You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

CDSPBlockConvolver.h 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. //$ nocpp
  2. /**
  3. * @file CDSPBlockConvolver.h
  4. *
  5. * @brief Single-block overlap-save convolution processor class.
  6. *
  7. * This file includes single-block overlap-save convolution processor class.
  8. *
  9. * r8brain-free-src Copyright (c) 2013-2014 Aleksey Vaneev
  10. * See the "License.txt" file for license.
  11. */
  12. #ifndef R8B_CDSPBLOCKCONVOLVER_INCLUDED
  13. #define R8B_CDSPBLOCKCONVOLVER_INCLUDED
  14. #include "CDSPFIRFilter.h"
  15. #include "CDSPProcessor.h"
  16. namespace r8b
  17. {
  18. /**
  19. * @brief Single-block overlap-save convolution processing class.
  20. *
  21. * Class that implements single-block overlap-save convolution processing. The
  22. * length of a single FFT block used depends on the length of the filter
  23. * kernel.
  24. *
  25. * The rationale behind "single-block" processing is that increasing the FFT
  26. * block length by 2 is more efficient than performing convolution at the same
  27. * FFT block length but using two blocks.
  28. *
  29. * This class also implements a built-in resampling by any whole-number
  30. * factor, which simplifies the overall resampling objects topology.
  31. */
  32. class CDSPBlockConvolver final : public CDSPProcessor
  33. {
  34. public:
  35. /**
  36. * Constructor initializes internal variables and constants of *this
  37. * object.
  38. *
  39. * @param aFilter Pre-calculated filter data. Reference to this object is
  40. * inhertied by *this object, and the object will be released when *this
  41. * object is destroyed. If upsampling is used, filter's gain should be
  42. * equal to the upsampling factor.
  43. * @param aUpFactor The upsampling factor, positive value. E.g. value of 2
  44. * means 2x upsampling should be performed over the input data.
  45. * @param aDownFactor The downsampling factor, positive value. E.g. value
  46. * of 2 means 2x downsampling should be performed over the output data.
  47. * @param PrevLatency Latency, in samples (any value >=0), which was left
  48. * in the output signal by a previous process. This value is usually
  49. * non-zero if the minimum-phase filters are in use. This value is always
  50. * zero if the linear-phase filters are in use.
  51. * @param aDoConsumeLatency "True" if the output latency should be
  52. * consumed. Does not apply to the fractional part of the latency (if such
  53. * part is available).
  54. */
  55. CDSPBlockConvolver(CDSPFIRFilter& aFilter, const int aUpFactor,
  56. const int aDownFactor, const double PrevLatency = 0.0,
  57. const bool aDoConsumeLatency = true)
  58. : Filter(&aFilter)
  59. , UpFactor(aUpFactor)
  60. , DownFactor(aDownFactor)
  61. , DoConsumeLatency(aDoConsumeLatency)
  62. , BlockLen2(2 << Filter->getBlockLenBits())
  63. {
  64. R8BASSERT(UpFactor > 0);
  65. R8BASSERT(DownFactor > 0);
  66. R8BASSERT(PrevLatency >= 0.0);
  67. int fftinBits;
  68. UpShift = getBitOccupancy(UpFactor) - 1;
  69. if ((1 << UpShift) == UpFactor)
  70. {
  71. fftinBits = Filter->getBlockLenBits() + 1 - UpShift;
  72. PrevInputLen = (Filter->getKernelLen() - 1) / UpFactor;
  73. InputLen = BlockLen2 - PrevInputLen * UpFactor;
  74. }
  75. else
  76. {
  77. UpShift = -1;
  78. fftinBits = Filter->getBlockLenBits() + 1;
  79. PrevInputLen = Filter->getKernelLen() - 1;
  80. InputLen = BlockLen2 - PrevInputLen;
  81. }
  82. OutOffset = Filter->getLatency();
  83. LatencyFrac = Filter->getLatencyFrac() + PrevLatency * UpFactor;
  84. Latency = (int)LatencyFrac;
  85. LatencyFrac -= Latency;
  86. LatencyFrac /= DownFactor;
  87. Latency += InputLen + OutOffset;
  88. int fftoutBits;
  89. InputDelay = 0;
  90. UpSkipInit = 0;
  91. DownSkipInit = 0;
  92. DownShift = getBitOccupancy(DownFactor) - 1;
  93. if ((1 << DownShift) == DownFactor)
  94. {
  95. fftoutBits = Filter->getBlockLenBits() + 1 - DownShift;
  96. if (DownFactor > 1)
  97. {
  98. if (UpShift > 0)
  99. {
  100. // This case never happens in practice due to mutual
  101. // exclusion of "power of 2" DownFactor and UpFactor
  102. // values.
  103. R8BASSERT(UpShift == 0);
  104. }
  105. else
  106. {
  107. int Delay = Latency & (DownFactor - 1);
  108. if (Delay > 0)
  109. {
  110. Delay = DownFactor - Delay;
  111. Latency += Delay;
  112. if (Delay < UpFactor) { UpSkipInit = Delay; }
  113. else
  114. {
  115. UpSkipInit = UpFactor - 1;
  116. InputDelay = Delay - UpSkipInit;
  117. }
  118. }
  119. if (!DoConsumeLatency) { Latency /= DownFactor; }
  120. }
  121. }
  122. }
  123. else
  124. {
  125. fftoutBits = Filter->getBlockLenBits() + 1;
  126. DownShift = -1;
  127. if (!DoConsumeLatency && DownFactor > 1)
  128. {
  129. DownSkipInit = Latency % DownFactor;
  130. Latency /= DownFactor;
  131. }
  132. }
  133. fftin = new CDSPRealFFTKeeper(fftinBits);
  134. if (fftoutBits == fftinBits) { fftout = fftin; }
  135. else
  136. {
  137. ffto2 = new CDSPRealFFTKeeper(fftoutBits);
  138. fftout = ffto2;
  139. }
  140. WorkBlocks.alloc(BlockLen2 * 2 + PrevInputLen);
  141. CurInput = &WorkBlocks[0];
  142. CurOutput = &WorkBlocks[BlockLen2];
  143. PrevInput = &WorkBlocks[BlockLen2 * 2];
  144. clear();
  145. R8BCONSOLE("CDSPBlockConvolver: flt_len=%i in_len=%i io=%i/%i "
  146. "fft=%i/%i latency=%i\n", Filter -> getKernelLen(), InputLen,
  147. UpFactor, DownFactor, (*fftin) -> getLen(), (*fftout) -> getLen(),
  148. getLatency());
  149. }
  150. ~CDSPBlockConvolver() override { Filter->unref(); }
  151. int getLatency() const override { return (DoConsumeLatency ? 0 : Latency); }
  152. double getLatencyFrac() const override { return (LatencyFrac); }
  153. int getInLenBeforeOutStart(const int NextInLen) const override
  154. {
  155. return ((InputLen - InputDelay + NextInLen * DownFactor) /
  156. UpFactor);
  157. }
  158. int getMaxOutLen(const int MaxInLen) const override
  159. {
  160. R8BASSERT(MaxInLen >= 0);
  161. return ((MaxInLen * UpFactor + InputDelay + DownFactor - 1) /
  162. DownFactor);
  163. }
  164. void clear() override
  165. {
  166. memset(&PrevInput[0], 0, PrevInputLen * sizeof(double));
  167. if (DoConsumeLatency) { LatencyLeft = Latency; }
  168. else
  169. {
  170. LatencyLeft = 0;
  171. if (DownShift > 0)
  172. {
  173. memset(&CurOutput[0], 0, (BlockLen2 >> DownShift) *
  174. sizeof(double));
  175. }
  176. else
  177. {
  178. memset(&CurOutput[BlockLen2 - OutOffset], 0, OutOffset *
  179. sizeof(double));
  180. memset(&CurOutput[0], 0, (InputLen - OutOffset) *
  181. sizeof(double));
  182. }
  183. }
  184. memset(CurInput, 0, InputDelay * sizeof(double));
  185. InDataLeft = InputLen - InputDelay;
  186. UpSkip = UpSkipInit;
  187. DownSkip = DownSkipInit;
  188. }
  189. int process(double* ip, int l0, double*& op0) override
  190. {
  191. R8BASSERT(l0 >= 0);
  192. R8BASSERT(UpFactor / DownFactor <= 1 || ip != op0 || l0 == 0);
  193. double* op = op0;
  194. int l = l0 * UpFactor;
  195. l0 = 0;
  196. while (l > 0)
  197. {
  198. const int Offs = InputLen - InDataLeft;
  199. if (l < InDataLeft)
  200. {
  201. InDataLeft -= l;
  202. if (UpShift >= 0)
  203. {
  204. memcpy(&CurInput[Offs >> UpShift], ip,
  205. (l >> UpShift) * sizeof(double));
  206. }
  207. else { copyUpsample(ip, &CurInput[Offs], l); }
  208. copyToOutput(Offs - OutOffset, op, l, l0);
  209. break;
  210. }
  211. const int b = InDataLeft;
  212. l -= b;
  213. InDataLeft = InputLen;
  214. int ilu;
  215. if (UpShift >= 0)
  216. {
  217. const int bu = b >> UpShift;
  218. memcpy(&CurInput[Offs >> UpShift], ip,
  219. bu * sizeof(double));
  220. ip += bu;
  221. ilu = InputLen >> UpShift;
  222. }
  223. else
  224. {
  225. copyUpsample(ip, &CurInput[Offs], b);
  226. ilu = InputLen;
  227. }
  228. const int pil = int(PrevInputLen * sizeof(double));
  229. memcpy(&CurInput[ilu], PrevInput, pil);
  230. memcpy(PrevInput, &CurInput[ilu - PrevInputLen], pil);
  231. (*fftin)->forward(CurInput);
  232. if (UpShift > 0) { mirrorInputSpectrum(); }
  233. if (Filter->isZeroPhase())
  234. {
  235. (*fftout)->multiplyBlocksZ(Filter->getKernelBlock(),
  236. CurInput);
  237. }
  238. else
  239. {
  240. (*fftout)->multiplyBlocks(Filter->getKernelBlock(),
  241. CurInput);
  242. }
  243. if (DownShift > 0)
  244. {
  245. const int z = BlockLen2 >> DownShift;
  246. CurInput[1] = Filter->getKernelBlock()[z] *
  247. CurInput[z];
  248. }
  249. (*fftout)->inverse(CurInput);
  250. copyToOutput(Offs - OutOffset, op, b, l0);
  251. double* const tmp = CurInput;
  252. CurInput = CurOutput;
  253. CurOutput = tmp;
  254. }
  255. return (l0);
  256. }
  257. private:
  258. CDSPFIRFilter* Filter = nullptr; ///< Filter in use.
  259. ///<
  260. CPtrKeeper<CDSPRealFFTKeeper*> fftin; ///< FFT object 1, used to produce
  261. ///< the input spectrum (can embed the "power of 2" upsampling).
  262. ///<
  263. CPtrKeeper<CDSPRealFFTKeeper*> ffto2; ///< FFT object 2 (can be NULL).
  264. ///<
  265. CDSPRealFFTKeeper* fftout = nullptr; ///< FFT object used to produce the output
  266. ///< signal (can embed the "power of 2" downsampling), may point to
  267. ///< either "fftin" or "ffto2".
  268. ///<
  269. int UpFactor = 0; ///< Upsampling factor.
  270. ///<
  271. int DownFactor = 0; ///< Downsampling factor.
  272. ///<
  273. bool DoConsumeLatency; ///< "True" if the output latency should be
  274. ///< consumed. Does not apply to the fractional part of the latency
  275. ///< (if such part is available).
  276. ///<
  277. int BlockLen2 = 0; ///< Equals block length * 2.
  278. ///<
  279. int OutOffset = 0; ///< Output offset, depends on filter's introduced latency.
  280. ///<
  281. int PrevInputLen = 0; ///< The length of previous input data saved, used for
  282. ///< overlap.
  283. ///<
  284. int InputLen = 0; ///< The number of input samples that should be accumulated
  285. ///< before the input block is processed.
  286. ///<
  287. int Latency = 0; ///< Processing latency, in samples.
  288. ///<
  289. double LatencyFrac = 0; ///< Fractional latency, in samples, that is left in
  290. ///< the output signal.
  291. ///<
  292. int UpShift = 0; ///< "Power of 2" upsampling shift. Equals -1 if UpFactor is
  293. ///< not a "power of 2" value. Equals 0 if UpFactor equals 1.
  294. ///<
  295. int DownShift = 0; ///< "Power of 2" downsampling shift. Equals -1 if
  296. ///< DownFactor is not a "power of 2". Equals 0 if DownFactor equals
  297. ///< 1.
  298. ///<
  299. int InputDelay = 0; ///< Additional input delay, in samples. Used to make the
  300. ///< output latency divisible by DownShift. Used only if UpShift <= 0
  301. ///< and DownShift > 0.
  302. ///<
  303. CFixedBuffer<double> WorkBlocks; ///< Previous input data, input and
  304. ///< output data blocks, overall capacity = BlockLen2 * 2 +
  305. ///< PrevInputLen. Used in the flip-flop manner.
  306. ///<
  307. double* PrevInput = nullptr; ///< Previous input data buffer, capacity = BlockLen.
  308. ///<
  309. double* CurInput = nullptr; ///< Input data buffer, capacity = BlockLen2.
  310. ///<
  311. double* CurOutput = nullptr; ///< Output data buffer, capacity = BlockLen2.
  312. ///<
  313. int InDataLeft = 0; ///< Samples left before processing input and output FFT
  314. ///< blocks. Initialized to InputLen on clear.
  315. ///<
  316. int LatencyLeft = 0; ///< Latency in samples left to skip.
  317. ///<
  318. int UpSkip = 0; ///< The current upsampling sample skip (value in the range
  319. ///< 0 to UpFactor - 1).
  320. ///<
  321. int UpSkipInit = 0; ///< The initial UpSkip value after clear().
  322. ///<
  323. int DownSkip = 0; ///< The current downsampling sample skip (value in the
  324. ///< range 0 to DownFactor - 1). Not used if DownShift > 0.
  325. ///<
  326. int DownSkipInit = 0; ///< The initial DownSkip value after clear().
  327. ///<
  328. /**
  329. * Function copies samples from the input buffer to the output buffer
  330. * while inserting zeros inbetween them to perform the whole-numbered
  331. * upsampling.
  332. *
  333. * @param[in,out] ip0 Input buffer. Will be advanced on function's return.
  334. * @param[out] op Output buffer.
  335. * @param l0 The number of samples to fill in the output buffer, including
  336. * both input samples and interpolation (zero) samples.
  337. */
  338. void copyUpsample(double*& ip0, double* op, int l0)
  339. {
  340. int b = min(UpSkip, l0);
  341. if (b > 0)
  342. {
  343. l0 -= b;
  344. UpSkip -= b;
  345. *op = 0.0;
  346. op++;
  347. b--;
  348. while (b > 0)
  349. {
  350. *op = 0.0;
  351. op++;
  352. b--;
  353. }
  354. }
  355. double* ip = ip0;
  356. int l = l0 / UpFactor;
  357. int lz = l0 - l * UpFactor;
  358. if (UpFactor == 3)
  359. {
  360. while (l > 0)
  361. {
  362. op[0] = *ip;
  363. op[1] = 0.0;
  364. op[2] = 0.0;
  365. ip++;
  366. op += UpFactor;
  367. l--;
  368. }
  369. }
  370. else if (UpFactor == 5)
  371. {
  372. while (l > 0)
  373. {
  374. op[0] = *ip;
  375. op[1] = 0.0;
  376. op[2] = 0.0;
  377. op[3] = 0.0;
  378. op[4] = 0.0;
  379. ip++;
  380. op += UpFactor;
  381. l--;
  382. }
  383. }
  384. else
  385. {
  386. while (l > 0)
  387. {
  388. op[0] = *ip;
  389. for (int j = 1; j < UpFactor; ++j) { op[j] = 0.0; }
  390. ip++;
  391. op += UpFactor;
  392. l--;
  393. }
  394. }
  395. if (lz > 0)
  396. {
  397. *op = *ip;
  398. op++;
  399. ip++;
  400. UpSkip = UpFactor - lz;
  401. while (lz > 1)
  402. {
  403. *op = 0.0;
  404. op++;
  405. lz--;
  406. }
  407. }
  408. ip0 = ip;
  409. }
  410. /**
  411. * Function copies sample data from the CurOutput buffer to the specified
  412. * output buffer and advances its position. If necessary, this function
  413. * "consumes" latency and performs downsampling.
  414. *
  415. * @param Offs CurOutput buffer offset, can be negative.
  416. * @param[out] op0 Output buffer pointer, will be advanced.
  417. * @param b The number of output samples available, including those which
  418. * are discarded during whole-number downsampling.
  419. * @param l0 The overall output sample count, will be increased.
  420. */
  421. void copyToOutput(int Offs, double*& op0, int b, int& l0)
  422. {
  423. if (Offs < 0)
  424. {
  425. if (Offs + b <= 0) { Offs += BlockLen2; }
  426. else
  427. {
  428. copyToOutput(Offs + BlockLen2, op0, -Offs, l0);
  429. b += Offs;
  430. Offs = 0;
  431. }
  432. }
  433. if (LatencyLeft > 0)
  434. {
  435. if (LatencyLeft >= b)
  436. {
  437. LatencyLeft -= b;
  438. return;
  439. }
  440. Offs += LatencyLeft;
  441. b -= LatencyLeft;
  442. LatencyLeft = 0;
  443. }
  444. const int df = DownFactor;
  445. if (DownShift > 0)
  446. {
  447. int Skip = Offs & (df - 1);
  448. if (Skip > 0)
  449. {
  450. Skip = df - Skip;
  451. b -= Skip;
  452. Offs += Skip;
  453. }
  454. if (b > 0)
  455. {
  456. b = (b + df - 1) >> DownShift;
  457. memcpy(op0, &CurOutput[Offs >> DownShift],
  458. b * sizeof(double));
  459. op0 += b;
  460. l0 += b;
  461. }
  462. }
  463. else
  464. {
  465. if (df > 1)
  466. {
  467. const double* ip = &CurOutput[Offs + DownSkip];
  468. int l = (b + df - 1 - DownSkip) / df;
  469. DownSkip += l * df - b;
  470. double* op = op0;
  471. l0 += l;
  472. op0 += l;
  473. while (l > 0)
  474. {
  475. *op = *ip;
  476. op++;
  477. ip += df;
  478. l--;
  479. }
  480. }
  481. else
  482. {
  483. memcpy(op0, &CurOutput[Offs], b * sizeof(double));
  484. op0 += b;
  485. l0 += b;
  486. }
  487. }
  488. }
  489. /**
  490. * Function performs input spectrum mirroring which is used to perform a
  491. * fast "power of 2" upsampling. Such mirroring is equivalent to insertion
  492. * of zeros into the input signal.
  493. */
  494. void mirrorInputSpectrum()
  495. {
  496. const int bl1 = BlockLen2 >> UpShift;
  497. const int bl2 = bl1 + bl1;
  498. int i;
  499. for (i = bl1 + 2; i < bl2; i += 2)
  500. {
  501. CurInput[i] = CurInput[bl2 - i];
  502. CurInput[i + 1] = -CurInput[bl2 - i + 1];
  503. }
  504. CurInput[bl1] = CurInput[1];
  505. CurInput[bl1 + 1] = 0.0;
  506. CurInput[1] = CurInput[0];
  507. for (i = 1; i < UpShift; ++i)
  508. {
  509. const int z = bl1 << i;
  510. memcpy(&CurInput[z], CurInput, z * sizeof(double));
  511. CurInput[z + 1] = 0.0;
  512. }
  513. }
  514. };
  515. } // namespace r8b
  516. #endif // R8B_CDSPBLOCKCONVOLVER_INCLUDED