You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

CDSPRealFFT.h 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. //$ nocpp
  2. /**
  3. * @file CDSPRealFFT.h
  4. *
  5. * @brief Real-valued FFT transform class.
  6. *
  7. * This file includes FFT object implementation. All created FFT objects are
  8. * kept in a global list after use for future reusal. Such approach minimizes
  9. * time necessary to initialize the FFT object of the required length.
  10. *
  11. * r8brain-free-src Copyright (c) 2013-2014 Aleksey Vaneev
  12. * See the "License.txt" file for license.
  13. */
  14. #ifndef R8B_CDSPREALFFT_INCLUDED
  15. #define R8B_CDSPREALFFT_INCLUDED
  16. #include "r8bbase.h"
  17. #if !R8B_IPP
  18. #include "fft4g.h"
  19. #endif // !R8B_IPP
  20. namespace r8b
  21. {
  22. /**
  23. * @brief Real-valued FFT transform class.
  24. *
  25. * Class implements a wrapper for real-valued discrete fast Fourier transform
  26. * functions. The object of this class can only be obtained via the
  27. * CDSPRealFFTKeeper class.
  28. *
  29. * Uses functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA
  30. * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
  31. *
  32. * Also uses Intel IPP library functions if available (the R8B_IPP=1 macro was
  33. * defined). Note that IPP library's FFT functions are 2-3 times more
  34. * efficient on the modern Intel Core i7-3770K processor than Ooura's
  35. * functions. It may be worthwhile investing in IPP. Note, that FFT functions
  36. * take less than 20% of the overall sample rate conversion time. However,
  37. * when the "power of 2" resampling is used the performance of FFT functions
  38. * becomes "everything".
  39. */
  40. class CDSPRealFFT : public R8B_BASECLASS
  41. {
  42. R8BNOCTOR(CDSPRealFFT)
  43. friend class CDSPRealFFTKeeper;
  44. public:
  45. /**
  46. * @return A multiplication constant that should be used after inverse
  47. * transform to obtain a correct value scale.
  48. */
  49. double getInvMulConst() const { return (InvMulConst); }
  50. /**
  51. * @return The length (the number of real values in a transform) of *this
  52. * FFT object, expressed as Nth power of 2.
  53. */
  54. int getLenBits() const { return (LenBits); }
  55. /**
  56. * @return The length (the number of real values in a transform) of *this
  57. * FFT object.
  58. */
  59. int getLen() const { return (Len); }
  60. /**
  61. * Function performs in-place forward FFT.
  62. *
  63. * @param[in,out] p Pointer to data block to transform, length should be
  64. * equal to *this object's getLen().
  65. */
  66. void forward(double* const p) const
  67. {
  68. #if R8B_IPP
  69. ippsFFTFwd_RToPerm_64f( p, p, SPtr, WorkBuffer );
  70. #else // R8B_IPP
  71. ooura_fft::rdft(Len, 1, p, wi.getPtr(), wd.getPtr());
  72. #endif // R8B_IPP
  73. }
  74. /**
  75. * Function performs in-place inverse FFT.
  76. *
  77. * @param[in,out] p Pointer to data block to transform, length should be
  78. * equal to *this object's getLen().
  79. */
  80. void inverse(double* const p) const
  81. {
  82. #if R8B_IPP
  83. ippsFFTInv_PermToR_64f( p, p, SPtr, WorkBuffer );
  84. #else // R8B_IPP
  85. ooura_fft::rdft(Len, -1, p, wi.getPtr(), wd.getPtr());
  86. #endif // R8B_IPP
  87. }
  88. /**
  89. * Function multiplies two complex-valued data blocks and places result in
  90. * a new data block. Length of all data blocks should be equal to *this
  91. * object's block length. Input blocks should have been produced with the
  92. * forward() function of *this object.
  93. *
  94. * @param ip1 Input data block 1.
  95. * @param ip2 Input data block 2.
  96. * @param[out] op Output data block, should not be equal to ip1 nor ip2.
  97. */
  98. void multiplyBlocks(const double* const ip1, const double* const ip2, double* const op) const
  99. {
  100. #if R8B_IPP
  101. ippsMulPerm_64f( (Ipp64f*) ip1, (Ipp64f*) ip2, (Ipp64f*) op, Len );
  102. #else // R8B_IPP
  103. op[0] = ip1[0] * ip2[0];
  104. op[1] = ip1[1] * ip2[1];
  105. int i = 2;
  106. while (i < Len)
  107. {
  108. op[i] = ip1[i] * ip2[i] - ip1[i + 1] * ip2[i + 1];
  109. op[i + 1] = ip1[i] * ip2[i + 1] + ip1[i + 1] * ip2[i];
  110. i += 2;
  111. }
  112. #endif // R8B_IPP
  113. }
  114. /**
  115. * Function is similar to the multiplyBlocks() function, but instead of
  116. * replacing data in the output buffer, the data is summed with the output
  117. * buffer.
  118. *
  119. * @param ip1 Input data block 1.
  120. * @param ip2 Input data block 2.
  121. * @param[out] op Output data block, should not be equal to ip1 nor ip2.
  122. */
  123. void multiplyBlocksAdd(const double* const ip1, const double* const ip2, double* const op) const
  124. {
  125. op[0] += ip1[0] * ip2[0];
  126. op[1] += ip1[1] * ip2[1];
  127. #if R8B_IPP
  128. ippsAddProduct_64fc( (const Ipp64fc*) ( ip1 + 2 ), (const Ipp64fc*) ( ip2 + 2 ), (Ipp64fc*) ( op + 2 ), ( Len >> 1 ) - 1 );
  129. #else // R8B_IPP
  130. int i = 2;
  131. while (i < Len)
  132. {
  133. op[i] += ip1[i] * ip2[i] - ip1[i + 1] * ip2[i + 1];
  134. op[i + 1] += ip1[i] * ip2[i + 1] + ip1[i + 1] * ip2[i];
  135. i += 2;
  136. }
  137. #endif // R8B_IPP
  138. }
  139. /**
  140. * Function multiplies two complex-valued data blocks in-place. Length of
  141. * both data blocks should be equal to *this object's block length. Blocks
  142. * should have been produced with the forward() function of *this object.
  143. *
  144. * @param ip Input data block 1.
  145. * @param[in,out] op Output/input data block 2.
  146. */
  147. void multiplyBlocks(const double* const ip, double* const op) const
  148. {
  149. #if R8B_IPP
  150. ippsMulPerm_64f( (Ipp64f*) op, (Ipp64f*) ip, (Ipp64f*) op, Len );
  151. #else // R8B_IPP
  152. op[0] *= ip[0];
  153. op[1] *= ip[1];
  154. int i = 2;
  155. while (i < Len)
  156. {
  157. const double t = op[i] * ip[i] - op[i + 1] * ip[i + 1];
  158. op[i + 1] = op[i] * ip[i + 1] + op[i + 1] * ip[i];
  159. op[i] = t;
  160. i += 2;
  161. }
  162. #endif // R8B_IPP
  163. }
  164. /**
  165. * Function multiplies two complex-valued data blocks in-place,
  166. * considering that the "ip" block contains "zero-phase" response. Length
  167. * of both data blocks should be equal to *this object's block length.
  168. * Blocks should have been produced with the forward() function of *this
  169. * object.
  170. *
  171. * @param ip Input data block 1, "zero-phase" response.
  172. * @param[in,out] op Output/input data block 2.
  173. */
  174. void multiplyBlocksZ(const double* const ip, double* const op) const
  175. {
  176. op[0] *= ip[0];
  177. op[1] *= ip[1];
  178. int i = 2;
  179. while (i < Len)
  180. {
  181. op[i] *= ip[i];
  182. op[i + 1] *= ip[i];
  183. i += 2;
  184. }
  185. }
  186. /**
  187. * Function performs in-place spectrum squaring. May cause aliasing
  188. * if the filter was not zero-padded before the forward() function call.
  189. *
  190. * @param[in,out] p Pointer to data block to square, length should be
  191. * equal to *this object's getLen(). This data block should contain
  192. * complex spectrum data, previously obtained via the forward() function.
  193. */
  194. void sqr(double* const p) const
  195. {
  196. p[0] *= p[0];
  197. p[1] *= p[1];
  198. #if R8B_IPP
  199. ippsSqr_64fc( (Ipp64fc*) ( p + 2 ), (Ipp64fc*) ( p + 2 ),
  200. ( Len >> 1 ) - 1 );
  201. #else // R8B_IPP
  202. int i = 2;
  203. while (i < Len)
  204. {
  205. const double r = p[i] * p[i] - p[i + 1] * p[i + 1];
  206. p[i + 1] = p[i] * (p[i + 1] + p[i + 1]);
  207. p[i] = r;
  208. i += 2;
  209. }
  210. #endif // R8B_IPP
  211. }
  212. private:
  213. int LenBits = 0; ///< Length of FFT block (expressed as Nth power of 2).
  214. ///<
  215. int Len = 0; ///< Length of FFT block (number of real values).
  216. ///<
  217. double InvMulConst = 0; ///< Inverse FFT multiply constant.
  218. ///<
  219. CDSPRealFFT* Next = nullptr; ///< Next object in a singly-linked list.
  220. ///<
  221. #if R8B_IPP
  222. IppsFFTSpec_R_64f* SPtr = nullptr; ///< Pointer to initialized data buffer
  223. ///< to be passed to IPP's FFT functions.
  224. ///<
  225. CFixedBuffer< unsigned char > SpecBuffer; ///< Working buffer.
  226. ///<
  227. CFixedBuffer< unsigned char > WorkBuffer; ///< Working buffer.
  228. ///<
  229. #else // R8B_IPP
  230. CFixedBuffer<int> wi; ///< Working buffer (ints).
  231. ///<
  232. CFixedBuffer<double> wd; ///< Working buffer (doubles).
  233. ///<
  234. #endif // R8B_IPP
  235. /**
  236. * A simple class that keeps the pointer to the object and deletes it
  237. * automatically.
  238. */
  239. class CObjKeeper
  240. {
  241. R8BNOCTOR(CObjKeeper)
  242. public:
  243. CObjKeeper() { }
  244. ~CObjKeeper() { delete Object; }
  245. CObjKeeper& operator =(CDSPRealFFT* const aObject)
  246. {
  247. Object = aObject;
  248. return (*this);
  249. }
  250. operator CDSPRealFFT*() const { return (Object); }
  251. private:
  252. CDSPRealFFT* Object = nullptr; ///< FFT object being kept.
  253. ///<
  254. };
  255. CDSPRealFFT() { }
  256. /**
  257. * Constructor initializes FFT object.
  258. *
  259. * @param aLenBits The length of FFT block (Nth power of 2), specifies the
  260. * number of real values in a block. Values from 1 to 30 inclusive are
  261. * supported.
  262. */
  263. CDSPRealFFT(const int aLenBits)
  264. : LenBits(aLenBits), Len(1 << aLenBits)
  265. #if R8B_IPP
  266. , InvMulConst( 1.0 / Len )
  267. #else // R8B_IPP
  268. , InvMulConst(2.0 / Len)
  269. #endif // R8B_IPP
  270. {
  271. #if R8B_IPP
  272. int SpecSize = 0;
  273. int SpecBufferSize = 0;
  274. int BufferSize = 0;
  275. ippsFFTGetSize_R_64f( LenBits, IPP_FFT_NODIV_BY_ANY,
  276. ippAlgHintFast, &SpecSize, &SpecBufferSize, &BufferSize );
  277. CFixedBuffer< unsigned char > InitBuffer( SpecBufferSize );
  278. SpecBuffer.alloc( SpecSize );
  279. WorkBuffer.alloc( BufferSize );
  280. ippsFFTInit_R_64f( &SPtr, LenBits, IPP_FFT_NODIV_BY_ANY,
  281. ippAlgHintFast, SpecBuffer, InitBuffer );
  282. #else // R8B_IPP
  283. wi.alloc((int)ceil(2.0 + sqrt(double(Len >> 1))));
  284. wi[0] = 0;
  285. wd.alloc(Len >> 1);
  286. #endif // R8B_IPP
  287. }
  288. ~CDSPRealFFT() { delete Next; }
  289. };
  290. /**
  291. * @brief A "keeper" class for real-valued FFT transform objects.
  292. *
  293. * Class implements "keeper" functionality for handling CDSPRealFFT objects.
  294. * The allocated FFT objects are placed on the global static list of objects
  295. * for future reuse instead of deallocation.
  296. */
  297. class CDSPRealFFTKeeper : public R8B_BASECLASS
  298. {
  299. R8BNOCTOR(CDSPRealFFTKeeper)
  300. public:
  301. CDSPRealFFTKeeper() { }
  302. /**
  303. * Function acquires FFT object with the specified block length.
  304. *
  305. * @param LenBits The length of FFT block (Nth power of 2), in the range
  306. * [1; 30] inclusive, specifies the number of real values in a FFT block.
  307. */
  308. CDSPRealFFTKeeper(const int LenBits) { Object = acquire(LenBits); }
  309. ~CDSPRealFFTKeeper() { if (Object != nullptr) { release(Object); } }
  310. /**
  311. * @return Pointer to the acquired FFT object.
  312. */
  313. const CDSPRealFFT* operator ->() const
  314. {
  315. R8BASSERT(Object != nullptr);
  316. return (Object);
  317. }
  318. /**
  319. * Function acquires FFT object with the specified block length. This
  320. * function can be called any number of times.
  321. *
  322. * @param LenBits The length of FFT block (Nth power of 2), in the range
  323. * [1; 30] inclusive, specifies the number of real values in a FFT block.
  324. */
  325. void init(const int LenBits)
  326. {
  327. if (Object != nullptr)
  328. {
  329. if (Object->LenBits == LenBits) { return; }
  330. release(Object);
  331. }
  332. Object = acquire(LenBits);
  333. }
  334. /**
  335. * Function releases a previously acquired FFT object.
  336. */
  337. void reset()
  338. {
  339. if (Object != nullptr)
  340. {
  341. release(Object);
  342. Object = nullptr;
  343. }
  344. }
  345. private:
  346. CDSPRealFFT* Object = nullptr; ///< FFT object.
  347. ///<
  348. static CSyncObject StateSync; ///< FFTObjects synchronizer.
  349. ///<
  350. static CDSPRealFFT::CObjKeeper FFTObjects[]; ///< Pool of FFT objects of
  351. ///< various lengths.
  352. ///<
  353. /**
  354. * Function acquires FFT object from the global pool.
  355. *
  356. * @param LenBits FFT block length (expressed as Nth power of 2).
  357. */
  358. CDSPRealFFT* acquire(const int LenBits)
  359. {
  360. R8BASSERT(LenBits > 0 && LenBits <= 30);
  361. R8BSYNC(StateSync);
  362. if (FFTObjects[LenBits] == nullptr) { return (new CDSPRealFFT(LenBits)); }
  363. CDSPRealFFT* ffto = FFTObjects[LenBits];
  364. FFTObjects[LenBits] = ffto->Next;
  365. return (ffto);
  366. }
  367. /**
  368. * Function releases a previously acquired FFT object.
  369. *
  370. * @param ffto FFT object to release.
  371. */
  372. void release(CDSPRealFFT* const ffto)
  373. {
  374. R8BSYNC(StateSync);
  375. ffto->Next = FFTObjects[ffto->LenBits];
  376. FFTObjects[ffto->LenBits] = ffto;
  377. }
  378. };
  379. /**
  380. * Function calculates the minimum-phase transform of the filter kernel, using
  381. * a discrete Hilbert transform in cepstrum domain.
  382. *
  383. * For more details, see part III.B of
  384. * http://www.hpl.hp.com/personal/Niranjan_Damera-Venkata/files/ComplexMinPhase.pdf
  385. *
  386. * @param[in,out] Kernel Filter kernel buffer.
  387. * @param KernelLen Filter kernel's length, in samples.
  388. * @param LenMult Kernel length multiplier. Used as a coefficient of the
  389. * "oversampling" in the frequency domain. Such oversampling is needed to
  390. * improve the precision of the minimum-phase transform. If the filter's
  391. * attenuation is high, this multiplier should be increased or otherwise the
  392. * required attenuation will not be reached due to "smoothing" effect of this
  393. * transform.
  394. * @param DoFinalMul "True" if the final multiplication after transform should
  395. * be performed or not. Such multiplication returns the gain of the signal to
  396. * its original value. This parameter can be set to "false" if normalization
  397. * of the resulting filter kernel is planned to be used.
  398. * @param[out] DCGroupDelay If not NULL, this variable receives group delay
  399. * at DC offset, in samples (can be a non-integer value).
  400. */
  401. inline void calcMinPhaseTransform(double* const Kernel, const int KernelLen,
  402. const int LenMult = 2, const bool DoFinalMul = true,
  403. double* const DCGroupDelay = nullptr)
  404. {
  405. R8BASSERT(KernelLen > 0);
  406. R8BASSERT(LenMult >= 2);
  407. const int LenBits = getBitOccupancy((KernelLen * LenMult) - 1);
  408. const int Len = 1 << LenBits;
  409. const int Len2 = Len >> 1;
  410. int i;
  411. CFixedBuffer<double> ip(Len);
  412. CFixedBuffer<double> ip2(Len2 + 1);
  413. memcpy(&ip[0], Kernel, KernelLen * sizeof(double));
  414. memset(&ip[KernelLen], 0, (Len - KernelLen) * sizeof(double));
  415. CDSPRealFFTKeeper ffto(LenBits);
  416. ffto->forward(ip);
  417. // Create the "log |c|" spectrum while saving the original power spectrum
  418. // in the "ip2" buffer.
  419. ip2[0] = ip[0];
  420. ip[0] = log(fabs(ip[0]) + 1e-50);
  421. ip2[Len2] = ip[1];
  422. ip[1] = log(fabs(ip[1]) + 1e-50);
  423. for (i = 1; i < Len2; ++i)
  424. {
  425. ip2[i] = sqrt(ip[i * 2] * ip[i * 2] +
  426. ip[i * 2 + 1] * ip[i * 2 + 1]);
  427. ip[i * 2] = log(ip2[i] + 1e-50);
  428. ip[i * 2 + 1] = 0.0;
  429. }
  430. // Convert to cepstrum and apply discrete Hilbert transform.
  431. ffto->inverse(ip);
  432. ip[0] = 0.0;
  433. for (i = 1; i < Len2; ++i) { ip[i] *= ffto->getInvMulConst(); }
  434. ip[Len2] = 0.0;
  435. for (i = Len2 + 1; i < Len; ++i) { ip[i] *= -ffto->getInvMulConst(); }
  436. // Convert Hilbert-transformed cepstrum back to the "log |c|" spectrum and
  437. // perform its exponentiation, multiplied by the power spectrum previously
  438. // saved in the "ip2" buffer.
  439. ffto->forward(ip);
  440. ip[0] = ip2[0];
  441. ip[1] = ip2[Len2];
  442. for (i = 1; i < Len2; ++i)
  443. {
  444. const double p = ip2[i];
  445. ip[i * 2 + 0] = cos(ip[i * 2 + 1]) * p;
  446. ip[i * 2 + 1] = sin(ip[i * 2 + 1]) * p;
  447. }
  448. ffto->inverse(ip);
  449. if (DoFinalMul) { for (i = 0; i < KernelLen; ++i) { Kernel[i] = ip[i] * ffto->getInvMulConst(); } }
  450. else { memcpy(&Kernel[0], &ip[0], KernelLen * sizeof(double)); }
  451. if (DCGroupDelay != nullptr)
  452. {
  453. double tmp;
  454. calcFIRFilterResponseAndGroupDelay(Kernel, KernelLen, 0.0,
  455. tmp, tmp, *DCGroupDelay);
  456. }
  457. }
  458. } // namespace r8b
  459. #endif // VOX_CDSPREALFFT_INCLUDED