//$ nocpp /** * @file CDSPRealFFT.h * * @brief Real-valued FFT transform class. * * This file includes FFT object implementation. All created FFT objects are * kept in a global list after use for future reusal. Such approach minimizes * time necessary to initialize the FFT object of the required length. * * r8brain-free-src Copyright (c) 2013-2014 Aleksey Vaneev * See the "License.txt" file for license. */ #ifndef R8B_CDSPREALFFT_INCLUDED #define R8B_CDSPREALFFT_INCLUDED #include "r8bbase.h" #if !R8B_IPP #include "fft4g.h" #endif // !R8B_IPP namespace r8b { /** * @brief Real-valued FFT transform class. * * Class implements a wrapper for real-valued discrete fast Fourier transform * functions. The object of this class can only be obtained via the * CDSPRealFFTKeeper class. * * Uses functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html * * Also uses Intel IPP library functions if available (the R8B_IPP=1 macro was * defined). Note that IPP library's FFT functions are 2-3 times more * efficient on the modern Intel Core i7-3770K processor than Ooura's * functions. It may be worthwhile investing in IPP. Note, that FFT functions * take less than 20% of the overall sample rate conversion time. However, * when the "power of 2" resampling is used the performance of FFT functions * becomes "everything". */ class CDSPRealFFT : public R8B_BASECLASS { R8BNOCTOR(CDSPRealFFT) friend class CDSPRealFFTKeeper; public: /** * @return A multiplication constant that should be used after inverse * transform to obtain a correct value scale. */ double getInvMulConst() const { return (InvMulConst); } /** * @return The length (the number of real values in a transform) of *this * FFT object, expressed as Nth power of 2. */ int getLenBits() const { return (LenBits); } /** * @return The length (the number of real values in a transform) of *this * FFT object. */ int getLen() const { return (Len); } /** * Function performs in-place forward FFT. * * @param[in,out] p Pointer to data block to transform, length should be * equal to *this object's getLen(). */ void forward(double* const p) const { #if R8B_IPP ippsFFTFwd_RToPerm_64f( p, p, SPtr, WorkBuffer ); #else // R8B_IPP ooura_fft::rdft(Len, 1, p, wi.getPtr(), wd.getPtr()); #endif // R8B_IPP } /** * Function performs in-place inverse FFT. * * @param[in,out] p Pointer to data block to transform, length should be * equal to *this object's getLen(). */ void inverse(double* const p) const { #if R8B_IPP ippsFFTInv_PermToR_64f( p, p, SPtr, WorkBuffer ); #else // R8B_IPP ooura_fft::rdft(Len, -1, p, wi.getPtr(), wd.getPtr()); #endif // R8B_IPP } /** * Function multiplies two complex-valued data blocks and places result in * a new data block. Length of all data blocks should be equal to *this * object's block length. Input blocks should have been produced with the * forward() function of *this object. * * @param ip1 Input data block 1. * @param ip2 Input data block 2. * @param[out] op Output data block, should not be equal to ip1 nor ip2. */ void multiplyBlocks(const double* const ip1, const double* const ip2, double* const op) const { #if R8B_IPP ippsMulPerm_64f( (Ipp64f*) ip1, (Ipp64f*) ip2, (Ipp64f*) op, Len ); #else // R8B_IPP op[0] = ip1[0] * ip2[0]; op[1] = ip1[1] * ip2[1]; int i = 2; while (i < Len) { op[i] = ip1[i] * ip2[i] - ip1[i + 1] * ip2[i + 1]; op[i + 1] = ip1[i] * ip2[i + 1] + ip1[i + 1] * ip2[i]; i += 2; } #endif // R8B_IPP } /** * Function is similar to the multiplyBlocks() function, but instead of * replacing data in the output buffer, the data is summed with the output * buffer. * * @param ip1 Input data block 1. * @param ip2 Input data block 2. * @param[out] op Output data block, should not be equal to ip1 nor ip2. */ void multiplyBlocksAdd(const double* const ip1, const double* const ip2, double* const op) const { op[0] += ip1[0] * ip2[0]; op[1] += ip1[1] * ip2[1]; #if R8B_IPP ippsAddProduct_64fc( (const Ipp64fc*) ( ip1 + 2 ), (const Ipp64fc*) ( ip2 + 2 ), (Ipp64fc*) ( op + 2 ), ( Len >> 1 ) - 1 ); #else // R8B_IPP int i = 2; while (i < Len) { op[i] += ip1[i] * ip2[i] - ip1[i + 1] * ip2[i + 1]; op[i + 1] += ip1[i] * ip2[i + 1] + ip1[i + 1] * ip2[i]; i += 2; } #endif // R8B_IPP } /** * Function multiplies two complex-valued data blocks in-place. Length of * both data blocks should be equal to *this object's block length. Blocks * should have been produced with the forward() function of *this object. * * @param ip Input data block 1. * @param[in,out] op Output/input data block 2. */ void multiplyBlocks(const double* const ip, double* const op) const { #if R8B_IPP ippsMulPerm_64f( (Ipp64f*) op, (Ipp64f*) ip, (Ipp64f*) op, Len ); #else // R8B_IPP op[0] *= ip[0]; op[1] *= ip[1]; int i = 2; while (i < Len) { const double t = op[i] * ip[i] - op[i + 1] * ip[i + 1]; op[i + 1] = op[i] * ip[i + 1] + op[i + 1] * ip[i]; op[i] = t; i += 2; } #endif // R8B_IPP } /** * Function multiplies two complex-valued data blocks in-place, * considering that the "ip" block contains "zero-phase" response. Length * of both data blocks should be equal to *this object's block length. * Blocks should have been produced with the forward() function of *this * object. * * @param ip Input data block 1, "zero-phase" response. * @param[in,out] op Output/input data block 2. */ void multiplyBlocksZ(const double* const ip, double* const op) const { op[0] *= ip[0]; op[1] *= ip[1]; int i = 2; while (i < Len) { op[i] *= ip[i]; op[i + 1] *= ip[i]; i += 2; } } /** * Function performs in-place spectrum squaring. May cause aliasing * if the filter was not zero-padded before the forward() function call. * * @param[in,out] p Pointer to data block to square, length should be * equal to *this object's getLen(). This data block should contain * complex spectrum data, previously obtained via the forward() function. */ void sqr(double* const p) const { p[0] *= p[0]; p[1] *= p[1]; #if R8B_IPP ippsSqr_64fc( (Ipp64fc*) ( p + 2 ), (Ipp64fc*) ( p + 2 ), ( Len >> 1 ) - 1 ); #else // R8B_IPP int i = 2; while (i < Len) { const double r = p[i] * p[i] - p[i + 1] * p[i + 1]; p[i + 1] = p[i] * (p[i + 1] + p[i + 1]); p[i] = r; i += 2; } #endif // R8B_IPP } private: int LenBits = 0; ///< Length of FFT block (expressed as Nth power of 2). ///< int Len = 0; ///< Length of FFT block (number of real values). ///< double InvMulConst = 0; ///< Inverse FFT multiply constant. ///< CDSPRealFFT* Next = nullptr; ///< Next object in a singly-linked list. ///< #if R8B_IPP IppsFFTSpec_R_64f* SPtr = nullptr; ///< Pointer to initialized data buffer ///< to be passed to IPP's FFT functions. ///< CFixedBuffer< unsigned char > SpecBuffer; ///< Working buffer. ///< CFixedBuffer< unsigned char > WorkBuffer; ///< Working buffer. ///< #else // R8B_IPP CFixedBuffer wi; ///< Working buffer (ints). ///< CFixedBuffer wd; ///< Working buffer (doubles). ///< #endif // R8B_IPP /** * A simple class that keeps the pointer to the object and deletes it * automatically. */ class CObjKeeper { R8BNOCTOR(CObjKeeper) public: CObjKeeper() { } ~CObjKeeper() { delete Object; } CObjKeeper& operator =(CDSPRealFFT* const aObject) { Object = aObject; return (*this); } operator CDSPRealFFT*() const { return (Object); } private: CDSPRealFFT* Object = nullptr; ///< FFT object being kept. ///< }; CDSPRealFFT() { } /** * Constructor initializes FFT object. * * @param aLenBits The length of FFT block (Nth power of 2), specifies the * number of real values in a block. Values from 1 to 30 inclusive are * supported. */ CDSPRealFFT(const int aLenBits) : LenBits(aLenBits), Len(1 << aLenBits) #if R8B_IPP , InvMulConst( 1.0 / Len ) #else // R8B_IPP , InvMulConst(2.0 / Len) #endif // R8B_IPP { #if R8B_IPP int SpecSize = 0; int SpecBufferSize = 0; int BufferSize = 0; ippsFFTGetSize_R_64f( LenBits, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast, &SpecSize, &SpecBufferSize, &BufferSize ); CFixedBuffer< unsigned char > InitBuffer( SpecBufferSize ); SpecBuffer.alloc( SpecSize ); WorkBuffer.alloc( BufferSize ); ippsFFTInit_R_64f( &SPtr, LenBits, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast, SpecBuffer, InitBuffer ); #else // R8B_IPP wi.alloc((int)ceil(2.0 + sqrt(double(Len >> 1)))); wi[0] = 0; wd.alloc(Len >> 1); #endif // R8B_IPP } ~CDSPRealFFT() { delete Next; } }; /** * @brief A "keeper" class for real-valued FFT transform objects. * * Class implements "keeper" functionality for handling CDSPRealFFT objects. * The allocated FFT objects are placed on the global static list of objects * for future reuse instead of deallocation. */ class CDSPRealFFTKeeper : public R8B_BASECLASS { R8BNOCTOR(CDSPRealFFTKeeper) public: CDSPRealFFTKeeper() { } /** * Function acquires FFT object with the specified block length. * * @param LenBits The length of FFT block (Nth power of 2), in the range * [1; 30] inclusive, specifies the number of real values in a FFT block. */ CDSPRealFFTKeeper(const int LenBits) { Object = acquire(LenBits); } ~CDSPRealFFTKeeper() { if (Object != nullptr) { release(Object); } } /** * @return Pointer to the acquired FFT object. */ const CDSPRealFFT* operator ->() const { R8BASSERT(Object != nullptr); return (Object); } /** * Function acquires FFT object with the specified block length. This * function can be called any number of times. * * @param LenBits The length of FFT block (Nth power of 2), in the range * [1; 30] inclusive, specifies the number of real values in a FFT block. */ void init(const int LenBits) { if (Object != nullptr) { if (Object->LenBits == LenBits) { return; } release(Object); } Object = acquire(LenBits); } /** * Function releases a previously acquired FFT object. */ void reset() { if (Object != nullptr) { release(Object); Object = nullptr; } } private: CDSPRealFFT* Object = nullptr; ///< FFT object. ///< static CSyncObject StateSync; ///< FFTObjects synchronizer. ///< static CDSPRealFFT::CObjKeeper FFTObjects[]; ///< Pool of FFT objects of ///< various lengths. ///< /** * Function acquires FFT object from the global pool. * * @param LenBits FFT block length (expressed as Nth power of 2). */ CDSPRealFFT* acquire(const int LenBits) { R8BASSERT(LenBits > 0 && LenBits <= 30); R8BSYNC(StateSync); if (FFTObjects[LenBits] == nullptr) { return (new CDSPRealFFT(LenBits)); } CDSPRealFFT* ffto = FFTObjects[LenBits]; FFTObjects[LenBits] = ffto->Next; return (ffto); } /** * Function releases a previously acquired FFT object. * * @param ffto FFT object to release. */ void release(CDSPRealFFT* const ffto) { R8BSYNC(StateSync); ffto->Next = FFTObjects[ffto->LenBits]; FFTObjects[ffto->LenBits] = ffto; } }; /** * Function calculates the minimum-phase transform of the filter kernel, using * a discrete Hilbert transform in cepstrum domain. * * For more details, see part III.B of * http://www.hpl.hp.com/personal/Niranjan_Damera-Venkata/files/ComplexMinPhase.pdf * * @param[in,out] Kernel Filter kernel buffer. * @param KernelLen Filter kernel's length, in samples. * @param LenMult Kernel length multiplier. Used as a coefficient of the * "oversampling" in the frequency domain. Such oversampling is needed to * improve the precision of the minimum-phase transform. If the filter's * attenuation is high, this multiplier should be increased or otherwise the * required attenuation will not be reached due to "smoothing" effect of this * transform. * @param DoFinalMul "True" if the final multiplication after transform should * be performed or not. Such multiplication returns the gain of the signal to * its original value. This parameter can be set to "false" if normalization * of the resulting filter kernel is planned to be used. * @param[out] DCGroupDelay If not NULL, this variable receives group delay * at DC offset, in samples (can be a non-integer value). */ inline void calcMinPhaseTransform(double* const Kernel, const int KernelLen, const int LenMult = 2, const bool DoFinalMul = true, double* const DCGroupDelay = nullptr) { R8BASSERT(KernelLen > 0); R8BASSERT(LenMult >= 2); const int LenBits = getBitOccupancy((KernelLen * LenMult) - 1); const int Len = 1 << LenBits; const int Len2 = Len >> 1; int i; CFixedBuffer ip(Len); CFixedBuffer ip2(Len2 + 1); memcpy(&ip[0], Kernel, KernelLen * sizeof(double)); memset(&ip[KernelLen], 0, (Len - KernelLen) * sizeof(double)); CDSPRealFFTKeeper ffto(LenBits); ffto->forward(ip); // Create the "log |c|" spectrum while saving the original power spectrum // in the "ip2" buffer. ip2[0] = ip[0]; ip[0] = log(fabs(ip[0]) + 1e-50); ip2[Len2] = ip[1]; ip[1] = log(fabs(ip[1]) + 1e-50); for (i = 1; i < Len2; ++i) { ip2[i] = sqrt(ip[i * 2] * ip[i * 2] + ip[i * 2 + 1] * ip[i * 2 + 1]); ip[i * 2] = log(ip2[i] + 1e-50); ip[i * 2 + 1] = 0.0; } // Convert to cepstrum and apply discrete Hilbert transform. ffto->inverse(ip); ip[0] = 0.0; for (i = 1; i < Len2; ++i) { ip[i] *= ffto->getInvMulConst(); } ip[Len2] = 0.0; for (i = Len2 + 1; i < Len; ++i) { ip[i] *= -ffto->getInvMulConst(); } // Convert Hilbert-transformed cepstrum back to the "log |c|" spectrum and // perform its exponentiation, multiplied by the power spectrum previously // saved in the "ip2" buffer. ffto->forward(ip); ip[0] = ip2[0]; ip[1] = ip2[Len2]; for (i = 1; i < Len2; ++i) { const double p = ip2[i]; ip[i * 2 + 0] = cos(ip[i * 2 + 1]) * p; ip[i * 2 + 1] = sin(ip[i * 2 + 1]) * p; } ffto->inverse(ip); if (DoFinalMul) { for (i = 0; i < KernelLen; ++i) { Kernel[i] = ip[i] * ffto->getInvMulConst(); } } else { memcpy(&Kernel[0], &ip[0], KernelLen * sizeof(double)); } if (DCGroupDelay != nullptr) { double tmp; calcFIRFilterResponseAndGroupDelay(Kernel, KernelLen, 0.0, tmp, tmp, *DCGroupDelay); } } } // namespace r8b #endif // VOX_CDSPREALFFT_INCLUDED