//$ nocpp /** * @file CDSPBlockConvolver.h * * @brief Single-block overlap-save convolution processor class. * * This file includes single-block overlap-save convolution processor class. * * r8brain-free-src Copyright (c) 2013-2014 Aleksey Vaneev * See the "License.txt" file for license. */ #ifndef R8B_CDSPBLOCKCONVOLVER_INCLUDED #define R8B_CDSPBLOCKCONVOLVER_INCLUDED #include "CDSPFIRFilter.h" #include "CDSPProcessor.h" namespace r8b { /** * @brief Single-block overlap-save convolution processing class. * * Class that implements single-block overlap-save convolution processing. The * length of a single FFT block used depends on the length of the filter * kernel. * * The rationale behind "single-block" processing is that increasing the FFT * block length by 2 is more efficient than performing convolution at the same * FFT block length but using two blocks. * * This class also implements a built-in resampling by any whole-number * factor, which simplifies the overall resampling objects topology. */ class CDSPBlockConvolver final : public CDSPProcessor { public: /** * Constructor initializes internal variables and constants of *this * object. * * @param aFilter Pre-calculated filter data. Reference to this object is * inhertied by *this object, and the object will be released when *this * object is destroyed. If upsampling is used, filter's gain should be * equal to the upsampling factor. * @param aUpFactor The upsampling factor, positive value. E.g. value of 2 * means 2x upsampling should be performed over the input data. * @param aDownFactor The downsampling factor, positive value. E.g. value * of 2 means 2x downsampling should be performed over the output data. * @param PrevLatency Latency, in samples (any value >=0), which was left * in the output signal by a previous process. This value is usually * non-zero if the minimum-phase filters are in use. This value is always * zero if the linear-phase filters are in use. * @param aDoConsumeLatency "True" if the output latency should be * consumed. Does not apply to the fractional part of the latency (if such * part is available). */ CDSPBlockConvolver(CDSPFIRFilter& aFilter, const int aUpFactor, const int aDownFactor, const double PrevLatency = 0.0, const bool aDoConsumeLatency = true) : Filter(&aFilter) , UpFactor(aUpFactor) , DownFactor(aDownFactor) , DoConsumeLatency(aDoConsumeLatency) , BlockLen2(2 << Filter->getBlockLenBits()) { R8BASSERT(UpFactor > 0); R8BASSERT(DownFactor > 0); R8BASSERT(PrevLatency >= 0.0); int fftinBits; UpShift = getBitOccupancy(UpFactor) - 1; if ((1 << UpShift) == UpFactor) { fftinBits = Filter->getBlockLenBits() + 1 - UpShift; PrevInputLen = (Filter->getKernelLen() - 1) / UpFactor; InputLen = BlockLen2 - PrevInputLen * UpFactor; } else { UpShift = -1; fftinBits = Filter->getBlockLenBits() + 1; PrevInputLen = Filter->getKernelLen() - 1; InputLen = BlockLen2 - PrevInputLen; } OutOffset = Filter->getLatency(); LatencyFrac = Filter->getLatencyFrac() + PrevLatency * UpFactor; Latency = (int)LatencyFrac; LatencyFrac -= Latency; LatencyFrac /= DownFactor; Latency += InputLen + OutOffset; int fftoutBits; InputDelay = 0; UpSkipInit = 0; DownSkipInit = 0; DownShift = getBitOccupancy(DownFactor) - 1; if ((1 << DownShift) == DownFactor) { fftoutBits = Filter->getBlockLenBits() + 1 - DownShift; if (DownFactor > 1) { if (UpShift > 0) { // This case never happens in practice due to mutual // exclusion of "power of 2" DownFactor and UpFactor // values. R8BASSERT(UpShift == 0); } else { int Delay = Latency & (DownFactor - 1); if (Delay > 0) { Delay = DownFactor - Delay; Latency += Delay; if (Delay < UpFactor) { UpSkipInit = Delay; } else { UpSkipInit = UpFactor - 1; InputDelay = Delay - UpSkipInit; } } if (!DoConsumeLatency) { Latency /= DownFactor; } } } } else { fftoutBits = Filter->getBlockLenBits() + 1; DownShift = -1; if (!DoConsumeLatency && DownFactor > 1) { DownSkipInit = Latency % DownFactor; Latency /= DownFactor; } } fftin = new CDSPRealFFTKeeper(fftinBits); if (fftoutBits == fftinBits) { fftout = fftin; } else { ffto2 = new CDSPRealFFTKeeper(fftoutBits); fftout = ffto2; } WorkBlocks.alloc(BlockLen2 * 2 + PrevInputLen); CurInput = &WorkBlocks[0]; CurOutput = &WorkBlocks[BlockLen2]; PrevInput = &WorkBlocks[BlockLen2 * 2]; clear(); R8BCONSOLE("CDSPBlockConvolver: flt_len=%i in_len=%i io=%i/%i " "fft=%i/%i latency=%i\n", Filter -> getKernelLen(), InputLen, UpFactor, DownFactor, (*fftin) -> getLen(), (*fftout) -> getLen(), getLatency()); } ~CDSPBlockConvolver() override { Filter->unref(); } int getLatency() const override { return (DoConsumeLatency ? 0 : Latency); } double getLatencyFrac() const override { return (LatencyFrac); } int getInLenBeforeOutStart(const int NextInLen) const override { return ((InputLen - InputDelay + NextInLen * DownFactor) / UpFactor); } int getMaxOutLen(const int MaxInLen) const override { R8BASSERT(MaxInLen >= 0); return ((MaxInLen * UpFactor + InputDelay + DownFactor - 1) / DownFactor); } void clear() override { memset(&PrevInput[0], 0, PrevInputLen * sizeof(double)); if (DoConsumeLatency) { LatencyLeft = Latency; } else { LatencyLeft = 0; if (DownShift > 0) { memset(&CurOutput[0], 0, (BlockLen2 >> DownShift) * sizeof(double)); } else { memset(&CurOutput[BlockLen2 - OutOffset], 0, OutOffset * sizeof(double)); memset(&CurOutput[0], 0, (InputLen - OutOffset) * sizeof(double)); } } memset(CurInput, 0, InputDelay * sizeof(double)); InDataLeft = InputLen - InputDelay; UpSkip = UpSkipInit; DownSkip = DownSkipInit; } int process(double* ip, int l0, double*& op0) override { R8BASSERT(l0 >= 0); R8BASSERT(UpFactor / DownFactor <= 1 || ip != op0 || l0 == 0); double* op = op0; int l = l0 * UpFactor; l0 = 0; while (l > 0) { const int Offs = InputLen - InDataLeft; if (l < InDataLeft) { InDataLeft -= l; if (UpShift >= 0) { memcpy(&CurInput[Offs >> UpShift], ip, (l >> UpShift) * sizeof(double)); } else { copyUpsample(ip, &CurInput[Offs], l); } copyToOutput(Offs - OutOffset, op, l, l0); break; } const int b = InDataLeft; l -= b; InDataLeft = InputLen; int ilu; if (UpShift >= 0) { const int bu = b >> UpShift; memcpy(&CurInput[Offs >> UpShift], ip, bu * sizeof(double)); ip += bu; ilu = InputLen >> UpShift; } else { copyUpsample(ip, &CurInput[Offs], b); ilu = InputLen; } const int pil = int(PrevInputLen * sizeof(double)); memcpy(&CurInput[ilu], PrevInput, pil); memcpy(PrevInput, &CurInput[ilu - PrevInputLen], pil); (*fftin)->forward(CurInput); if (UpShift > 0) { mirrorInputSpectrum(); } if (Filter->isZeroPhase()) { (*fftout)->multiplyBlocksZ(Filter->getKernelBlock(), CurInput); } else { (*fftout)->multiplyBlocks(Filter->getKernelBlock(), CurInput); } if (DownShift > 0) { const int z = BlockLen2 >> DownShift; CurInput[1] = Filter->getKernelBlock()[z] * CurInput[z]; } (*fftout)->inverse(CurInput); copyToOutput(Offs - OutOffset, op, b, l0); double* const tmp = CurInput; CurInput = CurOutput; CurOutput = tmp; } return (l0); } private: CDSPFIRFilter* Filter = nullptr; ///< Filter in use. ///< CPtrKeeper fftin; ///< FFT object 1, used to produce ///< the input spectrum (can embed the "power of 2" upsampling). ///< CPtrKeeper ffto2; ///< FFT object 2 (can be NULL). ///< CDSPRealFFTKeeper* fftout = nullptr; ///< FFT object used to produce the output ///< signal (can embed the "power of 2" downsampling), may point to ///< either "fftin" or "ffto2". ///< int UpFactor = 0; ///< Upsampling factor. ///< int DownFactor = 0; ///< Downsampling factor. ///< bool DoConsumeLatency; ///< "True" if the output latency should be ///< consumed. Does not apply to the fractional part of the latency ///< (if such part is available). ///< int BlockLen2 = 0; ///< Equals block length * 2. ///< int OutOffset = 0; ///< Output offset, depends on filter's introduced latency. ///< int PrevInputLen = 0; ///< The length of previous input data saved, used for ///< overlap. ///< int InputLen = 0; ///< The number of input samples that should be accumulated ///< before the input block is processed. ///< int Latency = 0; ///< Processing latency, in samples. ///< double LatencyFrac = 0; ///< Fractional latency, in samples, that is left in ///< the output signal. ///< int UpShift = 0; ///< "Power of 2" upsampling shift. Equals -1 if UpFactor is ///< not a "power of 2" value. Equals 0 if UpFactor equals 1. ///< int DownShift = 0; ///< "Power of 2" downsampling shift. Equals -1 if ///< DownFactor is not a "power of 2". Equals 0 if DownFactor equals ///< 1. ///< int InputDelay = 0; ///< Additional input delay, in samples. Used to make the ///< output latency divisible by DownShift. Used only if UpShift <= 0 ///< and DownShift > 0. ///< CFixedBuffer WorkBlocks; ///< Previous input data, input and ///< output data blocks, overall capacity = BlockLen2 * 2 + ///< PrevInputLen. Used in the flip-flop manner. ///< double* PrevInput = nullptr; ///< Previous input data buffer, capacity = BlockLen. ///< double* CurInput = nullptr; ///< Input data buffer, capacity = BlockLen2. ///< double* CurOutput = nullptr; ///< Output data buffer, capacity = BlockLen2. ///< int InDataLeft = 0; ///< Samples left before processing input and output FFT ///< blocks. Initialized to InputLen on clear. ///< int LatencyLeft = 0; ///< Latency in samples left to skip. ///< int UpSkip = 0; ///< The current upsampling sample skip (value in the range ///< 0 to UpFactor - 1). ///< int UpSkipInit = 0; ///< The initial UpSkip value after clear(). ///< int DownSkip = 0; ///< The current downsampling sample skip (value in the ///< range 0 to DownFactor - 1). Not used if DownShift > 0. ///< int DownSkipInit = 0; ///< The initial DownSkip value after clear(). ///< /** * Function copies samples from the input buffer to the output buffer * while inserting zeros inbetween them to perform the whole-numbered * upsampling. * * @param[in,out] ip0 Input buffer. Will be advanced on function's return. * @param[out] op Output buffer. * @param l0 The number of samples to fill in the output buffer, including * both input samples and interpolation (zero) samples. */ void copyUpsample(double*& ip0, double* op, int l0) { int b = min(UpSkip, l0); if (b > 0) { l0 -= b; UpSkip -= b; *op = 0.0; op++; b--; while (b > 0) { *op = 0.0; op++; b--; } } double* ip = ip0; int l = l0 / UpFactor; int lz = l0 - l * UpFactor; if (UpFactor == 3) { while (l > 0) { op[0] = *ip; op[1] = 0.0; op[2] = 0.0; ip++; op += UpFactor; l--; } } else if (UpFactor == 5) { while (l > 0) { op[0] = *ip; op[1] = 0.0; op[2] = 0.0; op[3] = 0.0; op[4] = 0.0; ip++; op += UpFactor; l--; } } else { while (l > 0) { op[0] = *ip; for (int j = 1; j < UpFactor; ++j) { op[j] = 0.0; } ip++; op += UpFactor; l--; } } if (lz > 0) { *op = *ip; op++; ip++; UpSkip = UpFactor - lz; while (lz > 1) { *op = 0.0; op++; lz--; } } ip0 = ip; } /** * Function copies sample data from the CurOutput buffer to the specified * output buffer and advances its position. If necessary, this function * "consumes" latency and performs downsampling. * * @param Offs CurOutput buffer offset, can be negative. * @param[out] op0 Output buffer pointer, will be advanced. * @param b The number of output samples available, including those which * are discarded during whole-number downsampling. * @param l0 The overall output sample count, will be increased. */ void copyToOutput(int Offs, double*& op0, int b, int& l0) { if (Offs < 0) { if (Offs + b <= 0) { Offs += BlockLen2; } else { copyToOutput(Offs + BlockLen2, op0, -Offs, l0); b += Offs; Offs = 0; } } if (LatencyLeft > 0) { if (LatencyLeft >= b) { LatencyLeft -= b; return; } Offs += LatencyLeft; b -= LatencyLeft; LatencyLeft = 0; } const int df = DownFactor; if (DownShift > 0) { int Skip = Offs & (df - 1); if (Skip > 0) { Skip = df - Skip; b -= Skip; Offs += Skip; } if (b > 0) { b = (b + df - 1) >> DownShift; memcpy(op0, &CurOutput[Offs >> DownShift], b * sizeof(double)); op0 += b; l0 += b; } } else { if (df > 1) { const double* ip = &CurOutput[Offs + DownSkip]; int l = (b + df - 1 - DownSkip) / df; DownSkip += l * df - b; double* op = op0; l0 += l; op0 += l; while (l > 0) { *op = *ip; op++; ip += df; l--; } } else { memcpy(op0, &CurOutput[Offs], b * sizeof(double)); op0 += b; l0 += b; } } } /** * Function performs input spectrum mirroring which is used to perform a * fast "power of 2" upsampling. Such mirroring is equivalent to insertion * of zeros into the input signal. */ void mirrorInputSpectrum() { const int bl1 = BlockLen2 >> UpShift; const int bl2 = bl1 + bl1; int i; for (i = bl1 + 2; i < bl2; i += 2) { CurInput[i] = CurInput[bl2 - i]; CurInput[i + 1] = -CurInput[bl2 - i + 1]; } CurInput[bl1] = CurInput[1]; CurInput[bl1 + 1] = 0.0; CurInput[1] = CurInput[0]; for (i = 1; i < UpShift; ++i) { const int z = bl1 << i; memcpy(&CurInput[z], CurInput, z * sizeof(double)); CurInput[z + 1] = 0.0; } } }; } // namespace r8b #endif // R8B_CDSPBLOCKCONVOLVER_INCLUDED