From 48cc6df8a769a2ae024ded5bd26504ddcfbc8e9a Mon Sep 17 00:00:00 2001 From: f4exb Date: Sun, 29 Apr 2018 22:56:34 +0200 Subject: [PATCH] Floating point to integer decimator optimization using the even/odd algorithm --- sdrbase/CMakeLists.txt | 2 + sdrbase/dsp/decimatorsfi.cpp | 36 ++--- sdrbase/dsp/decimatorsfi.h | 14 +- sdrbase/dsp/inthalfbandfiltereof.h | 231 +++++++++++++++++++++++++++++ 4 files changed, 258 insertions(+), 25 deletions(-) create mode 100644 sdrbase/dsp/inthalfbandfiltereof.h diff --git a/sdrbase/CMakeLists.txt b/sdrbase/CMakeLists.txt index e42a8097b..7647c6a1b 100644 --- a/sdrbase/CMakeLists.txt +++ b/sdrbase/CMakeLists.txt @@ -130,6 +130,8 @@ set(sdrbase_HEADERS dsp/inthalfbandfilterdbf.h dsp/inthalfbandfiltereo1.h dsp/inthalfbandfiltereo1i.h + dsp/inthalfbandfiltereo2.h + dsp/inthalfbandfiltereof.h dsp/inthalfbandfilterst.h dsp/inthalfbandfiltersti.h dsp/kissfft.h diff --git a/sdrbase/dsp/decimatorsfi.cpp b/sdrbase/dsp/decimatorsfi.cpp index 95f2e47c3..e4e2d8451 100644 --- a/sdrbase/dsp/decimatorsfi.cpp +++ b/sdrbase/dsp/decimatorsfi.cpp @@ -32,7 +32,7 @@ void DecimatorsFI::decimate1(SampleVector::iterator* it, const float* buf, qint3 void DecimatorsFI::decimate2_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double intbuf[2]; + float intbuf[2]; for (int pos = 0; pos < nbIAndQ - 3; pos += 4) { @@ -54,7 +54,7 @@ void DecimatorsFI::decimate2_cen(SampleVector::iterator* it, const float* buf, q void DecimatorsFI::decimate2_inf(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double xreal, yimag; + float xreal, yimag; for (int pos = 0; pos < nbIAndQ - 7; pos += 8) { @@ -74,7 +74,7 @@ void DecimatorsFI::decimate2_inf(SampleVector::iterator* it, const float* buf, q void DecimatorsFI::decimate2_sup(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double xreal, yimag; + float xreal, yimag; for (int pos = 0; pos < nbIAndQ - 7; pos += 8) { @@ -94,7 +94,7 @@ void DecimatorsFI::decimate2_sup(SampleVector::iterator* it, const float* buf, q void DecimatorsFI::decimate4_inf(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double xreal, yimag; + float xreal, yimag; for (int pos = 0; pos < nbIAndQ - 7; pos += 8) { @@ -116,7 +116,7 @@ void DecimatorsFI::decimate4_sup(SampleVector::iterator* it, const float* buf, q // Inf (LSB): // x y x y x y x y / x -> 0,-3,-4,7 / y -> 1,2,-5,-6 // [ rotate: 0, 1, -3, 2, -4, -5, 7, -6] - double xreal, yimag; + float xreal, yimag; for (int pos = 0; pos < nbIAndQ - 7; pos += 8) { @@ -132,7 +132,7 @@ void DecimatorsFI::decimate4_sup(SampleVector::iterator* it, const float* buf, q void DecimatorsFI::decimate8_inf(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double xreal[2], yimag[2]; + float xreal[2], yimag[2]; for (int pos = 0; pos < nbIAndQ - 15; pos += 8) { @@ -154,7 +154,7 @@ void DecimatorsFI::decimate8_inf(SampleVector::iterator* it, const float* buf, q void DecimatorsFI::decimate8_sup(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double xreal[2], yimag[2]; + float xreal[2], yimag[2]; for (int pos = 0; pos < nbIAndQ - 15; pos += 8) { @@ -178,7 +178,7 @@ void DecimatorsFI::decimate16_inf(SampleVector::iterator* it, const float* buf, { // Offset tuning: 4x downsample and rotate, then // downsample 4x more. [ rotate: 0, 1, -3, 2, -4, -5, 7, -6] - double xreal[4], yimag[4]; + float xreal[4], yimag[4]; for (int pos = 0; pos < nbIAndQ - 31; ) { @@ -205,7 +205,7 @@ void DecimatorsFI::decimate16_sup(SampleVector::iterator* it, const float* buf, { // Offset tuning: 4x downsample and rotate, then // downsample 4x more. [ rotate: 1, 0, -2, 3, -5, -4, 6, -7] - double xreal[4], yimag[4]; + float xreal[4], yimag[4]; for (int pos = 0; pos < nbIAndQ - 31; ) { @@ -230,7 +230,7 @@ void DecimatorsFI::decimate16_sup(SampleVector::iterator* it, const float* buf, void DecimatorsFI::decimate32_inf(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double xreal[8], yimag[8]; + float xreal[8], yimag[8]; for (int pos = 0; pos < nbIAndQ - 63; ) { @@ -260,7 +260,7 @@ void DecimatorsFI::decimate32_inf(SampleVector::iterator* it, const float* buf, void DecimatorsFI::decimate32_sup(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double xreal[8], yimag[8]; + float xreal[8], yimag[8]; for (int pos = 0; pos < nbIAndQ - 63; ) { @@ -290,7 +290,7 @@ void DecimatorsFI::decimate32_sup(SampleVector::iterator* it, const float* buf, void DecimatorsFI::decimate64_inf(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double xreal[16], yimag[16]; + float xreal[16], yimag[16]; for (int pos = 0; pos < nbIAndQ - 127; ) { @@ -329,7 +329,7 @@ void DecimatorsFI::decimate64_inf(SampleVector::iterator* it, const float* buf, void DecimatorsFI::decimate64_sup(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double xreal[16], yimag[16]; + float xreal[16], yimag[16]; for (int pos = 0; pos < nbIAndQ - 127; ) { @@ -368,7 +368,7 @@ void DecimatorsFI::decimate64_sup(SampleVector::iterator* it, const float* buf, void DecimatorsFI::decimate4_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double intbuf[4]; + float intbuf[4]; for (int pos = 0; pos < nbIAndQ - 7; pos += 8) { @@ -402,7 +402,7 @@ void DecimatorsFI::decimate4_cen(SampleVector::iterator* it, const float* buf, q void DecimatorsFI::decimate8_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double intbuf[8]; + float intbuf[8]; for (int pos = 0; pos < nbIAndQ - 15; pos += 16) { @@ -461,7 +461,7 @@ void DecimatorsFI::decimate8_cen(SampleVector::iterator* it, const float* buf, q void DecimatorsFI::decimate16_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double intbuf[16]; + float intbuf[16]; for (int pos = 0; pos < nbIAndQ - 31; pos += 32) { @@ -569,7 +569,7 @@ void DecimatorsFI::decimate16_cen(SampleVector::iterator* it, const float* buf, void DecimatorsFI::decimate32_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double intbuf[32]; + float intbuf[32]; for (int pos = 0; pos < nbIAndQ - 63; pos += 64) { @@ -774,7 +774,7 @@ void DecimatorsFI::decimate32_cen(SampleVector::iterator* it, const float* buf, void DecimatorsFI::decimate64_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ) { - double intbuf[64]; + float intbuf[64]; for (int pos = 0; pos < nbIAndQ - 127; pos += 128) { diff --git a/sdrbase/dsp/decimatorsfi.h b/sdrbase/dsp/decimatorsfi.h index 45f6ae2fe..cfeb85a82 100644 --- a/sdrbase/dsp/decimatorsfi.h +++ b/sdrbase/dsp/decimatorsfi.h @@ -17,7 +17,7 @@ #ifndef SDRBASE_DSP_DECIMATORSFI_H_ #define SDRBASE_DSP_DECIMATORSFI_H_ -#include "dsp/inthalfbandfilterdbf.h" +#include "dsp/inthalfbandfiltereof.h" #include "export.h" #define DECIMATORSFI_HB_FILTER_ORDER 64 @@ -46,12 +46,12 @@ public: void decimate64_sup(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ); void decimate64_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ); - IntHalfbandFilterDBF m_decimator2; // 1st stages - IntHalfbandFilterDBF m_decimator4; // 2nd stages - IntHalfbandFilterDBF m_decimator8; // 3rd stages - IntHalfbandFilterDBF m_decimator16; // 4th stages - IntHalfbandFilterDBF m_decimator32; // 5th stages - IntHalfbandFilterDBF m_decimator64; // 6th stages + IntHalfbandFilterEOF m_decimator2; // 1st stages + IntHalfbandFilterEOF m_decimator4; // 2nd stages + IntHalfbandFilterEOF m_decimator8; // 3rd stages + IntHalfbandFilterEOF m_decimator16; // 4th stages + IntHalfbandFilterEOF m_decimator32; // 5th stages + IntHalfbandFilterEOF m_decimator64; // 6th stages }; diff --git a/sdrbase/dsp/inthalfbandfiltereof.h b/sdrbase/dsp/inthalfbandfiltereof.h new file mode 100644 index 000000000..1afa92fb4 --- /dev/null +++ b/sdrbase/dsp/inthalfbandfiltereof.h @@ -0,0 +1,231 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2018 F4EXB // +// written by Edouard Griffiths // +// // +// Integer half-band FIR based interpolator and decimator // +// This is the even/odd double buffer variant. Really useful only when SIMD is // +// used // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef SDRBASE_DSP_INTHALFBANDFILTEREOF_H_ +#define SDRBASE_DSP_INTHALFBANDFILTEREOF_H_ + +#include +#include +#include "dsp/dsptypes.h" +#include "dsp/hbfiltertraits.h" +#include "export.h" + +template +class SDRBASE_API IntHalfbandFilterEOF { +public: + IntHalfbandFilterEOF(); + + bool workDecimateCenter(float *x, float *y) + { + // insert sample into ring-buffer + storeSample(*x, *y); + + switch(m_state) + { + case 0: + // advance write-pointer + advancePointer(); + // next state + m_state = 1; + // tell caller we don't have a new sample + return false; + + default: + // save result + doFIR(x, y); + // advance write-pointer + advancePointer(); + // next state + m_state = 0; + // tell caller we have a new sample + return true; + } + } + + void myDecimate(float x1, float y1, float *x2, float *y2) + { + storeSample(x1, y1); + advancePointer(); + + storeSample(*x2, *y2); + doFIR(x2, y2); + advancePointer(); + } + + /** Simple zero stuffing and filter */ + void myInterpolateZeroStuffing(float *x1, float *y1, float *x2, float *y2) + { + storeSample(*x1, *y1); + doFIR(x1, y1); + advancePointer(); + + storeSample(0, 0); + doFIR(x2, y2); + advancePointer(); + } + + /** Optimized upsampler by 2 not calculating FIR with inserted null samples */ + void myInterpolate(float *x1, float *y1, float *x2, float *y2) + { + // insert sample into ring double buffer + m_samples[m_ptr][0] = *x1; + m_samples[m_ptr][1] = *y1; + m_samples[m_ptr + HBFIRFilterTraits::hbOrder/2][0] = *x1; + m_samples[m_ptr + HBFIRFilterTraits::hbOrder/2][1] = *y1; + + // advance pointer + if (m_ptr < (HBFIRFilterTraits::hbOrder/2) - 1) { + m_ptr++; + } else { + m_ptr = 0; + } + + // first output sample calculated with the middle peak + *x1 = m_samples[m_ptr + (HBFIRFilterTraits::hbOrder/4) - 1][0]; + *y1 = m_samples[m_ptr + (HBFIRFilterTraits::hbOrder/4) - 1][1]; + + // second sample calculated with the filter + doInterpolateFIR(x2, y2); + } + +protected: + double m_even[2][HBFIRFilterTraits::hbOrder]; // double buffer technique + double m_odd[2][HBFIRFilterTraits::hbOrder]; // double buffer technique + double m_samples[HBFIRFilterTraits::hbOrder][2]; // double buffer technique + + int m_ptr; + int m_size; + int m_state; + + void storeSample(float x, float y) + { + if ((m_ptr % 2) == 0) + { + m_even[0][m_ptr/2] = x; + m_even[1][m_ptr/2] = y; + m_even[0][m_ptr/2 + m_size] = x; + m_even[1][m_ptr/2 + m_size] = y; + } + else + { + m_odd[0][m_ptr/2] = x; + m_odd[1][m_ptr/2] = y; + m_odd[0][m_ptr/2 + m_size] = x; + m_odd[1][m_ptr/2 + m_size] = y; + } + } + + void advancePointer() + { + m_ptr = m_ptr + 1 < 2*m_size ? m_ptr + 1: 0; + } + + void doFIR(float *x, float *y) + { + double iAcc = 0; + double qAcc = 0; + +//#if defined(USE_SSE4_1) && !defined(NO_DSP_SIMD) +// IntHalfbandFilterEO1Intrisics::work( +// m_ptr, +// m_even, +// m_odd, +// iAcc, +// qAcc +// ); +//#else + int a = m_ptr/2 + m_size; // tip pointer + int b = m_ptr/2 + 1; // tail pointer + + for (int i = 0; i < HBFIRFilterTraits::hbOrder / 4; i++) + { + if ((m_ptr % 2) == 0) + { + iAcc += (m_even[0][a] + m_even[0][b]) * HBFIRFilterTraits::hbCoeffsF[i]; + qAcc += (m_even[1][a] + m_even[1][b]) * HBFIRFilterTraits::hbCoeffsF[i]; + } + else + { + iAcc += (m_odd[0][a] + m_odd[0][b]) * HBFIRFilterTraits::hbCoeffsF[i]; + qAcc += (m_odd[1][a] + m_odd[1][b]) * HBFIRFilterTraits::hbCoeffsF[i]; + } + + a -= 1; + b += 1; + } +//#endif + if ((m_ptr % 2) == 0) + { + iAcc += m_odd[0][m_ptr/2 + m_size/2] * 0.5f; + qAcc += m_odd[1][m_ptr/2 + m_size/2] * 0.5f; + } + else + { + iAcc += m_even[0][m_ptr/2 + m_size/2 + 1] * 0.5f; + qAcc += m_even[1][m_ptr/2 + m_size/2 + 1] * 0.5f; + } + + *x = iAcc; // HB_SHIFT incorrect do not loose the gained bit + *y = qAcc; + } + + void doInterpolateFIR(float *x, float *y) + { + qint32 iAcc = 0; + qint32 qAcc = 0; + + qint16 a = m_ptr; + qint16 b = m_ptr + (HBFIRFilterTraits::hbOrder / 2) - 1; + + // go through samples in buffer + for (int i = 0; i < HBFIRFilterTraits::hbOrder / 4; i++) + { + iAcc += (m_samples[a][0] + m_samples[b][0]) * HBFIRFilterTraits::hbCoeffsF[i]; + qAcc += (m_samples[a][1] + m_samples[b][1]) * HBFIRFilterTraits::hbCoeffsF[i]; + a++; + b--; + } + + *x = iAcc * SDR_RX_SCALED; + *y = qAcc * SDR_RX_SCALED; + } +}; + +template +IntHalfbandFilterEOF::IntHalfbandFilterEOF() +{ + m_size = HBFIRFilterTraits::hbOrder/2; + + for (int i = 0; i < 2*m_size; i++) + { + m_even[0][i] = 0.0f; + m_even[1][i] = 0.0f; + m_odd[0][i] = 0.0f; + m_odd[1][i] = 0.0f; + m_samples[i][0] = 0.0f; + m_samples[i][1] = 0.0f; + } + + m_ptr = 0; + m_state = 0; +} + +#endif /* SDRBASE_DSP_INTHALFBANDFILTEREOF_H_ */