From 48cc6df8a769a2ae024ded5bd26504ddcfbc8e9a Mon Sep 17 00:00:00 2001
From: f4exb <f4exb06@gmail.com>
Date: Sun, 29 Apr 2018 22:56:34 +0200
Subject: [PATCH] Floating point to integer decimator optimization using the
 even/odd algorithm

---
 sdrbase/CMakeLists.txt             |   2 +
 sdrbase/dsp/decimatorsfi.cpp       |  36 ++---
 sdrbase/dsp/decimatorsfi.h         |  14 +-
 sdrbase/dsp/inthalfbandfiltereof.h | 231 +++++++++++++++++++++++++++++
 4 files changed, 258 insertions(+), 25 deletions(-)
 create mode 100644 sdrbase/dsp/inthalfbandfiltereof.h

diff --git a/sdrbase/CMakeLists.txt b/sdrbase/CMakeLists.txt
index e42a8097b..7647c6a1b 100644
--- a/sdrbase/CMakeLists.txt
+++ b/sdrbase/CMakeLists.txt
@@ -130,6 +130,8 @@ set(sdrbase_HEADERS
     dsp/inthalfbandfilterdbf.h
     dsp/inthalfbandfiltereo1.h
     dsp/inthalfbandfiltereo1i.h
+    dsp/inthalfbandfiltereo2.h
+    dsp/inthalfbandfiltereof.h
     dsp/inthalfbandfilterst.h
     dsp/inthalfbandfiltersti.h
     dsp/kissfft.h
diff --git a/sdrbase/dsp/decimatorsfi.cpp b/sdrbase/dsp/decimatorsfi.cpp
index 95f2e47c3..e4e2d8451 100644
--- a/sdrbase/dsp/decimatorsfi.cpp
+++ b/sdrbase/dsp/decimatorsfi.cpp
@@ -32,7 +32,7 @@ void DecimatorsFI::decimate1(SampleVector::iterator* it, const float* buf, qint3
 
 void DecimatorsFI::decimate2_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double intbuf[2];
+    float intbuf[2];
 
     for (int pos = 0; pos < nbIAndQ - 3; pos += 4)
     {
@@ -54,7 +54,7 @@ void DecimatorsFI::decimate2_cen(SampleVector::iterator* it, const float* buf, q
 
 void DecimatorsFI::decimate2_inf(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double xreal, yimag;
+    float xreal, yimag;
 
     for (int pos = 0; pos < nbIAndQ - 7; pos += 8)
     {
@@ -74,7 +74,7 @@ void DecimatorsFI::decimate2_inf(SampleVector::iterator* it, const float* buf, q
 
 void DecimatorsFI::decimate2_sup(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double xreal, yimag;
+    float xreal, yimag;
 
     for (int pos = 0; pos < nbIAndQ - 7; pos += 8)
     {
@@ -94,7 +94,7 @@ void DecimatorsFI::decimate2_sup(SampleVector::iterator* it, const float* buf, q
 
 void DecimatorsFI::decimate4_inf(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double xreal, yimag;
+    float xreal, yimag;
 
     for (int pos = 0; pos < nbIAndQ - 7; pos += 8)
     {
@@ -116,7 +116,7 @@ void DecimatorsFI::decimate4_sup(SampleVector::iterator* it, const float* buf, q
     // Inf (LSB):
     //            x  y   x  y   x   y  x   y  / x -> 0,-3,-4,7 / y -> 1,2,-5,-6
     // [ rotate:  0, 1, -3, 2, -4, -5, 7, -6]
-    double xreal, yimag;
+    float xreal, yimag;
 
     for (int pos = 0; pos < nbIAndQ - 7; pos += 8)
     {
@@ -132,7 +132,7 @@ void DecimatorsFI::decimate4_sup(SampleVector::iterator* it, const float* buf, q
 
 void DecimatorsFI::decimate8_inf(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double xreal[2], yimag[2];
+    float xreal[2], yimag[2];
 
     for (int pos = 0; pos < nbIAndQ - 15; pos += 8)
     {
@@ -154,7 +154,7 @@ void DecimatorsFI::decimate8_inf(SampleVector::iterator* it, const float* buf, q
 
 void DecimatorsFI::decimate8_sup(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double xreal[2], yimag[2];
+    float xreal[2], yimag[2];
 
     for (int pos = 0; pos < nbIAndQ - 15; pos += 8)
     {
@@ -178,7 +178,7 @@ void DecimatorsFI::decimate16_inf(SampleVector::iterator* it, const float* buf,
 {
     // Offset tuning: 4x downsample and rotate, then
     // downsample 4x more. [ rotate:  0, 1, -3, 2, -4, -5, 7, -6]
-    double xreal[4], yimag[4];
+    float xreal[4], yimag[4];
 
     for (int pos = 0; pos < nbIAndQ - 31; )
     {
@@ -205,7 +205,7 @@ void DecimatorsFI::decimate16_sup(SampleVector::iterator* it, const float* buf,
 {
     // Offset tuning: 4x downsample and rotate, then
     // downsample 4x more. [ rotate:  1, 0, -2, 3, -5, -4, 6, -7]
-    double xreal[4], yimag[4];
+    float xreal[4], yimag[4];
 
     for (int pos = 0; pos < nbIAndQ - 31; )
     {
@@ -230,7 +230,7 @@ void DecimatorsFI::decimate16_sup(SampleVector::iterator* it, const float* buf,
 
 void DecimatorsFI::decimate32_inf(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double xreal[8], yimag[8];
+    float xreal[8], yimag[8];
 
     for (int pos = 0; pos < nbIAndQ - 63; )
     {
@@ -260,7 +260,7 @@ void DecimatorsFI::decimate32_inf(SampleVector::iterator* it, const float* buf,
 
 void DecimatorsFI::decimate32_sup(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double xreal[8], yimag[8];
+    float xreal[8], yimag[8];
 
     for (int pos = 0; pos < nbIAndQ - 63; )
     {
@@ -290,7 +290,7 @@ void DecimatorsFI::decimate32_sup(SampleVector::iterator* it, const float* buf,
 
 void DecimatorsFI::decimate64_inf(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double xreal[16], yimag[16];
+    float xreal[16], yimag[16];
 
     for (int pos = 0; pos < nbIAndQ - 127; )
     {
@@ -329,7 +329,7 @@ void DecimatorsFI::decimate64_inf(SampleVector::iterator* it, const float* buf,
 
 void DecimatorsFI::decimate64_sup(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double xreal[16], yimag[16];
+    float xreal[16], yimag[16];
 
     for (int pos = 0; pos < nbIAndQ - 127; )
     {
@@ -368,7 +368,7 @@ void DecimatorsFI::decimate64_sup(SampleVector::iterator* it, const float* buf,
 
 void DecimatorsFI::decimate4_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double intbuf[4];
+    float intbuf[4];
 
     for (int pos = 0; pos < nbIAndQ - 7; pos += 8)
     {
@@ -402,7 +402,7 @@ void DecimatorsFI::decimate4_cen(SampleVector::iterator* it, const float* buf, q
 
 void DecimatorsFI::decimate8_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double intbuf[8];
+    float intbuf[8];
 
     for (int pos = 0; pos < nbIAndQ - 15; pos += 16)
     {
@@ -461,7 +461,7 @@ void DecimatorsFI::decimate8_cen(SampleVector::iterator* it, const float* buf, q
 
 void DecimatorsFI::decimate16_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double intbuf[16];
+    float intbuf[16];
 
     for (int pos = 0; pos < nbIAndQ - 31; pos += 32)
     {
@@ -569,7 +569,7 @@ void DecimatorsFI::decimate16_cen(SampleVector::iterator* it, const float* buf,
 
 void DecimatorsFI::decimate32_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double intbuf[32];
+    float intbuf[32];
 
     for (int pos = 0; pos < nbIAndQ - 63; pos += 64)
     {
@@ -774,7 +774,7 @@ void DecimatorsFI::decimate32_cen(SampleVector::iterator* it, const float* buf,
 
 void DecimatorsFI::decimate64_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ)
 {
-    double intbuf[64];
+    float intbuf[64];
 
     for (int pos = 0; pos < nbIAndQ - 127; pos += 128)
     {
diff --git a/sdrbase/dsp/decimatorsfi.h b/sdrbase/dsp/decimatorsfi.h
index 45f6ae2fe..cfeb85a82 100644
--- a/sdrbase/dsp/decimatorsfi.h
+++ b/sdrbase/dsp/decimatorsfi.h
@@ -17,7 +17,7 @@
 #ifndef SDRBASE_DSP_DECIMATORSFI_H_
 #define SDRBASE_DSP_DECIMATORSFI_H_
 
-#include "dsp/inthalfbandfilterdbf.h"
+#include "dsp/inthalfbandfiltereof.h"
 #include "export.h"
 
 #define DECIMATORSFI_HB_FILTER_ORDER 64
@@ -46,12 +46,12 @@ public:
     void decimate64_sup(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ);
     void decimate64_cen(SampleVector::iterator* it, const float* buf, qint32 nbIAndQ);
 
-    IntHalfbandFilterDBF<double, float, DECIMATORSFI_HB_FILTER_ORDER> m_decimator2;  // 1st stages
-    IntHalfbandFilterDBF<double, float, DECIMATORSFI_HB_FILTER_ORDER> m_decimator4;  // 2nd stages
-    IntHalfbandFilterDBF<double, float, DECIMATORSFI_HB_FILTER_ORDER> m_decimator8;  // 3rd stages
-    IntHalfbandFilterDBF<double, float, DECIMATORSFI_HB_FILTER_ORDER> m_decimator16; // 4th stages
-    IntHalfbandFilterDBF<double, float, DECIMATORSFI_HB_FILTER_ORDER> m_decimator32; // 5th stages
-    IntHalfbandFilterDBF<double, float, DECIMATORSFI_HB_FILTER_ORDER> m_decimator64; // 6th stages
+    IntHalfbandFilterEOF<DECIMATORSFI_HB_FILTER_ORDER> m_decimator2;  // 1st stages
+    IntHalfbandFilterEOF<DECIMATORSFI_HB_FILTER_ORDER> m_decimator4;  // 2nd stages
+    IntHalfbandFilterEOF<DECIMATORSFI_HB_FILTER_ORDER> m_decimator8;  // 3rd stages
+    IntHalfbandFilterEOF<DECIMATORSFI_HB_FILTER_ORDER> m_decimator16; // 4th stages
+    IntHalfbandFilterEOF<DECIMATORSFI_HB_FILTER_ORDER> m_decimator32; // 5th stages
+    IntHalfbandFilterEOF<DECIMATORSFI_HB_FILTER_ORDER> m_decimator64; // 6th stages
 };
 
 
diff --git a/sdrbase/dsp/inthalfbandfiltereof.h b/sdrbase/dsp/inthalfbandfiltereof.h
new file mode 100644
index 000000000..1afa92fb4
--- /dev/null
+++ b/sdrbase/dsp/inthalfbandfiltereof.h
@@ -0,0 +1,231 @@
+///////////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2018 F4EXB                                                      //
+// written by Edouard Griffiths                                                  //
+//                                                                               //
+// Integer half-band FIR based interpolator and decimator                        //
+// This is the even/odd double buffer variant. Really useful only when SIMD is   //
+// used                                                                          //
+//                                                                               //
+// This program is free software; you can redistribute it and/or modify          //
+// it under the terms of the GNU General Public License as published by          //
+// the Free Software Foundation as version 3 of the License, or                  //
+//                                                                               //
+// This program is distributed in the hope that it will be useful,               //
+// but WITHOUT ANY WARRANTY; without even the implied warranty of                //
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                  //
+// GNU General Public License V3 for more details.                               //
+//                                                                               //
+// You should have received a copy of the GNU General Public License             //
+// along with this program. If not, see <http://www.gnu.org/licenses/>.          //
+///////////////////////////////////////////////////////////////////////////////////
+
+#ifndef SDRBASE_DSP_INTHALFBANDFILTEREOF_H_
+#define SDRBASE_DSP_INTHALFBANDFILTEREOF_H_
+
+#include <stdint.h>
+#include <cstdlib>
+#include "dsp/dsptypes.h"
+#include "dsp/hbfiltertraits.h"
+#include "export.h"
+
+template<uint32_t HBFilterOrder>
+class SDRBASE_API IntHalfbandFilterEOF {
+public:
+    IntHalfbandFilterEOF();
+
+    bool workDecimateCenter(float *x, float *y)
+    {
+        // insert sample into ring-buffer
+        storeSample(*x, *y);
+
+        switch(m_state)
+        {
+            case 0:
+                // advance write-pointer
+                advancePointer();
+                // next state
+                m_state = 1;
+                // tell caller we don't have a new sample
+                return false;
+
+            default:
+                // save result
+                doFIR(x, y);
+                // advance write-pointer
+                advancePointer();
+                // next state
+                m_state = 0;
+                // tell caller we have a new sample
+                return true;
+        }
+    }
+
+    void myDecimate(float x1, float y1, float *x2, float *y2)
+    {
+        storeSample(x1, y1);
+        advancePointer();
+
+        storeSample(*x2, *y2);
+        doFIR(x2, y2);
+        advancePointer();
+    }
+
+    /** Simple zero stuffing and filter */
+    void myInterpolateZeroStuffing(float *x1, float *y1, float *x2, float *y2)
+    {
+        storeSample(*x1, *y1);
+        doFIR(x1, y1);
+        advancePointer();
+
+        storeSample(0, 0);
+        doFIR(x2, y2);
+        advancePointer();
+    }
+
+    /** Optimized upsampler by 2 not calculating FIR with inserted null samples */
+    void myInterpolate(float *x1, float *y1, float *x2, float *y2)
+    {
+        // insert sample into ring double buffer
+        m_samples[m_ptr][0] = *x1;
+        m_samples[m_ptr][1] = *y1;
+        m_samples[m_ptr + HBFIRFilterTraits<HBFilterOrder>::hbOrder/2][0] = *x1;
+        m_samples[m_ptr + HBFIRFilterTraits<HBFilterOrder>::hbOrder/2][1] = *y1;
+
+        // advance pointer
+        if (m_ptr < (HBFIRFilterTraits<HBFilterOrder>::hbOrder/2) - 1) {
+            m_ptr++;
+        } else {
+            m_ptr = 0;
+        }
+
+        // first output sample calculated with the middle peak
+        *x1 = m_samples[m_ptr + (HBFIRFilterTraits<HBFilterOrder>::hbOrder/4) - 1][0];
+        *y1 = m_samples[m_ptr + (HBFIRFilterTraits<HBFilterOrder>::hbOrder/4) - 1][1];
+
+        // second sample calculated with the filter
+        doInterpolateFIR(x2, y2);
+    }
+
+protected:
+    double m_even[2][HBFIRFilterTraits<HBFilterOrder>::hbOrder];    // double buffer technique
+    double m_odd[2][HBFIRFilterTraits<HBFilterOrder>::hbOrder];     // double buffer technique
+    double m_samples[HBFIRFilterTraits<HBFilterOrder>::hbOrder][2]; // double buffer technique
+
+    int m_ptr;
+    int m_size;
+    int m_state;
+
+    void storeSample(float x, float y)
+    {
+        if ((m_ptr % 2) == 0)
+        {
+            m_even[0][m_ptr/2] = x;
+            m_even[1][m_ptr/2] = y;
+            m_even[0][m_ptr/2 + m_size] = x;
+            m_even[1][m_ptr/2 + m_size] = y;
+        }
+        else
+        {
+            m_odd[0][m_ptr/2] = x;
+            m_odd[1][m_ptr/2] = y;
+            m_odd[0][m_ptr/2 + m_size] = x;
+            m_odd[1][m_ptr/2 + m_size] = y;
+        }
+    }
+
+    void advancePointer()
+    {
+        m_ptr = m_ptr + 1 < 2*m_size ? m_ptr + 1: 0;
+    }
+
+    void doFIR(float *x, float *y)
+    {
+        double iAcc = 0;
+        double qAcc = 0;
+
+//#if defined(USE_SSE4_1) && !defined(NO_DSP_SIMD)
+//        IntHalfbandFilterEO1Intrisics<HBFilterOrder>::work(
+//                m_ptr,
+//                m_even,
+//                m_odd,
+//                iAcc,
+//                qAcc
+//        );
+//#else
+        int a = m_ptr/2 + m_size; // tip pointer
+        int b = m_ptr/2 + 1; // tail pointer
+
+        for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
+        {
+            if ((m_ptr % 2) == 0)
+            {
+                iAcc += (m_even[0][a] + m_even[0][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffsF[i];
+                qAcc += (m_even[1][a] + m_even[1][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffsF[i];
+            }
+            else
+            {
+                iAcc += (m_odd[0][a] + m_odd[0][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffsF[i];
+                qAcc += (m_odd[1][a] + m_odd[1][b]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffsF[i];
+            }
+
+            a -= 1;
+            b += 1;
+        }
+//#endif
+        if ((m_ptr % 2) == 0)
+        {
+            iAcc += m_odd[0][m_ptr/2 + m_size/2] * 0.5f;
+            qAcc += m_odd[1][m_ptr/2 + m_size/2] * 0.5f;
+        }
+        else
+        {
+            iAcc += m_even[0][m_ptr/2 + m_size/2 + 1] * 0.5f;
+            qAcc += m_even[1][m_ptr/2 + m_size/2 + 1] * 0.5f;
+        }
+
+        *x = iAcc; // HB_SHIFT incorrect do not loose the gained bit
+        *y = qAcc;
+    }
+
+    void doInterpolateFIR(float *x, float *y)
+    {
+        qint32 iAcc = 0;
+        qint32 qAcc = 0;
+
+        qint16 a = m_ptr;
+        qint16 b = m_ptr + (HBFIRFilterTraits<HBFilterOrder>::hbOrder / 2) - 1;
+
+        // go through samples in buffer
+        for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
+        {
+            iAcc += (m_samples[a][0] + m_samples[b][0]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffsF[i];
+            qAcc += (m_samples[a][1] + m_samples[b][1]) * HBFIRFilterTraits<HBFilterOrder>::hbCoeffsF[i];
+            a++;
+            b--;
+        }
+
+        *x = iAcc * SDR_RX_SCALED;
+        *y = qAcc * SDR_RX_SCALED;
+    }
+};
+
+template<uint32_t HBFilterOrder>
+IntHalfbandFilterEOF<HBFilterOrder>::IntHalfbandFilterEOF()
+{
+    m_size = HBFIRFilterTraits<HBFilterOrder>::hbOrder/2;
+
+    for (int i = 0; i < 2*m_size; i++)
+    {
+        m_even[0][i] = 0.0f;
+        m_even[1][i] = 0.0f;
+        m_odd[0][i] = 0.0f;
+        m_odd[1][i] = 0.0f;
+        m_samples[i][0] = 0.0f;
+        m_samples[i][1] = 0.0f;
+    }
+
+    m_ptr = 0;
+    m_state = 0;
+}
+
+#endif /* SDRBASE_DSP_INTHALFBANDFILTEREOF_H_ */