diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e1802c2d..7147bdf69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -211,8 +211,6 @@ set(sdrbase_HEADERS sdrbase/dsp/inthalfbandfilterdb.h sdrbase/dsp/inthalfbandfiltereo1.h sdrbase/dsp/inthalfbandfiltereo1i.h - sdrbase/dsp/inthalfbandfiltereo2.h - sdrbase/dsp/inthalfbandfiltereo2i.h sdrbase/dsp/inthalfbandfilterst.h sdrbase/dsp/inthalfbandfiltersti.h sdrbase/dsp/kissfft.h diff --git a/sdrbase/dsp/inthalfbandfiltereo1.h b/sdrbase/dsp/inthalfbandfiltereo1.h index c6661128b..f7385b8aa 100644 --- a/sdrbase/dsp/inthalfbandfiltereo1.h +++ b/sdrbase/dsp/inthalfbandfiltereo1.h @@ -23,6 +23,7 @@ #define SDRBASE_DSP_INTHALFBANDFILTEREO_H_ #include +#include #include "dsp/dsptypes.h" #include "dsp/hbfiltertraits.h" #include "dsp/inthalfbandfiltereo1i.h" @@ -93,7 +94,7 @@ public: } } - bool workDecimateCenter(qint32 *x, qint32 *y) + bool workDecimateCenter(int32_t *x, int32_t *y) { // insert sample into ring-buffer storeSample(*x, *y); @@ -394,7 +395,7 @@ public: advancePointer(); } - void myDecimate(qint32 x1, qint32 y1, qint32 *x2, qint32 *y2) + void myDecimate(int32_t x1, int32_t y1, int32_t *x2, int32_t *y2) { storeSample(x1, y1); advancePointer(); @@ -405,8 +406,8 @@ public: } protected: - qint32 m_even[2][HBFIRFilterTraits::hbOrder]; // double buffer technique - qint32 m_odd[2][HBFIRFilterTraits::hbOrder]; // double buffer technique + int32_t m_even[2][HBFIRFilterTraits::hbOrder]; // double buffer technique + int32_t m_odd[2][HBFIRFilterTraits::hbOrder]; // double buffer technique int m_ptr; int m_size; @@ -430,7 +431,7 @@ protected: } } - void storeSample(qint32 x, qint32 y) + void storeSample(int32_t x, int32_t y) { if ((m_ptr % 2) == 0) { @@ -453,10 +454,15 @@ protected: m_ptr = m_ptr + 1 < 2*m_size ? m_ptr + 1: 0; } + int32_t rand(int32_t mod) + { + return (RAND_MAX/2 - std::rand()) % mod; + } + void doFIR(Sample* sample) { - qint32 iAcc = 0; - qint32 qAcc = 0; + int32_t iAcc = 0; + int32_t qAcc = 0; #ifdef USE_SSE4_1 IntHalfbandFilterEO1Intrisics::work( @@ -490,23 +496,23 @@ protected: if ((m_ptr % 2) == 0) { - iAcc += ((qint32)m_odd[0][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); - qAcc += ((qint32)m_odd[1][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); + iAcc += ((int32_t)m_odd[0][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); + qAcc += ((int32_t)m_odd[1][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); } else { - iAcc += ((qint32)m_even[0][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); - qAcc += ((qint32)m_even[1][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); + iAcc += ((int32_t)m_even[0][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); + qAcc += ((int32_t)m_even[1][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); } sample->setReal(iAcc >> HBFIRFilterTraits::hbShift -1); sample->setImag(qAcc >> HBFIRFilterTraits::hbShift -1); } - void doFIR(qint32 *x, qint32 *y) + void doFIR(int32_t *x, int32_t *y) { - qint32 iAcc = 0; - qint32 qAcc = 0; + int32_t iAcc = 0; + int32_t qAcc = 0; #ifdef USE_SSE4_1 IntHalfbandFilterEO1Intrisics::work( @@ -539,13 +545,13 @@ protected: #endif if ((m_ptr % 2) == 0) { - iAcc += ((qint32)m_odd[0][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); - qAcc += ((qint32)m_odd[1][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); + iAcc += ((int32_t)m_odd[0][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); + qAcc += ((int32_t)m_odd[1][m_ptr/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); } else { - iAcc += ((qint32)m_even[0][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); - qAcc += ((qint32)m_even[1][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); + iAcc += ((int32_t)m_even[0][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); + qAcc += ((int32_t)m_even[1][m_ptr/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); } *x = iAcc >> (HBFIRFilterTraits::hbShift -1); // HB_SHIFT incorrect do not loose the gained bit diff --git a/sdrbase/dsp/inthalfbandfiltereo2.h b/sdrbase/dsp/inthalfbandfiltereo2.h deleted file mode 100644 index 16ebab6fb..000000000 --- a/sdrbase/dsp/inthalfbandfiltereo2.h +++ /dev/null @@ -1,609 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////// -// Copyright (C) 2016 F4EXB // -// written by Edouard Griffiths // -// // -// Integer half-band FIR based interpolator and decimator // -// This is the even/odd double buffer variant. Really useful only when SIMD is // -// used // -// // -// This program is free software; you can redistribute it and/or modify // -// it under the terms of the GNU General Public License as published by // -// the Free Software Foundation as version 3 of the License, or // -// // -// This program is distributed in the hope that it will be useful, // -// but WITHOUT ANY WARRANTY; without even the implied warranty of // -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // -// GNU General Public License V3 for more details. // -// // -// You should have received a copy of the GNU General Public License // -// along with this program. If not, see . // -/////////////////////////////////////////////////////////////////////////////////// - -#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO2_H_ -#define SDRBASE_DSP_INTHALFBANDFILTEREO2_H_ - -#ifdef USE_SSE4_1 -#include -#endif - -#ifdef USE_NEON -#include -#endif - -#include -#include "dsp/dsptypes.h" -#include "dsp/hbfiltertraits.h" -#include "dsp/inthalfbandfiltereo2i.h" -#include "util/export.h" - -template -class SDRANGEL_API IntHalfbandFilterEO2 { -public: - IntHalfbandFilterEO2(); - - // downsample by 2, return center part of original spectrum - bool workDecimateCenter(Sample* sample) - { - // insert sample into ring-buffer - storeSample((FixReal) sample->real(), (FixReal) sample->imag()); - - switch(m_state) - { - case 0: - // advance write-pointer - advancePointer(); - // next state - m_state = 1; - // tell caller we don't have a new sample - return false; - - default: - // save result - doFIR(sample); - // advance write-pointer - advancePointer(); - // next state - m_state = 0; - - // tell caller we have a new sample - return true; - } - } - - // upsample by 2, return center part of original spectrum - double buffer variant - bool workInterpolateCenter(Sample* sampleIn, Sample *SampleOut) - { - switch(m_state) - { - case 0: - // insert sample into ring-buffer - storeSample(0, 0); - // save result - doFIR(SampleOut); - // advance write-pointer - advancePointer(); - // next state - m_state = 1; - // tell caller we didn't consume the sample - return false; - - default: - // insert sample into ring-buffer - storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag()); - // save result - doFIR(SampleOut); - // advance write-pointer - advancePointer(); - // next state - m_state = 0; - // tell caller we consumed the sample - return true; - } - } - - bool workDecimateCenter(qint32 *x, qint32 *y) - { - // insert sample into ring-buffer - storeSample(*x, *y); - - switch(m_state) - { - case 0: - // advance write-pointer - advancePointer(); - // next state - m_state = 1; - // tell caller we don't have a new sample - return false; - - default: - // save result - doFIR(x, y); - // advance write-pointer - advancePointer(); - // next state - m_state = 0; - // tell caller we have a new sample - return true; - } - } - - // downsample by 2, return lower half of original spectrum - bool workDecimateLowerHalf(Sample* sample) - { - switch(m_state) - { - case 0: - // insert sample into ring-buffer - storeSample((FixReal) -sample->imag(), (FixReal) sample->real()); - // advance write-pointer - advancePointer(); - // next state - m_state = 1; - // tell caller we don't have a new sample - return false; - - case 1: - // insert sample into ring-buffer - storeSample((FixReal) -sample->real(), (FixReal) -sample->imag()); - // save result - doFIR(sample); - // advance write-pointer - advancePointer(); - // next state - m_state = 2; - // tell caller we have a new sample - return true; - - case 2: - // insert sample into ring-buffer - storeSample((FixReal) sample->imag(), (FixReal) -sample->real()); - // advance write-pointer - advancePointer(); - // next state - m_state = 3; - // tell caller we don't have a new sample - return false; - - default: - // insert sample into ring-buffer - storeSample((FixReal) sample->real(), (FixReal) sample->imag()); - // save result - doFIR(sample); - // advance write-pointer - advancePointer(); - // next state - m_state = 0; - // tell caller we have a new sample - return true; - } - } - - // upsample by 2, from lower half of original spectrum - double buffer variant - bool workInterpolateLowerHalf(Sample* sampleIn, Sample *sampleOut) - { - Sample s; - - switch(m_state) - { - case 0: - // insert sample into ring-buffer - storeSample(0, 0); - - // save result - doFIR(&s); - sampleOut->setReal(s.imag()); - sampleOut->setImag(-s.real()); - - // advance write-pointer - advancePointer(); - - // next state - m_state = 1; - - // tell caller we didn't consume the sample - return false; - - case 1: - // insert sample into ring-buffer - storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag()); - - // save result - doFIR(&s); - sampleOut->setReal(-s.real()); - sampleOut->setImag(-s.imag()); - - // advance write-pointer - advancePointer(); - - // next state - m_state = 2; - - // tell caller we consumed the sample - return true; - - case 2: - // insert sample into ring-buffer - storeSample(0, 0); - - // save result - doFIR(&s); - sampleOut->setReal(-s.imag()); - sampleOut->setImag(s.real()); - - // advance write-pointer - advancePointer(); - - // next state - m_state = 3; - - // tell caller we didn't consume the sample - return false; - - default: - // insert sample into ring-buffer - storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag()); - - // save result - doFIR(&s); - sampleOut->setReal(s.real()); - sampleOut->setImag(s.imag()); - - // advance write-pointer - advancePointer(); - - // next state - m_state = 0; - - // tell caller we consumed the sample - return true; - } - } - - // downsample by 2, return upper half of original spectrum - bool workDecimateUpperHalf(Sample* sample) - { - switch(m_state) - { - case 0: - // insert sample into ring-buffer - storeSample((FixReal) sample->imag(), (FixReal) -sample->real()); - // advance write-pointer - advancePointer(); - // next state - m_state = 1; - // tell caller we don't have a new sample - return false; - - case 1: - // insert sample into ring-buffer - storeSample((FixReal) -sample->real(), (FixReal) -sample->imag()); - // save result - doFIR(sample); - // advance write-pointer - advancePointer(); - // next state - m_state = 2; - // tell caller we have a new sample - return true; - - case 2: - // insert sample into ring-buffer - storeSample((FixReal) -sample->imag(), (FixReal) sample->real()); - // advance write-pointer - advancePointer(); - // next state - m_state = 3; - // tell caller we don't have a new sample - return false; - - default: - // insert sample into ring-buffer - storeSample((FixReal) sample->real(), (FixReal) sample->imag()); - // save result - doFIR(sample); - // advance write-pointer - advancePointer(); - // next state - m_state = 0; - // tell caller we have a new sample - return true; - } - } - - // upsample by 2, move original spectrum to upper half - double buffer variant - bool workInterpolateUpperHalf(Sample* sampleIn, Sample *sampleOut) - { - Sample s; - - switch(m_state) - { - case 0: - // insert sample into ring-buffer - storeSample(0, 0); - - // save result - doFIR(&s); - sampleOut->setReal(-s.imag()); - sampleOut->setImag(s.real()); - - // advance write-pointer - advancePointer(); - - // next state - m_state = 1; - - // tell caller we didn't consume the sample - return false; - - case 1: - // insert sample into ring-buffer - storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag()); - - // save result - doFIR(&s); - sampleOut->setReal(-s.real()); - sampleOut->setImag(-s.imag()); - - // advance write-pointer - advancePointer(); - - // next state - m_state = 2; - - // tell caller we consumed the sample - return true; - - case 2: - // insert sample into ring-buffer - storeSample(0, 0); - - // save result - doFIR(&s); - sampleOut->setReal(s.imag()); - sampleOut->setImag(-s.real()); - - // advance write-pointer - advancePointer(); - - // next state - m_state = 3; - - // tell caller we didn't consume the sample - return false; - - default: - // insert sample into ring-buffer - storeSample((FixReal) sampleIn->real(), (FixReal) sampleIn->imag()); - - // save result - doFIR(&s); - sampleOut->setReal(s.real()); - sampleOut->setImag(s.imag()); - - // advance write-pointer - advancePointer(); - - // next state - m_state = 0; - - // tell caller we consumed the sample - return true; - } - } - - void myDecimate(const Sample* sample1, Sample* sample2) - { - storeSample((FixReal) sample1->real(), (FixReal) sample1->imag()); - advancePointer(); - - storeSample((FixReal) sample2->real(), (FixReal) sample2->imag()); - doFIR(sample2); - advancePointer(); - } - - void myDecimate(qint32 x1, qint32 y1, qint32 *x2, qint32 *y2) - { - storeSample(x1, y1); - advancePointer(); - - storeSample(*x2, *y2); - doFIR(x2, y2); - advancePointer(); - } - -protected: - qint32 m_evenB[2][HBFIRFilterTraits::hbOrder]; // double buffer technique - qint32 m_oddB[2][HBFIRFilterTraits::hbOrder]; // double buffer technique - qint32 m_evenA[2][HBFIRFilterTraits::hbOrder]; // double buffer technique - qint32 m_oddA[2][HBFIRFilterTraits::hbOrder]; // double buffer technique - - int m_ptrA; - int m_ptrB; - int m_size; - int m_state; - - void storeSample(const FixReal& sampleI, const FixReal& sampleQ) - { - if ((m_ptrB % 2) == 0) - { - m_evenB[0][m_ptrB/2] = sampleI; - m_evenB[1][m_ptrB/2] = sampleQ; - m_evenB[0][m_ptrB/2 + m_size] = sampleI; - m_evenB[1][m_ptrB/2 + m_size] = sampleQ; - m_evenA[0][m_ptrA/2] = sampleI; - m_evenA[1][m_ptrA/2] = sampleQ; - m_evenA[0][m_ptrA/2 + m_size] = sampleI; - m_evenA[1][m_ptrA/2 + m_size] = sampleQ; - } - else - { - m_oddB[0][m_ptrB/2] = sampleI; - m_oddB[1][m_ptrB/2] = sampleQ; - m_oddB[0][m_ptrB/2 + m_size] = sampleI; - m_oddB[1][m_ptrB/2 + m_size] = sampleQ; - m_oddA[0][m_ptrA/2] = sampleI; - m_oddA[1][m_ptrA/2] = sampleQ; - m_oddA[0][m_ptrA/2 + m_size] = sampleI; - m_oddA[1][m_ptrA/2 + m_size] = sampleQ; - } - } - - void storeSample(qint32 x, qint32 y) - { - if ((m_ptrB % 2) == 0) - { - m_evenB[0][m_ptrB/2] = x; - m_evenB[1][m_ptrB/2] = y; - m_evenB[0][m_ptrB/2 + m_size] = x; - m_evenB[1][m_ptrB/2 + m_size] = y; - m_evenA[0][m_ptrA/2] = x; - m_evenA[1][m_ptrA/2] = y; - m_evenA[0][m_ptrA/2 + m_size] = x; - m_evenA[1][m_ptrA/2 + m_size] = y; - } - else - { - m_oddB[0][m_ptrB/2] = x; - m_oddB[1][m_ptrB/2] = y; - m_oddB[0][m_ptrB/2 + m_size] = x; - m_oddB[1][m_ptrB/2 + m_size] = y; - m_oddA[0][m_ptrA/2] = x; - m_oddA[1][m_ptrA/2] = y; - m_oddA[0][m_ptrA/2 + m_size] = x; - m_oddA[1][m_ptrA/2 + m_size] = y; - } - } - - void advancePointer() - { - m_ptrA = (m_ptrA - 1 + 2*m_size) % (2*m_size); - m_ptrB = (m_ptrB + 1) % (2*m_size); - } - - void doFIR(Sample* sample) - { - int a = m_ptrA/2; // tip pointer - int b = m_ptrB/2 + 1; // tail pointer - - qint32 iAcc = 0; - qint32 qAcc = 0; - -#if defined(USE_AVX2) || defined(USE_SSE4_1) || defined(USE_NEON) - IntHalfbandFilterEO2Intrisics::work( - m_ptrA, - m_ptrB, - m_evenA, - m_evenB, - m_oddA, - m_oddB, - iAcc, - qAcc - ); -#else - for (int i = 0; i < HBFIRFilterTraits::hbOrder / 4; i++) - { - if ((m_ptrB % 2) == 0) - { - iAcc += (m_evenA[0][a] + m_evenB[0][b]) * HBFIRFilterTraits::hbCoeffs[i]; - qAcc += (m_evenA[1][a] + m_evenB[1][b]) * HBFIRFilterTraits::hbCoeffs[i]; - } - else - { - iAcc += (m_oddA[0][a] + m_oddB[0][b]) * HBFIRFilterTraits::hbCoeffs[i]; - qAcc += (m_oddA[1][a] + m_oddB[1][b]) * HBFIRFilterTraits::hbCoeffs[i]; - } - - a += 1; - b += 1; - } -#endif - - if ((m_ptrB % 2) == 0) - { - iAcc += ((qint32)m_oddB[0][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); - qAcc += ((qint32)m_oddB[1][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); - } - else - { - iAcc += ((qint32)m_evenB[0][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); - qAcc += ((qint32)m_evenB[1][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); - } - - sample->setReal(iAcc >> HBFIRFilterTraits::hbShift -1); - sample->setImag(qAcc >> HBFIRFilterTraits::hbShift -1); - } - - void doFIR(qint32 *x, qint32 *y) - { - int a = m_ptrA/2; // tip pointer - int b = m_ptrB/2 + 1; // tail pointer - - qint32 iAcc = 0; - qint32 qAcc = 0; - -#if defined(USE_AVX2) || defined(USE_SSE4_1) || defined(USE_NEON) - IntHalfbandFilterEO2Intrisics::work( - m_ptrA, - m_ptrB, - m_evenA, - m_evenB, - m_oddA, - m_oddB, - iAcc, - qAcc - ); -#else - for (int i = 0; i < HBFIRFilterTraits::hbOrder / 4; i++) - { - if ((m_ptrB % 2) == 0) - { - iAcc += (m_evenA[0][a] + m_evenB[0][b]) * HBFIRFilterTraits::hbCoeffs[i]; - qAcc += (m_evenA[1][a] + m_evenB[1][b]) * HBFIRFilterTraits::hbCoeffs[i]; - } - else - { - iAcc += (m_oddA[0][a] + m_oddB[0][b]) * HBFIRFilterTraits::hbCoeffs[i]; - qAcc += (m_oddA[1][a] + m_oddB[1][b]) * HBFIRFilterTraits::hbCoeffs[i]; - } - - a += 1; - b += 1; - } -#endif - - if ((m_ptrB % 2) == 0) - { - iAcc += ((qint32)m_oddB[0][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); - qAcc += ((qint32)m_oddB[1][m_ptrB/2 + m_size/2]) << (HBFIRFilterTraits::hbShift - 1); - } - else - { - iAcc += ((qint32)m_evenB[0][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); - qAcc += ((qint32)m_evenB[1][m_ptrB/2 + m_size/2 + 1]) << (HBFIRFilterTraits::hbShift - 1); - } - - *x = iAcc >> (HBFIRFilterTraits::hbShift -1); // HB_SHIFT incorrect do not loose the gained bit - *y = qAcc >> (HBFIRFilterTraits::hbShift -1); - } -}; - -template -IntHalfbandFilterEO2::IntHalfbandFilterEO2() -{ - m_size = HBFIRFilterTraits::hbOrder/2; - - for (int i = 0; i < 2*m_size; i++) - { - m_evenB[0][i] = 0; - m_evenB[1][i] = 0; - m_oddB[0][i] = 0; - m_oddB[1][i] = 0; - } - - m_ptrA = 0; - m_ptrB = 0; - m_state = 0; -} - -#endif /* SDRBASE_DSP_INTHALFBANDFILTEREO2_H_ */ diff --git a/sdrbase/dsp/inthalfbandfiltereo2i.h b/sdrbase/dsp/inthalfbandfiltereo2i.h deleted file mode 100644 index e3b1df4d4..000000000 --- a/sdrbase/dsp/inthalfbandfiltereo2i.h +++ /dev/null @@ -1,547 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////// -// Copyright (C) 2016 F4EXB // -// written by Edouard Griffiths // -// // -// Integer half-band FIR based interpolator and decimator // -// This is the even/odd double buffer variant. Really useful only when SIMD is // -// used // -// // -// This program is free software; you can redistribute it and/or modify // -// it under the terms of the GNU General Public License as published by // -// the Free Software Foundation as version 3 of the License, or // -// // -// This program is distributed in the hope that it will be useful, // -// but WITHOUT ANY WARRANTY; without even the implied warranty of // -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // -// GNU General Public License V3 for more details. // -// // -// You should have received a copy of the GNU General Public License // -// along with this program. If not, see . // -/////////////////////////////////////////////////////////////////////////////////// - -#ifndef INCLUDE_INTHALFBANDFILTEREO2I_H_ -#define INCLUDE_INTHALFBANDFILTEREO2I_H_ - -#include - -#if defined(USE_AVX2) -#include -#elif defined(USE_SSE4_1) -#include -#elif defined(USE_NEON) -#include -#endif - -#include "hbfiltertraits.h" - -template -class IntHalfbandFilterEO2Intrisics -{ -public: - static void work( - int ptrA, - int ptrB, - int32_t evenA[2][HBFilterOrder], - int32_t evenB[2][HBFilterOrder], - int32_t oddA[2][HBFilterOrder], - int32_t oddB[2][HBFilterOrder], - int32_t& iAcc, int32_t& qAcc) - { -#if defined(USE_SSE4_1) - int a = ptrA/2; // tip pointer - int b = ptrB/2 + 1; // tail pointer - const __m128i* h = (const __m128i*) HBFIRFilterTraits::hbCoeffs; - __m128i sumI = _mm_setzero_si128(); - __m128i sumQ = _mm_setzero_si128(); - __m128i sa, sb; - - for (int i = 0; i < HBFIRFilterTraits::hbOrder / 16; i++) - { - if ((ptrB % 2) == 0) - { - sa = _mm_loadu_si128((__m128i*) &(evenA[0][a])); - sb = _mm_loadu_si128((__m128i*) &(evenB[0][b])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - - sa = _mm_loadu_si128((__m128i*) &(evenA[1][a])); - sb = _mm_loadu_si128((__m128i*) &(evenB[1][b])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - } - else - { - sa = _mm_loadu_si128((__m128i*) &(oddA[0][a])); - sb = _mm_loadu_si128((__m128i*) &(oddB[0][b])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - - sa = _mm_loadu_si128((__m128i*) &(oddA[1][a])); - sb = _mm_loadu_si128((__m128i*) &(oddB[1][b])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - } - - a += 4; - b += 4; - ++h; - } - - // horizontal add of four 32 bit partial sums - - sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 8)); - sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 4)); - iAcc = _mm_cvtsi128_si32(sumI); - - sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8)); - sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4)); - qAcc = _mm_cvtsi128_si32(sumQ); - -#elif defined(USE_NEON) - int a = ptrA/2; // tip pointer - int b = ptrB/2 + 1; // tail pointer - int32x4_t sumI = vdupq_n_s32(0); - int32x4_t sumQ = vdupq_n_s32(0); - int32x4_t sa, sb, sh; - - for (int i = 0; i < HBFIRFilterTraits::hbOrder / 16; i++) - { - sh = vld1q_s32(&HBFIRFilterTraits::hbCoeffs[4*i]); - - if ((ptrB % 2) == 0) - { - sa = vld1q_s32(&(evenA[0][a])); - sb = vld1q_s32(&(evenB[0][b])); - sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh); - - sa = vld1q_s32(&(evenA[1][a])); - sb = vld1q_s32(&(evenB[1][b])); - sumQ = vmlaq_s32(sumQ, vaddq_s32(sa, sb), sh); - } - else - { - sa = vld1q_s32(&(oddA[0][a])); - sb = vld1q_s32(&(oddB[0][b])); - sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh); - - sa = vld1q_s32(&(oddA[1][a])); - sb = vld1q_s32(&(oddB[1][b])); - sumQ = vmlaq_s32(sumQ, vaddq_s32(sa, sb), sh); - } - - a += 4; - b += 4; - } - - int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI)); - int32x2_t sumI2 = vpadd_s32(sumI1, sumI1); - iAcc = vget_lane_s32(sumI2, 0); - - int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ)); - int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1); - qAcc = vget_lane_s32(sumQ2, 0); -#endif - } -}; - -template<> -class IntHalfbandFilterEO2Intrisics<48> -{ -public: - static void work( - int ptrA, - int ptrB, - int32_t evenA[2][48], - int32_t evenB[2][48], - int32_t oddA[2][48], - int32_t oddB[2][48], - int32_t& iAcc, int32_t& qAcc) - { -#if defined(USE_SSE4_1) - int a = ptrA/2; // tip pointer - int b = ptrB/2 + 1; // tail pointer - const __m128i* h = (const __m128i*) HBFIRFilterTraits<48>::hbCoeffs; - __m128i sumI = _mm_setzero_si128(); - __m128i sumQ = _mm_setzero_si128(); - __m128i sa, sb; - - if ((ptrB % 2) == 0) - { - // 0 - sa = _mm_loadu_si128((__m128i*) &(evenA[0][a])); - sb = _mm_loadu_si128((__m128i*) &(evenB[0][b])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - sa = _mm_loadu_si128((__m128i*) &(evenA[1][a])); - sb = _mm_loadu_si128((__m128i*) &(evenB[1][b])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - h++; - // 1 - sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+4])); - sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+4])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+4])); - sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+4])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - h++; - // 2 - sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+8])); - sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+8])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+8])); - sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+8])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - } - else - { - // 0 - sa = _mm_loadu_si128((__m128i*) &(oddA[0][a])); - sb = _mm_loadu_si128((__m128i*) &(oddB[0][b])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - sa = _mm_loadu_si128((__m128i*) &(oddA[1][a])); - sb = _mm_loadu_si128((__m128i*) &(oddB[1][b])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - h++; - // 1 - sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+4])); - sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+4])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+4])); - sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+4])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - h++; - // 2 - sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+8])); - sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+8])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+8])); - sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+8])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), *h)); - } - - // horizontal add of four 32 bit partial sums - - sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 8)); - sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 4)); - iAcc = _mm_cvtsi128_si32(sumI); - - sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8)); - sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4)); - qAcc = _mm_cvtsi128_si32(sumQ); - -#elif defined(USE_NEON) - int a = ptrA/2; // tip pointer - int b = ptrB/2 + 1; // tail pointer - int32x4_t sumI = vdupq_n_s32(0); - int32x4_t sumQ = vdupq_n_s32(0); - int32x4x3_t sh = vld3q_s32((int32_t const *) &HBFIRFilterTraits<48>::hbCoeffs[0]); - int32x4x3_t sa, sb; - - if ((ptrB % 2) == 0) - { - sa = vld3q_s32((int32_t const *) &(evenA[0][a])); - sb = vld3q_s32((int32_t const *) &(evenB[0][b])); - sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[0], sb.val[0]), sh.val[0]); - sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[1], sb.val[1]), sh.val[1]); - sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[2], sb.val[2]), sh.val[2]); - sa = vld3q_s32((int32_t const *) &(evenA[1][a])); - sb = vld3q_s32((int32_t const *) &(evenB[1][b])); - sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[0], sb.val[0]), sh.val[0]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[1], sb.val[1]), sh.val[1]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[2], sb.val[2]), sh.val[2]); - } - else - { - sa = vld3q_s32((int32_t const *) &(oddA[0][a])); - sb = vld3q_s32((int32_t const *) &(oddB[0][b])); - sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[0], sb.val[0]), sh.val[0]); - sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[1], sb.val[1]), sh.val[1]); - sumI = vmlaq_s32(sumI, vaddq_s32(sa.val[2], sb.val[2]), sh.val[2]); - sa = vld3q_s32((int32_t const *) &(oddA[1][a])); - sb = vld3q_s32((int32_t const *) &(oddB[1][b])); - sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[0], sb.val[0]), sh.val[0]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[1], sb.val[1]), sh.val[1]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(sa.val[2], sb.val[2]), sh.val[2]); - } - - int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI)); - int32x2_t sumI2 = vpadd_s32(sumI1, sumI1); - iAcc = vget_lane_s32(sumI2, 0); - - int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ)); - int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1); - qAcc = vget_lane_s32(sumQ2, 0); -#endif - } -}; - - -template<> -class IntHalfbandFilterEO2Intrisics<96> -{ -public: - static void work( - int ptrA, - int ptrB, - int32_t evenA[2][96], - int32_t evenB[2][96], - int32_t oddA[2][96], - int32_t oddB[2][96], - int32_t& iAcc, int32_t& qAcc) - { -#if defined(USE_AVX2) - int a = ptrA/2; // tip pointer - int b = ptrB/2 + 1; // tail pointer - const __m256i* h = (const __m256i*) HBFIRFilterTraits<96>::hbCoeffs; - __m256i sumI = _mm256_setzero_si256(); - __m256i sumQ = _mm256_setzero_si256(); - __m256i sa, sb; - - if ((ptrB % 2) == 0) - { - // I - sa = _mm256_loadu_si256((__m256i*) &(evenA[0][a])); - sb = _mm256_loadu_si256((__m256i*) &(evenB[0][b])); - sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[0])); - sa = _mm256_loadu_si256((__m256i*) &(evenA[0][a+8])); - sb = _mm256_loadu_si256((__m256i*) &(evenB[0][b+8])); - sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[1])); - sa = _mm256_loadu_si256((__m256i*) &(evenA[0][a+16])); - sb = _mm256_loadu_si256((__m256i*) &(evenB[0][b+16])); - sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[2])); - // Q - sa = _mm256_loadu_si256((__m256i*) &(evenA[1][a])); - sb = _mm256_loadu_si256((__m256i*) &(evenB[1][b])); - sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[0])); - sa = _mm256_loadu_si256((__m256i*) &(evenA[1][a+8])); - sb = _mm256_loadu_si256((__m256i*) &(evenB[1][b+8])); - sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[1])); - sa = _mm256_loadu_si256((__m256i*) &(evenA[1][a+16])); - sb = _mm256_loadu_si256((__m256i*) &(evenB[1][b+16])); - sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[2])); - } - else - { - // I - sa = _mm256_loadu_si256((__m256i*) &(oddA[0][a])); - sb = _mm256_loadu_si256((__m256i*) &(oddB[0][b])); - sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[0])); - sa = _mm256_loadu_si256((__m256i*) &(oddA[0][a+8])); - sb = _mm256_loadu_si256((__m256i*) &(oddB[0][b+8])); - sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[1])); - sa = _mm256_loadu_si256((__m256i*) &(oddA[0][a+16])); - sb = _mm256_loadu_si256((__m256i*) &(oddB[0][b+16])); - sumI = _mm256_add_epi32(sumI, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[2])); - // Q - sa = _mm256_loadu_si256((__m256i*) &(oddA[1][a])); - sb = _mm256_loadu_si256((__m256i*) &(oddB[1][b])); - sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[0])); - sa = _mm256_loadu_si256((__m256i*) &(oddA[1][a+8])); - sb = _mm256_loadu_si256((__m256i*) &(oddB[1][b+8])); - sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[1])); - sa = _mm256_loadu_si256((__m256i*) &(oddA[1][a+16])); - sb = _mm256_loadu_si256((__m256i*) &(oddB[1][b+16])); - sumQ = _mm256_add_epi32(sumQ, _mm256_mullo_epi32(_mm256_add_epi32(sa, sb), h[2])); - } - - // horizontal add - - __m128i vloI = _mm256_castsi256_si128(sumI); - vloI = _mm_add_epi32(vloI, _mm_srli_si128(vloI, 8)); - vloI = _mm_add_epi32(vloI, _mm_srli_si128(vloI, 4)); - iAcc = _mm_cvtsi128_si32(vloI); - __m128i vhiI = _mm256_extracti128_si256(sumI, 1); - vhiI = _mm_add_epi32(vhiI, _mm_srli_si128(vhiI, 8)); - vhiI = _mm_add_epi32(vhiI, _mm_srli_si128(vhiI, 4)); - iAcc += _mm_cvtsi128_si32(vhiI); - - __m128i vloQ = _mm256_castsi256_si128(sumQ); - vloQ = _mm_add_epi32(vloQ, _mm_srli_si128(vloQ, 8)); - vloQ = _mm_add_epi32(vloQ, _mm_srli_si128(vloQ, 4)); - qAcc = _mm_cvtsi128_si32(vloQ); - __m128i vhiQ = _mm256_extracti128_si256(sumQ, 1); - vhiQ = _mm_add_epi32(vhiQ, _mm_srli_si128(vhiQ, 8)); - vhiQ = _mm_add_epi32(vhiQ, _mm_srli_si128(vhiQ, 4)); - qAcc += _mm_cvtsi128_si32(vhiQ); - -#elif defined(USE_SSE4_1) - int a = ptrA/2; // tip pointer - int b = ptrB/2 + 1; // tail pointer - const __m128i* h = (const __m128i*) HBFIRFilterTraits<96>::hbCoeffs; - __m128i sumI = _mm_setzero_si128(); - __m128i sumQ = _mm_setzero_si128(); - __m128i sa, sb; - - if ((ptrB % 2) == 0) - { - // I - sa = _mm_loadu_si128((__m128i*) &(evenA[0][a])); - sb = _mm_loadu_si128((__m128i*) &(evenB[0][b])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[0])); - sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+4])); - sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+4])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[1])); - sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+8])); - sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+8])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[2])); - sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+12])); - sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+12])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[3])); - sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+16])); - sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+16])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[4])); - sa = _mm_loadu_si128((__m128i*) &(evenA[0][a+20])); - sb = _mm_loadu_si128((__m128i*) &(evenB[0][b+20])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[5])); - // Q - sa = _mm_loadu_si128((__m128i*) &(evenA[1][a])); - sb = _mm_loadu_si128((__m128i*) &(evenB[1][b])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[0])); - sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+4])); - sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+4])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[1])); - sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+8])); - sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+8])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[2])); - sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+12])); - sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+12])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[3])); - sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+16])); - sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+16])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[4])); - sa = _mm_loadu_si128((__m128i*) &(evenA[1][a+20])); - sb = _mm_loadu_si128((__m128i*) &(evenB[1][b+20])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[5])); - } - else - { - // I - sa = _mm_loadu_si128((__m128i*) &(oddA[0][a])); - sb = _mm_loadu_si128((__m128i*) &(oddB[0][b])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[0])); - sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+4])); - sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+4])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[1])); - sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+8])); - sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+8])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[2])); - sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+12])); - sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+12])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[3])); - sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+16])); - sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+16])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[4])); - sa = _mm_loadu_si128((__m128i*) &(oddA[0][a+20])); - sb = _mm_loadu_si128((__m128i*) &(oddB[0][b+20])); - sumI = _mm_add_epi32(sumI, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[5])); - // Q - sa = _mm_loadu_si128((__m128i*) &(oddA[1][a])); - sb = _mm_loadu_si128((__m128i*) &(oddB[1][b])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[0])); - sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+4])); - sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+4])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[1])); - sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+8])); - sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+8])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[2])); - sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+12])); - sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+12])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[3])); - sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+16])); - sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+16])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[4])); - sa = _mm_loadu_si128((__m128i*) &(oddA[1][a+20])); - sb = _mm_loadu_si128((__m128i*) &(oddB[1][b+20])); - sumQ = _mm_add_epi32(sumQ, _mm_mullo_epi32(_mm_add_epi32(sa, sb), h[5])); - } - - // horizontal add of four 32 bit partial sums - - sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 8)); - sumI = _mm_add_epi32(sumI, _mm_srli_si128(sumI, 4)); - iAcc = _mm_cvtsi128_si32(sumI); - - sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8)); - sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4)); - qAcc = _mm_cvtsi128_si32(sumQ); - -#elif defined(USE_NEON) - int a = ptrA/2; // tip pointer - int b = ptrB/2 + 1; // tail pointer - - int32x4_t sumI = vdupq_n_s32(0); - int32x4_t sumQ = vdupq_n_s32(0); - - int32x4x3_t sh = vld3q_s32((int32_t const *) &HBFIRFilterTraits<96>::hbCoeffs[0]); - int32x4x4_t s4a, s4b, s4h; - int32x4x2_t s2a, s2b, s2h; - - if ((ptrB % 2) == 0) - { - s4h = vld4q_s32((int32_t const *) &HBFIRFilterTraits<96>::hbCoeffs[0]); - - s4a = vld4q_s32((int32_t const *) &(evenA[0][a])); - s4b = vld4q_s32((int32_t const *) &(evenB[0][b])); - sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[0], s4b.val[0]), s4h.val[0]); - sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[1], s4b.val[1]), s4h.val[1]); - sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[2], s4b.val[2]), s4h.val[2]); - sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[3], s4b.val[3]), s4h.val[3]); - - s4a = vld4q_s32((int32_t const *) &(evenA[1][a])); - s4b = vld4q_s32((int32_t const *) &(evenB[1][b])); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[0], s4b.val[0]), s4h.val[0]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[1], s4b.val[1]), s4h.val[1]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[2], s4b.val[2]), s4h.val[2]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[3], s4b.val[3]), s4h.val[3]); - - s2h = vld4q_s32((int32_t const *) &HBFIRFilterTraits<96>::hbCoeffs[16]); - - s2a = vld2q_s32((int32_t const *) &(evenA[0][a+16])); - s2b = vld2q_s32((int32_t const *) &(evenB[0][b+16])); - sumI = vmlaq_s32(sumI, vaddq_s32(s2a.val[0], s2b.val[0]), s2h.val[0]); - sumI = vmlaq_s32(sumI, vaddq_s32(s2a.val[1], s2b.val[1]), s2h.val[1]); - - s2a = vld2q_s32((int32_t const *) &(evenA[1][a+16])); - s2b = vld2q_s32((int32_t const *) &(evenB[1][b+16])); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s2a.val[0], s2b.val[0]), s2h.val[0]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s2a.val[1], s2b.val[1]), s2h.val[1]); - } - else - { - s4h = vld4q_s32((int32_t const *) &HBFIRFilterTraits<96>::hbCoeffs[0]); - - s4a = vld4q_s32((int32_t const *) &(oddA[0][a])); - s4b = vld4q_s32((int32_t const *) &(oddB[0][b])); - sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[0], s4b.val[0]), s4h.val[0]); - sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[1], s4b.val[1]), s4h.val[1]); - sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[2], s4b.val[2]), s4h.val[2]); - sumI = vmlaq_s32(sumI, vaddq_s32(s4a.val[3], s4b.val[3]), s4h.val[3]); - - s4a = vld4q_s32((int32_t const *) &(oddA[1][a])); - s4b = vld4q_s32((int32_t const *) &(oddB[1][b])); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[0], s4b.val[0]), s4h.val[0]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[1], s4b.val[1]), s4h.val[1]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[2], s4b.val[2]), s4h.val[2]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s4a.val[3], s4b.val[3]), s4h.val[3]); - - s2h = vld4q_s32((int32_t const *) &HBFIRFilterTraits<96>::hbCoeffs[16]); - - s2a = vld2q_s32((int32_t const *) &(oddA[0][a+16])); - s2b = vld2q_s32((int32_t const *) &(oddB[0][b+16])); - sumI = vmlaq_s32(sumI, vaddq_s32(s2a.val[0], s2b.val[0]), s2h.val[0]); - sumI = vmlaq_s32(sumI, vaddq_s32(s2a.val[1], s2b.val[1]), s2h.val[1]); - - s2a = vld2q_s32((int32_t const *) &(oddA[1][a+16])); - s2b = vld2q_s32((int32_t const *) &(oddB[1][b+16])); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s2a.val[0], s2b.val[0]), s2h.val[0]); - sumQ = vmlaq_s32(sumQ, vaddq_s32(s2a.val[1], s2b.val[1]), s2h.val[1]); - } - - int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI)); - int32x2_t sumI2 = vpadd_s32(sumI1, sumI1); - iAcc = vget_lane_s32(sumI2, 0); - - int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ)); - int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1); - qAcc = vget_lane_s32(sumQ2, 0); -#endif - } -}; - - -#endif /* INCLUDE_INTHALFBANDFILTEREO2I_H_ */ diff --git a/sdrbase/dsp/inthalfbandfilterst.h b/sdrbase/dsp/inthalfbandfilterst.h index 4651fba20..1cf117d77 100644 --- a/sdrbase/dsp/inthalfbandfilterst.h +++ b/sdrbase/dsp/inthalfbandfilterst.h @@ -92,7 +92,7 @@ public: } } - bool workDecimateCenter(qint32 *x, qint32 *y) + bool workDecimateCenter(int32_t *x, int32_t *y) { // insert sample into ring-buffer storeSample(*x, *y); @@ -393,7 +393,7 @@ public: advancePointer(); } - void myDecimate(qint32 x1, qint32 y1, qint32 *x2, qint32 *y2) + void myDecimate(int32_t x1, int32_t y1, int32_t *x2, int32_t *y2) { storeSample(x1, y1); advancePointer(); @@ -404,15 +404,15 @@ public: } protected: - qint32 m_samplesDB[2*HBFilterOrder][2]; // double buffer technique with even/odd amnd I/Q stride - qint32 m_samplesAligned[HBFilterOrder][2] __attribute__ ((aligned (16))); + int32_t m_samplesDB[2*HBFilterOrder][2]; // double buffer technique with even/odd amnd I/Q stride + int32_t m_samplesAligned[HBFilterOrder][2] __attribute__ ((aligned (16))); int m_ptr; int m_size; int m_state; - qint32 m_iEvenAcc; - qint32 m_qEvenAcc; - qint32 m_iOddAcc; - qint32 m_qOddAcc; + int32_t m_iEvenAcc; + int32_t m_qEvenAcc; + int32_t m_iOddAcc; + int32_t m_qOddAcc; void storeSample(const FixReal& sampleI, const FixReal& sampleQ) @@ -423,7 +423,7 @@ protected: m_samplesDB[m_ptr + m_size][1] = sampleQ; } - void storeSample(qint32 x, qint32 y) + void storeSample(int32_t x, int32_t y) { m_samplesDB[m_ptr][0] = x; m_samplesDB[m_ptr][1] = y; @@ -447,7 +447,7 @@ protected: m_iOddAcc = 0; m_qOddAcc = 0; #ifdef USE_SSE4_1 -// memcpy((void *) m_samplesAligned, (const void *) &(m_samplesDB[ m_ptr + 1][0]), HBFilterOrder*2*sizeof(qint32)); +// memcpy((void *) m_samplesAligned, (const void *) &(m_samplesDB[ m_ptr + 1][0]), HBFilterOrder*2*sizeof(int32_t)); IntHalfbandFilterSTIntrinsics::workNA( m_ptr + 1, m_samplesDB, @@ -469,10 +469,10 @@ protected: b += 2; } #endif - m_iEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][0]) << (HBFIRFilterTraits::hbShift - 1); - m_qEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][1]) << (HBFIRFilterTraits::hbShift - 1); - m_iOddAcc += ((qint32)m_samplesDB[m_ptr + m_size/2 + 1][0]) << (HBFIRFilterTraits::hbShift - 1); - m_qOddAcc += ((qint32)m_samplesDB[m_ptr + m_size/2 + 1][1]) << (HBFIRFilterTraits::hbShift - 1); + m_iEvenAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2][0]) << (HBFIRFilterTraits::hbShift - 1); + m_qEvenAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2][1]) << (HBFIRFilterTraits::hbShift - 1); + m_iOddAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2 + 1][0]) << (HBFIRFilterTraits::hbShift - 1); + m_qOddAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2 + 1][1]) << (HBFIRFilterTraits::hbShift - 1); sample->setReal(m_iEvenAcc >> HBFIRFilterTraits::hbShift -1); sample->setImag(m_qEvenAcc >> HBFIRFilterTraits::hbShift -1); @@ -484,7 +484,7 @@ protected: } } - void doFIR(qint32 *x, qint32 *y) + void doFIR(int32_t *x, int32_t *y) { // calculate on odd values @@ -496,7 +496,7 @@ protected: m_qOddAcc = 0; #ifdef USE_SSE4_1 -// memcpy((void *) m_samplesAligned, (const void *) &(m_samplesDB[ m_ptr + 1][0]), HBFilterOrder*2*sizeof(qint32)); +// memcpy((void *) m_samplesAligned, (const void *) &(m_samplesDB[ m_ptr + 1][0]), HBFilterOrder*2*sizeof(int32_t)); IntHalfbandFilterSTIntrinsics::workNA( m_ptr + 1, m_samplesDB, @@ -518,10 +518,10 @@ protected: b += 2; } #endif - m_iEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][0]) << (HBFIRFilterTraits::hbShift - 1); - m_qEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][1]) << (HBFIRFilterTraits::hbShift - 1); - m_iOddAcc += ((qint32)m_samplesDB[m_ptr + m_size/2 + 1][0]) << (HBFIRFilterTraits::hbShift - 1); - m_qOddAcc += ((qint32)m_samplesDB[m_ptr + m_size/2 + 1][1]) << (HBFIRFilterTraits::hbShift - 1); + m_iEvenAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2][0]) << (HBFIRFilterTraits::hbShift - 1); + m_qEvenAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2][1]) << (HBFIRFilterTraits::hbShift - 1); + m_iOddAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2 + 1][0]) << (HBFIRFilterTraits::hbShift - 1); + m_qOddAcc += ((int32_t)m_samplesDB[m_ptr + m_size/2 + 1][1]) << (HBFIRFilterTraits::hbShift - 1); *x = m_iEvenAcc >> HBFIRFilterTraits::hbShift -1; *y = m_qEvenAcc >> HBFIRFilterTraits::hbShift -1; diff --git a/sdrbase/sdrbase.pro b/sdrbase/sdrbase.pro index d757c44b8..10bce8e2c 100644 --- a/sdrbase/sdrbase.pro +++ b/sdrbase/sdrbase.pro @@ -145,8 +145,6 @@ HEADERS += mainwindow.h\ dsp/inthalfbandfilterdb.h\ dsp/inthalfbandfiltereo1.h\ dsp/inthalfbandfiltereo1i.h\ - dsp/inthalfbandfiltereo2.h\ - dsp/inthalfbandfiltereo2i.h\ dsp/inthalfbandfilterst.h\ dsp/inthalfbandfiltersti.h\ dsp/kissfft.h\