From 2504d199fd3a2fbf90be0da30cd2fcfc87e5a2e6 Mon Sep 17 00:00:00 2001 From: f4exb Date: Tue, 8 Nov 2016 17:29:46 +0100 Subject: [PATCH] IntHalfbandFilterST: implemented intrinsics class --- CMakeLists.txt | 1 + sdrbase/dsp/inthalfbandfilterst.h | 39 +++++++++---- sdrbase/dsp/inthalfbandfiltersti.h | 92 ++++++++++++++++++++++++++++++ sdrbase/sdrbase.pro | 2 + 4 files changed, 124 insertions(+), 10 deletions(-) create mode 100644 sdrbase/dsp/inthalfbandfiltersti.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 990e99315..1e1802c2d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,6 +214,7 @@ set(sdrbase_HEADERS sdrbase/dsp/inthalfbandfiltereo2.h sdrbase/dsp/inthalfbandfiltereo2i.h sdrbase/dsp/inthalfbandfilterst.h + sdrbase/dsp/inthalfbandfiltersti.h sdrbase/dsp/kissfft.h sdrbase/dsp/kissengine.h sdrbase/dsp/lowpass.h diff --git a/sdrbase/dsp/inthalfbandfilterst.h b/sdrbase/dsp/inthalfbandfilterst.h index c48b2f9a8..ee5875396 100644 --- a/sdrbase/dsp/inthalfbandfilterst.h +++ b/sdrbase/dsp/inthalfbandfilterst.h @@ -3,7 +3,7 @@ // written by Edouard Griffiths // // // // Integer half-band FIR based interpolator and decimator // -// This is the double buffer variant // +// This is the even/odd and I/Q stride with double buffering variant // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // @@ -24,6 +24,7 @@ #include #include "dsp/dsptypes.h" #include "dsp/hbfiltertraits.h" +#include "dsp/inthalfbandfiltersti.h" #include "util/export.h" template @@ -403,7 +404,8 @@ public: } protected: - qint32 m_samplesDB[2*HBFIRFilterTraits::hbOrder][2]; // double buffer technique with even/odd amnd I/Q stride + qint32 m_samplesDB[2*HBFilterOrder][2]; // double buffer technique with even/odd amnd I/Q stride + qint32 m_samplesAligned[HBFilterOrder][2] __attribute__ ((aligned (16))); int m_ptr; int m_size; int m_state; @@ -431,9 +433,6 @@ protected: void doFIR(Sample* sample) { - int a = m_ptr + m_size; // tip pointer - odd - int b = m_ptr + 1; // tail pointer - aven - // calculate on odd values if ((m_ptr % 2) == 1) @@ -442,6 +441,17 @@ protected: m_qEvenAcc = 0; m_iOddAcc = 0; m_qOddAcc = 0; +#ifdef USE_SSE4_1 + memcpy((void *) m_samplesAligned, (const void *) &(m_samplesDB[ m_ptr + 1][0]), HBFilterOrder*2*sizeof(qint32)); + IntHalfbandFilterSTIntrinsics::work( + m_samplesAligned, + m_iEvenAcc, + m_qEvenAcc, + m_iOddAcc, + m_qOddAcc); +#else + int a = m_ptr + m_size; // tip pointer - odd + int b = m_ptr + 1; // tail pointer - aven for (int i = 0; i < HBFIRFilterTraits::hbOrder / 4; i++) { @@ -452,7 +462,7 @@ protected: a -= 2; b += 2; } - +#endif m_iEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][0]) << (HBFIRFilterTraits::hbShift - 1); m_qEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][1]) << (HBFIRFilterTraits::hbShift - 1); m_iOddAcc += ((qint32)m_samplesDB[m_ptr + m_size/2 + 1][0]) << (HBFIRFilterTraits::hbShift - 1); @@ -470,9 +480,6 @@ protected: void doFIR(qint32 *x, qint32 *y) { - int a = m_ptr + m_size; // tip pointer - odd - int b = m_ptr + 1; // tail pointer - aven - // calculate on odd values if ((m_ptr % 2) == 1) @@ -482,6 +489,18 @@ protected: m_iOddAcc = 0; m_qOddAcc = 0; +#ifdef USE_SSE4_1 + memcpy((void *) m_samplesAligned, (const void *) &(m_samplesDB[ m_ptr + 1][0]), HBFilterOrder*2*sizeof(qint32)); + IntHalfbandFilterSTIntrinsics::work( + m_samplesAligned, + m_iEvenAcc, + m_qEvenAcc, + m_iOddAcc, + m_qOddAcc); +#else + int a = m_ptr + m_size; // tip pointer - odd + int b = m_ptr + 1; // tail pointer - aven + for (int i = 0; i < HBFIRFilterTraits::hbOrder / 4; i++) { m_iEvenAcc += (m_samplesDB[a-1][0] + m_samplesDB[b][0]) * HBFIRFilterTraits::hbCoeffs[i]; @@ -491,7 +510,7 @@ protected: a -= 2; b += 2; } - +#endif m_iEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][0]) << (HBFIRFilterTraits::hbShift - 1); m_qEvenAcc += ((qint32)m_samplesDB[m_ptr + m_size/2][1]) << (HBFIRFilterTraits::hbShift - 1); m_iOddAcc += ((qint32)m_samplesDB[m_ptr + m_size/2 + 1][0]) << (HBFIRFilterTraits::hbShift - 1); diff --git a/sdrbase/dsp/inthalfbandfiltersti.h b/sdrbase/dsp/inthalfbandfiltersti.h new file mode 100644 index 000000000..08883d58b --- /dev/null +++ b/sdrbase/dsp/inthalfbandfiltersti.h @@ -0,0 +1,92 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016 F4EXB // +// written by Edouard Griffiths // +// // +// Integer half-band FIR based interpolator and decimator // +// This is the even/odd and I/Q stride with double buffering variant // +// This is the SIMD intrinsics code // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef SDRBASE_DSP_INTHALFBANDFILTERSTI_H_ +#define SDRBASE_DSP_INTHALFBANDFILTERSTI_H_ + +#include + +#if defined(USE_SSE4_1) +#include +#endif + +#include "hbfiltertraits.h" + +template +class IntHalfbandFilterSTIntrinsics +{ +public: + static void work( + int32_t samples[HBFilterOrder][2], + int32_t& iEvenAcc, int32_t& qEvenAcc, + int32_t& iOddAcc, int32_t& qOddAcc) + { +#if defined(USE_SSE4_1) + int a = HBFIRFilterTraits::hbOrder - 2; // tip + int b = 0; // tail + const __m128i* h = (const __m128i*) HBFIRFilterTraits::hbCoeffs; + __m128i sum = _mm_setzero_si128(); + __m128i sh, shh, sa, sb; + int32_t sums[4] __attribute__ ((aligned (16))); + + for (int i = 0; i < HBFIRFilterTraits::hbOrder / 16; i++) + { + sh = _mm_load_si128(h); + shh = _mm_shuffle_epi32(sh, _MM_SHUFFLE(0,0,0,0)); + sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq + sb = _mm_load_si128((__m128i*) &(samples[b][0])); + sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh)); + a -= 2; + b += 2; + shh = _mm_shuffle_epi32(sh, _MM_SHUFFLE(1,1,1,1)); + sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq + sb = _mm_load_si128((__m128i*) &(samples[b][0])); + sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh)); + a -= 2; + b += 2; + shh = _mm_shuffle_epi32(sh, _MM_SHUFFLE(2,2,2,2)); + sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq + sb = _mm_load_si128((__m128i*) &(samples[b][0])); + sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh)); + a -= 2; + b += 2; + shh = _mm_shuffle_epi32(sh, _MM_SHUFFLE(3,3,3,3)); + sa = _mm_load_si128((__m128i*) &(samples[a][0])); // Ei,Eq,Oi,Oq + sb = _mm_load_si128((__m128i*) &(samples[b][0])); + sum = _mm_add_epi32(sum, _mm_mullo_epi32(_mm_add_epi32(sa, sb), shh)); + a -= 2; + b += 2; + ++h; + } + + // Extract values from sum vector + _mm_store_si128((__m128i*) sums, sum); + iEvenAcc = sums[0]; + qEvenAcc = sums[1]; + iOddAcc = sums[2]; + qOddAcc = sums[3]; +#endif + } +}; + + + +#endif /* SDRBASE_DSP_INTHALFBANDFILTERSTI_H_ */ diff --git a/sdrbase/sdrbase.pro b/sdrbase/sdrbase.pro index feb4ba0c6..d757c44b8 100644 --- a/sdrbase/sdrbase.pro +++ b/sdrbase/sdrbase.pro @@ -147,6 +147,8 @@ HEADERS += mainwindow.h\ dsp/inthalfbandfiltereo1i.h\ dsp/inthalfbandfiltereo2.h\ dsp/inthalfbandfiltereo2i.h\ + dsp/inthalfbandfilterst.h\ + dsp/inthalfbandfiltersti.h\ dsp/kissfft.h\ dsp/kissengine.h\ dsp/lowpass.h\