///////////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2015 Edouard Griffiths, F4EXB                                   //
//                                                                               //
// This program is free software; you can redistribute it and/or modify          //
// it under the terms of the GNU General Public License as published by          //
// the Free Software Foundation as version 3 of the License, or                  //
// (at your option) any later version.                                           //
//                                                                               //
// This program is distributed in the hope that it will be useful,               //
// but WITHOUT ANY WARRANTY; without even the implied warranty of                //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                  //
// GNU General Public License V3 for more details.                               //
//                                                                               //
// You should have received a copy of the GNU General Public License             //
// along with this program. If not, see .          //
///////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDE_GPL_DSP_DECIMATORS_H_
#define INCLUDE_GPL_DSP_DECIMATORS_H_
#include "dsp/dsptypes.h"
#include "dsp/inthalfbandfiltereo.h"
#define DECIMATORS_HB_FILTER_ORDER 64
template
struct decimation_shifts
{
    static const uint pre1   = 0;
    static const uint pre2   = 0;
    static const uint post2  = 0;
    static const uint pre4   = 0;
    static const uint post4  = 0;
    static const uint pre8   = 0;
    static const uint post8  = 0;
    static const uint pre16  = 0;
    static const uint post16 = 0;
    static const uint pre32  = 0;
    static const uint post32 = 0;
    static const uint pre64  = 0;
    static const uint post64 = 0;
};
template<>
struct decimation_shifts<16, 24>
{
    static const uint pre1   = 0;
    static const uint pre2   = 0;
    static const uint post2  = 9;
    static const uint pre4   = 0;
    static const uint post4  = 10;
    static const uint pre8   = 0;
    static const uint post8  = 11;
    static const uint pre16  = 0;
    static const uint post16 = 12;
    static const uint pre32  = 0;
    static const uint post32 = 13;
    static const uint pre64  = 0;
    static const uint post64 = 14;
};
template<>
struct decimation_shifts<24, 24>
{
    static const uint pre1   = 0;
    static const uint pre2   = 0;
    static const uint post2  = 1;
    static const uint pre4   = 0;
    static const uint post4  = 2;
    static const uint pre8   = 0;
    static const uint post8  = 3;
    static const uint pre16  = 0;
    static const uint post16 = 4;
    static const uint pre32  = 0;
    static const uint post32 = 5;
    static const uint pre64  = 0;
    static const uint post64 = 6;
};
template<>
struct decimation_shifts<16, 16>
{
    static const uint pre1   = 0;
    static const uint pre2   = 0;
    static const uint post2  = 1;
    static const uint pre4   = 0;
    static const uint post4  = 2;
    static const uint pre8   = 0;
    static const uint post8  = 3;
    static const uint pre16  = 0;
    static const uint post16 = 4;
    static const uint pre32  = 0;
    static const uint post32 = 5;
    static const uint pre64  = 0;
    static const uint post64 = 6;
};
template<>
struct decimation_shifts<24, 16>
{
    static const uint pre1   = 8;
    static const uint pre2   = 7;
    static const uint post2  = 0;
    static const uint pre4   = 6;
    static const uint post4  = 0;
    static const uint pre8   = 5;
    static const uint post8  = 0;
    static const uint pre16  = 4;
    static const uint post16 = 0;
    static const uint pre32  = 3;
    static const uint post32 = 0;
    static const uint pre64  = 2;
    static const uint post64 = 0;
};
template<>
struct decimation_shifts<16, 12>
{
    static const uint pre1   = 4;
    static const uint pre2   = 3;
    static const uint post2  = 0;
    static const uint pre4   = 2;
    static const uint post4  = 0;
    static const uint pre8   = 1;
    static const uint post8  = 0;
    static const uint pre16  = 0;
    static const uint post16 = 0;
    static const uint pre32  = 0;
    static const uint post32 = 1;
    static const uint pre64  = 0;
    static const uint post64 = 2;
};
template<>
struct decimation_shifts<24, 12>
{
    static const uint pre1   = 12;
    static const uint pre2   = 11;
    static const uint post2  = 0;
    static const uint pre4   = 10;
    static const uint post4  = 0;
    static const uint pre8   = 9;
    static const uint post8  = 0;
    static const uint pre16  = 8;
    static const uint post16 = 0;
    static const uint pre32  = 7;
    static const uint post32 = 0;
    static const uint pre64  = 6;
    static const uint post64 = 0;
};
template<>
struct decimation_shifts<16, 8>
{
    static const uint pre1   = 8;
    static const uint pre2   = 7;
    static const uint post2  = 0;
    static const uint pre4   = 6;
    static const uint post4  = 0;
    static const uint pre8   = 5;
    static const uint post8  = 0;
    static const uint pre16  = 4;
    static const uint post16 = 0;
    static const uint pre32  = 3;
    static const uint post32 = 0;
    static const uint pre64  = 2;
    static const uint post64 = 0;
};
template<>
struct decimation_shifts<24, 8>
{
    static const uint pre1   = 16;
    static const uint pre2   = 15;
    static const uint post2  = 0;
    static const uint pre4   = 14;
    static const uint post4  = 0;
    static const uint pre8   = 13;
    static const uint post8  = 0;
    static const uint pre16  = 12;
    static const uint post16 = 0;
    static const uint pre32  = 11;
    static const uint post32 = 0;
    static const uint pre64  = 10;
    static const uint post64 = 0;
};
#ifdef _MSC_VER
#pragma pack(push,1)
template
struct TripleByteLE
{
    uint8_t b0;
    uint8_t b1;
    uint8_t b2;
    typedef union {
#pragma pack(push,1)
        struct {
            int32_t i;
        };
#pragma pack(pop)
#pragma pack(push, 1)
        struct {
            uint8_t i0;
            uint8_t i1;
            uint8_t i2;
            uint8_t i3;
        };
#pragma pack(pop)
    } isample;
    operator T() const {
        isample s;
        s.i0 = 0;
        s.i1 = b0;
        s.i2 = b1;
        s.i3 = b2;
        return s.i;
    }
};
#pragma pack(pop)
#else
template
struct TripleByteLE
{
    uint8_t b0;
    uint8_t b1;
    uint8_t b2;
    typedef union {
        struct {
            int32_t i;
        } __attribute__((__packed__));
        struct {
            uint8_t i0;
            uint8_t i1;
            uint8_t i2;
            uint8_t i3;
        } __attribute__((__packed__));
    } isample;
    operator T() const {
        isample s;
        s.i0 = 0;
        s.i1 = b0;
        s.i2 = b1;
        s.i3 = b2;
        return s.i;
    }
} __attribute__((__packed__));
#endif
#ifdef _MSC_VER
#pragma pack(push, 1)
template<>
struct TripleByteLE
{
    uint8_t b0;
    uint8_t b1;
    uint8_t b2;
    typedef union {
#pragma pack(push, 1)
        struct {
            qint32 i;
        };
#pragma pack(pop)
#pragma pack(push, 1)
        struct {
            uint8_t i0;
            uint8_t i1;
            uint8_t i2;
            uint8_t i3;
        };
#pragma pack(pop)
    } isample;
    operator qint32() const {
        isample s;
        s.i0 = 0;
        s.i1 = b0;
        s.i2 = b1;
        s.i3 = b2;
        return s.i >> 8;
    }
};
#pragma pack(pop)
#else
template<>
struct TripleByteLE
{
    uint8_t b0;
    uint8_t b1;
    uint8_t b2;
    typedef union {
        struct {
            qint32 i;
        } __attribute__((__packed__));
        struct {
            uint8_t i0;
            uint8_t i1;
            uint8_t i2;
            uint8_t i3;
        } __attribute__((__packed__));
    } isample;
    operator qint32() const {
        isample s;
        s.i0 = 0;
        s.i1 = b0;
        s.i2 = b1;
        s.i3 = b2;
        return s.i >> 8;
    }
} __attribute__((__packed__));
#endif
#ifdef _MSC_VER
#pragma pack(push, 1)
template<>
struct TripleByteLE
{
    uint8_t b0;
    uint8_t b1;
    uint8_t b2;
    typedef union {
#pragma pack(push, 1)
        struct {
            qint64 i;
        };
#pragma pack(pop)
#pragma pack(push, 1)
        struct {
            uint32_t ia;
            uint8_t i0;
            uint8_t i1;
            uint8_t i2;
            uint8_t i3;
        };
#pragma pack(pop)
    } isample;
    operator qint64() const {
        isample s;
        s.ia = 0;
        s.i0 = 0;
        s.i1 = b0;
        s.i2 = b1;
        s.i3 = b2;
        return s.i >> 40;
    }
};
#pragma pack(pop)
#else
template<>
struct TripleByteLE
{
    uint8_t b0;
    uint8_t b1;
    uint8_t b2;
    typedef union {
        struct {
            qint64 i;
        } __attribute__((__packed__));
        struct {
            uint32_t ia;
            uint8_t i0;
            uint8_t i1;
            uint8_t i2;
            uint8_t i3;
        } __attribute__((__packed__));
    } isample;
    operator qint64() const {
        isample s;
        s.ia = 0;
        s.i0 = 0;
        s.i1 = b0;
        s.i2 = b1;
        s.i3 = b2;
        return s.i >> 40;
    }
} __attribute__((__packed__));
#endif
/** Decimators with integer input and integer output */
template
class Decimators
{
public:
    // interleaved I/Q input buffer
	void decimate1(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate2_u(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate2_inf(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate2_sup(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate2_cen(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate4_inf(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate4_inf_txsync(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate4_sup(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate4_sup_txsync(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate4_cen(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate8_inf(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate8_inf_txsync(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate8_sup(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate8_sup_txsync(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate8_cen(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate16_inf(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate16_inf_txsync(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate16_sup(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate16_sup_txsync(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate16_cen(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate32_inf(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate32_inf_txsync(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate32_sup(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate32_sup_txsync(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate32_cen(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate64_inf(SampleVector::iterator* it, const T* buf, qint32 len);
    void decimate64_inf_txsync(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate64_sup(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate64_sup_txsync(SampleVector::iterator* it, const T* buf, qint32 len);
	void decimate64_cen(SampleVector::iterator* it, const T* buf, qint32 len);
    // separate I and Q input buffers
    void decimate1(SampleVector::iterator* it, const T* bufI, const T* bufQ, qint32 len);
    void decimate2_u(SampleVector::iterator* it, const T* bufI, const T* bufQ, qint32 len);
    void decimate2_cen(SampleVector::iterator* it, const T* bufI, const T* bufQ, qint32 len);
    void decimate4_cen(SampleVector::iterator* it, const T* bufI, const T* bufQ, qint32 len);
    void decimate8_cen(SampleVector::iterator* it, const T* bufI, const T* bufQ, qint32 len);
    void decimate16_cen(SampleVector::iterator* it, const T* bufI, const T* bufQ, qint32 len);
    void decimate32_cen(SampleVector::iterator* it, const T* bufI, const T* bufQ, qint32 len);
    void decimate64_cen(SampleVector::iterator* it, const T* bufI, const T* bufQ, qint32 len);
private:
#ifdef SDR_RX_SAMPLE_24BIT
    IntHalfbandFilterEO m_decimator2;  // 1st stages
    IntHalfbandFilterEO m_decimator2s; // 1st stages - straight
    IntHalfbandFilterEO m_decimator4;  // 2nd stages
    IntHalfbandFilterEO m_decimator8;  // 3rd stages
    IntHalfbandFilterEO m_decimator16; // 4th stages
    IntHalfbandFilterEO m_decimator32; // 5th stages
    IntHalfbandFilterEO m_decimator64; // 6th stages
#else
    IntHalfbandFilterEO m_decimator2;  // 1st stages
    IntHalfbandFilterEO m_decimator2s; // 1st stages - straight
    IntHalfbandFilterEO m_decimator4;  // 2nd stages
    IntHalfbandFilterEO m_decimator8;  // 3rd stages
    IntHalfbandFilterEO m_decimator16; // 4th stages
    IntHalfbandFilterEO m_decimator32; // 5th stages
    IntHalfbandFilterEO m_decimator64; // 6th stages
#endif
};
template
void Decimators::decimate1(SampleVector::iterator* it, const T* buf, qint32 len)
{
	qint32 xreal, yimag;
	for (int pos = 0; pos < len - 1; pos += 2)
	{
		xreal = IQOrder ? buf[pos+0] : buf[pos+1];
		yimag = IQOrder ? buf[pos+1] : buf[pos+0];
		(**it).setReal(xreal << decimation_shifts::pre1); // Valgrind optim (2 - comment not repeated)
		(**it).setImag(yimag << decimation_shifts::pre1);
		++(*it); // Valgrind optim (comment not repeated)
	}
}
template
void Decimators::decimate2_u(SampleVector::iterator* it, const T* buf, qint32 len)
{
	StorageType xreal, yimag;
	for (int pos = 0; pos < len - 7; pos += 8)
	{
		xreal = IQOrder ?
            (buf[pos+0] - buf[pos+3]) << decimation_shifts::pre2 :
            (buf[pos+1] + buf[pos+2] - 255) << decimation_shifts::pre2;
		yimag = IQOrder ?
            (buf[pos+1] + buf[pos+2] - 255) << decimation_shifts::pre2 :
            (buf[pos+0] - buf[pos+3]) << decimation_shifts::pre2;
		(**it).setReal(xreal >> decimation_shifts::post2);
		(**it).setImag(yimag >> decimation_shifts::post2);
		++(*it);
		xreal = IQOrder ?
            (buf[pos+7] - buf[pos+4]) << decimation_shifts::pre2 :
            (255 - buf[pos+5] - buf[pos+6]) << decimation_shifts::pre2;
		yimag = IQOrder ?
            (255 - buf[pos+5] - buf[pos+6]) << decimation_shifts::pre2 :
            (buf[pos+7] - buf[pos+4]) << decimation_shifts::pre2;
		(**it).setReal(xreal >> decimation_shifts::post2);
		(**it).setImag(yimag >> decimation_shifts::post2);
		++(*it);
	}
}
template
void Decimators::decimate2_inf(SampleVector::iterator* it, const T* buf, qint32 len)
{
    StorageType buf2[4];
    for (int pos = 0; pos < len - 7; pos += 8)
    {
        m_decimator2.myDecimateInf(
                buf[pos+0] << decimation_shifts::pre2,
                buf[pos+1] << decimation_shifts::pre2,
                buf[pos+2] << decimation_shifts::pre2,
                buf[pos+3] << decimation_shifts::pre2,
                buf[pos+4] << decimation_shifts::pre2,
                buf[pos+5] << decimation_shifts::pre2,
                buf[pos+6] << decimation_shifts::pre2,
                buf[pos+7] << decimation_shifts::pre2,
                &buf2[0]);
        (**it).setReal(buf2[0] >> decimation_shifts::post2);
        (**it).setImag(buf2[1] >> decimation_shifts::post2);
        ++(*it);
        (**it).setReal(buf2[2] >> decimation_shifts::post2);
        (**it).setImag(buf2[3] >> decimation_shifts::post2);
        ++(*it);
    }
}
template
void Decimators::decimate2_sup(SampleVector::iterator* it, const T* buf, qint32 len)
{
    StorageType buf2[4];
    for (int pos = 0; pos < len - 7; pos += 8)
    {
        m_decimator2.myDecimateSup(
                buf[pos+0] << decimation_shifts::pre2,
                buf[pos+1] << decimation_shifts::pre2,
                buf[pos+2] << decimation_shifts::pre2,
                buf[pos+3] << decimation_shifts::pre2,
                buf[pos+4] << decimation_shifts::pre2,
                buf[pos+5] << decimation_shifts::pre2,
                buf[pos+6] << decimation_shifts::pre2,
                buf[pos+7] << decimation_shifts::pre2,
                &buf2[0]);
        (**it).setReal(buf2[0] >> decimation_shifts::post2);
        (**it).setImag(buf2[1] >> decimation_shifts::post2);
        ++(*it);
        (**it).setReal(buf2[2] >> decimation_shifts::post2);
        (**it).setImag(buf2[3] >> decimation_shifts::post2);
        ++(*it);
    }
}
template
void Decimators::decimate2_cen(SampleVector::iterator* it, const T* buf, qint32 len)
{
    StorageType buf2[4];
    for (int pos = 0; pos < len - 7; pos += 8)
    {
        m_decimator2.myDecimateCen(
                buf[pos+0] << decimation_shifts::pre2,
                buf[pos+1] << decimation_shifts::pre2,
                buf[pos+2] << decimation_shifts::pre2,
                buf[pos+3] << decimation_shifts::pre2,
                buf[pos+4] << decimation_shifts::pre2,
                buf[pos+5] << decimation_shifts::pre2,
                buf[pos+6] << decimation_shifts::pre2,
                buf[pos+7] << decimation_shifts::pre2,
                &buf2[0]);
        (**it).setReal(buf2[0] >> decimation_shifts::post2);
        (**it).setImag(buf2[1] >> decimation_shifts::post2);
        ++(*it);
        (**it).setReal(buf2[2] >> decimation_shifts::post2);
        (**it).setImag(buf2[3] >> decimation_shifts::post2);
        ++(*it);
    }
}
template
void Decimators::decimate4_inf(SampleVector::iterator* it, const T* buf, qint32 len)
{
    StorageType buf2[8], buf4[4];
    for (int pos = 0; pos < len - 15; pos += 16)
    {
        m_decimator2s.myDecimateInf(
                buf[pos+0] << decimation_shifts::pre4,
                buf[pos+1] << decimation_shifts::pre4,
                buf[pos+2] << decimation_shifts::pre4,
                buf[pos+3] << decimation_shifts::pre4,
                buf[pos+4] << decimation_shifts::pre4,
                buf[pos+5] << decimation_shifts::pre4,
                buf[pos+6] << decimation_shifts::pre4,
                buf[pos+7] << decimation_shifts::pre4,
                &buf2[0]);
        m_decimator2s.myDecimateInf(
                buf[pos+8] << decimation_shifts::pre4,
                buf[pos+9] << decimation_shifts::pre4,
                buf[pos+10] << decimation_shifts::pre4,
                buf[pos+11] << decimation_shifts::pre4,
                buf[pos+12] << decimation_shifts::pre4,
                buf[pos+13] << decimation_shifts::pre4,
                buf[pos+14] << decimation_shifts::pre4,
                buf[pos+15] << decimation_shifts::pre4,
                &buf2[4]);
        m_decimator4.myDecimateSup(
                buf2[0],
                buf2[1],
                buf2[2],
                buf2[3],
                buf2[4],
                buf2[5],
                buf2[6],
                buf2[7],
                &buf4[0]);
        (**it).setReal(buf4[IQOrder ? 0 : 1] >> decimation_shifts::post4);
        (**it).setImag(buf4[IQOrder ? 1 : 0] >> decimation_shifts::post4);
        ++(*it);
        (**it).setReal(buf4[IQOrder ? 2 : 3] >> decimation_shifts::post4);
        (**it).setImag(buf4[IQOrder ? 3 : 2] >> decimation_shifts::post4);
        ++(*it);
    }
}
template
void Decimators::decimate4_inf_txsync(SampleVector::iterator* it, const T* buf, qint32 len)
{
    StorageType buf2[8], buf4[4];
    for (int pos = 0; pos < len - 15; pos += 16)
    {
        m_decimator2s.myDecimateInf(
                buf[pos+0] << decimation_shifts::pre4,
                buf[pos+1] << decimation_shifts::pre4,
                buf[pos+2] << decimation_shifts::pre4,
                buf[pos+3] << decimation_shifts::pre4,
                buf[pos+4] << decimation_shifts::pre4,
                buf[pos+5] << decimation_shifts::pre4,
                buf[pos+6] << decimation_shifts::pre4,
                buf[pos+7] << decimation_shifts::pre4,
                &buf2[0]);
        m_decimator2s.myDecimateInf(
                buf[pos+8] << decimation_shifts::pre4,
                buf[pos+9] << decimation_shifts::pre4,
                buf[pos+10] << decimation_shifts::pre4,
                buf[pos+11] << decimation_shifts::pre4,
                buf[pos+12] << decimation_shifts::pre4,
                buf[pos+13] << decimation_shifts::pre4,
                buf[pos+14] << decimation_shifts::pre4,
                buf[pos+15] << decimation_shifts::pre4,
                &buf2[4]);
        m_decimator4.myDecimateInf(
                buf2[0],
                buf2[1],
                buf2[2],
                buf2[3],
                buf2[4],
                buf2[5],
                buf2[6],
                buf2[7],
                &buf4[0]);
        (**it).setReal(buf4[IQOrder? 0 : 1] >> decimation_shifts::post4);
        (**it).setImag(buf4[IQOrder? 1 : 0] >> decimation_shifts::post4);
        ++(*it);
        (**it).setReal(buf4[IQOrder? 2 : 3] >> decimation_shifts::post4);
        (**it).setImag(buf4[IQOrder? 3 : 2] >> decimation_shifts::post4);
        ++(*it);
    }
}
template
void Decimators::decimate4_sup(SampleVector::iterator* it, const T* buf, qint32 len)
{
    StorageType buf2[8], buf4[4];
    for (int pos = 0; pos < len - 15; pos += 16)
    {
        m_decimator2s.myDecimateSup(
                buf[pos+0] << decimation_shifts::pre4,
                buf[pos+1] << decimation_shifts::pre4,
                buf[pos+2] << decimation_shifts::pre4,
                buf[pos+3] << decimation_shifts::pre4,
                buf[pos+4] << decimation_shifts::pre4,
                buf[pos+5] << decimation_shifts::pre4,
                buf[pos+6] << decimation_shifts::pre4,
                buf[pos+7] << decimation_shifts::pre4,
                &buf2[0]);
        m_decimator2s.myDecimateSup(
                buf[pos+8] << decimation_shifts::pre4,
                buf[pos+9] << decimation_shifts::pre4,
                buf[pos+10] << decimation_shifts::pre4,
                buf[pos+11] << decimation_shifts::pre4,
                buf[pos+12] << decimation_shifts::pre4,
                buf[pos+13] << decimation_shifts::pre4,
                buf[pos+14] << decimation_shifts::pre4,
                buf[pos+15] << decimation_shifts::pre4,
                &buf2[4]);
        m_decimator4.myDecimateInf(
                buf2[0],
                buf2[1],
                buf2[2],
                buf2[3],
                buf2[4],
                buf2[5],
                buf2[6],
                buf2[7],
                &buf4[0]);
        (**it).setReal(buf4[IQOrder? 0 : 1] >> decimation_shifts::post4);
        (**it).setImag(buf4[IQOrder? 1 : 0] >> decimation_shifts::post4);
        ++(*it);
        (**it).setReal(buf4[IQOrder? 2 : 3] >> decimation_shifts::post4);
        (**it).setImag(buf4[IQOrder? 3 : 2] >> decimation_shifts::post4);
        ++(*it);
    }
}
template
void Decimators::decimate4_sup_txsync(SampleVector::iterator* it, const T* buf, qint32 len)
{
    StorageType buf2[8], buf4[4];
    for (int pos = 0; pos < len - 15; pos += 16)
    {
        m_decimator2s.myDecimateSup(
                buf[pos+0] << decimation_shifts::pre4,
                buf[pos+1] << decimation_shifts::pre4,
                buf[pos+2] << decimation_shifts::pre4,
                buf[pos+3] << decimation_shifts::pre4,
                buf[pos+4] << decimation_shifts::pre4,
                buf[pos+5] << decimation_shifts::pre4,
                buf[pos+6] << decimation_shifts::pre4,
                buf[pos+7] << decimation_shifts::pre4,
                &buf2[0]);
        m_decimator2s.myDecimateSup(
                buf[pos+8] << decimation_shifts::pre4,
                buf[pos+9] << decimation_shifts::pre4,
                buf[pos+10] << decimation_shifts::pre4,
                buf[pos+11] << decimation_shifts::pre4,
                buf[pos+12] << decimation_shifts::pre4,
                buf[pos+13] << decimation_shifts::pre4,
                buf[pos+14] << decimation_shifts::pre4,
                buf[pos+15] << decimation_shifts::pre4,
                &buf2[4]);
        m_decimator4.myDecimateSup(
                buf2[0],
                buf2[1],
                buf2[2],
                buf2[3],
                buf2[4],
                buf2[5],
                buf2[6],
                buf2[7],
                &buf4[0]);
        (**it).setReal(buf4[IQOrder? 0 : 1] >> decimation_shifts::post4);
        (**it).setImag(buf4[IQOrder? 1 : 0] >> decimation_shifts::post4);
        ++(*it);
        (**it).setReal(buf4[IQOrder? 2 : 3] >> decimation_shifts::post4);
        (**it).setImag(buf4[IQOrder? 3 : 2] >> decimation_shifts::post4);
        ++(*it);
    }
}
template
void Decimators::decimate4_cen(SampleVector::iterator* it, const T* buf, qint32 len)
{
	StorageType buf2[8], buf4[4];
	for (int pos = 0; pos < len - 15; pos += 16)
	{
        m_decimator2.myDecimateCen(
                buf[pos+0] << decimation_shifts::pre4,
                buf[pos+1] << decimation_shifts::pre4,
                buf[pos+2] << decimation_shifts::pre4,
                buf[pos+3] << decimation_shifts::pre4,
                buf[pos+4] << decimation_shifts::pre4,
                buf[pos+5] << decimation_shifts::pre4,
                buf[pos+6] << decimation_shifts::pre4,
                buf[pos+7] << decimation_shifts::pre4,
                &buf2[0]);
        m_decimator2.myDecimateCen(
                buf[pos+8] << decimation_shifts::pre4,
                buf[pos+9] << decimation_shifts::pre4,
                buf[pos+10] << decimation_shifts::pre4,
                buf[pos+11] << decimation_shifts::pre4,
                buf[pos+12] << decimation_shifts::pre4,
                buf[pos+13] << decimation_shifts::pre4,
                buf[pos+14] << decimation_shifts::pre4,
                buf[pos+15] << decimation_shifts::pre4,
                &buf2[4]);
        m_decimator4.myDecimateCen(
                &buf2[0],
                &buf4[0]);
        (**it).setReal(buf4[0] >> decimation_shifts::post4);
        (**it).setImag(buf4[1] >> decimation_shifts::post4);
        ++(*it);
        (**it).setReal(buf4[2] >> decimation_shifts::post4);
        (**it).setImag(buf4[3] >> decimation_shifts::post4);
        ++(*it);
	}
}
template
void Decimators::decimate8_inf(SampleVector::iterator* it, const T* buf, qint32 len)
{
    StorageType buf2[16], buf4[8], buf8[4];
	for (int pos = 0; pos < len - 31; pos += 32)
	{
        m_decimator2s.myDecimateInf(
                buf[pos+0] << decimation_shifts::pre8,
                buf[pos+1] << decimation_shifts::pre8,
                buf[pos+2] << decimation_shifts::pre8,
                buf[pos+3] << decimation_shifts::pre8,
                buf[pos+4] << decimation_shifts::pre8,
                buf[pos+5] << decimation_shifts::pre8,
                buf[pos+6] << decimation_shifts::pre8,
                buf[pos+7] << decimation_shifts::pre8,
                &buf2[0]);
        m_decimator2s.myDecimateInf(
                buf[pos+8] << decimation_shifts::pre8,
                buf[pos+9] << decimation_shifts::pre8,
                buf[pos+10] << decimation_shifts::pre8,
                buf[pos+11] << decimation_shifts::pre8,
                buf[pos+12] << decimation_shifts::pre8,
                buf[pos+13] << decimation_shifts::pre8,
                buf[pos+14] << decimation_shifts::pre8,
                buf[pos+15] << decimation_shifts::pre8,
                &buf2[4]);
        m_decimator2s.myDecimateInf(
                buf[pos+16] << decimation_shifts::pre8,
                buf[pos+17] << decimation_shifts::pre8,
                buf[pos+18] << decimation_shifts::pre8,
                buf[pos+19] << decimation_shifts::pre8,
                buf[pos+20] << decimation_shifts::pre8,
                buf[pos+21] << decimation_shifts::pre8,
                buf[pos+22] << decimation_shifts::pre8,
                buf[pos+23] << decimation_shifts::pre8,
                &buf2[8]);
        m_decimator2s.myDecimateInf(
                buf[pos+24] << decimation_shifts::pre8,
                buf[pos+25] << decimation_shifts::pre8,
                buf[pos+26] << decimation_shifts::pre8,
                buf[pos+27] << decimation_shifts::pre8,
                buf[pos+28] << decimation_shifts::pre8,
                buf[pos+29] << decimation_shifts::pre8,
                buf[pos+30] << decimation_shifts::pre8,
                buf[pos+31] << decimation_shifts::pre8,
                &buf2[12]);
        m_decimator4.myDecimateSup(
                &buf2[0],
                &buf4[0]);
        m_decimator4.myDecimateSup(
                &buf2[8],
                &buf4[4]);
        m_decimator8.myDecimateCen(
                &buf4[0],
                &buf8[0]);
        (**it).setReal(buf8[IQOrder? 0 : 1] >> decimation_shifts::post8);
        (**it).setImag(buf8[IQOrder? 1 : 0] >> decimation_shifts::post8);
        ++(*it);
        (**it).setReal(buf8[IQOrder? 2 : 3] >> decimation_shifts::post8);
        (**it).setImag(buf8[IQOrder? 3 : 2] >> decimation_shifts::post8);
        ++(*it);
	}
}
template
void Decimators::decimate8_inf_txsync(SampleVector::iterator* it, const T* buf, qint32 len)
{
    StorageType buf2[16], buf4[8], buf8[4];
	for (int pos = 0; pos < len - 31; pos += 32)
	{
        for (int i = 0; i < 4; i++)
        {
            m_decimator2s.myDecimateInf(
                buf[pos+8*i+0] << decimation_shifts::pre8,
                buf[pos+8*i+1] << decimation_shifts::pre8,
                buf[pos+8*i+2] << decimation_shifts::pre8,
                buf[pos+8*i+3] << decimation_shifts::pre8,
                buf[pos+8*i+4] << decimation_shifts::pre8,
                buf[pos+8*i+5] << decimation_shifts::pre8,
                buf[pos+8*i+6] << decimation_shifts::pre8,
                buf[pos+8*i+7] << decimation_shifts::pre8,
                &buf2[4*i]);
        }
        m_decimator4.myDecimateInf(
                &buf2[0],
                &buf4[0]);
        m_decimator4.myDecimateInf(
                &buf2[8],
                &buf4[4]);
        m_decimator8.myDecimateSup(
                &buf4[0],
                &buf8[0]);
        (**it).setReal(buf8[IQOrder? 0 : 1] >> decimation_shifts::post8);
        (**it).setImag(buf8[IQOrder? 1 : 0] >> decimation_shifts::post8);
        ++(*it);
        (**it).setReal(buf8[IQOrder? 2 : 3] >> decimation_shifts::post8);
        (**it).setImag(buf8[IQOrder? 3 : 2] >> decimation_shifts::post8);
        ++(*it);
	}
}
template
void Decimators::decimate8_sup(SampleVector::iterator* it, const T* buf, qint32 len)
{
    StorageType buf2[16], buf4[8], buf8[4];
    for (int pos = 0; pos < len - 31; pos += 32)
    {
        m_decimator2s.myDecimateSup(
                buf[pos+0] << decimation_shifts::pre8,
                buf[pos+1] << decimation_shifts::pre8,
                buf[pos+2] << decimation_shifts::pre8,
                buf[pos+3] << decimation_shifts::pre8,
                buf[pos+4] << decimation_shifts::pre8,
                buf[pos+5] << decimation_shifts::pre8,
                buf[pos+6] << decimation_shifts::pre8,
                buf[pos+7] << decimation_shifts::pre8,
                &buf2[0]);
        m_decimator2s.myDecimateSup(
                buf[pos+8] << decimation_shifts