Exemple #1
0
        internal static unsafe void opus_fft_impl(FFTState st, int[] fout, int fout_ptr)
        {
            int m2, m;
            int p;
            int L;

            int[] fstride = new int[MAXFACTORS];
            int   i;
            int   shift;

            /* st.shift can be -1 */
            shift = st.shift > 0 ? st.shift : 0;

            fstride[0] = 1;
            L          = 0;
            do
            {
                p = st.factors[2 * L];
                m = st.factors[2 * L + 1];
                fstride[L + 1] = fstride[L] * p;
                L++;
            } while (m != 1);

            fixed(int *_fixed_fout = fout)
            {
                int *pfout = _fixed_fout + fout_ptr;

                m = st.factors[2 * L - 1];
                for (i = L - 1; i >= 0; i--)
                {
                    if (i != 0)
                    {
                        m2 = st.factors[2 * i - 1];
                    }
                    else
                    {
                        m2 = 1;
                    }
                    switch (st.factors[2 * i])
                    {
                    case 2:
                        kf_bfly2(pfout, m, fstride[i]);
                        break;

                    case 4:
                        kf_bfly4(pfout, fstride[i] << shift, st, m, fstride[i], m2);
                        break;

                    case 3:
                        kf_bfly3(pfout, fstride[i] << shift, st, m, fstride[i], m2);
                        break;

                    case 5:
                        kf_bfly5(pfout, fstride[i] << shift, st, m, fstride[i], m2);
                        break;
                    }
                    m = m2;
                }
            }
        }
Exemple #2
0
        internal static void kf_bfly3(
            int[] Fout,
            int fout_ptr,
            int fstride,
            FFTState st,
            int m,
            int N,
            int mm
            )
        {
            int i;
            int k;
            int m1 = 2 * m;
            int m2 = 4 * m;
            int tw1, tw2;
            int scratch0, scratch1, scratch2, scratch3, scratch4, scratch5, scratch6, scratch7;

            int Fout_beg = fout_ptr;

            for (i = 0; i < N; i++)
            {
                fout_ptr = Fout_beg + 2 * i * mm;
                tw1      = tw2 = 0;
                /* For non-custom modes, m is guaranteed to be a multiple of 4. */
                k = m;
                do
                {
                    scratch2 = (S_MUL(Fout[fout_ptr + m1], st.twiddles[tw1]) - S_MUL(Fout[fout_ptr + m1 + 1], st.twiddles[tw1 + 1]));
                    scratch3 = (S_MUL(Fout[fout_ptr + m1], st.twiddles[tw1 + 1]) + S_MUL(Fout[fout_ptr + m1 + 1], st.twiddles[tw1]));
                    scratch4 = (S_MUL(Fout[fout_ptr + m2], st.twiddles[tw2]) - S_MUL(Fout[fout_ptr + m2 + 1], st.twiddles[tw2 + 1]));
                    scratch5 = (S_MUL(Fout[fout_ptr + m2], st.twiddles[tw2 + 1]) + S_MUL(Fout[fout_ptr + m2 + 1], st.twiddles[tw2]));

                    scratch6 = scratch2 + scratch4;
                    scratch7 = scratch3 + scratch5;
                    scratch0 = scratch2 - scratch4;
                    scratch1 = scratch3 - scratch5;

                    tw1 += fstride * 2;
                    tw2 += fstride * 4;

                    Fout[fout_ptr + m1]     = Fout[fout_ptr + 0] - HALF_OF(scratch6);
                    Fout[fout_ptr + m1 + 1] = Fout[fout_ptr + 1] - HALF_OF(scratch7);

                    scratch0 = S_MUL(scratch0, -28378);
                    scratch1 = S_MUL(scratch1, -28378);

                    Fout[fout_ptr + 0] += scratch6;
                    Fout[fout_ptr + 1] += scratch7;

                    Fout[fout_ptr + m2]     = Fout[fout_ptr + m1] + scratch1;
                    Fout[fout_ptr + m2 + 1] = Fout[fout_ptr + m1 + 1] - scratch0;

                    Fout[fout_ptr + m1]     -= scratch1;
                    Fout[fout_ptr + m1 + 1] += scratch0;

                    fout_ptr += 2;
                } while ((--k) != 0);
            }
        }
Exemple #3
0
        internal static unsafe void kf_bfly3(
            int *Fout,
            int fstride,
            FFTState st,
            int m,
            int N,
            int mm
            )
        {
            int i;
            int k;
            int m1 = 2 * m;
            int m2 = 4 * m;
            int tw1, tw2;
            int scratch0, scratch1, scratch2, scratch3, scratch4, scratch5, scratch6, scratch7;

            int *Fout_beg = Fout;

            for (i = 0; i < N; i++)
            {
                Fout = Fout_beg + 2 * i * mm;
                tw1  = tw2 = 0;
                /* For non-custom modes, m is guaranteed to be a multiple of 4. */
                k = m;
                do
                {
                    scratch2 = (S_MUL(*(Fout + m1), st.twiddles[tw1]) - S_MUL(*(Fout + m1 + 1), st.twiddles[tw1 + 1]));
                    scratch3 = (S_MUL(*(Fout + m1), st.twiddles[tw1 + 1]) + S_MUL(*(Fout + m1 + 1), st.twiddles[tw1]));
                    scratch4 = (S_MUL(*(Fout + m2), st.twiddles[tw2]) - S_MUL(*(Fout + m2 + 1), st.twiddles[tw2 + 1]));
                    scratch5 = (S_MUL(*(Fout + m2), st.twiddles[tw2 + 1]) + S_MUL(*(Fout + m2 + 1), st.twiddles[tw2]));

                    scratch6 = scratch2 + scratch4;
                    scratch7 = scratch3 + scratch5;
                    scratch0 = scratch2 - scratch4;
                    scratch1 = scratch3 - scratch5;

                    tw1 += fstride * 2;
                    tw2 += fstride * 4;

                    *(Fout + m1)     = *(Fout) - HALF_OF(scratch6);
                    *(Fout + m1 + 1) = *(Fout + 1) - HALF_OF(scratch7);

                    scratch0 = S_MUL(scratch0, -28378);
                    scratch1 = S_MUL(scratch1, -28378);

                    *(Fout)     += scratch6;
                    *(Fout + 1) += scratch7;

                    *(Fout + m2)     = *(Fout + m1) + scratch1;
                    *(Fout + m2 + 1) = *(Fout + m1 + 1) - scratch0;

                    *(Fout + m1)     -= scratch1;
                    *(Fout + m1 + 1) += scratch0;

                    Fout += 2;
                } while ((--k) != 0);
            }
        }
Exemple #4
0
        internal static void opus_fft_impl(FFTState st, int[] fout, int fout_ptr)
        {
            int m2, m;
            int p;
            int L;

            int[] fstride = fstrides.Value;
            int   i;
            int   shift;

            /* st.shift can be -1 */
            shift = st.shift > 0 ? st.shift : 0;

            fstride[0] = 1;
            L          = 0;
            do
            {
                p = st.factors[2 * L];
                m = st.factors[2 * L + 1];
                fstride[L + 1] = fstride[L] * p;
                L++;
            } while (m != 1);

            m = st.factors[2 * L - 1];
            for (i = L - 1; i >= 0; i--)
            {
                if (i != 0)
                {
                    m2 = st.factors[2 * i - 1];
                }
                else
                {
                    m2 = 1;
                }
                switch (st.factors[2 * i])
                {
                case 2:
                    kf_bfly2(fout, fout_ptr, m, fstride[i]);
                    break;

                case 4:
                    kf_bfly4(fout, fout_ptr, fstride[i] << shift, st, m, fstride[i], m2);
                    break;

                case 3:
                    kf_bfly3(fout, fout_ptr, fstride[i] << shift, st, m, fstride[i], m2);
                    break;

                case 5:
                    kf_bfly5(fout, fout_ptr, fstride[i] << shift, st, m, fstride[i], m2);
                    break;
                }
                m = m2;
            }
        }
Exemple #5
0
        internal static void opus_fft(FFTState st, int[] fin, int[] fout)
        {
            int i;
            /* Allows us to scale with MULT16_32_Q16() */
            int   scale_shift = st.scale_shift - 1;
            short scale       = st.scale;

            Inlines.OpusAssert(fin != fout, "In-place FFT not supported");

            /* Bit-reverse the input */
            for (i = 0; i < st.nfft; i++)
            {
                fout[(2 * st.bitrev[i])]     = Inlines.SHR32(Inlines.MULT16_32_Q16(scale, fin[(2 * i)]), scale_shift);
                fout[(2 * st.bitrev[i] + 1)] = Inlines.SHR32(Inlines.MULT16_32_Q16(scale, fin[(2 * i) + 1]), scale_shift);
            }

            opus_fft_impl(st, fout, 0);
        }
Exemple #6
0
        /* Forward MDCT trashes the input array */
        internal static void clt_mdct_forward(MDCTLookup l, int[] input, int input_ptr, int[] output, int output_ptr,
                                              int[] window, int overlap, int shift, int stride)
        {
            int i;
            int N, N2, N4;

            int[]    f;
            int[]    f2;
            FFTState st = l.kfft[shift];

            short[] trig;
            int     trig_ptr = 0;
            int     scale;

            int scale_shift = st.scale_shift - 1;

            scale = st.scale;

            N    = l.n;
            trig = l.trig;
            for (i = 0; i < shift; i++)
            {
                N         = N >> 1;
                trig_ptr += N;
            }
            N2 = N >> 1;
            N4 = N >> 2;

            f  = new int[N2];
            f2 = new int[N4 * 2];

            /* Consider the input to be composed of four blocks: [a, b, c, d] */
            /* Window, shuffle, fold */
            {
                /* Temp pointers to make it really clear to the compiler what we're doing */
                int xp1 = input_ptr + (overlap >> 1);
                int xp2 = input_ptr + N2 - 1 + (overlap >> 1);
                int yp  = 0;
                int wp1 = (overlap >> 1);
                int wp2 = ((overlap >> 1) - 1);
                for (i = 0; i < ((overlap + 3) >> 2); i++)
                {
                    /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
                    f[yp++] = Inlines.MULT16_32_Q15(window[wp2], input[xp1 + N2]) + Inlines.MULT16_32_Q15(window[wp1], input[xp2]);
                    f[yp++] = Inlines.MULT16_32_Q15(window[wp1], input[xp1]) - Inlines.MULT16_32_Q15(window[wp2], input[xp2 - N2]);
                    xp1    += 2;
                    xp2    -= 2;
                    wp1    += 2;
                    wp2    -= 2;
                }
                wp1 = 0;
                wp2 = (overlap - 1);
                for (; i < N4 - ((overlap + 3) >> 2); i++)
                {
                    /* Real part arranged as a-bR, Imag part arranged as -c-dR */
                    f[yp++] = input[xp2];
                    f[yp++] = input[xp1];
                    xp1    += 2;
                    xp2    -= 2;
                }
                for (; i < N4; i++)
                {
                    /* Real part arranged as a-bR, Imag part arranged as -c-dR */
                    f[yp++] = Inlines.MULT16_32_Q15(window[wp2], input[xp2]) - Inlines.MULT16_32_Q15(window[wp1], input[xp1 - N2]);
                    f[yp++] = Inlines.MULT16_32_Q15(window[wp2], input[xp1]) + Inlines.MULT16_32_Q15(window[wp1], input[xp2 + N2]);
                    xp1    += 2;
                    xp2    -= 2;
                    wp1    += 2;
                    wp2    -= 2;
                }
            }
            /* Pre-rotation */
            {
                int yp = 0;
                int t  = trig_ptr;
                for (i = 0; i < N4; i++)
                {
                    short t0, t1;
                    int   re, im, yr, yi;
                    t0 = trig[t + i];
                    t1 = trig[t + N4 + i];
                    re = f[yp++];
                    im = f[yp++];
                    yr = KissFFT.S_MUL(re, t0) - KissFFT.S_MUL(im, t1);
                    yi = KissFFT.S_MUL(im, t0) + KissFFT.S_MUL(re, t1);
                    f2[2 * st.bitrev[i]]     = Inlines.PSHR32(Inlines.MULT16_32_Q16(scale, yr), scale_shift);
                    f2[2 * st.bitrev[i] + 1] = Inlines.PSHR32(Inlines.MULT16_32_Q16(scale, yi), scale_shift);
                }
            }

            /* N/4 complex FFT, does not downscale anymore */
            KissFFT.opus_fft_impl(st, f2, 0);

            /* Post-rotate */
            {
                /* Temp pointers to make it really clear to the compiler what we're doing */
                int fp  = 0;
                int yp1 = output_ptr;
                int yp2 = output_ptr + (stride * (N2 - 1));
                int t   = trig_ptr;
                for (i = 0; i < N4; i++)
                {
                    int yr, yi;
                    yr          = KissFFT.S_MUL(f2[fp + 1], trig[t + N4 + i]) - KissFFT.S_MUL(f2[fp], trig[t + i]);
                    yi          = KissFFT.S_MUL(f2[fp], trig[t + N4 + i]) + KissFFT.S_MUL(f2[fp + 1], trig[t + i]);
                    output[yp1] = yr;
                    output[yp2] = yi;
                    fp         += 2;
                    yp1        += (2 * stride);
                    yp2        -= (2 * stride);
                }
            }
        }
Exemple #7
0
        internal static void kf_bfly5(
            int[] Fout,
            int fout_ptr,
            int fstride,
            FFTState st,
            int m,
            int N,
            int mm
            )
        {
            int Fout0, Fout1, Fout2, Fout3, Fout4;
            int i, u;
            int scratch0, scratch1, scratch2, scratch3, scratch4, scratch5,
                scratch6, scratch7, scratch8, scratch9, scratch10, scratch11,
                scratch12, scratch13, scratch14, scratch15, scratch16, scratch17,
                scratch18, scratch19, scratch20, scratch21, scratch22, scratch23,
                scratch24, scratch25;

            int Fout_beg = fout_ptr;

            short ya_r = 10126;
            short ya_i = -31164;
            short yb_r = -26510;
            short yb_i = -19261;
            int   tw1, tw2, tw3, tw4;

            for (i = 0; i < N; i++)
            {
                tw1      = tw2 = tw3 = tw4 = 0;
                fout_ptr = Fout_beg + 2 * i * mm;
                Fout0    = fout_ptr;
                Fout1    = fout_ptr + (2 * m);
                Fout2    = fout_ptr + (4 * m);
                Fout3    = fout_ptr + (6 * m);
                Fout4    = fout_ptr + (8 * m);

                /* For non-custom modes, m is guaranteed to be a multiple of 4. */
                for (u = 0; u < m; ++u)
                {
                    scratch0 = Fout[Fout0 + 0];
                    scratch1 = Fout[Fout0 + 1];

                    scratch2 = (S_MUL(Fout[Fout1 + 0], st.twiddles[tw1]) - S_MUL(Fout[Fout1 + 1], st.twiddles[tw1 + 1]));
                    scratch3 = (S_MUL(Fout[Fout1 + 0], st.twiddles[tw1 + 1]) + S_MUL(Fout[Fout1 + 1], st.twiddles[tw1]));
                    scratch4 = (S_MUL(Fout[Fout2 + 0], st.twiddles[tw2]) - S_MUL(Fout[Fout2 + 1], st.twiddles[tw2 + 1]));
                    scratch5 = (S_MUL(Fout[Fout2 + 0], st.twiddles[tw2 + 1]) + S_MUL(Fout[Fout2 + 1], st.twiddles[tw2]));
                    scratch6 = (S_MUL(Fout[Fout3 + 0], st.twiddles[tw3]) - S_MUL(Fout[Fout3 + 1], st.twiddles[tw3 + 1]));
                    scratch7 = (S_MUL(Fout[Fout3 + 0], st.twiddles[tw3 + 1]) + S_MUL(Fout[Fout3 + 1], st.twiddles[tw3]));
                    scratch8 = (S_MUL(Fout[Fout4 + 0], st.twiddles[tw4]) - S_MUL(Fout[Fout4 + 1], st.twiddles[tw4 + 1]));
                    scratch9 = (S_MUL(Fout[Fout4 + 0], st.twiddles[tw4 + 1]) + S_MUL(Fout[Fout4 + 1], st.twiddles[tw4]));

                    tw1 += (2 * fstride);
                    tw2 += (4 * fstride);
                    tw3 += (6 * fstride);
                    tw4 += (8 * fstride);

                    scratch14 = scratch2 + scratch8;
                    scratch15 = scratch3 + scratch9;
                    scratch20 = scratch2 - scratch8;
                    scratch21 = scratch3 - scratch9;
                    scratch16 = scratch4 + scratch6;
                    scratch17 = scratch5 + scratch7;
                    scratch18 = scratch4 - scratch6;
                    scratch19 = scratch5 - scratch7;

                    Fout[Fout0 + 0] += scratch14 + scratch16;
                    Fout[Fout0 + 1] += scratch15 + scratch17;

                    scratch10 = scratch0 + S_MUL(scratch14, ya_r) + S_MUL(scratch16, yb_r);
                    scratch11 = scratch1 + S_MUL(scratch15, ya_r) + S_MUL(scratch17, yb_r);

                    scratch12 = S_MUL(scratch21, ya_i) + S_MUL(scratch19, yb_i);
                    scratch13 = 0 - S_MUL(scratch20, ya_i) - S_MUL(scratch18, yb_i);

                    Fout[Fout1 + 0] = scratch10 - scratch12;
                    Fout[Fout1 + 1] = scratch11 - scratch13;
                    Fout[Fout4 + 0] = scratch10 + scratch12;
                    Fout[Fout4 + 1] = scratch11 + scratch13;

                    scratch22 = scratch0 + S_MUL(scratch14, yb_r) + S_MUL(scratch16, ya_r);
                    scratch23 = scratch1 + S_MUL(scratch15, yb_r) + S_MUL(scratch17, ya_r);
                    scratch24 = 0 - S_MUL(scratch21, yb_i) + S_MUL(scratch19, ya_i);
                    scratch25 = S_MUL(scratch20, yb_i) - S_MUL(scratch18, ya_i);

                    Fout[Fout2 + 0] = scratch22 + scratch24;
                    Fout[Fout2 + 1] = scratch23 + scratch25;
                    Fout[Fout3 + 0] = scratch22 - scratch24;
                    Fout[Fout3 + 1] = scratch23 - scratch25;

                    Fout0 += 2;
                    Fout1 += 2;
                    Fout2 += 2;
                    Fout3 += 2;
                    Fout4 += 2;
                }
            }
        }
Exemple #8
0
        internal static void kf_bfly4(
            int[] Fout,
            int fout_ptr,
            int fstride,
            FFTState st,
            int m,
            int N,
            int mm)
        {
            int i;

            if (m == 1)
            {
                /* Degenerate case where all the twiddles are 1. */
                int scratch0, scratch1, scratch2, scratch3;
                for (i = 0; i < N; i++)
                {
                    scratch0            = Fout[fout_ptr + 0] - Fout[fout_ptr + 4];
                    scratch1            = Fout[fout_ptr + 1] - Fout[fout_ptr + 5];
                    Fout[fout_ptr + 0] += Fout[fout_ptr + 4];
                    Fout[fout_ptr + 1] += Fout[fout_ptr + 5];
                    scratch2            = Fout[fout_ptr + 2] + Fout[fout_ptr + 6];
                    scratch3            = Fout[fout_ptr + 3] + Fout[fout_ptr + 7];
                    Fout[fout_ptr + 4]  = Fout[fout_ptr + 0] - scratch2;
                    Fout[fout_ptr + 5]  = Fout[fout_ptr + 1] - scratch3;
                    Fout[fout_ptr + 0] += scratch2;
                    Fout[fout_ptr + 1] += scratch3;
                    scratch2            = Fout[fout_ptr + 2] - Fout[fout_ptr + 6];
                    scratch3            = Fout[fout_ptr + 3] - Fout[fout_ptr + 7];
                    Fout[fout_ptr + 2]  = scratch0 + scratch3;
                    Fout[fout_ptr + 3]  = scratch1 - scratch2;
                    Fout[fout_ptr + 6]  = scratch0 - scratch3;
                    Fout[fout_ptr + 7]  = scratch1 + scratch2;
                    fout_ptr           += 8;
                }
            }
            else
            {
                int j;
                int scratch0, scratch1, scratch2, scratch3, scratch4, scratch5, scratch6, scratch7, scratch8, scratch9, scratch10, scratch11;
                int tw1, tw2, tw3;
                int Fout_beg = fout_ptr;
                for (i = 0; i < N; i++)
                {
                    fout_ptr = Fout_beg + 2 * i * mm;
                    int m1 = fout_ptr + (2 * m);
                    int m2 = fout_ptr + (4 * m);
                    int m3 = fout_ptr + (6 * m);
                    tw3 = tw2 = tw1 = 0;
                    /* m is guaranteed to be a multiple of 4. */
                    for (j = 0; j < m; j++)
                    {
                        scratch0            = (S_MUL(Fout[m1], st.twiddles[tw1]) - S_MUL(Fout[m1 + 1], st.twiddles[tw1 + 1]));
                        scratch1            = (S_MUL(Fout[m1], st.twiddles[tw1 + 1]) + S_MUL(Fout[m1 + 1], st.twiddles[tw1]));
                        scratch2            = (S_MUL(Fout[m2], st.twiddles[tw2]) - S_MUL(Fout[m2 + 1], st.twiddles[tw2 + 1]));
                        scratch3            = (S_MUL(Fout[m2], st.twiddles[tw2 + 1]) + S_MUL(Fout[m2 + 1], st.twiddles[tw2]));
                        scratch4            = (S_MUL(Fout[m3], st.twiddles[tw3]) - S_MUL(Fout[m3 + 1], st.twiddles[tw3 + 1]));
                        scratch5            = (S_MUL(Fout[m3], st.twiddles[tw3 + 1]) + S_MUL(Fout[m3 + 1], st.twiddles[tw3]));
                        scratch10           = Fout[fout_ptr] - scratch2;
                        scratch11           = Fout[fout_ptr + 1] - scratch3;
                        Fout[fout_ptr]     += scratch2;
                        Fout[fout_ptr + 1] += scratch3;
                        scratch6            = scratch0 + scratch4;
                        scratch7            = scratch1 + scratch5;
                        scratch8            = scratch0 - scratch4;
                        scratch9            = scratch1 - scratch5;
                        Fout[m2]            = Fout[fout_ptr] - scratch6;
                        Fout[m2 + 1]        = Fout[fout_ptr + 1] - scratch7;
                        tw1                += fstride * 2;
                        tw2                += fstride * 4;
                        tw3                += fstride * 6;
                        Fout[fout_ptr]     += scratch6;
                        Fout[fout_ptr + 1] += scratch7;
                        Fout[m1]            = scratch10 + scratch9;
                        Fout[m1 + 1]        = scratch11 - scratch8;
                        Fout[m3]            = scratch10 - scratch9;
                        Fout[m3 + 1]        = scratch11 + scratch8;
                        fout_ptr           += 2;
                        m1 += 2;
                        m2 += 2;
                        m3 += 2;
                    }
                }
            }
        }
Exemple #9
0
        /* Forward MDCT trashes the input array */
        internal static unsafe void clt_mdct_forward(MDCTLookup l, int[] input, int input_ptr, int[] output, int output_ptr,
                                                     int[] window, int overlap, int shift, int stride)
        {
            int i;
            int N, N2, N4;

            int[]    f;
            int[]    f2;
            FFTState st = l.kfft[shift];
            int      scale;

            int scale_shift = st.scale_shift - 1;

            scale = st.scale;

            N = l.n;
            fixed(short *ptrig_base = l.trig)
            {
                short *trig = ptrig_base;

                for (i = 0; i < shift; i++)
                {
                    N     = N >> 1;
                    trig += N;
                }
                N2 = N >> 1;
                N4 = N >> 2;

                f  = new int[N2];
                f2 = new int[N4 * 2];
                fixed(int *pinput_base = input, pwindow = window, pf = f, pf2 = f2)
                {
                    int *pinput = pinput_base + input_ptr;

                    /* Consider the input to be composed of four blocks: [a, b, c, d] */
                    /* Window, shuffle, fold */
                    {
                        /* Temp pointers to make it really clear to the compiler what we're doing */
                        int *xp1 = pinput + (overlap >> 1);
                        int *xp2 = pinput + N2 - 1 + (overlap >> 1);
                        int *yp  = pf;
                        int *wp1 = pwindow + (overlap >> 1);
                        int *wp2 = pwindow + ((overlap >> 1) - 1);
                        for (i = 0; i < ((overlap + 3) >> 2); i++)
                        {
                            /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
                            *yp++ = Inlines.MULT16_32_Q15(*wp2, xp1[N2]) + Inlines.MULT16_32_Q15(*wp1, *xp2);
                            *yp++ = Inlines.MULT16_32_Q15(*wp1, *xp1) - Inlines.MULT16_32_Q15(*wp2, xp2[0 - N2]);
                            xp1 += 2;
                            xp2 -= 2;
                            wp1 += 2;
                            wp2 -= 2;
                        }
                        wp1 = pwindow;
                        wp2 = pwindow + (overlap - 1);
                        for (; i < N4 - ((overlap + 3) >> 2); i++)
                        {
                            /* Real part arranged as a-bR, Imag part arranged as -c-dR */
                            *yp++ = *xp2;
                            *yp++ = *xp1;
                            xp1 += 2;
                            xp2 -= 2;
                        }
                        for (; i < N4; i++)
                        {
                            /* Real part arranged as a-bR, Imag part arranged as -c-dR */
                            *yp++ = Inlines.MULT16_32_Q15(*wp2, *xp2) - Inlines.MULT16_32_Q15(*wp1, xp1[0 - N2]);
                            *yp++ = Inlines.MULT16_32_Q15(*wp2, *xp1) + Inlines.MULT16_32_Q15(*wp1, xp2[N2]);
                            xp1 += 2;
                            xp2 -= 2;
                            wp1 += 2;
                            wp2 -= 2;
                        }
                    }
                    /* Pre-rotation */
                    {
                        int *  yp = pf;
                        short *t  = trig;
                        for (i = 0; i < N4; i++)
                        {
                            short t0, t1;
                            int   re, im, yr, yi;
                            t0 = t[i];
                            t1 = t[N4 + i];
                            re = *yp++;
                            im = *yp++;
                            yr = KissFFT.S_MUL(re, t0) - KissFFT.S_MUL(im, t1);
                            yi = KissFFT.S_MUL(im, t0) + KissFFT.S_MUL(re, t1);
                            pf2[2 * st.bitrev[i]]     = Inlines.PSHR32(Inlines.MULT16_32_Q16(scale, yr), scale_shift);
                            pf2[2 * st.bitrev[i] + 1] = Inlines.PSHR32(Inlines.MULT16_32_Q16(scale, yi), scale_shift);
                        }
                    }

                    /* N/4 complex FFT, does not downscale anymore */
                    KissFFT.opus_fft_impl(st, f2, 0);

                    /* Post-rotate */
                    fixed(int *poutput_base = output)
                    {
                        /* Temp pointers to make it really clear to the compiler what we're doing */
                        int *  fp  = pf2;
                        int *  yp1 = poutput_base + output_ptr;
                        int *  yp2 = poutput_base + output_ptr + (stride * (N2 - 1));
                        short *t   = trig;

                        for (i = 0; i < N4; i++)
                        {
                            int yr, yi;
                            yr = KissFFT.S_MUL(fp[1], t[N4 + i]) - KissFFT.S_MUL(fp[0], t[i]);
                            yi = KissFFT.S_MUL(fp[0], t[N4 + i]) + KissFFT.S_MUL(fp[1], t[i]);
                            *yp1 = yr;
                            *yp2 = yi;
                            fp  += 2;
                            yp1 += (2 * stride);
                            yp2 -= (2 * stride);
                        }
                    }
                }
            }
        }
Exemple #10
0
        internal static unsafe void kf_bfly4(
            int *Fout,
            int fstride,
            FFTState st,
            int m,
            int N,
            int mm)
        {
            int i;

            if (m == 1)
            {
                /* Degenerate case where all the twiddles are 1. */
                int scratch0, scratch1, scratch2, scratch3;
                for (i = 0; i < N; i++)
                {
                    scratch0     = *(Fout) - *(Fout + 4);
                    scratch1     = *(Fout + 1) - *(Fout + 5);
                    *(Fout + 0) += *(Fout + 4);
                    *(Fout + 1) += *(Fout + 5);
                    scratch2     = *(Fout + 2) + *(Fout + 6);
                    scratch3     = *(Fout + 3) + *(Fout + 7);
                    *(Fout + 4)  = *(Fout + 0) - scratch2;
                    *(Fout + 5)  = *(Fout + 1) - scratch3;
                    *(Fout + 0) += scratch2;
                    *(Fout + 1) += scratch3;
                    scratch2     = *(Fout + 2) - *(Fout + 6);
                    scratch3     = *(Fout + 3) - *(Fout + 7);
                    *(Fout + 2)  = scratch0 + scratch3;
                    *(Fout + 3)  = scratch1 - scratch2;
                    *(Fout + 6)  = scratch0 - scratch3;
                    *(Fout + 7)  = scratch1 + scratch2;
                    Fout        += 8;
                }
            }
            else
            {
                int  j;
                int  scratch0, scratch1, scratch2, scratch3, scratch4, scratch5, scratch6, scratch7, scratch8, scratch9, scratch10, scratch11;
                int  tw1, tw2, tw3;
                int *Fout_beg = Fout;
                for (i = 0; i < N; i++)
                {
                    Fout = Fout_beg + 2 * i * mm;
                    int *m1 = Fout + (2 * m);
                    int *m2 = Fout + (4 * m);
                    int *m3 = Fout + (6 * m);
                    tw3 = tw2 = tw1 = 0;
                    /* m is guaranteed to be a multiple of 4. */
                    for (j = 0; j < m; j++)
                    {
                        scratch0     = (S_MUL(*m1, st.twiddles[tw1]) - S_MUL(*(m1 + 1), st.twiddles[tw1 + 1]));
                        scratch1     = (S_MUL(*m1, st.twiddles[tw1 + 1]) + S_MUL(*(m1 + 1), st.twiddles[tw1]));
                        scratch2     = (S_MUL(*m2, st.twiddles[tw2]) - S_MUL(*(m2 + 1), st.twiddles[tw2 + 1]));
                        scratch3     = (S_MUL(*m2, st.twiddles[tw2 + 1]) + S_MUL(*(m2 + 1), st.twiddles[tw2]));
                        scratch4     = (S_MUL(*m3, st.twiddles[tw3]) - S_MUL(*(m3 + 1), st.twiddles[tw3 + 1]));
                        scratch5     = (S_MUL(*m3, st.twiddles[tw3 + 1]) + S_MUL(*(m3 + 1), st.twiddles[tw3]));
                        scratch10    = *(Fout) - scratch2;
                        scratch11    = *(Fout + 1) - scratch3;
                        *(Fout)     += scratch2;
                        *(Fout + 1) += scratch3;
                        scratch6     = scratch0 + scratch4;
                        scratch7     = scratch1 + scratch5;
                        scratch8     = scratch0 - scratch4;
                        scratch9     = scratch1 - scratch5;
                        *m2 = *(Fout) - scratch6;
                        *(m2 + 1)    = *(Fout + 1) - scratch7;
                        tw1         += fstride * 2;
                        tw2         += fstride * 4;
                        tw3         += fstride * 6;
                        *(Fout)     += scratch6;
                        *(Fout + 1) += scratch7;
                        *m1 = scratch10 + scratch9;
                        *(m1 + 1) = scratch11 - scratch8;
                        *m3 = scratch10 - scratch9;
                        *(m3 + 1) = scratch11 + scratch8;
                        Fout     += 2;
                        m1       += 2;
                        m2       += 2;
                        m3       += 2;
                    }
                }
            }
        }