Exemple #1
0
        internal static float fast_atan2f(float y, float x)
        {
            float x2, y2;

            /* Should avoid underflow on the values we'll get */
            if (Inlines.ABS16(x) + Inlines.ABS16(y) < 1e-9f)
            {
                x *= 1e12f;
                y *= 1e12f;
            }
            x2 = x * x;
            y2 = y * y;
            if (x2 < y2)
            {
                float den = (y2 + cB * x2) * (y2 + cC * x2);
                if (den != 0)
                {
                    return(-x * y * (y2 + cA * x2) / den + (y < 0 ? -cE : cE));
                }
                else
                {
                    return(y < 0 ? -cE : cE);
                }
            }
            else
            {
                float den = (x2 + cB * y2) * (x2 + cC * y2);
                if (den != 0)
                {
                    return(x * y * (x2 + cA * y2) / den + (y < 0 ? -cE : cE) - (x * y < 0 ? -cE : cE));
                }
                else
                {
                    return((y < 0 ? -cE : cE) - (x * y < 0 ? -cE : cE));
                }
            }
        }
Exemple #2
0
        internal static int compute_stereo_width(short[] pcm, int pcm_ptr, int frame_size, int Fs, StereoWidthState mem)
        {
            int corr;
            int ldiff;
            int width;
            int xx, xy, yy;
            int sqrt_xx, sqrt_yy;
            int qrrt_xx, qrrt_yy;
            int frame_rate;
            int i;
            int short_alpha;

            frame_rate  = Fs / frame_size;
            short_alpha = CeltConstants.Q15ONE - (25 * CeltConstants.Q15ONE / Inlines.IMAX(50, frame_rate));
            xx          = xy = yy = 0;
            for (i = 0; i < frame_size - 3; i += 4)
            {
                int pxx = 0;
                int pxy = 0;
                int pyy = 0;
                int x, y;
                int p2i = pcm_ptr + (2 * i);
                x    = pcm[p2i];
                y    = pcm[p2i + 1];
                pxx  = Inlines.SHR32(Inlines.MULT16_16(x, x), 2);
                pxy  = Inlines.SHR32(Inlines.MULT16_16(x, y), 2);
                pyy  = Inlines.SHR32(Inlines.MULT16_16(y, y), 2);
                x    = pcm[p2i + 2];
                y    = pcm[p2i + 3];
                pxx += Inlines.SHR32(Inlines.MULT16_16(x, x), 2);
                pxy += Inlines.SHR32(Inlines.MULT16_16(x, y), 2);
                pyy += Inlines.SHR32(Inlines.MULT16_16(y, y), 2);
                x    = pcm[p2i + 4];
                y    = pcm[p2i + 5];
                pxx += Inlines.SHR32(Inlines.MULT16_16(x, x), 2);
                pxy += Inlines.SHR32(Inlines.MULT16_16(x, y), 2);
                pyy += Inlines.SHR32(Inlines.MULT16_16(y, y), 2);
                x    = pcm[p2i + 6];
                y    = pcm[p2i + 7];
                pxx += Inlines.SHR32(Inlines.MULT16_16(x, x), 2);
                pxy += Inlines.SHR32(Inlines.MULT16_16(x, y), 2);
                pyy += Inlines.SHR32(Inlines.MULT16_16(y, y), 2);

                xx += Inlines.SHR32(pxx, 10);
                xy += Inlines.SHR32(pxy, 10);
                yy += Inlines.SHR32(pyy, 10);
            }

            mem.XX += Inlines.MULT16_32_Q15(short_alpha, xx - mem.XX);
            mem.XY += Inlines.MULT16_32_Q15(short_alpha, xy - mem.XY);
            mem.YY += Inlines.MULT16_32_Q15(short_alpha, yy - mem.YY);
            mem.XX  = Inlines.MAX32(0, mem.XX);
            mem.XY  = Inlines.MAX32(0, mem.XY);
            mem.YY  = Inlines.MAX32(0, mem.YY);
            if (Inlines.MAX32(mem.XX, mem.YY) > ((short)(0.5 + (8e-4f) * (((int)1) << (18)))) /*Inlines.QCONST16(8e-4f, 18)*/)
            {
                sqrt_xx = Inlines.celt_sqrt(mem.XX);
                sqrt_yy = Inlines.celt_sqrt(mem.YY);
                qrrt_xx = Inlines.celt_sqrt(sqrt_xx);
                qrrt_yy = Inlines.celt_sqrt(sqrt_yy);
                /* Inter-channel correlation */
                mem.XY = Inlines.MIN32(mem.XY, sqrt_xx * sqrt_yy);
                corr   = Inlines.SHR32(Inlines.frac_div32(mem.XY, CeltConstants.EPSILON + Inlines.MULT16_16(sqrt_xx, sqrt_yy)), 16);
                /* Approximate loudness difference */
                ldiff = CeltConstants.Q15ONE * Inlines.ABS16(qrrt_xx - qrrt_yy) / (CeltConstants.EPSILON + qrrt_xx + qrrt_yy);
                width = Inlines.MULT16_16_Q15(Inlines.celt_sqrt(((int)(0.5 + (1.0f) * (((int)1) << (30)))) /*Inlines.QCONST32(1.0f, 30)*/ - Inlines.MULT16_16(corr, corr)), ldiff);
                /* Smoothing over one second */
                mem.smoothed_width += (width - mem.smoothed_width) / frame_rate;
                /* Peak follower */
                mem.max_follower = Inlines.MAX16(mem.max_follower - ((short)(0.5 + (.02f) * (((int)1) << (15)))) /*Inlines.QCONST16(.02f, 15)*/ / frame_rate, mem.smoothed_width);
            }
            else
            {
                width = 0;
                corr  = CeltConstants.Q15ONE;
                ldiff = 0;
            }
            /*printf("%f %f %f %f %f ", corr/(float)1.0f, ldiff/(float)1.0f, width/(float)1.0f, mem.smoothed_width/(float)1.0f, mem.max_follower/(float)1.0f);*/
            return(Inlines.EXTRACT16(Inlines.MIN32(CeltConstants.Q15ONE, 20 * mem.max_follower)));
        }
Exemple #3
0
        internal static uint alg_quant(int[] X, int X_ptr, int N, int K, int spread, int B, EntropyCoder enc
                                       )
        {
            int[] y = new int[N];
            int[] iy = new int[N];
            int[] signx = new int[N];
            int   i, j;
            int   s;
            int   pulsesLeft;
            int   sum;
            int   xy;
            int   yy;
            uint  collapse_mask;

            Inlines.OpusAssert(K > 0, "alg_quant() needs at least one pulse");
            Inlines.OpusAssert(N > 1, "alg_quant() needs at least two dimensions");

            exp_rotation(X, X_ptr, N, 1, B, K, spread);

            /* Get rid of the sign */
            sum = 0;
            j   = 0;
            do
            {
                int xpj = X_ptr + j;

                /* OPT: Make sure the following two lines result in conditional moves
                 * rather than branches. */
                signx[j] = X[xpj] > 0 ? 1 : -1;
                X[xpj]   = Inlines.ABS16(X[xpj]);

                iy[j] = 0;
                y[j]  = 0;
            } while (++j < N);

            xy = yy = 0;

            pulsesLeft = K;

            /* Do a pre-search by projecting on the pyramid */
            if (K > (N >> 1))
            {
                int rcp;
                j = 0; do
                {
                    sum += X[X_ptr + j];
                } while (++j < N);

                /* If X is too small, just replace it with a pulse at 0 */

                /* Prevents infinities and NaNs from causing too many pulses
                *  to be allocated. 64 is an approximation of infinity here. */
                if (sum <= K)
                {
                    X[X_ptr] = ((short)(0.5 + (1.0f) * (((int)1) << (14)))) /*Inlines.QCONST16(1.0f, 14)*/;
                    j        = X_ptr + 1;
                    do
                    {
                        X[j] = 0;
                    } while (++j < N + X_ptr);

                    sum = ((short)(0.5 + (1.0f) * (((int)1) << (14)))) /*Inlines.QCONST16(1.0f, 14)*/;
                }

                rcp = Inlines.EXTRACT16(Inlines.MULT16_32_Q16((K - 1), Inlines.celt_rcp(sum)));
                j   = 0;

                do
                {
                    /* It's really important to round *towards zero* here */
                    iy[j]       = Inlines.MULT16_16_Q15(X[X_ptr + j], rcp);
                    y[j]        = (int)iy[j];
                    yy          = (Inlines.MAC16_16(yy, y[j], y[j]));
                    xy          = Inlines.MAC16_16(xy, X[X_ptr + j], y[j]);
                    y[j]       *= 2;
                    pulsesLeft -= iy[j];
                } while (++j < N);
            }

            Inlines.OpusAssert(pulsesLeft >= 1, "Allocated too many pulses in the quick pass");

            /* This should never happen, but just in case it does (e.g. on silence)
             * we fill the first bin with pulses. */
            if (pulsesLeft > N + 3)
            {
                int tmp = (int)pulsesLeft;
                yy         = (Inlines.MAC16_16(yy, tmp, tmp));
                yy         = (Inlines.MAC16_16(yy, tmp, y[0]));
                iy[0]     += pulsesLeft;
                pulsesLeft = 0;
            }

            s = 1;
            for (i = 0; i < pulsesLeft; i++)
            {
                int best_id;
                int best_num = 0 - CeltConstants.VERY_LARGE16;
                int best_den = 0;
                int rshift   = 1 + Inlines.celt_ilog2(K - pulsesLeft + i + 1);
                best_id = 0;

                /* The squared magnitude term gets added anyway, so we might as well
                 * add it outside the loop */
                yy = Inlines.ADD16(yy, 1); // opus bug - was add32
                j  = 0;
                do
                {
                    int Rxy, Ryy;
                    /* Temporary sums of the new pulse(s) */
                    Rxy = Inlines.EXTRACT16(Inlines.SHR32(Inlines.ADD32(xy, Inlines.EXTEND32(X[X_ptr + j])), rshift));
                    /* We're multiplying y[j] by two so we don't have to do it here */
                    Ryy = Inlines.ADD16(yy, y[j]);

                    /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
                     * Rxy is positive because the sign is pre-computed) */
                    Rxy = Inlines.MULT16_16_Q15(Rxy, Rxy);

                    /* The idea is to check for num/den >= best_num/best_den, but that way
                     * we can do it without any division */
                    /* OPT: Make sure to use conditional moves here */
                    if (Inlines.MULT16_16(best_den, Rxy) > Inlines.MULT16_16(Ryy, best_num))
                    {
                        best_den = Ryy;
                        best_num = Rxy;
                        best_id  = j;
                    }
                } while (++j < N);

                /* Updating the sums of the new pulse(s) */
                xy = Inlines.ADD32(xy, Inlines.EXTEND32(X[X_ptr + best_id]));
                /* We're multiplying y[j] by two so we don't have to do it here */
                yy = Inlines.ADD16(yy, y[best_id]);

                /* Only now that we've made the final choice, update y/iy */
                /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
                y[best_id] = (y[best_id] + (2 * s));
                iy[best_id]++;
            }

            /* Put the original sign back */
            j = 0;
            do
            {
                X[X_ptr + j] = (Inlines.MULT16_16(signx[j], X[X_ptr + j]));

                /* OPT: Make sure your compiler uses a conditional move here rather than
                 *   a branch. */
                iy[j] = signx[j] < 0 ? -iy[j] : iy[j];
            } while (++j < N);

            CWRS.encode_pulses(iy, N, K, enc);

            collapse_mask = extract_collapse_mask(iy, N, B);

            return(collapse_mask);
        }
Exemple #4
0
        /// <summary>
        ///
        /// </summary>
        /// <typeparam name="T">The type of signal being handled (either short or float) - changes based on which API is used</typeparam>
        /// <param name="tonal"></param>
        /// <param name="celt_mode"></param>
        /// <param name="x"></param>
        /// <param name="len"></param>
        /// <param name="offset"></param>
        /// <param name="c1"></param>
        /// <param name="c2"></param>
        /// <param name="C"></param>
        /// <param name="lsb_depth"></param>
        /// <param name="downmix"></param>
        internal static void tonality_analysis <T>(TonalityAnalysisState tonal, CeltMode celt_mode, T[] x, int x_ptr, int len, int offset, int c1, int c2, int C, int lsb_depth, Downmix.downmix_func <T> downmix)
        {
            int      i, b;
            FFTState kfft;

            int[] input;
            int[] output;
            int   N = 480, N2 = 240;

            float[] A   = tonal.angle;
            float[] dA  = tonal.d_angle;
            float[] d2A = tonal.d2_angle;
            float[] tonality;
            float[] noisiness;
            float[] band_tonality = new float[OpusConstants.NB_TBANDS];
            float[] logE          = new float[OpusConstants.NB_TBANDS];
            float[] BFCC          = new float[8];
            float[] features      = new float[25];
            float   frame_tonality;
            float   max_frame_tonality;
            /*float tw_sum=0;*/
            float frame_noisiness;
            float pi4   = (float)(M_PI * M_PI * M_PI * M_PI);
            float slope = 0;
            float frame_stationarity;
            float relativeE;

            float[]      frame_probs = new float[2];
            float        alpha, alphaE, alphaE2;
            float        frame_loudness;
            float        bandwidth_mask;
            int          bandwidth = 0;
            float        maxE      = 0;
            float        noise_floor;
            int          remaining;
            AnalysisInfo info; //[porting note] pointer

            tonal.last_transition++;
            alpha   = 1.0f / Inlines.IMIN(20, 1 + tonal.count);
            alphaE  = 1.0f / Inlines.IMIN(50, 1 + tonal.count);
            alphaE2 = 1.0f / Inlines.IMIN(1000, 1 + tonal.count);

            if (tonal.count < 4)
            {
                tonal.music_prob = 0.5f;
            }
            kfft = celt_mode.mdct.kfft[0];
            if (tonal.count == 0)
            {
                tonal.mem_fill = 240;
            }

            downmix(x, x_ptr, tonal.inmem, tonal.mem_fill, Inlines.IMIN(len, OpusConstants.ANALYSIS_BUF_SIZE - tonal.mem_fill), offset, c1, c2, C);

            if (tonal.mem_fill + len < OpusConstants.ANALYSIS_BUF_SIZE)
            {
                tonal.mem_fill += len;
                /* Don't have enough to update the analysis */
                return;
            }

            info = tonal.info[tonal.write_pos++];
            if (tonal.write_pos >= OpusConstants.DETECT_SIZE)
            {
                tonal.write_pos -= OpusConstants.DETECT_SIZE;
            }

            input     = new int[960];
            output    = new int[960];
            tonality  = new float[240];
            noisiness = new float[240];
            for (i = 0; i < N2; i++)
            {
                float w = Tables.analysis_window[i];
                input[2 * i]                 = (int)(w * tonal.inmem[i]);
                input[2 * i + 1]             = (int)(w * tonal.inmem[N2 + i]);
                input[(2 * (N - i - 1))]     = (int)(w * tonal.inmem[N - i - 1]);
                input[(2 * (N - i - 1)) + 1] = (int)(w * tonal.inmem[N + N2 - i - 1]);
            }
            Arrays.MemMoveInt(tonal.inmem, OpusConstants.ANALYSIS_BUF_SIZE - 240, 0, 240);

            remaining = len - (OpusConstants.ANALYSIS_BUF_SIZE - tonal.mem_fill);
            downmix(x, x_ptr, tonal.inmem, 240, remaining, offset + OpusConstants.ANALYSIS_BUF_SIZE - tonal.mem_fill, c1, c2, C);
            tonal.mem_fill = 240 + remaining;

            KissFFT.opus_fft(kfft, input, output);

            for (i = 1; i < N2; i++)
            {
                float X1r, X2r, X1i, X2i;
                float angle, d_angle, d2_angle;
                float angle2, d_angle2, d2_angle2;
                float mod1, mod2, avg_mod;
                X1r = (float)output[2 * i] + output[2 * (N - i)];
                X1i = (float)output[(2 * i) + 1] - output[2 * (N - i) + 1];
                X2r = (float)output[(2 * i) + 1] + output[2 * (N - i) + 1];
                X2i = (float)output[2 * (N - i)] - output[2 * i];

                angle    = (float)(.5f / M_PI) * fast_atan2f(X1i, X1r);
                d_angle  = angle - A[i];
                d2_angle = d_angle - dA[i];

                angle2    = (float)(.5f / M_PI) * fast_atan2f(X2i, X2r);
                d_angle2  = angle2 - angle;
                d2_angle2 = d_angle2 - d_angle;

                mod1         = d2_angle - (float)Math.Floor(0.5f + d2_angle);
                noisiness[i] = Inlines.ABS16(mod1);
                mod1        *= mod1;
                mod1        *= mod1;

                mod2          = d2_angle2 - (float)Math.Floor(0.5f + d2_angle2);
                noisiness[i] += Inlines.ABS16(mod2);
                mod2         *= mod2;
                mod2         *= mod2;

                avg_mod     = .25f * (d2A[i] + 2.0f * mod1 + mod2);
                tonality[i] = 1.0f / (1.0f + 40.0f * 16.0f * pi4 * avg_mod) - .015f;

                A[i]   = angle2;
                dA[i]  = d_angle2;
                d2A[i] = mod2;
            }

            frame_tonality     = 0;
            max_frame_tonality = 0;
            /*tw_sum = 0;*/
            info.activity      = 0;
            frame_noisiness    = 0;
            frame_stationarity = 0;
            if (tonal.count == 0)
            {
                for (b = 0; b < OpusConstants.NB_TBANDS; b++)
                {
                    tonal.lowE[b]  = 1e10f;
                    tonal.highE[b] = -1e10f;
                }
            }
            relativeE      = 0;
            frame_loudness = 0;
            for (b = 0; b < OpusConstants.NB_TBANDS; b++)
            {
                float E = 0, tE = 0, nE = 0;
                float L1, L2;
                float stationarity;
                for (i = Tables.tbands[b]; i < Tables.tbands[b + 1]; i++)
                {
                    float binE = output[2 * i] * (float)output[2 * i] + output[2 * (N - i)] * (float)output[2 * (N - i)]
                                 + output[2 * i + 1] * (float)output[2 * i + 1] + output[2 * (N - i) + 1] * (float)output[2 * (N - i) + 1];
                    /* FIXME: It's probably best to change the BFCC filter initial state instead */
                    binE *= 5.55e-17f;
                    E    += binE;
                    tE   += binE * tonality[i];
                    nE   += binE * 2.0f * (.5f - noisiness[i]);
                }

                tonal.E[tonal.E_count][b] = E;
                frame_noisiness          += nE / (1e-15f + E);

                frame_loudness += (float)Math.Sqrt(E + 1e-10f);
                logE[b]         = (float)Math.Log(E + 1e-10f);
                tonal.lowE[b]   = Inlines.MIN32(logE[b], tonal.lowE[b] + 0.01f);
                tonal.highE[b]  = Inlines.MAX32(logE[b], tonal.highE[b] - 0.1f);
                if (tonal.highE[b] < tonal.lowE[b] + 1.0f)
                {
                    tonal.highE[b] += 0.5f;
                    tonal.lowE[b]  -= 0.5f;
                }
                relativeE += (logE[b] - tonal.lowE[b]) / (1e-15f + tonal.highE[b] - tonal.lowE[b]);

                L1 = L2 = 0;
                for (i = 0; i < OpusConstants.NB_FRAMES; i++)
                {
                    L1 += (float)Math.Sqrt(tonal.E[i][b]);
                    L2 += tonal.E[i][b];
                }

                stationarity        = Inlines.MIN16(0.99f, L1 / (float)Math.Sqrt(1e-15 + OpusConstants.NB_FRAMES * L2));
                stationarity       *= stationarity;
                stationarity       *= stationarity;
                frame_stationarity += stationarity;
                /*band_tonality[b] = tE/(1e-15+E)*/
                band_tonality[b] = Inlines.MAX16(tE / (1e-15f + E), stationarity * tonal.prev_band_tonality[b]);
                frame_tonality  += band_tonality[b];
                if (b >= OpusConstants.NB_TBANDS - OpusConstants.NB_TONAL_SKIP_BANDS)
                {
                    frame_tonality -= band_tonality[b - OpusConstants.NB_TBANDS + OpusConstants.NB_TONAL_SKIP_BANDS];
                }
                max_frame_tonality = Inlines.MAX16(max_frame_tonality, (1.0f + .03f * (b - OpusConstants.NB_TBANDS)) * frame_tonality);
                slope += band_tonality[b] * (b - 8);
                tonal.prev_band_tonality[b] = band_tonality[b];
            }

            bandwidth_mask = 0;
            bandwidth      = 0;
            maxE           = 0;
            noise_floor    = 5.7e-4f / (1 << (Inlines.IMAX(0, lsb_depth - 8)));
            noise_floor   *= 1 << (15 + CeltConstants.SIG_SHIFT);
            noise_floor   *= noise_floor;
            for (b = 0; b < OpusConstants.NB_TOT_BANDS; b++)
            {
                float E = 0;
                int   band_start, band_end;
                /* Keep a margin of 300 Hz for aliasing */
                band_start = Tables.extra_bands[b];
                band_end   = Tables.extra_bands[b + 1];
                for (i = band_start; i < band_end; i++)
                {
                    float binE = output[2 * i] * (float)output[2 * i] + output[2 * (N - i)] * (float)output[2 * (N - i)]
                                 + output[2 * i + 1] * (float)output[2 * i + 1] + output[2 * (N - i) + 1] * (float)output[2 * (N - i) + 1];
                    E += binE;
                }
                maxE           = Inlines.MAX32(maxE, E);
                tonal.meanE[b] = Inlines.MAX32((1 - alphaE2) * tonal.meanE[b], E);
                E = Inlines.MAX32(E, tonal.meanE[b]);
                /* Use a simple follower with 13 dB/Bark slope for spreading function */
                bandwidth_mask = Inlines.MAX32(.05f * bandwidth_mask, E);

                /* Consider the band "active" only if all these conditions are met:
                 * 1) less than 10 dB below the simple follower
                 * 2) less than 90 dB below the peak band (maximal masking possible considering
                 *    both the ATH and the loudness-dependent slope of the spreading function)
                 * 3) above the PCM quantization noise floor
                 */
                if (E > .1 * bandwidth_mask && E * 1e9f > maxE && E > noise_floor * (band_end - band_start))
                {
                    bandwidth = b;
                }
            }
            if (tonal.count <= 2)
            {
                bandwidth = 20;
            }
            frame_loudness   = 20 * (float)Math.Log10(frame_loudness);
            tonal.Etracker   = Inlines.MAX32(tonal.Etracker - .03f, frame_loudness);
            tonal.lowECount *= (1 - alphaE);
            if (frame_loudness < tonal.Etracker - 30)
            {
                tonal.lowECount += alphaE;
            }

            for (i = 0; i < 8; i++)
            {
                float sum = 0;
                for (b = 0; b < 16; b++)
                {
                    sum += Tables.dct_table[i * 16 + b] * logE[b];
                }
                BFCC[i] = sum;
            }

            frame_stationarity /= OpusConstants.NB_TBANDS;
            relativeE          /= OpusConstants.NB_TBANDS;
            if (tonal.count < 10)
            {
                relativeE = 0.5f;
            }
            frame_noisiness    /= OpusConstants.NB_TBANDS;
            info.activity       = frame_noisiness + (1 - frame_noisiness) * relativeE;
            frame_tonality      = (max_frame_tonality / (OpusConstants.NB_TBANDS - OpusConstants.NB_TONAL_SKIP_BANDS));
            frame_tonality      = Inlines.MAX16(frame_tonality, tonal.prev_tonality * .8f);
            tonal.prev_tonality = frame_tonality;

            slope /= 8 * 8;
            info.tonality_slope = slope;

            tonal.E_count = (tonal.E_count + 1) % OpusConstants.NB_FRAMES;
            tonal.count++;
            info.tonality = frame_tonality;

            for (i = 0; i < 4; i++)
            {
                features[i] = -0.12299f * (BFCC[i] + tonal.mem[i + 24]) + 0.49195f * (tonal.mem[i] + tonal.mem[i + 16]) + 0.69693f * tonal.mem[i + 8] - 1.4349f * tonal.cmean[i];
            }

            for (i = 0; i < 4; i++)
            {
                tonal.cmean[i] = (1 - alpha) * tonal.cmean[i] + alpha * BFCC[i];
            }

            for (i = 0; i < 4; i++)
            {
                features[4 + i] = 0.63246f * (BFCC[i] - tonal.mem[i + 24]) + 0.31623f * (tonal.mem[i] - tonal.mem[i + 16]);
            }
            for (i = 0; i < 3; i++)
            {
                features[8 + i] = 0.53452f * (BFCC[i] + tonal.mem[i + 24]) - 0.26726f * (tonal.mem[i] + tonal.mem[i + 16]) - 0.53452f * tonal.mem[i + 8];
            }

            if (tonal.count > 5)
            {
                for (i = 0; i < 9; i++)
                {
                    tonal.std[i] = (1 - alpha) * tonal.std[i] + alpha * features[i] * features[i];
                }
            }

            for (i = 0; i < 8; i++)
            {
                tonal.mem[i + 24] = tonal.mem[i + 16];
                tonal.mem[i + 16] = tonal.mem[i + 8];
                tonal.mem[i + 8]  = tonal.mem[i];
                tonal.mem[i]      = BFCC[i];
            }
            for (i = 0; i < 9; i++)
            {
                features[11 + i] = (float)Math.Sqrt(tonal.std[i]);
            }
            features[20] = info.tonality;
            features[21] = info.activity;
            features[22] = frame_stationarity;
            features[23] = info.tonality_slope;
            features[24] = tonal.lowECount;

            mlp.mlp_process(Tables.net, features, frame_probs);
            frame_probs[0] = .5f * (frame_probs[0] + 1);
            /* Curve fitting between the MLP probability and the actual probability */
            frame_probs[0] = .01f + 1.21f * frame_probs[0] * frame_probs[0] - .23f * (float)Math.Pow(frame_probs[0], 10);
            /* Probability of active audio (as opposed to silence) */
            frame_probs[1] = .5f * frame_probs[1] + .5f;
            /* Consider that silence has a 50-50 probability. */
            frame_probs[0] = frame_probs[1] * frame_probs[0] + (1 - frame_probs[1]) * .5f;

            /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
            {
                /* Probability of state transition */
                float tau;

                /* Represents independence of the MLP probabilities, where
                 *  beta=1 means fully independent. */
                float beta;
                /* Denormalized probability of speech (p0) and music (p1) after update */
                float p0, p1;
                /* Probabilities for "all speech" and "all music" */
                float s0, m0;
                /* Probability sum for renormalisation */
                float psum;
                /* Instantaneous probability of speech and music, with beta pre-applied. */
                float speech0;
                float music0;

                /* One transition every 3 minutes of active audio */
                tau  = .00005f * frame_probs[1];
                beta = .05f;
                //if (1)
                {
                    /* Adapt beta based on how "unexpected" the new prob is */
                    float p, q;
                    p    = Inlines.MAX16(.05f, Inlines.MIN16(.95f, frame_probs[0]));
                    q    = Inlines.MAX16(.05f, Inlines.MIN16(.95f, tonal.music_prob));
                    beta = .01f + .05f * Inlines.ABS16(p - q) / (p * (1 - q) + q * (1 - p));
                }

                /* p0 and p1 are the probabilities of speech and music at this frame
                 *  using only information from previous frame and applying the
                 *  state transition model */
                p0 = (1 - tonal.music_prob) * (1 - tau) + tonal.music_prob * tau;
                p1 = tonal.music_prob * (1 - tau) + (1 - tonal.music_prob) * tau;

                /* We apply the current probability with exponent beta to work around
                 *  the fact that the probability estimates aren't independent. */
                p0 *= (float)Math.Pow(1 - frame_probs[0], beta);
                p1 *= (float)Math.Pow(frame_probs[0], beta);
                /* Normalise the probabilities to get the Marokv probability of music. */
                tonal.music_prob = p1 / (p0 + p1);
                info.music_prob  = tonal.music_prob;

                /* This chunk of code deals with delayed decision. */
                psum = 1e-20f;
                /* Instantaneous probability of speech and music, with beta pre-applied. */
                speech0 = (float)Math.Pow(1 - frame_probs[0], beta);
                music0  = (float)Math.Pow(frame_probs[0], beta);
                if (tonal.count == 1)
                {
                    tonal.pspeech[0] = 0.5f;
                    tonal.pmusic[0]  = 0.5f;
                }

                /* Updated probability of having only speech (s0) or only music (m0),
                 *  before considering the new observation. */
                s0 = tonal.pspeech[0] + tonal.pspeech[1];
                m0 = tonal.pmusic[0] + tonal.pmusic[1];
                /* Updates s0 and m0 with instantaneous probability. */
                tonal.pspeech[0] = s0 * (1 - tau) * speech0;
                tonal.pmusic[0]  = m0 * (1 - tau) * music0;
                /* Propagate the transition probabilities */
                for (i = 1; i < OpusConstants.DETECT_SIZE - 1; i++)
                {
                    tonal.pspeech[i] = tonal.pspeech[i + 1] * speech0;
                    tonal.pmusic[i]  = tonal.pmusic[i + 1] * music0;
                }
                /* Probability that the latest frame is speech, when all the previous ones were music. */
                tonal.pspeech[OpusConstants.DETECT_SIZE - 1] = m0 * tau * speech0;
                /* Probability that the latest frame is music, when all the previous ones were speech. */
                tonal.pmusic[OpusConstants.DETECT_SIZE - 1] = s0 * tau * music0;

                /* Renormalise probabilities to 1 */
                for (i = 0; i < OpusConstants.DETECT_SIZE; i++)
                {
                    psum += tonal.pspeech[i] + tonal.pmusic[i];
                }
                psum = 1.0f / psum;
                for (i = 0; i < OpusConstants.DETECT_SIZE; i++)
                {
                    tonal.pspeech[i] *= psum;
                    tonal.pmusic[i]  *= psum;
                }
                psum = tonal.pmusic[0];
                for (i = 1; i < OpusConstants.DETECT_SIZE; i++)
                {
                    psum += tonal.pspeech[i];
                }

                /* Estimate our confidence in the speech/music decisions */
                if (frame_probs[1] > .75)
                {
                    if (tonal.music_prob > .9)
                    {
                        float adapt;
                        adapt = 1.0f / (++tonal.music_confidence_count);
                        tonal.music_confidence_count = Inlines.IMIN(tonal.music_confidence_count, 500);
                        tonal.music_confidence      += adapt * Inlines.MAX16(-.2f, frame_probs[0] - tonal.music_confidence);
                    }
                    if (tonal.music_prob < .1)
                    {
                        float adapt;
                        adapt = 1.0f / (++tonal.speech_confidence_count);
                        tonal.speech_confidence_count = Inlines.IMIN(tonal.speech_confidence_count, 500);
                        tonal.speech_confidence      += adapt * Inlines.MIN16(.2f, frame_probs[0] - tonal.speech_confidence);
                    }
                }
                else
                {
                    if (tonal.music_confidence_count == 0)
                    {
                        tonal.music_confidence = .9f;
                    }
                    if (tonal.speech_confidence_count == 0)
                    {
                        tonal.speech_confidence = .1f;
                    }
                }
            }
            if (tonal.last_music != ((tonal.music_prob > .5f) ? 1 : 0))
            {
                tonal.last_transition = 0;
            }
            tonal.last_music = (tonal.music_prob > .5f) ? 1 : 0;

            info.bandwidth = bandwidth;
            info.noisiness = frame_noisiness;
            info.valid     = 1;
        }