Exemple #1
0
        internal static int compute_frame_size <T>(T[] analysis_pcm, int analysis_pcm_ptr, int frame_size,
                                                   OpusFramesize variable_duration, int C, int Fs, int bitrate_bps,
                                                   int delay_compensation, Downmix.downmix_func <T> downmix, float[] subframe_mem, bool analysis_enabled
                                                   )
        {
            if (analysis_enabled && variable_duration == OpusFramesize.OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs / 200)
            {
                int LM = 3;
                LM = optimize_framesize(analysis_pcm, analysis_pcm_ptr, frame_size, C, Fs, bitrate_bps,
                                        0, subframe_mem, delay_compensation, downmix);
                while ((Fs / 400 << LM) > frame_size)
                {
                    LM--;
                }
                frame_size = (Fs / 400 << LM);
            }
            else
            {
                frame_size = frame_size_select(frame_size, variable_duration, Fs);
            }

            if (frame_size < 0)
            {
                return(-1);
            }
            return(frame_size);
        }
Exemple #2
0
        internal static void run_analysis <T>(TonalityAnalysisState analysis, CeltMode celt_mode, T[] analysis_pcm, int analysis_pcm_ptr,
                                              int analysis_frame_size, int frame_size, int c1, int c2, int C, int Fs,
                                              int lsb_depth, Downmix.downmix_func <T> downmix, AnalysisInfo analysis_info)
        {
            int offset;
            int pcm_len;

            if (analysis_pcm != null)
            {
                /* Avoid overflow/wrap-around of the analysis buffer */
                analysis_frame_size = Inlines.IMIN((OpusConstants.DETECT_SIZE - 5) * Fs / 100, analysis_frame_size);

                pcm_len = analysis_frame_size - analysis.analysis_offset;
                offset  = analysis.analysis_offset;
                do
                {
                    tonality_analysis(analysis, celt_mode, analysis_pcm, analysis_pcm_ptr, Inlines.IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
                    offset  += 480;
                    pcm_len -= 480;
                } while (pcm_len > 0);
                analysis.analysis_offset = analysis_frame_size;

                analysis.analysis_offset -= frame_size;
            }

            analysis_info.valid = 0;
            tonality_get_info(analysis, analysis_info, frame_size);
        }
Exemple #3
0
        internal static int optimize_framesize <T>(T[] x, int x_ptr, int len, int C, int Fs,
                                                   int bitrate, int tonality, float[] mem, int buffering,
                                                   Downmix.downmix_func <T> downmix)
        {
            int N;
            int i;

            float[] e   = new float[MAX_DYNAMIC_FRAMESIZE + 4];
            float[] e_1 = new float[MAX_DYNAMIC_FRAMESIZE + 3];
            int     memx;
            int     bestLM = 0;
            int     subframe;
            int     pos;
            int     offset;

            int[] sub;

            subframe = Fs / 400;
            sub      = new int[subframe];
            e[0]     = mem[0];
            e_1[0]   = 1.0f / (CeltConstants.EPSILON + mem[0]);
            if (buffering != 0)
            {
                /* Consider the CELT delay when not in restricted-lowdelay */
                /* We assume the buffering is between 2.5 and 5 ms */
                offset = 2 * subframe - buffering;
                Inlines.OpusAssert(offset >= 0 && offset <= subframe);
                len   -= offset;
                e[1]   = mem[1];
                e_1[1] = 1.0f / (CeltConstants.EPSILON + mem[1]);
                e[2]   = mem[2];
                e_1[2] = 1.0f / (CeltConstants.EPSILON + mem[2]);
                pos    = 3;
            }
            else
            {
                pos    = 1;
                offset = 0;
            }
            N = Inlines.IMIN(len / subframe, MAX_DYNAMIC_FRAMESIZE);
            /* Just silencing a warning, it's really initialized later */
            memx = 0;
            for (i = 0; i < N; i++)
            {
                float tmp;
                int   tmpx;
                int   j;
                tmp = CeltConstants.EPSILON;

                downmix(x, x_ptr, sub, 0, subframe, i * subframe + offset, 0, -2, C);
                if (i == 0)
                {
                    memx = sub[0];
                }
                for (j = 0; j < subframe; j++)
                {
                    tmpx = sub[j];
                    tmp += (tmpx - memx) * (float)(tmpx - memx);
                    memx = tmpx;
                }
                e[i + pos]   = tmp;
                e_1[i + pos] = 1.0f / tmp;
            }

            /* Hack to get 20 ms working with APPLICATION_AUDIO
             * The real problem is that the corresponding memory needs to use 1.5 ms
             * from this frame and 1 ms from the next frame */
            e[i + pos] = e[i + pos - 1];
            if (buffering != 0)
            {
                N = Inlines.IMIN(MAX_DYNAMIC_FRAMESIZE, N + 2);
            }
            bestLM = transient_viterbi(e, e_1, N, (int)((1.0f + .5f * tonality) * (60 * C + 40)), bitrate / 400);
            mem[0] = e[1 << bestLM];
            if (buffering != 0)
            {
                mem[1] = e[(1 << bestLM) + 1];
                mem[2] = e[(1 << bestLM) + 2];
            }
            return(bestLM);
        }
Exemple #4
0
        internal int opus_multistream_encode_native <T>
        (
            opus_copy_channel_in_func <T> copy_channel_in,
            T[] pcm,
            int pcm_ptr,
            int analysis_frame_size,
            byte[] data,
            int data_ptr,
            int max_data_bytes,
            int lsb_depth,
            Downmix.downmix_func <T> downmix,
            int float_api
        )
        {
            int Fs;
            int s;
            int encoder_ptr;
            int tot_size;

            short[]          buf;
            int[]            bandSMR;
            byte[]           tmp_data = new byte[MS_FRAME_TMP];
            OpusRepacketizer rp       = new OpusRepacketizer();
            int      vbr;
            CeltMode celt_mode;

            int[] bitrates    = new int[256];
            int[] bandLogE    = new int[42];
            int[] mem         = null;
            int[] preemph_mem = null;
            int   frame_size;
            int   rate_sum;
            int   smallest_packet;

            if (this.surround != 0)
            {
                preemph_mem = this.preemph_mem;
                mem         = this.window_mem;
            }

            encoder_ptr = 0;
            Fs          = this.encoders[encoder_ptr].SampleRate;
            vbr         = this.encoders[encoder_ptr].UseVBR ? 1 : 0;
            celt_mode   = this.encoders[encoder_ptr].GetCeltMode();

            {
                int delay_compensation;
                int channels;

                channels            = this.layout.nb_streams + this.layout.nb_coupled_streams;
                delay_compensation  = this.encoders[encoder_ptr].Lookahead;
                delay_compensation -= Fs / 400;
                frame_size          = CodecHelpers.compute_frame_size(pcm, pcm_ptr, analysis_frame_size,
                                                                      this.variable_duration, channels, Fs, this.bitrate_bps,
                                                                      delay_compensation, downmix, this.subframe_mem, this.encoders[encoder_ptr].analysis.enabled);
            }

            if (400 * frame_size < Fs)
            {
                return(OpusError.OPUS_BAD_ARG);
            }

            /* Validate frame_size before using it to allocate stack space.
             * This mirrors the checks in opus_encode[_float](). */
            if (400 * frame_size != Fs && 200 * frame_size != Fs &&
                100 * frame_size != Fs && 50 * frame_size != Fs &&
                25 * frame_size != Fs && 50 * frame_size != 3 * Fs)
            {
                return(OpusError.OPUS_BAD_ARG);
            }

            /* Smallest packet the encoder can produce. */
            smallest_packet = this.layout.nb_streams * 2 - 1;
            if (max_data_bytes < smallest_packet)
            {
                return(OpusError.OPUS_BUFFER_TOO_SMALL);
            }
            buf = new short[2 * frame_size];

            bandSMR = new int[21 * this.layout.nb_channels];
            if (this.surround != 0)
            {
                surround_analysis(celt_mode, pcm, pcm_ptr, bandSMR, mem, preemph_mem, frame_size, 120, this.layout.nb_channels, Fs, copy_channel_in);
            }

            /* Compute bitrate allocation between streams (this could be a lot better) */
            rate_sum = surround_rate_allocation(bitrates, frame_size);

            if (vbr == 0)
            {
                if (this.bitrate_bps == OpusConstants.OPUS_AUTO)
                {
                    max_data_bytes = Inlines.IMIN(max_data_bytes, 3 * rate_sum / (3 * 8 * Fs / frame_size));
                }
                else if (this.bitrate_bps != OpusConstants.OPUS_BITRATE_MAX)
                {
                    max_data_bytes = Inlines.IMIN(max_data_bytes, Inlines.IMAX(smallest_packet,
                                                                               3 * this.bitrate_bps / (3 * 8 * Fs / frame_size)));
                }
            }

            for (s = 0; s < this.layout.nb_streams; s++)
            {
                OpusEncoder enc = this.encoders[encoder_ptr];
                encoder_ptr += 1;
                enc.Bitrate  = (bitrates[s]);
                if (this.surround != 0)
                {
                    int equiv_rate;
                    equiv_rate = this.bitrate_bps;
                    if (frame_size * 50 < Fs)
                    {
                        equiv_rate -= 60 * (Fs / frame_size - 50) * this.layout.nb_channels;
                    }
                    if (equiv_rate > 10000 * this.layout.nb_channels)
                    {
                        enc.Bandwidth = (OpusBandwidth.OPUS_BANDWIDTH_FULLBAND);
                    }
                    else if (equiv_rate > 7000 * this.layout.nb_channels)
                    {
                        enc.Bandwidth = (OpusBandwidth.OPUS_BANDWIDTH_SUPERWIDEBAND);
                    }
                    else if (equiv_rate > 5000 * this.layout.nb_channels)
                    {
                        enc.Bandwidth = (OpusBandwidth.OPUS_BANDWIDTH_WIDEBAND);
                    }
                    else
                    {
                        enc.Bandwidth = (OpusBandwidth.OPUS_BANDWIDTH_NARROWBAND);
                    }
                    if (s < this.layout.nb_coupled_streams)
                    {
                        /* To preserve the spatial image, force stereo CELT on coupled streams */
                        enc.ForceMode     = (OpusMode.MODE_CELT_ONLY);
                        enc.ForceChannels = (2);
                    }
                }
            }

            encoder_ptr = 0;
            /* Counting ToC */
            tot_size = 0;
            for (s = 0; s < this.layout.nb_streams; s++)
            {
                OpusEncoder enc;
                int         len;
                int         curr_max;
                int         c1, c2;

                rp.Reset();
                enc = this.encoders[encoder_ptr];
                if (s < this.layout.nb_coupled_streams)
                {
                    int i;
                    int left, right;
                    left  = OpusMultistream.get_left_channel(this.layout, s, -1);
                    right = OpusMultistream.get_right_channel(this.layout, s, -1);
                    copy_channel_in(buf, 0, 2,
                                    pcm, pcm_ptr, this.layout.nb_channels, left, frame_size);
                    copy_channel_in(buf, 1, 2,
                                    pcm, pcm_ptr, this.layout.nb_channels, right, frame_size);
                    encoder_ptr += 1;
                    if (this.surround != 0)
                    {
                        for (i = 0; i < 21; i++)
                        {
                            bandLogE[i]      = bandSMR[21 * left + i];
                            bandLogE[21 + i] = bandSMR[21 * right + i];
                        }
                    }
                    c1 = left;
                    c2 = right;
                }
                else
                {
                    int i;
                    int chan = OpusMultistream.get_mono_channel(this.layout, s, -1);
                    copy_channel_in(buf, 0, 1,
                                    pcm, pcm_ptr, this.layout.nb_channels, chan, frame_size);
                    encoder_ptr += 1;
                    if (this.surround != 0)
                    {
                        for (i = 0; i < 21; i++)
                        {
                            bandLogE[i] = bandSMR[21 * chan + i];
                        }
                    }
                    c1 = chan;
                    c2 = -1;
                }
                if (this.surround != 0)
                {
                    enc.SetEnergyMask(bandLogE);
                }

                /* number of bytes left (+Toc) */
                curr_max = max_data_bytes - tot_size;
                /* Reserve one byte for the last stream and two for the others */
                curr_max -= Inlines.IMAX(0, 2 * (this.layout.nb_streams - s - 1) - 1);
                curr_max  = Inlines.IMIN(curr_max, MS_FRAME_TMP);
                /* Repacketizer will add one or two bytes for self-delimited frames */
                if (s != this.layout.nb_streams - 1)
                {
                    curr_max -= curr_max > 253 ? 2 : 1;
                }
                if (vbr == 0 && s == this.layout.nb_streams - 1)
                {
                    enc.Bitrate = (curr_max * (8 * Fs / frame_size));
                }
                len = enc.opus_encode_native(buf, 0, frame_size, tmp_data, 0, curr_max, lsb_depth,
                                             pcm, pcm_ptr, analysis_frame_size, c1, c2, this.layout.nb_channels, downmix, float_api);
                if (len < 0)
                {
                    return(len);
                }

                /* We need to use the repacketizer to add the self-delimiting lengths
                 * while taking into account the fact that the encoder can now return
                 * more than one frame at a time (e.g. 60 ms CELT-only) */
                rp.AddPacket(tmp_data, 0, len);
                len = rp.opus_repacketizer_out_range_impl(0, rp.GetNumFrames(),
                                                          data, data_ptr, max_data_bytes - tot_size, (s != this.layout.nb_streams - 1) ? 1 : 0, (vbr == 0 && s == this.layout.nb_streams - 1) ? 1 : 0);
                data_ptr += len;
                tot_size += len;
            }

            return(tot_size);
        }
Exemple #5
0
        /// <summary>
        ///
        /// </summary>
        /// <typeparam name="T">The type of signal being handled (either short or float) - changes based on which API is used</typeparam>
        /// <param name="tonal"></param>
        /// <param name="celt_mode"></param>
        /// <param name="x"></param>
        /// <param name="len"></param>
        /// <param name="offset"></param>
        /// <param name="c1"></param>
        /// <param name="c2"></param>
        /// <param name="C"></param>
        /// <param name="lsb_depth"></param>
        /// <param name="downmix"></param>
        internal static void tonality_analysis <T>(TonalityAnalysisState tonal, CeltMode celt_mode, T[] x, int x_ptr, int len, int offset, int c1, int c2, int C, int lsb_depth, Downmix.downmix_func <T> downmix)
        {
            int      i, b;
            FFTState kfft;

            int[] input;
            int[] output;
            int   N = 480, N2 = 240;

            float[] A   = tonal.angle;
            float[] dA  = tonal.d_angle;
            float[] d2A = tonal.d2_angle;
            float[] tonality;
            float[] noisiness;
            float[] band_tonality = new float[OpusConstants.NB_TBANDS];
            float[] logE          = new float[OpusConstants.NB_TBANDS];
            float[] BFCC          = new float[8];
            float[] features      = new float[25];
            float   frame_tonality;
            float   max_frame_tonality;
            /*float tw_sum=0;*/
            float frame_noisiness;
            float pi4   = (float)(M_PI * M_PI * M_PI * M_PI);
            float slope = 0;
            float frame_stationarity;
            float relativeE;

            float[]      frame_probs = new float[2];
            float        alpha, alphaE, alphaE2;
            float        frame_loudness;
            float        bandwidth_mask;
            int          bandwidth = 0;
            float        maxE      = 0;
            float        noise_floor;
            int          remaining;
            AnalysisInfo info; //[porting note] pointer

            tonal.last_transition++;
            alpha   = 1.0f / Inlines.IMIN(20, 1 + tonal.count);
            alphaE  = 1.0f / Inlines.IMIN(50, 1 + tonal.count);
            alphaE2 = 1.0f / Inlines.IMIN(1000, 1 + tonal.count);

            if (tonal.count < 4)
            {
                tonal.music_prob = 0.5f;
            }
            kfft = celt_mode.mdct.kfft[0];
            if (tonal.count == 0)
            {
                tonal.mem_fill = 240;
            }

            downmix(x, x_ptr, tonal.inmem, tonal.mem_fill, Inlines.IMIN(len, OpusConstants.ANALYSIS_BUF_SIZE - tonal.mem_fill), offset, c1, c2, C);

            if (tonal.mem_fill + len < OpusConstants.ANALYSIS_BUF_SIZE)
            {
                tonal.mem_fill += len;
                /* Don't have enough to update the analysis */
                return;
            }

            info = tonal.info[tonal.write_pos++];
            if (tonal.write_pos >= OpusConstants.DETECT_SIZE)
            {
                tonal.write_pos -= OpusConstants.DETECT_SIZE;
            }

            input     = new int[960];
            output    = new int[960];
            tonality  = new float[240];
            noisiness = new float[240];
            for (i = 0; i < N2; i++)
            {
                float w = Tables.analysis_window[i];
                input[2 * i]                 = (int)(w * tonal.inmem[i]);
                input[2 * i + 1]             = (int)(w * tonal.inmem[N2 + i]);
                input[(2 * (N - i - 1))]     = (int)(w * tonal.inmem[N - i - 1]);
                input[(2 * (N - i - 1)) + 1] = (int)(w * tonal.inmem[N + N2 - i - 1]);
            }
            Arrays.MemMoveInt(tonal.inmem, OpusConstants.ANALYSIS_BUF_SIZE - 240, 0, 240);

            remaining = len - (OpusConstants.ANALYSIS_BUF_SIZE - tonal.mem_fill);
            downmix(x, x_ptr, tonal.inmem, 240, remaining, offset + OpusConstants.ANALYSIS_BUF_SIZE - tonal.mem_fill, c1, c2, C);
            tonal.mem_fill = 240 + remaining;

            KissFFT.opus_fft(kfft, input, output);

            for (i = 1; i < N2; i++)
            {
                float X1r, X2r, X1i, X2i;
                float angle, d_angle, d2_angle;
                float angle2, d_angle2, d2_angle2;
                float mod1, mod2, avg_mod;
                X1r = (float)output[2 * i] + output[2 * (N - i)];
                X1i = (float)output[(2 * i) + 1] - output[2 * (N - i) + 1];
                X2r = (float)output[(2 * i) + 1] + output[2 * (N - i) + 1];
                X2i = (float)output[2 * (N - i)] - output[2 * i];

                angle    = (float)(.5f / M_PI) * fast_atan2f(X1i, X1r);
                d_angle  = angle - A[i];
                d2_angle = d_angle - dA[i];

                angle2    = (float)(.5f / M_PI) * fast_atan2f(X2i, X2r);
                d_angle2  = angle2 - angle;
                d2_angle2 = d_angle2 - d_angle;

                mod1         = d2_angle - (float)Math.Floor(0.5f + d2_angle);
                noisiness[i] = Inlines.ABS16(mod1);
                mod1        *= mod1;
                mod1        *= mod1;

                mod2          = d2_angle2 - (float)Math.Floor(0.5f + d2_angle2);
                noisiness[i] += Inlines.ABS16(mod2);
                mod2         *= mod2;
                mod2         *= mod2;

                avg_mod     = .25f * (d2A[i] + 2.0f * mod1 + mod2);
                tonality[i] = 1.0f / (1.0f + 40.0f * 16.0f * pi4 * avg_mod) - .015f;

                A[i]   = angle2;
                dA[i]  = d_angle2;
                d2A[i] = mod2;
            }

            frame_tonality     = 0;
            max_frame_tonality = 0;
            /*tw_sum = 0;*/
            info.activity      = 0;
            frame_noisiness    = 0;
            frame_stationarity = 0;
            if (tonal.count == 0)
            {
                for (b = 0; b < OpusConstants.NB_TBANDS; b++)
                {
                    tonal.lowE[b]  = 1e10f;
                    tonal.highE[b] = -1e10f;
                }
            }
            relativeE      = 0;
            frame_loudness = 0;
            for (b = 0; b < OpusConstants.NB_TBANDS; b++)
            {
                float E = 0, tE = 0, nE = 0;
                float L1, L2;
                float stationarity;
                for (i = Tables.tbands[b]; i < Tables.tbands[b + 1]; i++)
                {
                    float binE = output[2 * i] * (float)output[2 * i] + output[2 * (N - i)] * (float)output[2 * (N - i)]
                                 + output[2 * i + 1] * (float)output[2 * i + 1] + output[2 * (N - i) + 1] * (float)output[2 * (N - i) + 1];
                    /* FIXME: It's probably best to change the BFCC filter initial state instead */
                    binE *= 5.55e-17f;
                    E    += binE;
                    tE   += binE * tonality[i];
                    nE   += binE * 2.0f * (.5f - noisiness[i]);
                }

                tonal.E[tonal.E_count][b] = E;
                frame_noisiness          += nE / (1e-15f + E);

                frame_loudness += (float)Math.Sqrt(E + 1e-10f);
                logE[b]         = (float)Math.Log(E + 1e-10f);
                tonal.lowE[b]   = Inlines.MIN32(logE[b], tonal.lowE[b] + 0.01f);
                tonal.highE[b]  = Inlines.MAX32(logE[b], tonal.highE[b] - 0.1f);
                if (tonal.highE[b] < tonal.lowE[b] + 1.0f)
                {
                    tonal.highE[b] += 0.5f;
                    tonal.lowE[b]  -= 0.5f;
                }
                relativeE += (logE[b] - tonal.lowE[b]) / (1e-15f + tonal.highE[b] - tonal.lowE[b]);

                L1 = L2 = 0;
                for (i = 0; i < OpusConstants.NB_FRAMES; i++)
                {
                    L1 += (float)Math.Sqrt(tonal.E[i][b]);
                    L2 += tonal.E[i][b];
                }

                stationarity        = Inlines.MIN16(0.99f, L1 / (float)Math.Sqrt(1e-15 + OpusConstants.NB_FRAMES * L2));
                stationarity       *= stationarity;
                stationarity       *= stationarity;
                frame_stationarity += stationarity;
                /*band_tonality[b] = tE/(1e-15+E)*/
                band_tonality[b] = Inlines.MAX16(tE / (1e-15f + E), stationarity * tonal.prev_band_tonality[b]);
                frame_tonality  += band_tonality[b];
                if (b >= OpusConstants.NB_TBANDS - OpusConstants.NB_TONAL_SKIP_BANDS)
                {
                    frame_tonality -= band_tonality[b - OpusConstants.NB_TBANDS + OpusConstants.NB_TONAL_SKIP_BANDS];
                }
                max_frame_tonality = Inlines.MAX16(max_frame_tonality, (1.0f + .03f * (b - OpusConstants.NB_TBANDS)) * frame_tonality);
                slope += band_tonality[b] * (b - 8);
                tonal.prev_band_tonality[b] = band_tonality[b];
            }

            bandwidth_mask = 0;
            bandwidth      = 0;
            maxE           = 0;
            noise_floor    = 5.7e-4f / (1 << (Inlines.IMAX(0, lsb_depth - 8)));
            noise_floor   *= 1 << (15 + CeltConstants.SIG_SHIFT);
            noise_floor   *= noise_floor;
            for (b = 0; b < OpusConstants.NB_TOT_BANDS; b++)
            {
                float E = 0;
                int   band_start, band_end;
                /* Keep a margin of 300 Hz for aliasing */
                band_start = Tables.extra_bands[b];
                band_end   = Tables.extra_bands[b + 1];
                for (i = band_start; i < band_end; i++)
                {
                    float binE = output[2 * i] * (float)output[2 * i] + output[2 * (N - i)] * (float)output[2 * (N - i)]
                                 + output[2 * i + 1] * (float)output[2 * i + 1] + output[2 * (N - i) + 1] * (float)output[2 * (N - i) + 1];
                    E += binE;
                }
                maxE           = Inlines.MAX32(maxE, E);
                tonal.meanE[b] = Inlines.MAX32((1 - alphaE2) * tonal.meanE[b], E);
                E = Inlines.MAX32(E, tonal.meanE[b]);
                /* Use a simple follower with 13 dB/Bark slope for spreading function */
                bandwidth_mask = Inlines.MAX32(.05f * bandwidth_mask, E);

                /* Consider the band "active" only if all these conditions are met:
                 * 1) less than 10 dB below the simple follower
                 * 2) less than 90 dB below the peak band (maximal masking possible considering
                 *    both the ATH and the loudness-dependent slope of the spreading function)
                 * 3) above the PCM quantization noise floor
                 */
                if (E > .1 * bandwidth_mask && E * 1e9f > maxE && E > noise_floor * (band_end - band_start))
                {
                    bandwidth = b;
                }
            }
            if (tonal.count <= 2)
            {
                bandwidth = 20;
            }
            frame_loudness   = 20 * (float)Math.Log10(frame_loudness);
            tonal.Etracker   = Inlines.MAX32(tonal.Etracker - .03f, frame_loudness);
            tonal.lowECount *= (1 - alphaE);
            if (frame_loudness < tonal.Etracker - 30)
            {
                tonal.lowECount += alphaE;
            }

            for (i = 0; i < 8; i++)
            {
                float sum = 0;
                for (b = 0; b < 16; b++)
                {
                    sum += Tables.dct_table[i * 16 + b] * logE[b];
                }
                BFCC[i] = sum;
            }

            frame_stationarity /= OpusConstants.NB_TBANDS;
            relativeE          /= OpusConstants.NB_TBANDS;
            if (tonal.count < 10)
            {
                relativeE = 0.5f;
            }
            frame_noisiness    /= OpusConstants.NB_TBANDS;
            info.activity       = frame_noisiness + (1 - frame_noisiness) * relativeE;
            frame_tonality      = (max_frame_tonality / (OpusConstants.NB_TBANDS - OpusConstants.NB_TONAL_SKIP_BANDS));
            frame_tonality      = Inlines.MAX16(frame_tonality, tonal.prev_tonality * .8f);
            tonal.prev_tonality = frame_tonality;

            slope /= 8 * 8;
            info.tonality_slope = slope;

            tonal.E_count = (tonal.E_count + 1) % OpusConstants.NB_FRAMES;
            tonal.count++;
            info.tonality = frame_tonality;

            for (i = 0; i < 4; i++)
            {
                features[i] = -0.12299f * (BFCC[i] + tonal.mem[i + 24]) + 0.49195f * (tonal.mem[i] + tonal.mem[i + 16]) + 0.69693f * tonal.mem[i + 8] - 1.4349f * tonal.cmean[i];
            }

            for (i = 0; i < 4; i++)
            {
                tonal.cmean[i] = (1 - alpha) * tonal.cmean[i] + alpha * BFCC[i];
            }

            for (i = 0; i < 4; i++)
            {
                features[4 + i] = 0.63246f * (BFCC[i] - tonal.mem[i + 24]) + 0.31623f * (tonal.mem[i] - tonal.mem[i + 16]);
            }
            for (i = 0; i < 3; i++)
            {
                features[8 + i] = 0.53452f * (BFCC[i] + tonal.mem[i + 24]) - 0.26726f * (tonal.mem[i] + tonal.mem[i + 16]) - 0.53452f * tonal.mem[i + 8];
            }

            if (tonal.count > 5)
            {
                for (i = 0; i < 9; i++)
                {
                    tonal.std[i] = (1 - alpha) * tonal.std[i] + alpha * features[i] * features[i];
                }
            }

            for (i = 0; i < 8; i++)
            {
                tonal.mem[i + 24] = tonal.mem[i + 16];
                tonal.mem[i + 16] = tonal.mem[i + 8];
                tonal.mem[i + 8]  = tonal.mem[i];
                tonal.mem[i]      = BFCC[i];
            }
            for (i = 0; i < 9; i++)
            {
                features[11 + i] = (float)Math.Sqrt(tonal.std[i]);
            }
            features[20] = info.tonality;
            features[21] = info.activity;
            features[22] = frame_stationarity;
            features[23] = info.tonality_slope;
            features[24] = tonal.lowECount;

            mlp.mlp_process(Tables.net, features, frame_probs);
            frame_probs[0] = .5f * (frame_probs[0] + 1);
            /* Curve fitting between the MLP probability and the actual probability */
            frame_probs[0] = .01f + 1.21f * frame_probs[0] * frame_probs[0] - .23f * (float)Math.Pow(frame_probs[0], 10);
            /* Probability of active audio (as opposed to silence) */
            frame_probs[1] = .5f * frame_probs[1] + .5f;
            /* Consider that silence has a 50-50 probability. */
            frame_probs[0] = frame_probs[1] * frame_probs[0] + (1 - frame_probs[1]) * .5f;

            /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
            {
                /* Probability of state transition */
                float tau;

                /* Represents independence of the MLP probabilities, where
                 *  beta=1 means fully independent. */
                float beta;
                /* Denormalized probability of speech (p0) and music (p1) after update */
                float p0, p1;
                /* Probabilities for "all speech" and "all music" */
                float s0, m0;
                /* Probability sum for renormalisation */
                float psum;
                /* Instantaneous probability of speech and music, with beta pre-applied. */
                float speech0;
                float music0;

                /* One transition every 3 minutes of active audio */
                tau  = .00005f * frame_probs[1];
                beta = .05f;
                //if (1)
                {
                    /* Adapt beta based on how "unexpected" the new prob is */
                    float p, q;
                    p    = Inlines.MAX16(.05f, Inlines.MIN16(.95f, frame_probs[0]));
                    q    = Inlines.MAX16(.05f, Inlines.MIN16(.95f, tonal.music_prob));
                    beta = .01f + .05f * Inlines.ABS16(p - q) / (p * (1 - q) + q * (1 - p));
                }

                /* p0 and p1 are the probabilities of speech and music at this frame
                 *  using only information from previous frame and applying the
                 *  state transition model */
                p0 = (1 - tonal.music_prob) * (1 - tau) + tonal.music_prob * tau;
                p1 = tonal.music_prob * (1 - tau) + (1 - tonal.music_prob) * tau;

                /* We apply the current probability with exponent beta to work around
                 *  the fact that the probability estimates aren't independent. */
                p0 *= (float)Math.Pow(1 - frame_probs[0], beta);
                p1 *= (float)Math.Pow(frame_probs[0], beta);
                /* Normalise the probabilities to get the Marokv probability of music. */
                tonal.music_prob = p1 / (p0 + p1);
                info.music_prob  = tonal.music_prob;

                /* This chunk of code deals with delayed decision. */
                psum = 1e-20f;
                /* Instantaneous probability of speech and music, with beta pre-applied. */
                speech0 = (float)Math.Pow(1 - frame_probs[0], beta);
                music0  = (float)Math.Pow(frame_probs[0], beta);
                if (tonal.count == 1)
                {
                    tonal.pspeech[0] = 0.5f;
                    tonal.pmusic[0]  = 0.5f;
                }

                /* Updated probability of having only speech (s0) or only music (m0),
                 *  before considering the new observation. */
                s0 = tonal.pspeech[0] + tonal.pspeech[1];
                m0 = tonal.pmusic[0] + tonal.pmusic[1];
                /* Updates s0 and m0 with instantaneous probability. */
                tonal.pspeech[0] = s0 * (1 - tau) * speech0;
                tonal.pmusic[0]  = m0 * (1 - tau) * music0;
                /* Propagate the transition probabilities */
                for (i = 1; i < OpusConstants.DETECT_SIZE - 1; i++)
                {
                    tonal.pspeech[i] = tonal.pspeech[i + 1] * speech0;
                    tonal.pmusic[i]  = tonal.pmusic[i + 1] * music0;
                }
                /* Probability that the latest frame is speech, when all the previous ones were music. */
                tonal.pspeech[OpusConstants.DETECT_SIZE - 1] = m0 * tau * speech0;
                /* Probability that the latest frame is music, when all the previous ones were speech. */
                tonal.pmusic[OpusConstants.DETECT_SIZE - 1] = s0 * tau * music0;

                /* Renormalise probabilities to 1 */
                for (i = 0; i < OpusConstants.DETECT_SIZE; i++)
                {
                    psum += tonal.pspeech[i] + tonal.pmusic[i];
                }
                psum = 1.0f / psum;
                for (i = 0; i < OpusConstants.DETECT_SIZE; i++)
                {
                    tonal.pspeech[i] *= psum;
                    tonal.pmusic[i]  *= psum;
                }
                psum = tonal.pmusic[0];
                for (i = 1; i < OpusConstants.DETECT_SIZE; i++)
                {
                    psum += tonal.pspeech[i];
                }

                /* Estimate our confidence in the speech/music decisions */
                if (frame_probs[1] > .75)
                {
                    if (tonal.music_prob > .9)
                    {
                        float adapt;
                        adapt = 1.0f / (++tonal.music_confidence_count);
                        tonal.music_confidence_count = Inlines.IMIN(tonal.music_confidence_count, 500);
                        tonal.music_confidence      += adapt * Inlines.MAX16(-.2f, frame_probs[0] - tonal.music_confidence);
                    }
                    if (tonal.music_prob < .1)
                    {
                        float adapt;
                        adapt = 1.0f / (++tonal.speech_confidence_count);
                        tonal.speech_confidence_count = Inlines.IMIN(tonal.speech_confidence_count, 500);
                        tonal.speech_confidence      += adapt * Inlines.MIN16(.2f, frame_probs[0] - tonal.speech_confidence);
                    }
                }
                else
                {
                    if (tonal.music_confidence_count == 0)
                    {
                        tonal.music_confidence = .9f;
                    }
                    if (tonal.speech_confidence_count == 0)
                    {
                        tonal.speech_confidence = .1f;
                    }
                }
            }
            if (tonal.last_music != ((tonal.music_prob > .5f) ? 1 : 0))
            {
                tonal.last_transition = 0;
            }
            tonal.last_music = (tonal.music_prob > .5f) ? 1 : 0;

            info.bandwidth = bandwidth;
            info.noisiness = frame_noisiness;
            info.valid     = 1;
        }