internal static void unquant_energy_finalise(CeltMode m, int start, int end, int[] oldEBands, int[] fine_quant, int[] fine_priority, int bits_left, EntropyCoder dec, int C) { int i, prio, c; /* Use up the remaining bits */ for (prio = 0; prio < 2; prio++) { for (i = start; i < end && bits_left >= C; i++) { if (fine_quant[i] >= CeltConstants.MAX_FINE_BITS || fine_priority[i] != prio) { continue; } c = 0; do { int q2; int offset; q2 = (int)dec.dec_bits(1); offset = Inlines.SHR16((Inlines.SHL16((q2), CeltConstants.DB_SHIFT) - ((short)(0.5 + (.5f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(.5f, CeltConstants.DB_SHIFT)*/), fine_quant[i] + 1); oldEBands[i + c * m.nbEBands] += offset; bits_left--; } while (++c < C); } } }
internal static int bits2pulses(CeltMode m, int band, int LM, int bits) { int i; int lo, hi; LM++; byte[] cache = m.cache.bits; int cache_ptr = m.cache.index[LM * m.nbEBands + band]; lo = 0; hi = cache[cache_ptr]; bits--; for (i = 0; i < CeltConstants.LOG_MAX_PSEUDO; i++) { int mid = (lo + hi + 1) >> 1; /* OPT: Make sure this is implemented with a conditional move */ if ((int)cache[cache_ptr + mid] >= bits) { hi = mid; } else { lo = mid; } } if (bits - (lo == 0 ? -1 : (int)cache[cache_ptr + lo]) <= (int)cache[cache_ptr + hi] - bits) { return(lo); } else { return(hi); } }
internal static void unquant_fine_energy(CeltMode m, int start, int end, int[] oldEBands, int[] fine_quant, EntropyCoder dec, int C) { int i, c; /* Decode finer resolution */ for (i = start; i < end; i++) { if (fine_quant[i] <= 0) { continue; } c = 0; do { int q2; int offset; q2 = (int)dec.dec_bits((uint)fine_quant[i]); offset = Inlines.SUB16((Inlines.SHR32( Inlines.SHL32(q2, CeltConstants.DB_SHIFT) + ((short)(0.5 + (.5f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(.5f, CeltConstants.DB_SHIFT)*/, fine_quant[i])), ((short)(0.5 + (.5f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(.5f, CeltConstants.DB_SHIFT)*/); // opus bug: unnecessary extend32 oldEBands[i + c * m.nbEBands] += offset; } while (++c < C); } }
internal static void run_analysis <T>(TonalityAnalysisState analysis, CeltMode celt_mode, T[] analysis_pcm, int analysis_pcm_ptr, int analysis_frame_size, int frame_size, int c1, int c2, int C, int Fs, int lsb_depth, Downmix.downmix_func <T> downmix, AnalysisInfo analysis_info) { int offset; int pcm_len; if (analysis_pcm != null) { /* Avoid overflow/wrap-around of the analysis buffer */ analysis_frame_size = Inlines.IMIN((OpusConstants.DETECT_SIZE - 5) * Fs / 100, analysis_frame_size); pcm_len = analysis_frame_size - analysis.analysis_offset; offset = analysis.analysis_offset; do { tonality_analysis(analysis, celt_mode, analysis_pcm, analysis_pcm_ptr, Inlines.IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); offset += 480; pcm_len -= 480; } while (pcm_len > 0); analysis.analysis_offset = analysis_frame_size; analysis.analysis_offset -= frame_size; } analysis_info.valid = 0; tonality_get_info(analysis, analysis_info, frame_size); }
/// <summary> /// only needed in one place /// </summary> /// <param name="m"></param> /// <param name="effEnd"></param> /// <param name="end"></param> /// <param name="bandE"></param> /// <param name="bandLogE"></param> /// <param name="C"></param> internal static void amp2Log2(CeltMode m, int effEnd, int end, int[] bandE, int[] bandLogE, int bandLogE_ptr, int C) { int c, i; c = 0; do { for (i = 0; i < effEnd; i++) { bandLogE[bandLogE_ptr + (c * m.nbEBands) + i] = (Inlines.celt_log2(Inlines.SHL32(bandE[i + c * m.nbEBands], 2)) - Inlines.SHL16((int)Tables.eMeans[i], 6)); } for (i = effEnd; i < end; i++) { bandLogE[bandLogE_ptr + (c * m.nbEBands) + i] = (0 - ((short)(0.5 + (14.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(14.0f, CeltConstants.DB_SHIFT)*/); } } while (++c < C); }
internal static void quant_fine_energy(CeltMode m, int start, int end, int[][] oldEBands, int[][] error, int[] fine_quant, EntropyCoder enc, int C) { int i, c; /* Encode finer resolution */ for (i = start; i < end; i++) { int frac = (1 << fine_quant[i]); if (fine_quant[i] <= 0) { continue; } c = 0; do { int q2; int offset; /* Has to be without rounding */ q2 = (error[c][i] + ((short)(0.5 + (.5f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(.5f, CeltConstants.DB_SHIFT)*/) >> (CeltConstants.DB_SHIFT - fine_quant[i]); if (q2 > frac - 1) { q2 = frac - 1; } if (q2 < 0) { q2 = 0; } enc.enc_bits((uint)q2, (uint)fine_quant[i]); offset = Inlines.SUB16( (Inlines.SHR32( Inlines.SHL32(q2, CeltConstants.DB_SHIFT) + ((short)(0.5 + (.5f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(.5f, CeltConstants.DB_SHIFT)*/, fine_quant[i])), ((short)(0.5 + (.5f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(.5f, CeltConstants.DB_SHIFT)*/); oldEBands[c][i] += offset; error[c][i] -= offset; } while (++c < C); } }
internal static int quant_coarse_energy_impl(CeltMode m, int start, int end, int[][] eBands, int[][] oldEBands, int budget, int tell, byte[] prob_model, int[][] error, EntropyCoder enc, int C, int LM, int intra, int max_decay, int lfe) { int i, c; int badness = 0; int[] prev = { 0, 0 }; int coef; int beta; if (tell + 3 <= budget) { enc.enc_bit_logp(intra, 3); } if (intra != 0) { coef = 0; beta = beta_intra; } else { beta = beta_coef[LM]; coef = pred_coef[LM]; } /* Encode at a fixed coarse resolution */ for (i = start; i < end; i++) { c = 0; do { int bits_left; int qi, qi0; int q; int x; int f, tmp; int oldE; int decay_bound; x = eBands[c][i]; oldE = Inlines.MAX16(-((short)(0.5 + (9.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(9.0f, CeltConstants.DB_SHIFT)*/, oldEBands[c][i]); f = Inlines.SHL32(Inlines.EXTEND32(x), 7) - Inlines.PSHR32(Inlines.MULT16_16(coef, oldE), 8) - prev[c]; /* Rounding to nearest integer here is really important! */ qi = (f + ((int)(0.5 + (.5f) * (((int)1) << (CeltConstants.DB_SHIFT + 7)))) /*Inlines.QCONST32(.5f, CeltConstants.DB_SHIFT + 7)*/) >> (CeltConstants.DB_SHIFT + 7); decay_bound = Inlines.EXTRACT16(Inlines.MAX32(-((short)(0.5 + (28.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(28.0f, CeltConstants.DB_SHIFT)*/, Inlines.SUB32((int)oldEBands[c][i], max_decay))); /* Prevent the energy from going down too quickly (e.g. for bands * that have just one bin) */ if (qi < 0 && x < decay_bound) { qi += (int)Inlines.SHR16(Inlines.SUB16(decay_bound, x), CeltConstants.DB_SHIFT); if (qi > 0) { qi = 0; } } qi0 = qi; /* If we don't have enough bits to encode all the energy, just assume * something safe. */ tell = enc.tell(); bits_left = budget - tell - 3 * C * (end - i); if (i != start && bits_left < 30) { if (bits_left < 24) { qi = Inlines.IMIN(1, qi); } if (bits_left < 16) { qi = Inlines.IMAX(-1, qi); } } if (lfe != 0 && i >= 2) { qi = Inlines.IMIN(qi, 0); } if (budget - tell >= 15) { int pi; pi = 2 * Inlines.IMIN(i, 20); Laplace.ec_laplace_encode(enc, ref qi, (((uint)prob_model[pi]) << 7), ((int)prob_model[pi + 1]) << 6); } else if (budget - tell >= 2) { qi = Inlines.IMAX(-1, Inlines.IMIN(qi, 1)); enc.enc_icdf(2 * qi ^ (0 - (qi < 0 ? 1 : 0)), small_energy_icdf, 2); } else if (budget - tell >= 1) { qi = Inlines.IMIN(0, qi); enc.enc_bit_logp(-qi, 1); } else { qi = -1; } error[c][i] = (Inlines.PSHR32(f, 7) - Inlines.SHL16((qi), CeltConstants.DB_SHIFT)); badness += Inlines.abs(qi0 - qi); q = (int)Inlines.SHL32(qi, CeltConstants.DB_SHIFT); // opus bug: useless extend32 tmp = Inlines.PSHR32(Inlines.MULT16_16(coef, oldE), 8) + prev[c] + Inlines.SHL32(q, 7); tmp = Inlines.MAX32(-((int)(0.5 + (28.0f) * (((int)1) << (CeltConstants.DB_SHIFT + 7)))) /*Inlines.QCONST32(28.0f, CeltConstants.DB_SHIFT + 7)*/, tmp); oldEBands[c][i] = (Inlines.PSHR32(tmp, 7)); prev[c] = prev[c] + Inlines.SHL32(q, 7) - Inlines.MULT16_16(beta, Inlines.PSHR32(q, 8)); } while (++c < C); } return(lfe != 0 ? 0 : badness); }
internal static void unquant_coarse_energy(CeltMode m, int start, int end, int[] oldEBands, int intra, EntropyCoder dec, int C, int LM) { byte[] prob_model = Tables.e_prob_model[LM][intra]; int i, c; int[] prev = { 0, 0 }; int coef; int beta; int budget; int tell; if (intra != 0) { coef = 0; beta = beta_intra; } else { beta = beta_coef[LM]; coef = pred_coef[LM]; } budget = (int)dec.storage * 8; /* Decode at a fixed coarse resolution */ for (i = start; i < end; i++) { c = 0; do { int qi; int q; int tmp; /* It would be better to express this invariant as a * test on C at function entry, but that isn't enough * to make the static analyzer happy. */ Inlines.OpusAssert(c < 2); tell = dec.tell(); if (budget - tell >= 15) { int pi; pi = 2 * Inlines.IMIN(i, 20); qi = Laplace.ec_laplace_decode(dec, (uint)prob_model[pi] << 7, prob_model[pi + 1] << 6); } else if (budget - tell >= 2) { qi = dec.dec_icdf(small_energy_icdf, 2); qi = (qi >> 1) ^ -(qi & 1); } else if (budget - tell >= 1) { qi = 0 - dec.dec_bit_logp(1); } else { qi = -1; } q = (int)Inlines.SHL32(qi, CeltConstants.DB_SHIFT); // opus bug: useless extend32 oldEBands[i + c * m.nbEBands] = Inlines.MAX16((0 - ((short)(0.5 + (9.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(9.0f, CeltConstants.DB_SHIFT)*/), oldEBands[i + c * m.nbEBands]); tmp = Inlines.PSHR32(Inlines.MULT16_16(coef, oldEBands[i + c * m.nbEBands]), 8) + prev[c] + Inlines.SHL32(q, 7); tmp = Inlines.MAX32(-((int)(0.5 + (28.0f) * (((int)1) << (CeltConstants.DB_SHIFT + 7)))) /*Inlines.QCONST32(28.0f, CeltConstants.DB_SHIFT + 7)*/, tmp); oldEBands[i + c * m.nbEBands] = (Inlines.PSHR32(tmp, 7)); prev[c] = prev[c] + Inlines.SHL32(q, 7) - Inlines.MULT16_16(beta, Inlines.PSHR32(q, 8)); } while (++c < C); } }
internal static void quant_coarse_energy(CeltMode m, int start, int end, int effEnd, int[][] eBands, int[][] oldEBands, uint budget, int[][] error, EntropyCoder enc, int C, int LM, int nbAvailableBytes, int force_intra, ref int delayedIntra, int two_pass, int loss_rate, int lfe) { int intra; int max_decay; int[][] oldEBands_intra; int[][] error_intra; EntropyCoder enc_start_state = new EntropyCoder(); // [porting note] stack variable uint tell; int badness1 = 0; int intra_bias; int new_distortion; intra = (force_intra != 0 || (two_pass == 0 && delayedIntra > 2 * C * (end - start) && nbAvailableBytes > (end - start) * C)) ? 1 : 0; intra_bias = (int)((budget * delayedIntra * loss_rate) / (C * 512)); new_distortion = loss_distortion(eBands, oldEBands, start, effEnd, m.nbEBands, C); tell = (uint)enc.tell(); if (tell + 3 > budget) { two_pass = intra = 0; } max_decay = ((short)(0.5 + (16.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(16.0f, CeltConstants.DB_SHIFT)*/; if (end - start > 10) { max_decay = (Inlines.MIN32(max_decay, Inlines.SHL32(nbAvailableBytes, CeltConstants.DB_SHIFT - 3))); // opus bug: useless extend32 } if (lfe != 0) { max_decay = ((short)(0.5 + (3.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(3.0f, CeltConstants.DB_SHIFT)*/; } enc_start_state.Assign(enc); oldEBands_intra = Arrays.InitTwoDimensionalArray <int>(C, m.nbEBands); error_intra = Arrays.InitTwoDimensionalArray <int>(C, m.nbEBands); Array.Copy(oldEBands[0], 0, oldEBands_intra[0], 0, m.nbEBands); if (C == 2) { Array.Copy(oldEBands[1], 0, oldEBands_intra[1], 0, m.nbEBands); } if (two_pass != 0 || intra != 0) { badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, (int)budget, (int)tell, Tables.e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe); } if (intra == 0) { int intra_buf; EntropyCoder enc_intra_state = new EntropyCoder(); // [porting note] stack variable int tell_intra; uint nstart_bytes; uint nintra_bytes; uint save_bytes; int badness2; byte[] intra_bits = null; tell_intra = (int)enc.tell_frac(); enc_intra_state.Assign(enc); nstart_bytes = enc_start_state.range_bytes(); nintra_bytes = enc_intra_state.range_bytes(); intra_buf = enc_intra_state.buf_ptr + (int)nstart_bytes; save_bytes = nintra_bytes - nstart_bytes; if (save_bytes != 0) { intra_bits = new byte[(int)save_bytes]; /* Copy bits from intra bit-stream */ Array.Copy(enc_intra_state.buf, intra_buf, intra_bits, 0, (int)save_bytes); } enc.Assign(enc_start_state); badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, (int)budget, (int)tell, Tables.e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe); if (two_pass != 0 && (badness1 < badness2 || (badness1 == badness2 && ((int)enc.tell_frac()) + intra_bias > tell_intra))) { enc.Assign(enc_intra_state); /* Copy intra bits to bit-stream */ if (intra_bits != null) { Array.Copy(intra_bits, 0, enc_intra_state.buf, intra_buf, (int)(nintra_bytes - nstart_bytes)); } Array.Copy(oldEBands_intra[0], 0, oldEBands[0], 0, m.nbEBands); Array.Copy(error_intra[0], 0, error[0], 0, m.nbEBands); if (C == 2) { Array.Copy(oldEBands_intra[1], 0, oldEBands[1], 0, m.nbEBands); Array.Copy(error_intra[1], 0, error[1], 0, m.nbEBands); } intra = 1; } } else { Array.Copy(oldEBands_intra[0], 0, oldEBands[0], 0, m.nbEBands); Array.Copy(error_intra[0], 0, error[0], 0, m.nbEBands); if (C == 2) { Array.Copy(oldEBands_intra[1], 0, oldEBands[1], 0, m.nbEBands); Array.Copy(error_intra[1], 0, error[1], 0, m.nbEBands); } } if (intra != 0) { delayedIntra = new_distortion; } else { delayedIntra = Inlines.ADD32(Inlines.MULT16_32_Q15(Inlines.MULT16_16_Q15(pred_coef[LM], pred_coef[LM]), delayedIntra), new_distortion); } }
internal static int pulses2bits(CeltMode m, int band, int LM, int pulses) { LM++; return(pulses == 0 ? 0 : m.cache.bits[m.cache.index[LM * m.nbEBands + band] + pulses] + 1); }
internal static int compute_allocation(CeltMode m, int start, int end, int[] offsets, int[] cap, int alloc_trim, ref int intensity, ref int dual_stereo, int total, out int balance, int[] pulses, int[] ebits, int[] fine_priority, int C, int LM, EntropyCoder ec, int encode, int prev, int signalBandwidth) { int lo, hi, len, j; int codedBands; int skip_start; int skip_rsv; int intensity_rsv; int dual_stereo_rsv; total = Inlines.IMAX(total, 0); len = m.nbEBands; skip_start = start; /* Reserve a bit to signal the end of manually skipped bands. */ skip_rsv = total >= 1 << EntropyCoder.BITRES ? 1 << EntropyCoder.BITRES : 0; total -= skip_rsv; /* Reserve bits for the intensity and dual stereo parameters. */ intensity_rsv = dual_stereo_rsv = 0; if (C == 2) { intensity_rsv = LOG2_FRAC_TABLE[end - start]; if (intensity_rsv > total) { intensity_rsv = 0; } else { total -= intensity_rsv; dual_stereo_rsv = total >= 1 << EntropyCoder.BITRES ? 1 << EntropyCoder.BITRES : 0; total -= dual_stereo_rsv; } } int[] bits1 = new int[len]; int[] bits2 = new int[len]; int[] thresh = new int[len]; int[] trim_offset = new int[len]; for (j = start; j < end; j++) { /* Below this threshold, we're sure not to allocate any PVQ bits */ thresh[j] = Inlines.IMAX((C) << EntropyCoder.BITRES, (3 * (m.eBands[j + 1] - m.eBands[j]) << LM << EntropyCoder.BITRES) >> 4); /* Tilt of the allocation curve */ trim_offset[j] = C * (m.eBands[j + 1] - m.eBands[j]) * (alloc_trim - 5 - LM) * (end - j - 1) * (1 << (LM + EntropyCoder.BITRES)) >> 6; /* Giving less resolution to single-coefficient bands because they get * more benefit from having one coarse value per coefficient*/ if ((m.eBands[j + 1] - m.eBands[j]) << LM == 1) { trim_offset[j] -= C << EntropyCoder.BITRES; } } lo = 1; hi = m.nbAllocVectors - 1; do { int done = 0; int psum = 0; int mid = (lo + hi) >> 1; for (j = end; j-- > start;) { int bitsj; int N = m.eBands[j + 1] - m.eBands[j]; bitsj = C * N * m.allocVectors[mid * len + j] << LM >> 2; if (bitsj > 0) { bitsj = Inlines.IMAX(0, bitsj + trim_offset[j]); } bitsj += offsets[j]; if (bitsj >= thresh[j] || done != 0) { done = 1; /* Don't allocate more than we can actually use */ psum += Inlines.IMIN(bitsj, cap[j]); } else { if (bitsj >= C << EntropyCoder.BITRES) { psum += C << EntropyCoder.BITRES; } } } if (psum > total) { hi = mid - 1; } else { lo = mid + 1; } /*printf ("lo = %d, hi = %d\n", lo, hi);*/ } while (lo <= hi); hi = lo--; /*printf ("interp between %d and %d\n", lo, hi);*/ for (j = start; j < end; j++) { int bits1j, bits2j; int N = m.eBands[j + 1] - m.eBands[j]; bits1j = C * N * m.allocVectors[lo * len + j] << LM >> 2; bits2j = hi >= m.nbAllocVectors ? cap[j] : C * N * m.allocVectors[hi * len + j] << LM >> 2; if (bits1j > 0) { bits1j = Inlines.IMAX(0, bits1j + trim_offset[j]); } if (bits2j > 0) { bits2j = Inlines.IMAX(0, bits2j + trim_offset[j]); } if (lo > 0) { bits1j += offsets[j]; } bits2j += offsets[j]; if (offsets[j] > 0) { skip_start = j; } bits2j = Inlines.IMAX(0, bits2j - bits1j); bits1[j] = bits1j; bits2[j] = bits2j; } codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap, total, out balance, skip_rsv, ref intensity, intensity_rsv, ref dual_stereo, dual_stereo_rsv, pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth); return(codedBands); }
internal static int interp_bits2pulses(CeltMode m, int start, int end, int skip_start, int[] bits1, int[] bits2, int[] thresh, int[] cap, int total, out int _balance, int skip_rsv, ref int intensity, int intensity_rsv, ref int dual_stereo, int dual_stereo_rsv, int[] bits, int[] ebits, int[] fine_priority, int C, int LM, EntropyCoder ec, int encode, int prev, int signalBandwidth) { int psum; int lo, hi; int i, j; int logM; int stereo; int codedBands = -1; int alloc_floor; int left, percoeff; int done; int balance; alloc_floor = C << EntropyCoder.BITRES; stereo = C > 1 ? 1 : 0; logM = LM << EntropyCoder.BITRES; lo = 0; hi = 1 << ALLOC_STEPS; for (i = 0; i < ALLOC_STEPS; i++) { int mid = (lo + hi) >> 1; psum = 0; done = 0; for (j = end; j-- > start;) { int tmp = bits1[j] + (mid * (int)bits2[j] >> ALLOC_STEPS); if (tmp >= thresh[j] || done != 0) { done = 1; /* Don't allocate more than we can actually use */ psum += Inlines.IMIN(tmp, cap[j]); } else { if (tmp >= alloc_floor) { psum += alloc_floor; } } } if (psum > total) { hi = mid; } else { lo = mid; } } psum = 0; /*printf ("interp bisection gave %d\n", lo);*/ done = 0; for (j = end; j-- > start;) { int tmp = bits1[j] + (lo * bits2[j] >> ALLOC_STEPS); if (tmp < thresh[j] && done == 0) { if (tmp >= alloc_floor) { tmp = alloc_floor; } else { tmp = 0; } } else { done = 1; } /* Don't allocate more than we can actually use */ tmp = Inlines.IMIN(tmp, cap[j]); bits[j] = tmp; psum += tmp; } /* Decide which bands to skip, working backwards from the end. */ for (codedBands = end; ; codedBands--) { int band_width; int band_bits; int rem; j = codedBands - 1; /* Never skip the first band, nor a band that has been boosted by * dynalloc. * In the first case, we'd be coding a bit to signal we're going to waste * all the other bits. * In the second case, we'd be coding a bit to redistribute all the bits * we just signaled should be cocentrated in this band. */ if (j <= skip_start) { /* Give the bit we reserved to end skipping back. */ total += skip_rsv; break; } /*Figure out how many left-over bits we would be adding to this band. * This can include bits we've stolen back from higher, skipped bands.*/ left = total - psum; percoeff = Inlines.celt_udiv(left, m.eBands[codedBands] - m.eBands[start]); left -= (m.eBands[codedBands] - m.eBands[start]) * percoeff; rem = Inlines.IMAX(left - (m.eBands[j] - m.eBands[start]), 0); band_width = m.eBands[codedBands] - m.eBands[j]; band_bits = (int)(bits[j] + percoeff * band_width + rem); /*Only code a skip decision if we're above the threshold for this band. * Otherwise it is force-skipped. * This ensures that we have enough bits to code the skip flag.*/ if (band_bits >= Inlines.IMAX(thresh[j], alloc_floor + (1 << EntropyCoder.BITRES))) { if (encode != 0) { /*This if() block is the only part of the allocation function that * is not a mandatory part of the bitstream: any bands we choose to * skip here must be explicitly signaled.*/ /*Choose a threshold with some hysteresis to keep bands from * fluctuating in and out.*/ #if FUZZING if ((new Random().Next() & 0x1) == 0) #else if (codedBands <= start + 2 || (band_bits > ((j < prev ? 7 : 9) * band_width << LM << EntropyCoder.BITRES) >> 4 && j <= signalBandwidth)) #endif { ec.enc_bit_logp(1, 1); break; } ec.enc_bit_logp(0, 1); } else if (ec.dec_bit_logp(1) != 0) { break; } /*We used a bit to skip this band.*/ psum += 1 << EntropyCoder.BITRES; band_bits -= 1 << EntropyCoder.BITRES; } /*Reclaim the bits originally allocated to this band.*/ psum -= bits[j] + intensity_rsv; if (intensity_rsv > 0) { intensity_rsv = LOG2_FRAC_TABLE[j - start]; } psum += intensity_rsv; if (band_bits >= alloc_floor) { /*If we have enough for a fine energy bit per channel, use it.*/ psum += alloc_floor; bits[j] = alloc_floor; } else { /*Otherwise this band gets nothing at all.*/ bits[j] = 0; } } Inlines.OpusAssert(codedBands > start); /* Code the intensity and dual stereo parameters. */ if (intensity_rsv > 0) { if (encode != 0) { intensity = Inlines.IMIN(intensity, codedBands); ec.enc_uint((uint)(intensity - start), (uint)(codedBands + 1 - start)); } else { intensity = start + (int)ec.dec_uint((uint)(codedBands + 1 - start)); } } else { intensity = 0; } if (intensity <= start) { total += dual_stereo_rsv; dual_stereo_rsv = 0; } if (dual_stereo_rsv > 0) { if (encode != 0) { ec.enc_bit_logp(dual_stereo, 1); } else { dual_stereo = ec.dec_bit_logp(1); } } else { dual_stereo = 0; } /* Allocate the remaining bits */ left = total - psum; percoeff = Inlines.celt_udiv(left, m.eBands[codedBands] - m.eBands[start]); left -= (m.eBands[codedBands] - m.eBands[start]) * percoeff; for (j = start; j < codedBands; j++) { bits[j] += ((int)percoeff * (m.eBands[j + 1] - m.eBands[j])); } for (j = start; j < codedBands; j++) { int tmp = (int)Inlines.IMIN(left, m.eBands[j + 1] - m.eBands[j]); bits[j] += tmp; left -= tmp; } /*for (j=0;j<end;j++)printf("%d ", bits[j]);printf("\n");*/ balance = 0; for (j = start; j < codedBands; j++) { int N0, N, den; int offset; int NClogN; int excess, bit; Inlines.OpusAssert(bits[j] >= 0); N0 = m.eBands[j + 1] - m.eBands[j]; N = N0 << LM; bit = (int)bits[j] + balance; if (N > 1) { excess = Inlines.MAX32(bit - cap[j], 0); bits[j] = bit - excess; /* Compensate for the extra DoF in stereo */ den = (C * N + ((C == 2 && N > 2 && (dual_stereo == 0) && j < intensity) ? 1 : 0)); NClogN = den * (m.logN[j] + logM); /* Offset for the number of fine bits by log2(N)/2 + FINE_OFFSET * compared to their "fair share" of total/N */ offset = (NClogN >> 1) - den * CeltConstants.FINE_OFFSET; /* N=2 is the only point that doesn't match the curve */ if (N == 2) { offset += den << EntropyCoder.BITRES >> 2; } /* Changing the offset for allocating the second and third * fine energy bit */ if (bits[j] + offset < den * 2 << EntropyCoder.BITRES) { offset += NClogN >> 2; } else if (bits[j] + offset < den * 3 << EntropyCoder.BITRES) { offset += NClogN >> 3; } /* Divide with rounding */ ebits[j] = Inlines.IMAX(0, (bits[j] + offset + (den << (EntropyCoder.BITRES - 1)))); ebits[j] = Inlines.celt_udiv(ebits[j], den) >> EntropyCoder.BITRES; /* Make sure not to bust */ if (C * ebits[j] > (bits[j] >> EntropyCoder.BITRES)) { ebits[j] = bits[j] >> stereo >> EntropyCoder.BITRES; } /* More than that is useless because that's about as far as PVQ can go */ ebits[j] = Inlines.IMIN(ebits[j], CeltConstants.MAX_FINE_BITS); /* If we rounded down or capped this band, make it a candidate for the * final fine energy pass */ fine_priority[j] = (ebits[j] * (den << EntropyCoder.BITRES) >= bits[j] + offset) ? 1 : 0; /* Remove the allocated fine bits; the rest are assigned to PVQ */ bits[j] -= C * ebits[j] << EntropyCoder.BITRES; } else { /* For N=1, all bits go to fine energy except for a single sign bit */ excess = Inlines.MAX32(0, bit - (C << EntropyCoder.BITRES)); bits[j] = bit - excess; ebits[j] = 0; fine_priority[j] = 1; } /* Fine energy can't take advantage of the re-balancing in * quant_all_bands(). * Instead, do the re-balancing here.*/ if (excess > 0) { int extra_fine; int extra_bits; extra_fine = Inlines.IMIN(excess >> (stereo + EntropyCoder.BITRES), CeltConstants.MAX_FINE_BITS - ebits[j]); ebits[j] += extra_fine; extra_bits = extra_fine * C << EntropyCoder.BITRES; fine_priority[j] = (extra_bits >= excess - balance) ? 1 : 0; excess -= extra_bits; } balance = excess; Inlines.OpusAssert(bits[j] >= 0); Inlines.OpusAssert(ebits[j] >= 0); } /* Save any remaining bits over the cap for the rebalancing in * quant_all_bands(). */ _balance = balance; /* The skipped bands use all their bits for fine energy. */ for (; j < end; j++) { ebits[j] = bits[j] >> stereo >> EntropyCoder.BITRES; Inlines.OpusAssert(C * ebits[j] << EntropyCoder.BITRES == bits[j]); bits[j] = 0; fine_priority[j] = (ebits[j] < 1) ? 1 : 0; } return(codedBands); }
// fixme: test the perf of this alternate implementation //int logSum(int a, int b) //{ // return log2(pow(4, a) + pow(4, b)) / 2; //} internal static void surround_analysis <T>(CeltMode celt_mode, T[] pcm, int pcm_ptr, int[] bandLogE, int[] mem, int[] preemph_mem, int len, int overlap, int channels, int rate, opus_copy_channel_in_func <T> copy_channel_in ) { int c; int i; int LM; int[] pos = { 0, 0, 0, 0, 0, 0, 0, 0 }; int upsample; int frame_size; int channel_offset; int[][] bandE = Arrays.InitTwoDimensionalArray <int>(1, 21); int[][] maskLogE = Arrays.InitTwoDimensionalArray <int>(3, 21); int[] input; short[] x; int[][] freq; upsample = CeltCommon.resampling_factor(rate); frame_size = len * upsample; for (LM = 0; LM < celt_mode.maxLM; LM++) { if (celt_mode.shortMdctSize << LM == frame_size) { break; } } input = new int[frame_size + overlap]; x = new short[len]; freq = Arrays.InitTwoDimensionalArray <int>(1, frame_size); channel_pos(channels, pos); for (c = 0; c < 3; c++) { for (i = 0; i < 21; i++) { maskLogE[c][i] = -((short)(0.5 + (28.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(28.0f, CeltConstants.DB_SHIFT)*/; } } for (c = 0; c < channels; c++) { Array.Copy(mem, c * overlap, input, 0, overlap); copy_channel_in(x, 0, 1, pcm, pcm_ptr, channels, c, len); BoxedValueInt boxed_preemph = new BoxedValueInt(preemph_mem[c]); CeltCommon.celt_preemphasis(x, input, overlap, frame_size, 1, upsample, celt_mode.preemph, boxed_preemph, 0); preemph_mem[c] = boxed_preemph.Val; MDCT.clt_mdct_forward( celt_mode.mdct, input, 0, freq[0], 0, celt_mode.window, overlap, celt_mode.maxLM - LM, 1); if (upsample != 1) { int bound = len; for (i = 0; i < bound; i++) { freq[0][i] *= upsample; } for (; i < frame_size; i++) { freq[0][i] = 0; } } Bands.compute_band_energies(celt_mode, freq, bandE, 21, 1, LM); QuantizeBands.amp2Log2(celt_mode, 21, 21, bandE[0], bandLogE, 21 * c, 1); /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */ for (i = 1; i < 21; i++) { bandLogE[21 * c + i] = Inlines.MAX16(bandLogE[21 * c + i], bandLogE[21 * c + i - 1] - ((short)(0.5 + (1.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(1.0f, CeltConstants.DB_SHIFT)*/); } for (i = 19; i >= 0; i--) { bandLogE[21 * c + i] = Inlines.MAX16(bandLogE[21 * c + i], bandLogE[21 * c + i + 1] - ((short)(0.5 + (2.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(2.0f, CeltConstants.DB_SHIFT)*/); } if (pos[c] == 1) { for (i = 0; i < 21; i++) { maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21 * c + i]); } } else if (pos[c] == 3) { for (i = 0; i < 21; i++) { maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21 * c + i]); } } else if (pos[c] == 2) { for (i = 0; i < 21; i++) { maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21 * c + i] - ((short)(0.5 + (.5f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(.5f, CeltConstants.DB_SHIFT)*/); maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21 * c + i] - ((short)(0.5 + (.5f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(.5f, CeltConstants.DB_SHIFT)*/); } } Array.Copy(input, frame_size, mem, c * overlap, overlap); } for (i = 0; i < 21; i++) { maskLogE[1][i] = Inlines.MIN32(maskLogE[0][i], maskLogE[2][i]); } channel_offset = Inlines.HALF16(Inlines.celt_log2(((int)(0.5 + (2.0f) * (((int)1) << (14)))) /*Inlines.QCONST32(2.0f, 14)*/ / (channels - 1))); for (c = 0; c < 3; c++) { for (i = 0; i < 21; i++) { maskLogE[c][i] += channel_offset; } } for (c = 0; c < channels; c++) { int[] mask; if (pos[c] != 0) { mask = maskLogE[pos[c] - 1]; for (i = 0; i < 21; i++) { bandLogE[21 * c + i] = bandLogE[21 * c + i] - mask[i]; } } else { for (i = 0; i < 21; i++) { bandLogE[21 * c + i] = 0; } } } }
/// <summary> /// /// </summary> /// <typeparam name="T">The type of signal being handled (either short or float) - changes based on which API is used</typeparam> /// <param name="tonal"></param> /// <param name="celt_mode"></param> /// <param name="x"></param> /// <param name="len"></param> /// <param name="offset"></param> /// <param name="c1"></param> /// <param name="c2"></param> /// <param name="C"></param> /// <param name="lsb_depth"></param> /// <param name="downmix"></param> internal static void tonality_analysis <T>(TonalityAnalysisState tonal, CeltMode celt_mode, T[] x, int x_ptr, int len, int offset, int c1, int c2, int C, int lsb_depth, Downmix.downmix_func <T> downmix) { int i, b; FFTState kfft; int[] input; int[] output; int N = 480, N2 = 240; float[] A = tonal.angle; float[] dA = tonal.d_angle; float[] d2A = tonal.d2_angle; float[] tonality; float[] noisiness; float[] band_tonality = new float[OpusConstants.NB_TBANDS]; float[] logE = new float[OpusConstants.NB_TBANDS]; float[] BFCC = new float[8]; float[] features = new float[25]; float frame_tonality; float max_frame_tonality; /*float tw_sum=0;*/ float frame_noisiness; float pi4 = (float)(M_PI * M_PI * M_PI * M_PI); float slope = 0; float frame_stationarity; float relativeE; float[] frame_probs = new float[2]; float alpha, alphaE, alphaE2; float frame_loudness; float bandwidth_mask; int bandwidth = 0; float maxE = 0; float noise_floor; int remaining; AnalysisInfo info; //[porting note] pointer tonal.last_transition++; alpha = 1.0f / Inlines.IMIN(20, 1 + tonal.count); alphaE = 1.0f / Inlines.IMIN(50, 1 + tonal.count); alphaE2 = 1.0f / Inlines.IMIN(1000, 1 + tonal.count); if (tonal.count < 4) { tonal.music_prob = 0.5f; } kfft = celt_mode.mdct.kfft[0]; if (tonal.count == 0) { tonal.mem_fill = 240; } downmix(x, x_ptr, tonal.inmem, tonal.mem_fill, Inlines.IMIN(len, OpusConstants.ANALYSIS_BUF_SIZE - tonal.mem_fill), offset, c1, c2, C); if (tonal.mem_fill + len < OpusConstants.ANALYSIS_BUF_SIZE) { tonal.mem_fill += len; /* Don't have enough to update the analysis */ return; } info = tonal.info[tonal.write_pos++]; if (tonal.write_pos >= OpusConstants.DETECT_SIZE) { tonal.write_pos -= OpusConstants.DETECT_SIZE; } input = new int[960]; output = new int[960]; tonality = new float[240]; noisiness = new float[240]; for (i = 0; i < N2; i++) { float w = Tables.analysis_window[i]; input[2 * i] = (int)(w * tonal.inmem[i]); input[2 * i + 1] = (int)(w * tonal.inmem[N2 + i]); input[(2 * (N - i - 1))] = (int)(w * tonal.inmem[N - i - 1]); input[(2 * (N - i - 1)) + 1] = (int)(w * tonal.inmem[N + N2 - i - 1]); } Arrays.MemMoveInt(tonal.inmem, OpusConstants.ANALYSIS_BUF_SIZE - 240, 0, 240); remaining = len - (OpusConstants.ANALYSIS_BUF_SIZE - tonal.mem_fill); downmix(x, x_ptr, tonal.inmem, 240, remaining, offset + OpusConstants.ANALYSIS_BUF_SIZE - tonal.mem_fill, c1, c2, C); tonal.mem_fill = 240 + remaining; KissFFT.opus_fft(kfft, input, output); for (i = 1; i < N2; i++) { float X1r, X2r, X1i, X2i; float angle, d_angle, d2_angle; float angle2, d_angle2, d2_angle2; float mod1, mod2, avg_mod; X1r = (float)output[2 * i] + output[2 * (N - i)]; X1i = (float)output[(2 * i) + 1] - output[2 * (N - i) + 1]; X2r = (float)output[(2 * i) + 1] + output[2 * (N - i) + 1]; X2i = (float)output[2 * (N - i)] - output[2 * i]; angle = (float)(.5f / M_PI) * fast_atan2f(X1i, X1r); d_angle = angle - A[i]; d2_angle = d_angle - dA[i]; angle2 = (float)(.5f / M_PI) * fast_atan2f(X2i, X2r); d_angle2 = angle2 - angle; d2_angle2 = d_angle2 - d_angle; mod1 = d2_angle - (float)Math.Floor(0.5f + d2_angle); noisiness[i] = Inlines.ABS16(mod1); mod1 *= mod1; mod1 *= mod1; mod2 = d2_angle2 - (float)Math.Floor(0.5f + d2_angle2); noisiness[i] += Inlines.ABS16(mod2); mod2 *= mod2; mod2 *= mod2; avg_mod = .25f * (d2A[i] + 2.0f * mod1 + mod2); tonality[i] = 1.0f / (1.0f + 40.0f * 16.0f * pi4 * avg_mod) - .015f; A[i] = angle2; dA[i] = d_angle2; d2A[i] = mod2; } frame_tonality = 0; max_frame_tonality = 0; /*tw_sum = 0;*/ info.activity = 0; frame_noisiness = 0; frame_stationarity = 0; if (tonal.count == 0) { for (b = 0; b < OpusConstants.NB_TBANDS; b++) { tonal.lowE[b] = 1e10f; tonal.highE[b] = -1e10f; } } relativeE = 0; frame_loudness = 0; for (b = 0; b < OpusConstants.NB_TBANDS; b++) { float E = 0, tE = 0, nE = 0; float L1, L2; float stationarity; for (i = Tables.tbands[b]; i < Tables.tbands[b + 1]; i++) { float binE = output[2 * i] * (float)output[2 * i] + output[2 * (N - i)] * (float)output[2 * (N - i)] + output[2 * i + 1] * (float)output[2 * i + 1] + output[2 * (N - i) + 1] * (float)output[2 * (N - i) + 1]; /* FIXME: It's probably best to change the BFCC filter initial state instead */ binE *= 5.55e-17f; E += binE; tE += binE * tonality[i]; nE += binE * 2.0f * (.5f - noisiness[i]); } tonal.E[tonal.E_count][b] = E; frame_noisiness += nE / (1e-15f + E); frame_loudness += (float)Math.Sqrt(E + 1e-10f); logE[b] = (float)Math.Log(E + 1e-10f); tonal.lowE[b] = Inlines.MIN32(logE[b], tonal.lowE[b] + 0.01f); tonal.highE[b] = Inlines.MAX32(logE[b], tonal.highE[b] - 0.1f); if (tonal.highE[b] < tonal.lowE[b] + 1.0f) { tonal.highE[b] += 0.5f; tonal.lowE[b] -= 0.5f; } relativeE += (logE[b] - tonal.lowE[b]) / (1e-15f + tonal.highE[b] - tonal.lowE[b]); L1 = L2 = 0; for (i = 0; i < OpusConstants.NB_FRAMES; i++) { L1 += (float)Math.Sqrt(tonal.E[i][b]); L2 += tonal.E[i][b]; } stationarity = Inlines.MIN16(0.99f, L1 / (float)Math.Sqrt(1e-15 + OpusConstants.NB_FRAMES * L2)); stationarity *= stationarity; stationarity *= stationarity; frame_stationarity += stationarity; /*band_tonality[b] = tE/(1e-15+E)*/ band_tonality[b] = Inlines.MAX16(tE / (1e-15f + E), stationarity * tonal.prev_band_tonality[b]); frame_tonality += band_tonality[b]; if (b >= OpusConstants.NB_TBANDS - OpusConstants.NB_TONAL_SKIP_BANDS) { frame_tonality -= band_tonality[b - OpusConstants.NB_TBANDS + OpusConstants.NB_TONAL_SKIP_BANDS]; } max_frame_tonality = Inlines.MAX16(max_frame_tonality, (1.0f + .03f * (b - OpusConstants.NB_TBANDS)) * frame_tonality); slope += band_tonality[b] * (b - 8); tonal.prev_band_tonality[b] = band_tonality[b]; } bandwidth_mask = 0; bandwidth = 0; maxE = 0; noise_floor = 5.7e-4f / (1 << (Inlines.IMAX(0, lsb_depth - 8))); noise_floor *= 1 << (15 + CeltConstants.SIG_SHIFT); noise_floor *= noise_floor; for (b = 0; b < OpusConstants.NB_TOT_BANDS; b++) { float E = 0; int band_start, band_end; /* Keep a margin of 300 Hz for aliasing */ band_start = Tables.extra_bands[b]; band_end = Tables.extra_bands[b + 1]; for (i = band_start; i < band_end; i++) { float binE = output[2 * i] * (float)output[2 * i] + output[2 * (N - i)] * (float)output[2 * (N - i)] + output[2 * i + 1] * (float)output[2 * i + 1] + output[2 * (N - i) + 1] * (float)output[2 * (N - i) + 1]; E += binE; } maxE = Inlines.MAX32(maxE, E); tonal.meanE[b] = Inlines.MAX32((1 - alphaE2) * tonal.meanE[b], E); E = Inlines.MAX32(E, tonal.meanE[b]); /* Use a simple follower with 13 dB/Bark slope for spreading function */ bandwidth_mask = Inlines.MAX32(.05f * bandwidth_mask, E); /* Consider the band "active" only if all these conditions are met: * 1) less than 10 dB below the simple follower * 2) less than 90 dB below the peak band (maximal masking possible considering * both the ATH and the loudness-dependent slope of the spreading function) * 3) above the PCM quantization noise floor */ if (E > .1 * bandwidth_mask && E * 1e9f > maxE && E > noise_floor * (band_end - band_start)) { bandwidth = b; } } if (tonal.count <= 2) { bandwidth = 20; } frame_loudness = 20 * (float)Math.Log10(frame_loudness); tonal.Etracker = Inlines.MAX32(tonal.Etracker - .03f, frame_loudness); tonal.lowECount *= (1 - alphaE); if (frame_loudness < tonal.Etracker - 30) { tonal.lowECount += alphaE; } for (i = 0; i < 8; i++) { float sum = 0; for (b = 0; b < 16; b++) { sum += Tables.dct_table[i * 16 + b] * logE[b]; } BFCC[i] = sum; } frame_stationarity /= OpusConstants.NB_TBANDS; relativeE /= OpusConstants.NB_TBANDS; if (tonal.count < 10) { relativeE = 0.5f; } frame_noisiness /= OpusConstants.NB_TBANDS; info.activity = frame_noisiness + (1 - frame_noisiness) * relativeE; frame_tonality = (max_frame_tonality / (OpusConstants.NB_TBANDS - OpusConstants.NB_TONAL_SKIP_BANDS)); frame_tonality = Inlines.MAX16(frame_tonality, tonal.prev_tonality * .8f); tonal.prev_tonality = frame_tonality; slope /= 8 * 8; info.tonality_slope = slope; tonal.E_count = (tonal.E_count + 1) % OpusConstants.NB_FRAMES; tonal.count++; info.tonality = frame_tonality; for (i = 0; i < 4; i++) { features[i] = -0.12299f * (BFCC[i] + tonal.mem[i + 24]) + 0.49195f * (tonal.mem[i] + tonal.mem[i + 16]) + 0.69693f * tonal.mem[i + 8] - 1.4349f * tonal.cmean[i]; } for (i = 0; i < 4; i++) { tonal.cmean[i] = (1 - alpha) * tonal.cmean[i] + alpha * BFCC[i]; } for (i = 0; i < 4; i++) { features[4 + i] = 0.63246f * (BFCC[i] - tonal.mem[i + 24]) + 0.31623f * (tonal.mem[i] - tonal.mem[i + 16]); } for (i = 0; i < 3; i++) { features[8 + i] = 0.53452f * (BFCC[i] + tonal.mem[i + 24]) - 0.26726f * (tonal.mem[i] + tonal.mem[i + 16]) - 0.53452f * tonal.mem[i + 8]; } if (tonal.count > 5) { for (i = 0; i < 9; i++) { tonal.std[i] = (1 - alpha) * tonal.std[i] + alpha * features[i] * features[i]; } } for (i = 0; i < 8; i++) { tonal.mem[i + 24] = tonal.mem[i + 16]; tonal.mem[i + 16] = tonal.mem[i + 8]; tonal.mem[i + 8] = tonal.mem[i]; tonal.mem[i] = BFCC[i]; } for (i = 0; i < 9; i++) { features[11 + i] = (float)Math.Sqrt(tonal.std[i]); } features[20] = info.tonality; features[21] = info.activity; features[22] = frame_stationarity; features[23] = info.tonality_slope; features[24] = tonal.lowECount; mlp.mlp_process(Tables.net, features, frame_probs); frame_probs[0] = .5f * (frame_probs[0] + 1); /* Curve fitting between the MLP probability and the actual probability */ frame_probs[0] = .01f + 1.21f * frame_probs[0] * frame_probs[0] - .23f * (float)Math.Pow(frame_probs[0], 10); /* Probability of active audio (as opposed to silence) */ frame_probs[1] = .5f * frame_probs[1] + .5f; /* Consider that silence has a 50-50 probability. */ frame_probs[0] = frame_probs[1] * frame_probs[0] + (1 - frame_probs[1]) * .5f; /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/ { /* Probability of state transition */ float tau; /* Represents independence of the MLP probabilities, where * beta=1 means fully independent. */ float beta; /* Denormalized probability of speech (p0) and music (p1) after update */ float p0, p1; /* Probabilities for "all speech" and "all music" */ float s0, m0; /* Probability sum for renormalisation */ float psum; /* Instantaneous probability of speech and music, with beta pre-applied. */ float speech0; float music0; /* One transition every 3 minutes of active audio */ tau = .00005f * frame_probs[1]; beta = .05f; //if (1) { /* Adapt beta based on how "unexpected" the new prob is */ float p, q; p = Inlines.MAX16(.05f, Inlines.MIN16(.95f, frame_probs[0])); q = Inlines.MAX16(.05f, Inlines.MIN16(.95f, tonal.music_prob)); beta = .01f + .05f * Inlines.ABS16(p - q) / (p * (1 - q) + q * (1 - p)); } /* p0 and p1 are the probabilities of speech and music at this frame * using only information from previous frame and applying the * state transition model */ p0 = (1 - tonal.music_prob) * (1 - tau) + tonal.music_prob * tau; p1 = tonal.music_prob * (1 - tau) + (1 - tonal.music_prob) * tau; /* We apply the current probability with exponent beta to work around * the fact that the probability estimates aren't independent. */ p0 *= (float)Math.Pow(1 - frame_probs[0], beta); p1 *= (float)Math.Pow(frame_probs[0], beta); /* Normalise the probabilities to get the Marokv probability of music. */ tonal.music_prob = p1 / (p0 + p1); info.music_prob = tonal.music_prob; /* This chunk of code deals with delayed decision. */ psum = 1e-20f; /* Instantaneous probability of speech and music, with beta pre-applied. */ speech0 = (float)Math.Pow(1 - frame_probs[0], beta); music0 = (float)Math.Pow(frame_probs[0], beta); if (tonal.count == 1) { tonal.pspeech[0] = 0.5f; tonal.pmusic[0] = 0.5f; } /* Updated probability of having only speech (s0) or only music (m0), * before considering the new observation. */ s0 = tonal.pspeech[0] + tonal.pspeech[1]; m0 = tonal.pmusic[0] + tonal.pmusic[1]; /* Updates s0 and m0 with instantaneous probability. */ tonal.pspeech[0] = s0 * (1 - tau) * speech0; tonal.pmusic[0] = m0 * (1 - tau) * music0; /* Propagate the transition probabilities */ for (i = 1; i < OpusConstants.DETECT_SIZE - 1; i++) { tonal.pspeech[i] = tonal.pspeech[i + 1] * speech0; tonal.pmusic[i] = tonal.pmusic[i + 1] * music0; } /* Probability that the latest frame is speech, when all the previous ones were music. */ tonal.pspeech[OpusConstants.DETECT_SIZE - 1] = m0 * tau * speech0; /* Probability that the latest frame is music, when all the previous ones were speech. */ tonal.pmusic[OpusConstants.DETECT_SIZE - 1] = s0 * tau * music0; /* Renormalise probabilities to 1 */ for (i = 0; i < OpusConstants.DETECT_SIZE; i++) { psum += tonal.pspeech[i] + tonal.pmusic[i]; } psum = 1.0f / psum; for (i = 0; i < OpusConstants.DETECT_SIZE; i++) { tonal.pspeech[i] *= psum; tonal.pmusic[i] *= psum; } psum = tonal.pmusic[0]; for (i = 1; i < OpusConstants.DETECT_SIZE; i++) { psum += tonal.pspeech[i]; } /* Estimate our confidence in the speech/music decisions */ if (frame_probs[1] > .75) { if (tonal.music_prob > .9) { float adapt; adapt = 1.0f / (++tonal.music_confidence_count); tonal.music_confidence_count = Inlines.IMIN(tonal.music_confidence_count, 500); tonal.music_confidence += adapt * Inlines.MAX16(-.2f, frame_probs[0] - tonal.music_confidence); } if (tonal.music_prob < .1) { float adapt; adapt = 1.0f / (++tonal.speech_confidence_count); tonal.speech_confidence_count = Inlines.IMIN(tonal.speech_confidence_count, 500); tonal.speech_confidence += adapt * Inlines.MIN16(.2f, frame_probs[0] - tonal.speech_confidence); } } else { if (tonal.music_confidence_count == 0) { tonal.music_confidence = .9f; } if (tonal.speech_confidence_count == 0) { tonal.speech_confidence = .1f; } } } if (tonal.last_music != ((tonal.music_prob > .5f) ? 1 : 0)) { tonal.last_transition = 0; } tonal.last_music = (tonal.music_prob > .5f) ? 1 : 0; info.bandwidth = bandwidth; info.noisiness = frame_noisiness; info.valid = 1; }