/// <summary> /// Get the speech activity level in Q8 /// </summary> /// <param name="psEncC">I/O Encoder state</param> /// <param name="pIn">I PCM input</param> /// <param name="pIn_ptr"></param> /// <returns>0 if success</returns> internal static int silk_VAD_GetSA_Q8( SilkChannelEncoder psEncC, short[] pIn, int pIn_ptr) { int SA_Q15, pSNR_dB_Q7, input_tilt; int decimated_framelength1, decimated_framelength2; int decimated_framelength; int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; int sumSquared = 0, smooth_coef_Q16; short HPstateTmp; short[] X; int[] Xnrg = new int[SilkConstants.VAD_N_BANDS]; int[] NrgToNoiseRatio_Q8 = new int[SilkConstants.VAD_N_BANDS]; int speech_nrg, x_tmp; int[] X_offset = new int[SilkConstants.VAD_N_BANDS]; int ret = 0; SilkVADState psSilk_VAD = psEncC.sVAD; /* Safety checks */ Inlines.OpusAssert(SilkConstants.VAD_N_BANDS == 4); Inlines.OpusAssert(SilkConstants.MAX_FRAME_LENGTH >= psEncC.frame_length); Inlines.OpusAssert(psEncC.frame_length <= 512); Inlines.OpusAssert(psEncC.frame_length == 8 * Inlines.silk_RSHIFT(psEncC.frame_length, 3)); /***********************/ /* Filter and Decimate */ /***********************/ decimated_framelength1 = Inlines.silk_RSHIFT(psEncC.frame_length, 1); decimated_framelength2 = Inlines.silk_RSHIFT(psEncC.frame_length, 2); decimated_framelength = Inlines.silk_RSHIFT(psEncC.frame_length, 3); /* Decimate into 4 bands: * 0 L 3L L 3L 5L * - -- - -- -- * 8 8 2 4 4 * * [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz | * * They're arranged to allow the minimal ( frame_length / 4 ) extra * scratch space during the downsampling process */ X_offset[0] = 0; X_offset[1] = decimated_framelength + decimated_framelength2; X_offset[2] = X_offset[1] + decimated_framelength; X_offset[3] = X_offset[2] + decimated_framelength2; X = new short[X_offset[3] + decimated_framelength1]; /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ Filters.silk_ana_filt_bank_1(pIn, pIn_ptr, psSilk_VAD.AnaState, X, X, X_offset[3], psEncC.frame_length); /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ Filters.silk_ana_filt_bank_1(X, 0, psSilk_VAD.AnaState1, X, X, X_offset[2], decimated_framelength1); /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ Filters.silk_ana_filt_bank_1(X, 0, psSilk_VAD.AnaState2, X, X, X_offset[1], decimated_framelength2); /*********************************************/ /* HP filter on lowest band (differentiator) */ /*********************************************/ X[decimated_framelength - 1] = (short)(Inlines.silk_RSHIFT(X[decimated_framelength - 1], 1)); HPstateTmp = X[decimated_framelength - 1]; for (i = decimated_framelength - 1; i > 0; i--) { X[i - 1] = (short)(Inlines.silk_RSHIFT(X[i - 1], 1)); X[i] -= X[i - 1]; } X[0] -= psSilk_VAD.HPstate; psSilk_VAD.HPstate = HPstateTmp; /*************************************/ /* Calculate the energy in each band */ /*************************************/ for (b = 0; b < SilkConstants.VAD_N_BANDS; b++) { /* Find the decimated framelength in the non-uniformly divided bands */ decimated_framelength = Inlines.silk_RSHIFT(psEncC.frame_length, Inlines.silk_min_int(SilkConstants.VAD_N_BANDS - b, SilkConstants.VAD_N_BANDS - 1)); /* Split length into subframe lengths */ dec_subframe_length = Inlines.silk_RSHIFT(decimated_framelength, SilkConstants.VAD_INTERNAL_SUBFRAMES_LOG2); dec_subframe_offset = 0; /* Compute energy per sub-frame */ /* initialize with summed energy of last subframe */ Xnrg[b] = psSilk_VAD.XnrgSubfr[b]; for (s = 0; s < SilkConstants.VAD_INTERNAL_SUBFRAMES; s++) { sumSquared = 0; for (i = 0; i < dec_subframe_length; i++) { /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */ /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ x_tmp = Inlines.silk_RSHIFT( X[X_offset[b] + i + dec_subframe_offset], 3); sumSquared = Inlines.silk_SMLABB(sumSquared, x_tmp, x_tmp); /* Safety check */ Inlines.OpusAssert(sumSquared >= 0); } /* Add/saturate summed energy of current subframe */ if (s < SilkConstants.VAD_INTERNAL_SUBFRAMES - 1) { Xnrg[b] = Inlines.silk_ADD_POS_SAT32(Xnrg[b], sumSquared); } else { /* Look-ahead subframe */ Xnrg[b] = Inlines.silk_ADD_POS_SAT32(Xnrg[b], Inlines.silk_RSHIFT(sumSquared, 1)); } dec_subframe_offset += dec_subframe_length; } psSilk_VAD.XnrgSubfr[b] = sumSquared; } /********************/ /* Noise estimation */ /********************/ silk_VAD_GetNoiseLevels(Xnrg, psSilk_VAD); /***********************************************/ /* Signal-plus-noise to noise ratio estimation */ /***********************************************/ sumSquared = 0; input_tilt = 0; for (b = 0; b < SilkConstants.VAD_N_BANDS; b++) { speech_nrg = Xnrg[b] - psSilk_VAD.NL[b]; if (speech_nrg > 0) { /* Divide, with sufficient resolution */ if ((Xnrg[b] & 0xFF800000) == 0) { NrgToNoiseRatio_Q8[b] = Inlines.silk_DIV32(Inlines.silk_LSHIFT(Xnrg[b], 8), psSilk_VAD.NL[b] + 1); } else { NrgToNoiseRatio_Q8[b] = Inlines.silk_DIV32(Xnrg[b], Inlines.silk_RSHIFT(psSilk_VAD.NL[b], 8) + 1); } /* Convert to log domain */ SNR_Q7 = Inlines.silk_lin2log(NrgToNoiseRatio_Q8[b]) - 8 * 128; /* Sum-of-squares */ sumSquared = Inlines.silk_SMLABB(sumSquared, SNR_Q7, SNR_Q7); /* Q14 */ /* Tilt measure */ if (speech_nrg < ((int)1 << 20)) { /* Scale down SNR value for small subband speech energies */ SNR_Q7 = Inlines.silk_SMULWB(Inlines.silk_LSHIFT(Inlines.silk_SQRT_APPROX(speech_nrg), 6), SNR_Q7); } input_tilt = Inlines.silk_SMLAWB(input_tilt, tiltWeights[b], SNR_Q7); } else { NrgToNoiseRatio_Q8[b] = 256; } } /* Mean-of-squares */ sumSquared = Inlines.silk_DIV32_16(sumSquared, SilkConstants.VAD_N_BANDS); /* Q14 */ /* Root-mean-square approximation, scale to dBs, and write to output pointer */ pSNR_dB_Q7 = (short)(3 * Inlines.silk_SQRT_APPROX(sumSquared)); /* Q7 */ /*********************************/ /* Speech Probability Estimation */ /*********************************/ SA_Q15 = Sigmoid.silk_sigm_Q15(Inlines.silk_SMULWB(SilkConstants.VAD_SNR_FACTOR_Q16, pSNR_dB_Q7) - SilkConstants.VAD_NEGATIVE_OFFSET_Q5); /**************************/ /* Frequency Tilt Measure */ /**************************/ psEncC.input_tilt_Q15 = Inlines.silk_LSHIFT(Sigmoid.silk_sigm_Q15(input_tilt) - 16384, 1); /**************************************************/ /* Scale the sigmoid output based on power levels */ /**************************************************/ speech_nrg = 0; for (b = 0; b < SilkConstants.VAD_N_BANDS; b++) { /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ speech_nrg += (b + 1) * Inlines.silk_RSHIFT(Xnrg[b] - psSilk_VAD.NL[b], 4); } /* Power scaling */ if (speech_nrg <= 0) { SA_Q15 = Inlines.silk_RSHIFT(SA_Q15, 1); } else if (speech_nrg < 32768) { if (psEncC.frame_length == 10 * psEncC.fs_kHz) { speech_nrg = Inlines.silk_LSHIFT_SAT32(speech_nrg, 16); } else { speech_nrg = Inlines.silk_LSHIFT_SAT32(speech_nrg, 15); } /* square-root */ speech_nrg = Inlines.silk_SQRT_APPROX(speech_nrg); SA_Q15 = Inlines.silk_SMULWB(32768 + speech_nrg, SA_Q15); } /* Copy the resulting speech activity in Q8 */ psEncC.speech_activity_Q8 = Inlines.silk_min_int(Inlines.silk_RSHIFT(SA_Q15, 7), byte.MaxValue); /***********************************/ /* Energy Level and SNR estimation */ /***********************************/ /* Smoothing coefficient */ smooth_coef_Q16 = Inlines.silk_SMULWB(SilkConstants.VAD_SNR_SMOOTH_COEF_Q18, Inlines.silk_SMULWB((int)SA_Q15, SA_Q15)); if (psEncC.frame_length == 10 * psEncC.fs_kHz) { smooth_coef_Q16 >>= 1; } for (b = 0; b < SilkConstants.VAD_N_BANDS; b++) { /* compute smoothed energy-to-noise ratio per band */ psSilk_VAD.NrgRatioSmth_Q8[b] = Inlines.silk_SMLAWB(psSilk_VAD.NrgRatioSmth_Q8[b], NrgToNoiseRatio_Q8[b] - psSilk_VAD.NrgRatioSmth_Q8[b], smooth_coef_Q16); /* signal to noise ratio in dB per band */ SNR_Q7 = 3 * (Inlines.silk_lin2log(psSilk_VAD.NrgRatioSmth_Q8[b]) - 8 * 128); /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */ psEncC.input_quality_bands_Q15[b] = Sigmoid.silk_sigm_Q15(Inlines.silk_RSHIFT(SNR_Q7 - 16 * 128, 4)); } return(ret); }
/* Processing of gains */ internal static void silk_process_gains( SilkChannelEncoder psEnc, /* I/O Encoder state */ SilkEncoderControl psEncCtrl, /* I/O Encoder control */ int condCoding /* I The type of conditional coding to use */ ) { SilkShapeState psShapeSt = psEnc.sShape; int k; int s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10; /* Gain reduction when LTP coding gain is high */ if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl.LTPredCodGain - 12.0f ) ); */ s_Q16 = 0 - Sigmoid.silk_sigm_Q15(Inlines.silk_RSHIFT_ROUND(psEncCtrl.LTPredCodGain_Q7 - ((int)((12.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(12.0f, 7)*/, 4)); for (k = 0; k < psEnc.nb_subfr; k++) { psEncCtrl.Gains_Q16[k] = Inlines.silk_SMLAWB(psEncCtrl.Gains_Q16[k], psEncCtrl.Gains_Q16[k], s_Q16); } } /* Limit the quantized signal */ /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */ InvMaxSqrVal_Q16 = Inlines.silk_DIV32_16(Inlines.silk_log2lin( Inlines.silk_SMULWB(((int)((21 + 16 / 0.33f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(21 + 16 / 0.33f, 7)*/ - psEnc.SNR_dB_Q7, ((int)((0.33f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.33f, 16)*/)), psEnc.subfr_length); for (k = 0; k < psEnc.nb_subfr; k++) { /* Soft limit on ratio residual energy and squared gains */ ResNrg = psEncCtrl.ResNrg[k]; ResNrgPart = Inlines.silk_SMULWW(ResNrg, InvMaxSqrVal_Q16); if (psEncCtrl.ResNrgQ[k] > 0) { ResNrgPart = Inlines.silk_RSHIFT_ROUND(ResNrgPart, psEncCtrl.ResNrgQ[k]); } else { if (ResNrgPart >= Inlines.silk_RSHIFT(int.MaxValue, -psEncCtrl.ResNrgQ[k])) { ResNrgPart = int.MaxValue; } else { ResNrgPart = Inlines.silk_LSHIFT(ResNrgPart, -psEncCtrl.ResNrgQ[k]); } } gain = psEncCtrl.Gains_Q16[k]; gain_squared = Inlines.silk_ADD_SAT32(ResNrgPart, Inlines.silk_SMMUL(gain, gain)); if (gain_squared < short.MaxValue) { /* recalculate with higher precision */ gain_squared = Inlines.silk_SMLAWW(Inlines.silk_LSHIFT(ResNrgPart, 16), gain, gain); Inlines.OpusAssert(gain_squared > 0); gain = Inlines.silk_SQRT_APPROX(gain_squared); /* Q8 */ gain = Inlines.silk_min(gain, int.MaxValue >> 8); psEncCtrl.Gains_Q16[k] = Inlines.silk_LSHIFT_SAT32(gain, 8); /* Q16 */ } else { gain = Inlines.silk_SQRT_APPROX(gain_squared); /* Q0 */ gain = Inlines.silk_min(gain, int.MaxValue >> 16); psEncCtrl.Gains_Q16[k] = Inlines.silk_LSHIFT_SAT32(gain, 16); /* Q16 */ } } /* Save unquantized gains and gain Index */ Array.Copy(psEncCtrl.Gains_Q16, psEncCtrl.GainsUnq_Q16, psEnc.nb_subfr); psEncCtrl.lastGainIndexPrev = psShapeSt.LastGainIndex; /* Quantize gains */ BoxedValueSbyte boxed_lastGainIndex = new BoxedValueSbyte(psShapeSt.LastGainIndex); GainQuantization.silk_gains_quant(psEnc.indices.GainsIndices, psEncCtrl.Gains_Q16, boxed_lastGainIndex, condCoding == SilkConstants.CODE_CONDITIONALLY ? 1 : 0, psEnc.nb_subfr); psShapeSt.LastGainIndex = boxed_lastGainIndex.Val; /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */ if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { if (psEncCtrl.LTPredCodGain_Q7 + Inlines.silk_RSHIFT(psEnc.input_tilt_Q15, 8) > ((int)((1.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 7)*/) { psEnc.indices.quantOffsetType = 0; } else { psEnc.indices.quantOffsetType = 1; } } /* Quantizer boundary adjustment */ quant_offset_Q10 = Tables.silk_Quantization_Offsets_Q10[psEnc.indices.signalType >> 1][psEnc.indices.quantOffsetType]; psEncCtrl.Lambda_Q10 = ((int)((TuningParameters.LAMBDA_OFFSET) * ((long)1 << (10)) + 0.5))/*Inlines.SILK_CONST(TuningParameters.LAMBDA_OFFSET, 10)*/ + Inlines.silk_SMULBB(((int)((TuningParameters.LAMBDA_DELAYED_DECISIONS) * ((long)1 << (10)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_DELAYED_DECISIONS, 10)*/, psEnc.nStatesDelayedDecision) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_SPEECH_ACT) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_SPEECH_ACT, 18)*/, psEnc.speech_activity_Q8) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_INPUT_QUALITY) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_INPUT_QUALITY, 12)*/, psEncCtrl.input_quality_Q14) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_CODING_QUALITY) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_CODING_QUALITY, 12)*/, psEncCtrl.coding_quality_Q14) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_QUANT_OFFSET) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_QUANT_OFFSET, 16)*/, quant_offset_Q10); Inlines.OpusAssert(psEncCtrl.Lambda_Q10 > 0); Inlines.OpusAssert(psEncCtrl.Lambda_Q10 < ((int)((2) * ((long)1 << (10)) + 0.5)) /*Inlines.SILK_CONST(2, 10)*/); }
/**************************************************************/ /* Compute noise shaping coefficients and initial gain values */ /**************************************************************/ internal static void silk_noise_shape_analysis( SilkChannelEncoder psEnc, /* I/O Encoder state FIX */ SilkEncoderControl psEncCtrl, /* I/O Encoder control FIX */ short[] pitch_res, /* I LPC residual from pitch analysis */ int pitch_res_ptr, short[] x, /* I Input signal [ frame_length + la_shape ] */ int x_ptr ) { SilkShapeState psShapeSt = psEnc.sShape; int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; int SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; int nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; int delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; int[] auto_corr = new int[SilkConstants.MAX_SHAPE_LPC_ORDER + 1]; int[] refl_coef_Q16 = new int[SilkConstants.MAX_SHAPE_LPC_ORDER]; int[] AR1_Q24 = new int[SilkConstants.MAX_SHAPE_LPC_ORDER]; int[] AR2_Q24 = new int[SilkConstants.MAX_SHAPE_LPC_ORDER]; short[] x_windowed; int pitch_res_ptr2; int x_ptr2; /* Point to start of first LPC analysis block */ x_ptr2 = x_ptr - psEnc.la_shape; /****************/ /* GAIN CONTROL */ /****************/ SNR_adj_dB_Q7 = psEnc.SNR_dB_Q7; /* Input quality is the average of the quality in the lowest two VAD bands */ psEncCtrl.input_quality_Q14 = (int)Inlines.silk_RSHIFT((int)psEnc.input_quality_bands_Q15[0] + psEnc.input_quality_bands_Q15[1], 2); /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ psEncCtrl.coding_quality_Q14 = Inlines.silk_RSHIFT(Sigmoid.silk_sigm_Q15(Inlines.silk_RSHIFT_ROUND(SNR_adj_dB_Q7 - ((int)((20.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(20.0f, 7)*/, 4)), 1); /* Reduce coding SNR during low speech activity */ if (psEnc.useCBR == 0) { b_Q8 = ((int)((1.0f) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 8)*/ - psEnc.speech_activity_Q8; b_Q8 = Inlines.silk_SMULWB(Inlines.silk_LSHIFT(b_Q8, 8), b_Q8); SNR_adj_dB_Q7 = Inlines.silk_SMLAWB(SNR_adj_dB_Q7, Inlines.silk_SMULBB(((int)((0 - TuningParameters.BG_SNR_DECR_dB) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(0 - TuningParameters.BG_SNR_DECR_dB, 7)*/ >> (4 + 1), b_Q8), /* Q11*/ Inlines.silk_SMULWB(((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/ + psEncCtrl.input_quality_Q14, psEncCtrl.coding_quality_Q14)); /* Q12*/ } if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /* Reduce gains for periodic signals */ SNR_adj_dB_Q7 = Inlines.silk_SMLAWB(SNR_adj_dB_Q7, ((int)((TuningParameters.HARM_SNR_INCR_dB) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HARM_SNR_INCR_dB, 8)*/, psEnc.LTPCorr_Q15); } else { /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ SNR_adj_dB_Q7 = Inlines.silk_SMLAWB(SNR_adj_dB_Q7, Inlines.silk_SMLAWB(((int)((6.0f) * ((long)1 << (9)) + 0.5)) /*Inlines.SILK_CONST(6.0f, 9)*/, -((int)((0.4f) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(0.4f, 18)*/, psEnc.SNR_dB_Q7), ((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/ - psEncCtrl.input_quality_Q14); } /*************************/ /* SPARSENESS PROCESSING */ /*************************/ /* Set quantizer offset */ if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /* Initially set to 0; may be overruled in process_gains(..) */ psEnc.indices.quantOffsetType = 0; psEncCtrl.sparseness_Q8 = 0; } else { /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ nSamples = Inlines.silk_LSHIFT(psEnc.fs_kHz, 1); energy_variation_Q7 = 0; log_energy_prev_Q7 = 0; pitch_res_ptr2 = pitch_res_ptr; for (k = 0; k < Inlines.silk_SMULBB(SilkConstants.SUB_FRAME_LENGTH_MS, psEnc.nb_subfr) / 2; k++) { SumSqrShift.silk_sum_sqr_shift(out nrg, out scale, pitch_res, pitch_res_ptr2, nSamples); nrg += Inlines.silk_RSHIFT(nSamples, scale); /* Q(-scale)*/ log_energy_Q7 = Inlines.silk_lin2log(nrg); if (k > 0) { energy_variation_Q7 += Inlines.silk_abs(log_energy_Q7 - log_energy_prev_Q7); } log_energy_prev_Q7 = log_energy_Q7; pitch_res_ptr2 += nSamples; } psEncCtrl.sparseness_Q8 = Inlines.silk_RSHIFT(Sigmoid.silk_sigm_Q15(Inlines.silk_SMULWB(energy_variation_Q7 - ((int)((5.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(5.0f, 7)*/, ((int)((0.1f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.1f, 16)*/)), 7); /* Set quantization offset depending on sparseness measure */ if (psEncCtrl.sparseness_Q8 > ((int)((TuningParameters.SPARSENESS_THRESHOLD_QNT_OFFSET) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SPARSENESS_THRESHOLD_QNT_OFFSET, 8)*/) { psEnc.indices.quantOffsetType = 0; } else { psEnc.indices.quantOffsetType = 1; } /* Increase coding SNR for sparse signals */ SNR_adj_dB_Q7 = Inlines.silk_SMLAWB(SNR_adj_dB_Q7, ((int)((TuningParameters.SPARSE_SNR_INCR_dB) * ((long)1 << (15)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SPARSE_SNR_INCR_dB, 15)*/, psEncCtrl.sparseness_Q8 - ((int)((0.5f) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(0.5f, 8)*/); } /*******************************/ /* Control bandwidth expansion */ /*******************************/ /* More BWE for signals with high prediction gain */ strength_Q16 = Inlines.silk_SMULWB(psEncCtrl.predGain_Q16, ((int)((TuningParameters.FIND_PITCH_WHITE_NOISE_FRACTION) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_PITCH_WHITE_NOISE_FRACTION, 16)*/); BWExp1_Q16 = BWExp2_Q16 = Inlines.silk_DIV32_varQ(((int)((TuningParameters.BANDWIDTH_EXPANSION) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.BANDWIDTH_EXPANSION, 16)*/, Inlines.silk_SMLAWW(((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/, strength_Q16, strength_Q16), 16); delta_Q16 = Inlines.silk_SMULWB(((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/ - Inlines.silk_SMULBB(3, psEncCtrl.coding_quality_Q14), ((int)((TuningParameters.LOW_RATE_BANDWIDTH_EXPANSION_DELTA) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16)*/); BWExp1_Q16 = Inlines.silk_SUB32(BWExp1_Q16, delta_Q16); BWExp2_Q16 = Inlines.silk_ADD32(BWExp2_Q16, delta_Q16); /* BWExp1 will be applied after BWExp2, so make it relative */ BWExp1_Q16 = Inlines.silk_DIV32_16(Inlines.silk_LSHIFT(BWExp1_Q16, 14), Inlines.silk_RSHIFT(BWExp2_Q16, 2)); if (psEnc.warping_Q16 > 0) { /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ warping_Q16 = Inlines.silk_SMLAWB(psEnc.warping_Q16, (int)psEncCtrl.coding_quality_Q14, ((int)((0.01f) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(0.01f, 18)*/); } else { warping_Q16 = 0; } /********************************************/ /* Compute noise shaping AR coefs and gains */ /********************************************/ x_windowed = new short[psEnc.shapeWinLength]; for (k = 0; k < psEnc.nb_subfr; k++) { /* Apply window: sine slope followed by flat part followed by cosine slope */ int shift, slope_part, flat_part; flat_part = psEnc.fs_kHz * 3; slope_part = Inlines.silk_RSHIFT(psEnc.shapeWinLength - flat_part, 1); ApplySineWindow.silk_apply_sine_window(x_windowed, 0, x, x_ptr2, 1, slope_part); shift = slope_part; Array.Copy(x, x_ptr2 + shift, x_windowed, shift, flat_part); shift += flat_part; ApplySineWindow.silk_apply_sine_window(x_windowed, shift, x, x_ptr2 + shift, 2, slope_part); /* Update pointer: next LPC analysis block */ x_ptr2 += psEnc.subfr_length; BoxedValueInt scale_boxed = new BoxedValueInt(scale); if (psEnc.warping_Q16 > 0) { /* Calculate warped auto correlation */ Autocorrelation.silk_warped_autocorrelation(auto_corr, scale_boxed, x_windowed, warping_Q16, psEnc.shapeWinLength, psEnc.shapingLPCOrder); } else { /* Calculate regular auto correlation */ Autocorrelation.silk_autocorr(auto_corr, scale_boxed, x_windowed, psEnc.shapeWinLength, psEnc.shapingLPCOrder + 1); } scale = scale_boxed.Val; /* Add white noise, as a fraction of energy */ auto_corr[0] = Inlines.silk_ADD32(auto_corr[0], Inlines.silk_max_32(Inlines.silk_SMULWB(Inlines.silk_RSHIFT(auto_corr[0], 4), ((int)((TuningParameters.SHAPE_WHITE_NOISE_FRACTION) * ((long)1 << (20)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SHAPE_WHITE_NOISE_FRACTION, 20)*/), 1)); /* Calculate the reflection coefficients using schur */ nrg = Schur.silk_schur64(refl_coef_Q16, auto_corr, psEnc.shapingLPCOrder); Inlines.OpusAssert(nrg >= 0); /* Convert reflection coefficients to prediction coefficients */ K2A.silk_k2a_Q16(AR2_Q24, refl_coef_Q16, psEnc.shapingLPCOrder); Qnrg = -scale; /* range: -12...30*/ Inlines.OpusAssert(Qnrg >= -12); Inlines.OpusAssert(Qnrg <= 30); /* Make sure that Qnrg is an even number */ if ((Qnrg & 1) != 0) { Qnrg -= 1; nrg >>= 1; } tmp32 = Inlines.silk_SQRT_APPROX(nrg); Qnrg >>= 1; /* range: -6...15*/ psEncCtrl.Gains_Q16[k] = Inlines.silk_LSHIFT_SAT32(tmp32, 16 - Qnrg); if (psEnc.warping_Q16 > 0) { /* Adjust gain for warping */ gain_mult_Q16 = warped_gain(AR2_Q24, warping_Q16, psEnc.shapingLPCOrder); Inlines.OpusAssert(psEncCtrl.Gains_Q16[k] >= 0); if (Inlines.silk_SMULWW(Inlines.silk_RSHIFT_ROUND(psEncCtrl.Gains_Q16[k], 1), gain_mult_Q16) >= (int.MaxValue >> 1)) { psEncCtrl.Gains_Q16[k] = int.MaxValue; } else { psEncCtrl.Gains_Q16[k] = Inlines.silk_SMULWW(psEncCtrl.Gains_Q16[k], gain_mult_Q16); } } /* Bandwidth expansion for synthesis filter shaping */ BWExpander.silk_bwexpander_32(AR2_Q24, psEnc.shapingLPCOrder, BWExp2_Q16); /* Compute noise shaping filter coefficients */ Array.Copy(AR2_Q24, AR1_Q24, psEnc.shapingLPCOrder); /* Bandwidth expansion for analysis filter shaping */ Inlines.OpusAssert(BWExp1_Q16 <= ((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/); BWExpander.silk_bwexpander_32(AR1_Q24, psEnc.shapingLPCOrder, BWExp1_Q16); /* Ratio of prediction gains, in energy domain */ pre_nrg_Q30 = LPCInversePredGain.silk_LPC_inverse_pred_gain_Q24(AR2_Q24, psEnc.shapingLPCOrder); nrg = LPCInversePredGain.silk_LPC_inverse_pred_gain_Q24(AR1_Q24, psEnc.shapingLPCOrder); /*psEncCtrl.GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ pre_nrg_Q30 = Inlines.silk_LSHIFT32(Inlines.silk_SMULWB(pre_nrg_Q30, ((int)((0.7f) * ((long)1 << (15)) + 0.5)) /*Inlines.SILK_CONST(0.7f, 15)*/), 1); psEncCtrl.GainsPre_Q14[k] = (int)((int)((0.3f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.3f, 14)*/ + Inlines.silk_DIV32_varQ(pre_nrg_Q30, nrg, 14); /* Convert to monic warped prediction coefficients and limit absolute values */ limit_warped_coefs(AR2_Q24, AR1_Q24, warping_Q16, ((int)((3.999f) * ((long)1 << (24)) + 0.5)) /*Inlines.SILK_CONST(3.999f, 24)*/, psEnc.shapingLPCOrder); /* Convert from Q24 to Q13 and store in int16 */ for (i = 0; i < psEnc.shapingLPCOrder; i++) { psEncCtrl.AR1_Q13[k * SilkConstants.MAX_SHAPE_LPC_ORDER + i] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT_ROUND(AR1_Q24[i], 11)); psEncCtrl.AR2_Q13[k * SilkConstants.MAX_SHAPE_LPC_ORDER + i] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT_ROUND(AR2_Q24[i], 11)); } } /*****************/ /* Gain tweaking */ /*****************/ /* Increase gains during low speech activity and put lower limit on gains */ gain_mult_Q16 = Inlines.silk_log2lin(-Inlines.silk_SMLAWB(-((int)((16.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(16.0f, 7)*/, SNR_adj_dB_Q7, ((int)((0.16f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.16f, 16)*/)); gain_add_Q16 = Inlines.silk_log2lin(Inlines.silk_SMLAWB(((int)((16.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(16.0f, 7)*/, ((int)((SilkConstants.MIN_QGAIN_DB) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.MIN_QGAIN_DB, 7)*/, ((int)((0.16f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.16f, 16)*/)); Inlines.OpusAssert(gain_mult_Q16 > 0); for (k = 0; k < psEnc.nb_subfr; k++) { psEncCtrl.Gains_Q16[k] = Inlines.silk_SMULWW(psEncCtrl.Gains_Q16[k], gain_mult_Q16); Inlines.OpusAssert(psEncCtrl.Gains_Q16[k] >= 0); psEncCtrl.Gains_Q16[k] = Inlines.silk_ADD_POS_SAT32(psEncCtrl.Gains_Q16[k], gain_add_Q16); } gain_mult_Q16 = ((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/ + Inlines.silk_RSHIFT_ROUND(Inlines.silk_MLA(((int)((TuningParameters.INPUT_TILT) * ((long)1 << (26)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.INPUT_TILT, 26)*/, psEncCtrl.coding_quality_Q14, ((int)((TuningParameters.HIGH_RATE_INPUT_TILT) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HIGH_RATE_INPUT_TILT, 12)*/), 10); for (k = 0; k < psEnc.nb_subfr; k++) { psEncCtrl.GainsPre_Q14[k] = Inlines.silk_SMULWB(gain_mult_Q16, psEncCtrl.GainsPre_Q14[k]); } /************************************************/ /* Control low-frequency shaping and noise tilt */ /************************************************/ /* Less low frequency shaping for noisy inputs */ strength_Q16 = Inlines.silk_MUL(((int)((TuningParameters.LOW_FREQ_SHAPING) * ((long)1 << (4)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LOW_FREQ_SHAPING, 4)*/, Inlines.silk_SMLAWB(((int)((1.0f) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 12)*/, ((int)((TuningParameters.LOW_QUALITY_LOW_FREQ_SHAPING_DECR) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13)*/, psEnc.input_quality_bands_Q15[0] - ((int)((1.0f) * ((long)1 << (15)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 15)*/)); strength_Q16 = Inlines.silk_RSHIFT(Inlines.silk_MUL(strength_Q16, psEnc.speech_activity_Q8), 8); if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ int fs_kHz_inv = Inlines.silk_DIV32_16(((int)((0.2f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.2f, 14)*/, psEnc.fs_kHz); for (k = 0; k < psEnc.nb_subfr; k++) { b_Q14 = fs_kHz_inv + Inlines.silk_DIV32_16(((int)((3.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(3.0f, 14)*/, psEncCtrl.pitchL[k]); /* Pack two coefficients in one int32 */ psEncCtrl.LF_shp_Q14[k] = Inlines.silk_LSHIFT(((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/ - b_Q14 - Inlines.silk_SMULWB(strength_Q16, b_Q14), 16); psEncCtrl.LF_shp_Q14[k] |= (b_Q14 - ((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/) & 0xFFFF; // opus bug: again, cast to ushort was done here where bitwise masking was intended } Inlines.OpusAssert(((int)((TuningParameters.HARM_HP_NOISE_COEF) * ((long)1 << (24)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HARM_HP_NOISE_COEF, 24)*/ < ((int)((0.5f) * ((long)1 << (24)) + 0.5)) /*Inlines.SILK_CONST(0.5f, 24)*/); /* Guarantees that second argument to SMULWB() is within range of an short*/ Tilt_Q16 = -((int)((TuningParameters.HP_NOISE_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HP_NOISE_COEF, 16)*/ - Inlines.silk_SMULWB(((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/ - ((int)((TuningParameters.HP_NOISE_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HP_NOISE_COEF, 16)*/, Inlines.silk_SMULWB(((int)((TuningParameters.HARM_HP_NOISE_COEF) * ((long)1 << (24)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HARM_HP_NOISE_COEF, 24)*/, psEnc.speech_activity_Q8)); } else { b_Q14 = Inlines.silk_DIV32_16(21299, psEnc.fs_kHz); /* 1.3_Q0 = 21299_Q14*/ /* Pack two coefficients in one int32 */ psEncCtrl.LF_shp_Q14[0] = Inlines.silk_LSHIFT(((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/ - b_Q14 - Inlines.silk_SMULWB(strength_Q16, Inlines.silk_SMULWB(((int)((0.6f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.6f, 16)*/, b_Q14)), 16); psEncCtrl.LF_shp_Q14[0] |= (b_Q14 - ((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/) & 0xFFFF; // opus bug: cast to ushort is better expressed as a bitwise operator, otherwise runtime analysis might flag it as an overflow error for (k = 1; k < psEnc.nb_subfr; k++) { psEncCtrl.LF_shp_Q14[k] = psEncCtrl.LF_shp_Q14[0]; } Tilt_Q16 = -((int)((TuningParameters.HP_NOISE_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HP_NOISE_COEF, 16)*/; } /****************************/ /* HARMONIC SHAPING CONTROL */ /****************************/ /* Control boosting of harmonic frequencies */ HarmBoost_Q16 = Inlines.silk_SMULWB(Inlines.silk_SMULWB(((int)((1.0f) * ((long)1 << (17)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 17)*/ - Inlines.silk_LSHIFT(psEncCtrl.coding_quality_Q14, 3), psEnc.LTPCorr_Q15), ((int)((TuningParameters.LOW_RATE_HARMONIC_BOOST) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LOW_RATE_HARMONIC_BOOST, 16)*/); /* More harmonic boost for noisy input signals */ HarmBoost_Q16 = Inlines.silk_SMLAWB(HarmBoost_Q16, ((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/ - Inlines.silk_LSHIFT(psEncCtrl.input_quality_Q14, 2), ((int)((TuningParameters.LOW_INPUT_QUALITY_HARMONIC_BOOST) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LOW_INPUT_QUALITY_HARMONIC_BOOST, 16)*/); if (SilkConstants.USE_HARM_SHAPING != 0 && psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /* More harmonic noise shaping for high bitrates or noisy input */ HarmShapeGain_Q16 = Inlines.silk_SMLAWB(((int)((TuningParameters.HARMONIC_SHAPING) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HARMONIC_SHAPING, 16)*/, ((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/ - Inlines.silk_SMULWB(((int)((1.0f) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 18)*/ - Inlines.silk_LSHIFT(psEncCtrl.coding_quality_Q14, 4), psEncCtrl.input_quality_Q14), ((int)((TuningParameters.HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16)*/); /* Less harmonic noise shaping for less periodic signals */ HarmShapeGain_Q16 = Inlines.silk_SMULWB(Inlines.silk_LSHIFT(HarmShapeGain_Q16, 1), Inlines.silk_SQRT_APPROX(Inlines.silk_LSHIFT(psEnc.LTPCorr_Q15, 15))); } else { HarmShapeGain_Q16 = 0; } /*************************/ /* Smooth over subframes */ /*************************/ for (k = 0; k < SilkConstants.MAX_NB_SUBFR; k++) { psShapeSt.HarmBoost_smth_Q16 = Inlines.silk_SMLAWB(psShapeSt.HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt.HarmBoost_smth_Q16, ((int)((TuningParameters.SUBFR_SMTH_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SUBFR_SMTH_COEF, 16)*/); psShapeSt.HarmShapeGain_smth_Q16 = Inlines.silk_SMLAWB(psShapeSt.HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt.HarmShapeGain_smth_Q16, ((int)((TuningParameters.SUBFR_SMTH_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SUBFR_SMTH_COEF, 16)*/); psShapeSt.Tilt_smth_Q16 = Inlines.silk_SMLAWB(psShapeSt.Tilt_smth_Q16, Tilt_Q16 - psShapeSt.Tilt_smth_Q16, ((int)((TuningParameters.SUBFR_SMTH_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SUBFR_SMTH_COEF, 16)*/); psEncCtrl.HarmBoost_Q14[k] = (int)Inlines.silk_RSHIFT_ROUND(psShapeSt.HarmBoost_smth_Q16, 2); psEncCtrl.HarmShapeGain_Q14[k] = (int)Inlines.silk_RSHIFT_ROUND(psShapeSt.HarmShapeGain_smth_Q16, 2); psEncCtrl.Tilt_Q14[k] = (int)Inlines.silk_RSHIFT_ROUND(psShapeSt.Tilt_smth_Q16, 2); } }