internal static void silk_prefilter( SilkChannelEncoder psEnc, /* I/O Encoder state */ SilkEncoderControl psEncCtrl, /* I Encoder control */ int[] xw_Q3, /* O Weighted signal */ short[] x, /* I Speech signal */ int x_ptr ) { SilkPrefilterState P = psEnc.sPrefilt; int j, k, lag; int tmp_32; int AR1_shp_Q13; int px; int pxw_Q3; int HarmShapeGain_Q12, Tilt_Q14; int HarmShapeFIRPacked_Q12, LF_shp_Q14; int[] x_filt_Q12; int[] st_res_Q2; short[] B_Q10 = new short[2]; /* Set up pointers */ px = x_ptr; pxw_Q3 = 0; lag = P.lagPrev; x_filt_Q12 = new int[psEnc.subfr_length]; st_res_Q2 = new int[psEnc.subfr_length]; for (k = 0; k < psEnc.nb_subfr; k++) { /* Update Variables that change per sub frame */ if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { lag = psEncCtrl.pitchL[k]; } /* Noise shape parameters */ HarmShapeGain_Q12 = Inlines.silk_SMULWB((int)psEncCtrl.HarmShapeGain_Q14[k], 16384 - psEncCtrl.HarmBoost_Q14[k]); Inlines.OpusAssert(HarmShapeGain_Q12 >= 0); HarmShapeFIRPacked_Q12 = Inlines.silk_RSHIFT(HarmShapeGain_Q12, 2); HarmShapeFIRPacked_Q12 |= Inlines.silk_LSHIFT((int)Inlines.silk_RSHIFT(HarmShapeGain_Q12, 1), 16); Tilt_Q14 = psEncCtrl.Tilt_Q14[k]; LF_shp_Q14 = psEncCtrl.LF_shp_Q14[k]; AR1_shp_Q13 = k * SilkConstants.MAX_SHAPE_LPC_ORDER; /* Short term FIR filtering*/ silk_warped_LPC_analysis_filter(P.sAR_shp, st_res_Q2, psEncCtrl.AR1_Q13, AR1_shp_Q13, x, px, (short)(psEnc.warping_Q16), psEnc.subfr_length, psEnc.shapingLPCOrder); /* Reduce (mainly) low frequencies during harmonic emphasis */ B_Q10[0] = (short)(Inlines.silk_RSHIFT_ROUND(psEncCtrl.GainsPre_Q14[k], 4)); tmp_32 = Inlines.silk_SMLABB(((int)((TuningParameters.INPUT_TILT) * ((long)1 << (26)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.INPUT_TILT, 26)*/, psEncCtrl.HarmBoost_Q14[k], HarmShapeGain_Q12); /* Q26 */ tmp_32 = Inlines.silk_SMLABB(tmp_32, psEncCtrl.coding_quality_Q14, ((int)((TuningParameters.HIGH_RATE_INPUT_TILT) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HIGH_RATE_INPUT_TILT, 12)*/); /* Q26 */ tmp_32 = Inlines.silk_SMULWB(tmp_32, -psEncCtrl.GainsPre_Q14[k]); /* Q24 */ tmp_32 = Inlines.silk_RSHIFT_ROUND(tmp_32, 14); /* Q10 */ B_Q10[1] = (short)(Inlines.silk_SAT16(tmp_32)); x_filt_Q12[0] = Inlines.silk_MLA(Inlines.silk_MUL(st_res_Q2[0], B_Q10[0]), P.sHarmHP_Q2, B_Q10[1]); for (j = 1; j < psEnc.subfr_length; j++) { x_filt_Q12[j] = Inlines.silk_MLA(Inlines.silk_MUL(st_res_Q2[j], B_Q10[0]), st_res_Q2[j - 1], B_Q10[1]); } P.sHarmHP_Q2 = st_res_Q2[psEnc.subfr_length - 1]; silk_prefilt(P, x_filt_Q12, xw_Q3, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc.subfr_length); px += psEnc.subfr_length; pxw_Q3 += psEnc.subfr_length; } P.lagPrev = psEncCtrl.pitchL[psEnc.nb_subfr - 1]; }
/* Processing of gains */ internal static void silk_process_gains( SilkChannelEncoder psEnc, /* I/O Encoder state */ SilkEncoderControl psEncCtrl, /* I/O Encoder control */ int condCoding /* I The type of conditional coding to use */ ) { SilkShapeState psShapeSt = psEnc.sShape; int k; int s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10; /* Gain reduction when LTP coding gain is high */ if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl.LTPredCodGain - 12.0f ) ); */ s_Q16 = 0 - Sigmoid.silk_sigm_Q15(Inlines.silk_RSHIFT_ROUND(psEncCtrl.LTPredCodGain_Q7 - ((int)((12.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(12.0f, 7)*/, 4)); for (k = 0; k < psEnc.nb_subfr; k++) { psEncCtrl.Gains_Q16[k] = Inlines.silk_SMLAWB(psEncCtrl.Gains_Q16[k], psEncCtrl.Gains_Q16[k], s_Q16); } } /* Limit the quantized signal */ /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */ InvMaxSqrVal_Q16 = Inlines.silk_DIV32_16(Inlines.silk_log2lin( Inlines.silk_SMULWB(((int)((21 + 16 / 0.33f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(21 + 16 / 0.33f, 7)*/ - psEnc.SNR_dB_Q7, ((int)((0.33f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.33f, 16)*/)), psEnc.subfr_length); for (k = 0; k < psEnc.nb_subfr; k++) { /* Soft limit on ratio residual energy and squared gains */ ResNrg = psEncCtrl.ResNrg[k]; ResNrgPart = Inlines.silk_SMULWW(ResNrg, InvMaxSqrVal_Q16); if (psEncCtrl.ResNrgQ[k] > 0) { ResNrgPart = Inlines.silk_RSHIFT_ROUND(ResNrgPart, psEncCtrl.ResNrgQ[k]); } else { if (ResNrgPart >= Inlines.silk_RSHIFT(int.MaxValue, -psEncCtrl.ResNrgQ[k])) { ResNrgPart = int.MaxValue; } else { ResNrgPart = Inlines.silk_LSHIFT(ResNrgPart, -psEncCtrl.ResNrgQ[k]); } } gain = psEncCtrl.Gains_Q16[k]; gain_squared = Inlines.silk_ADD_SAT32(ResNrgPart, Inlines.silk_SMMUL(gain, gain)); if (gain_squared < short.MaxValue) { /* recalculate with higher precision */ gain_squared = Inlines.silk_SMLAWW(Inlines.silk_LSHIFT(ResNrgPart, 16), gain, gain); Inlines.OpusAssert(gain_squared > 0); gain = Inlines.silk_SQRT_APPROX(gain_squared); /* Q8 */ gain = Inlines.silk_min(gain, int.MaxValue >> 8); psEncCtrl.Gains_Q16[k] = Inlines.silk_LSHIFT_SAT32(gain, 8); /* Q16 */ } else { gain = Inlines.silk_SQRT_APPROX(gain_squared); /* Q0 */ gain = Inlines.silk_min(gain, int.MaxValue >> 16); psEncCtrl.Gains_Q16[k] = Inlines.silk_LSHIFT_SAT32(gain, 16); /* Q16 */ } } /* Save unquantized gains and gain Index */ Array.Copy(psEncCtrl.Gains_Q16, psEncCtrl.GainsUnq_Q16, psEnc.nb_subfr); psEncCtrl.lastGainIndexPrev = psShapeSt.LastGainIndex; /* Quantize gains */ BoxedValueSbyte boxed_lastGainIndex = new BoxedValueSbyte(psShapeSt.LastGainIndex); GainQuantization.silk_gains_quant(psEnc.indices.GainsIndices, psEncCtrl.Gains_Q16, boxed_lastGainIndex, condCoding == SilkConstants.CODE_CONDITIONALLY ? 1 : 0, psEnc.nb_subfr); psShapeSt.LastGainIndex = boxed_lastGainIndex.Val; /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */ if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { if (psEncCtrl.LTPredCodGain_Q7 + Inlines.silk_RSHIFT(psEnc.input_tilt_Q15, 8) > ((int)((1.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 7)*/) { psEnc.indices.quantOffsetType = 0; } else { psEnc.indices.quantOffsetType = 1; } } /* Quantizer boundary adjustment */ quant_offset_Q10 = Tables.silk_Quantization_Offsets_Q10[psEnc.indices.signalType >> 1][psEnc.indices.quantOffsetType]; psEncCtrl.Lambda_Q10 = ((int)((TuningParameters.LAMBDA_OFFSET) * ((long)1 << (10)) + 0.5))/*Inlines.SILK_CONST(TuningParameters.LAMBDA_OFFSET, 10)*/ + Inlines.silk_SMULBB(((int)((TuningParameters.LAMBDA_DELAYED_DECISIONS) * ((long)1 << (10)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_DELAYED_DECISIONS, 10)*/, psEnc.nStatesDelayedDecision) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_SPEECH_ACT) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_SPEECH_ACT, 18)*/, psEnc.speech_activity_Q8) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_INPUT_QUALITY) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_INPUT_QUALITY, 12)*/, psEncCtrl.input_quality_Q14) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_CODING_QUALITY) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_CODING_QUALITY, 12)*/, psEncCtrl.coding_quality_Q14) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_QUANT_OFFSET) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_QUANT_OFFSET, 16)*/, quant_offset_Q10); Inlines.OpusAssert(psEncCtrl.Lambda_Q10 > 0); Inlines.OpusAssert(psEncCtrl.Lambda_Q10 < ((int)((2) * ((long)1 << (10)) + 0.5)) /*Inlines.SILK_CONST(2, 10)*/); }
/* Finds LPC vector from correlations, and converts to NLSF */ internal static void silk_find_LPC( SilkChannelEncoder psEncC, /* I/O Encoder state */ short[] NLSF_Q15, /* O NLSFs */ short[] x, /* I Input signal */ int minInvGain_Q30 /* I Inverse of max prediction gain */ ) { int k, subfr_length; int[] a_Q16 = new int[SilkConstants.MAX_LPC_ORDER]; int isInterpLower, shift; int res_nrg0, res_nrg1; int rshift0, rshift1; BoxedValueInt scratch_box1 = new BoxedValueInt(); BoxedValueInt scratch_box2 = new BoxedValueInt(); /* Used only for LSF interpolation */ int[] a_tmp_Q16 = new int[SilkConstants.MAX_LPC_ORDER]; int res_nrg_interp, res_nrg, res_tmp_nrg; int res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q; short[] a_tmp_Q12 = new short[SilkConstants.MAX_LPC_ORDER]; short[] NLSF0_Q15 = new short[SilkConstants.MAX_LPC_ORDER]; subfr_length = psEncC.subfr_length + psEncC.predictLPCOrder; /* Default: no interpolation */ psEncC.indices.NLSFInterpCoef_Q2 = 4; /* Burg AR analysis for the full frame */ BurgModified.silk_burg_modified(scratch_box1, scratch_box2, a_Q16, x, 0, minInvGain_Q30, subfr_length, psEncC.nb_subfr, psEncC.predictLPCOrder); res_nrg = scratch_box1.Val; res_nrg_Q = scratch_box2.Val; if (psEncC.useInterpolatedNLSFs != 0 && psEncC.first_frame_after_reset == 0 && psEncC.nb_subfr == SilkConstants.MAX_NB_SUBFR) { short[] LPC_res; /* Optimal solution for last 10 ms */ BurgModified.silk_burg_modified(scratch_box1, scratch_box2, a_tmp_Q16, x, (2 * subfr_length), minInvGain_Q30, subfr_length, 2, psEncC.predictLPCOrder); res_tmp_nrg = scratch_box1.Val; res_tmp_nrg_Q = scratch_box2.Val; /* subtract residual energy here, as that's easier than adding it to the */ /* residual energy of the first 10 ms in each iteration of the search below */ shift = res_tmp_nrg_Q - res_nrg_Q; if (shift >= 0) { if (shift < 32) { res_nrg = res_nrg - Inlines.silk_RSHIFT(res_tmp_nrg, shift); } } else { Inlines.OpusAssert(shift > -32); res_nrg = Inlines.silk_RSHIFT(res_nrg, -shift) - res_tmp_nrg; res_nrg_Q = res_tmp_nrg_Q; } /* Convert to NLSFs */ NLSF.silk_A2NLSF(NLSF_Q15, a_tmp_Q16, psEncC.predictLPCOrder); LPC_res = new short[2 * subfr_length]; /* Search over interpolation indices to find the one with lowest residual energy */ for (k = 3; k >= 0; k--) { /* Interpolate NLSFs for first half */ Inlines.silk_interpolate(NLSF0_Q15, psEncC.prev_NLSFq_Q15, NLSF_Q15, k, psEncC.predictLPCOrder); /* Convert to LPC for residual energy evaluation */ NLSF.silk_NLSF2A(a_tmp_Q12, NLSF0_Q15, psEncC.predictLPCOrder); /* Calculate residual energy with NLSF interpolation */ Filters.silk_LPC_analysis_filter(LPC_res, 0, x, 0, a_tmp_Q12, 0, 2 * subfr_length, psEncC.predictLPCOrder); SumSqrShift.silk_sum_sqr_shift(out res_nrg0, out rshift0, LPC_res, psEncC.predictLPCOrder, subfr_length - psEncC.predictLPCOrder); SumSqrShift.silk_sum_sqr_shift(out res_nrg1, out rshift1, LPC_res, psEncC.predictLPCOrder + subfr_length, subfr_length - psEncC.predictLPCOrder); /* Add subframe energies from first half frame */ shift = rshift0 - rshift1; if (shift >= 0) { res_nrg1 = Inlines.silk_RSHIFT(res_nrg1, shift); res_nrg_interp_Q = -rshift0; } else { res_nrg0 = Inlines.silk_RSHIFT(res_nrg0, -shift); res_nrg_interp_Q = -rshift1; } res_nrg_interp = Inlines.silk_ADD32(res_nrg0, res_nrg1); /* Compare with first half energy without NLSF interpolation, or best interpolated value so far */ shift = res_nrg_interp_Q - res_nrg_Q; if (shift >= 0) { if (Inlines.silk_RSHIFT(res_nrg_interp, shift) < res_nrg) { isInterpLower = (true ? 1 : 0); } else { isInterpLower = (false ? 1 : 0); } } else { if (-shift < 32) { if (res_nrg_interp < Inlines.silk_RSHIFT(res_nrg, -shift)) { isInterpLower = (true ? 1 : 0); } else { isInterpLower = (false ? 1 : 0); } } else { isInterpLower = (false ? 1 : 0); } } /* Determine whether current interpolated NLSFs are best so far */ if (isInterpLower == (true ? 1 : 0)) { /* Interpolation has lower residual energy */ res_nrg = res_nrg_interp; res_nrg_Q = res_nrg_interp_Q; psEncC.indices.NLSFInterpCoef_Q2 = (sbyte)k; } } } if (psEncC.indices.NLSFInterpCoef_Q2 == 4) { /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */ NLSF.silk_A2NLSF(NLSF_Q15, a_Q16, psEncC.predictLPCOrder); } Inlines.OpusAssert(psEncC.indices.NLSFInterpCoef_Q2 == 4 || (psEncC.useInterpolatedNLSFs != 0 && psEncC.first_frame_after_reset == 0 && psEncC.nb_subfr == SilkConstants.MAX_NB_SUBFR)); }
/// <summary> /// Get the speech activity level in Q8 /// </summary> /// <param name="psEncC">I/O Encoder state</param> /// <param name="pIn">I PCM input</param> /// <param name="pIn_ptr"></param> /// <returns>0 if success</returns> internal static int silk_VAD_GetSA_Q8( SilkChannelEncoder psEncC, short[] pIn, int pIn_ptr) { int SA_Q15, pSNR_dB_Q7, input_tilt; int decimated_framelength1, decimated_framelength2; int decimated_framelength; int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; int sumSquared = 0, smooth_coef_Q16; short HPstateTmp; short[] X; int[] Xnrg = new int[SilkConstants.VAD_N_BANDS]; int[] NrgToNoiseRatio_Q8 = new int[SilkConstants.VAD_N_BANDS]; int speech_nrg, x_tmp; int[] X_offset = new int[SilkConstants.VAD_N_BANDS]; int ret = 0; SilkVADState psSilk_VAD = psEncC.sVAD; /* Safety checks */ Inlines.OpusAssert(SilkConstants.VAD_N_BANDS == 4); Inlines.OpusAssert(SilkConstants.MAX_FRAME_LENGTH >= psEncC.frame_length); Inlines.OpusAssert(psEncC.frame_length <= 512); Inlines.OpusAssert(psEncC.frame_length == 8 * Inlines.silk_RSHIFT(psEncC.frame_length, 3)); /***********************/ /* Filter and Decimate */ /***********************/ decimated_framelength1 = Inlines.silk_RSHIFT(psEncC.frame_length, 1); decimated_framelength2 = Inlines.silk_RSHIFT(psEncC.frame_length, 2); decimated_framelength = Inlines.silk_RSHIFT(psEncC.frame_length, 3); /* Decimate into 4 bands: * 0 L 3L L 3L 5L * - -- - -- -- * 8 8 2 4 4 * * [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz | * * They're arranged to allow the minimal ( frame_length / 4 ) extra * scratch space during the downsampling process */ X_offset[0] = 0; X_offset[1] = decimated_framelength + decimated_framelength2; X_offset[2] = X_offset[1] + decimated_framelength; X_offset[3] = X_offset[2] + decimated_framelength2; X = new short[X_offset[3] + decimated_framelength1]; /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ Filters.silk_ana_filt_bank_1(pIn, pIn_ptr, psSilk_VAD.AnaState, X, X, X_offset[3], psEncC.frame_length); /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ Filters.silk_ana_filt_bank_1(X, 0, psSilk_VAD.AnaState1, X, X, X_offset[2], decimated_framelength1); /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ Filters.silk_ana_filt_bank_1(X, 0, psSilk_VAD.AnaState2, X, X, X_offset[1], decimated_framelength2); /*********************************************/ /* HP filter on lowest band (differentiator) */ /*********************************************/ X[decimated_framelength - 1] = (short)(Inlines.silk_RSHIFT(X[decimated_framelength - 1], 1)); HPstateTmp = X[decimated_framelength - 1]; for (i = decimated_framelength - 1; i > 0; i--) { X[i - 1] = (short)(Inlines.silk_RSHIFT(X[i - 1], 1)); X[i] -= X[i - 1]; } X[0] -= psSilk_VAD.HPstate; psSilk_VAD.HPstate = HPstateTmp; /*************************************/ /* Calculate the energy in each band */ /*************************************/ for (b = 0; b < SilkConstants.VAD_N_BANDS; b++) { /* Find the decimated framelength in the non-uniformly divided bands */ decimated_framelength = Inlines.silk_RSHIFT(psEncC.frame_length, Inlines.silk_min_int(SilkConstants.VAD_N_BANDS - b, SilkConstants.VAD_N_BANDS - 1)); /* Split length into subframe lengths */ dec_subframe_length = Inlines.silk_RSHIFT(decimated_framelength, SilkConstants.VAD_INTERNAL_SUBFRAMES_LOG2); dec_subframe_offset = 0; /* Compute energy per sub-frame */ /* initialize with summed energy of last subframe */ Xnrg[b] = psSilk_VAD.XnrgSubfr[b]; for (s = 0; s < SilkConstants.VAD_INTERNAL_SUBFRAMES; s++) { sumSquared = 0; for (i = 0; i < dec_subframe_length; i++) { /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */ /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ x_tmp = Inlines.silk_RSHIFT( X[X_offset[b] + i + dec_subframe_offset], 3); sumSquared = Inlines.silk_SMLABB(sumSquared, x_tmp, x_tmp); /* Safety check */ Inlines.OpusAssert(sumSquared >= 0); } /* Add/saturate summed energy of current subframe */ if (s < SilkConstants.VAD_INTERNAL_SUBFRAMES - 1) { Xnrg[b] = Inlines.silk_ADD_POS_SAT32(Xnrg[b], sumSquared); } else { /* Look-ahead subframe */ Xnrg[b] = Inlines.silk_ADD_POS_SAT32(Xnrg[b], Inlines.silk_RSHIFT(sumSquared, 1)); } dec_subframe_offset += dec_subframe_length; } psSilk_VAD.XnrgSubfr[b] = sumSquared; } /********************/ /* Noise estimation */ /********************/ silk_VAD_GetNoiseLevels(Xnrg, psSilk_VAD); /***********************************************/ /* Signal-plus-noise to noise ratio estimation */ /***********************************************/ sumSquared = 0; input_tilt = 0; for (b = 0; b < SilkConstants.VAD_N_BANDS; b++) { speech_nrg = Xnrg[b] - psSilk_VAD.NL[b]; if (speech_nrg > 0) { /* Divide, with sufficient resolution */ if ((Xnrg[b] & 0xFF800000) == 0) { NrgToNoiseRatio_Q8[b] = Inlines.silk_DIV32(Inlines.silk_LSHIFT(Xnrg[b], 8), psSilk_VAD.NL[b] + 1); } else { NrgToNoiseRatio_Q8[b] = Inlines.silk_DIV32(Xnrg[b], Inlines.silk_RSHIFT(psSilk_VAD.NL[b], 8) + 1); } /* Convert to log domain */ SNR_Q7 = Inlines.silk_lin2log(NrgToNoiseRatio_Q8[b]) - 8 * 128; /* Sum-of-squares */ sumSquared = Inlines.silk_SMLABB(sumSquared, SNR_Q7, SNR_Q7); /* Q14 */ /* Tilt measure */ if (speech_nrg < ((int)1 << 20)) { /* Scale down SNR value for small subband speech energies */ SNR_Q7 = Inlines.silk_SMULWB(Inlines.silk_LSHIFT(Inlines.silk_SQRT_APPROX(speech_nrg), 6), SNR_Q7); } input_tilt = Inlines.silk_SMLAWB(input_tilt, tiltWeights[b], SNR_Q7); } else { NrgToNoiseRatio_Q8[b] = 256; } } /* Mean-of-squares */ sumSquared = Inlines.silk_DIV32_16(sumSquared, SilkConstants.VAD_N_BANDS); /* Q14 */ /* Root-mean-square approximation, scale to dBs, and write to output pointer */ pSNR_dB_Q7 = (short)(3 * Inlines.silk_SQRT_APPROX(sumSquared)); /* Q7 */ /*********************************/ /* Speech Probability Estimation */ /*********************************/ SA_Q15 = Sigmoid.silk_sigm_Q15(Inlines.silk_SMULWB(SilkConstants.VAD_SNR_FACTOR_Q16, pSNR_dB_Q7) - SilkConstants.VAD_NEGATIVE_OFFSET_Q5); /**************************/ /* Frequency Tilt Measure */ /**************************/ psEncC.input_tilt_Q15 = Inlines.silk_LSHIFT(Sigmoid.silk_sigm_Q15(input_tilt) - 16384, 1); /**************************************************/ /* Scale the sigmoid output based on power levels */ /**************************************************/ speech_nrg = 0; for (b = 0; b < SilkConstants.VAD_N_BANDS; b++) { /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ speech_nrg += (b + 1) * Inlines.silk_RSHIFT(Xnrg[b] - psSilk_VAD.NL[b], 4); } /* Power scaling */ if (speech_nrg <= 0) { SA_Q15 = Inlines.silk_RSHIFT(SA_Q15, 1); } else if (speech_nrg < 32768) { if (psEncC.frame_length == 10 * psEncC.fs_kHz) { speech_nrg = Inlines.silk_LSHIFT_SAT32(speech_nrg, 16); } else { speech_nrg = Inlines.silk_LSHIFT_SAT32(speech_nrg, 15); } /* square-root */ speech_nrg = Inlines.silk_SQRT_APPROX(speech_nrg); SA_Q15 = Inlines.silk_SMULWB(32768 + speech_nrg, SA_Q15); } /* Copy the resulting speech activity in Q8 */ psEncC.speech_activity_Q8 = Inlines.silk_min_int(Inlines.silk_RSHIFT(SA_Q15, 7), byte.MaxValue); /***********************************/ /* Energy Level and SNR estimation */ /***********************************/ /* Smoothing coefficient */ smooth_coef_Q16 = Inlines.silk_SMULWB(SilkConstants.VAD_SNR_SMOOTH_COEF_Q18, Inlines.silk_SMULWB((int)SA_Q15, SA_Q15)); if (psEncC.frame_length == 10 * psEncC.fs_kHz) { smooth_coef_Q16 >>= 1; } for (b = 0; b < SilkConstants.VAD_N_BANDS; b++) { /* compute smoothed energy-to-noise ratio per band */ psSilk_VAD.NrgRatioSmth_Q8[b] = Inlines.silk_SMLAWB(psSilk_VAD.NrgRatioSmth_Q8[b], NrgToNoiseRatio_Q8[b] - psSilk_VAD.NrgRatioSmth_Q8[b], smooth_coef_Q16); /* signal to noise ratio in dB per band */ SNR_Q7 = 3 * (Inlines.silk_lin2log(psSilk_VAD.NrgRatioSmth_Q8[b]) - 8 * 128); /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */ psEncC.input_quality_bands_Q15[b] = Sigmoid.silk_sigm_Q15(Inlines.silk_RSHIFT(SNR_Q7 - 16 * 128, 4)); } return(ret); }
/// <summary> /// Encode side-information parameters to payload /// </summary> /// <param name="psEncC">I/O Encoder state</param> /// <param name="psRangeEnc">I/O Compressor data structure</param> /// <param name="FrameIndex">I Frame number</param> /// <param name="encode_LBRR">I Flag indicating LBRR data is being encoded</param> /// <param name="condCoding">I The type of conditional coding to use</param> internal static void silk_encode_indices( SilkChannelEncoder psEncC, EntropyCoder psRangeEnc, int FrameIndex, int encode_LBRR, int condCoding) { int i, k, typeOffset; int encode_absolute_lagIndex, delta_lagIndex; short[] ec_ix = new short[SilkConstants.MAX_LPC_ORDER]; byte[] pred_Q8 = new byte[SilkConstants.MAX_LPC_ORDER]; SideInfoIndices psIndices; if (encode_LBRR != 0) { psIndices = psEncC.indices_LBRR[FrameIndex]; } else { psIndices = psEncC.indices; } /*******************************************/ /* Encode signal type and quantizer offset */ /*******************************************/ typeOffset = 2 * psIndices.signalType + psIndices.quantOffsetType; Inlines.OpusAssert(typeOffset >= 0 && typeOffset < 6); Inlines.OpusAssert(encode_LBRR == 0 || typeOffset >= 2); if (encode_LBRR != 0 || typeOffset >= 2) { psRangeEnc.enc_icdf(typeOffset - 2, Tables.silk_type_offset_VAD_iCDF, 8); } else { psRangeEnc.enc_icdf(typeOffset, Tables.silk_type_offset_no_VAD_iCDF, 8); } /****************/ /* Encode gains */ /****************/ /* first subframe */ if (condCoding == SilkConstants.CODE_CONDITIONALLY) { /* conditional coding */ Inlines.OpusAssert(psIndices.GainsIndices[0] >= 0 && psIndices.GainsIndices[0] < SilkConstants.MAX_DELTA_GAIN_QUANT - SilkConstants.MIN_DELTA_GAIN_QUANT + 1); psRangeEnc.enc_icdf(psIndices.GainsIndices[0], Tables.silk_delta_gain_iCDF, 8); } else { /* independent coding, in two stages: MSB bits followed by 3 LSBs */ Inlines.OpusAssert(psIndices.GainsIndices[0] >= 0 && psIndices.GainsIndices[0] < SilkConstants.N_LEVELS_QGAIN); psRangeEnc.enc_icdf(Inlines.silk_RSHIFT(psIndices.GainsIndices[0], 3), Tables.silk_gain_iCDF[psIndices.signalType], 8); psRangeEnc.enc_icdf(psIndices.GainsIndices[0] & 7, Tables.silk_uniform8_iCDF, 8); } /* remaining subframes */ for (i = 1; i < psEncC.nb_subfr; i++) { Inlines.OpusAssert(psIndices.GainsIndices[i] >= 0 && psIndices.GainsIndices[i] < SilkConstants.MAX_DELTA_GAIN_QUANT - SilkConstants.MIN_DELTA_GAIN_QUANT + 1); psRangeEnc.enc_icdf(psIndices.GainsIndices[i], Tables.silk_delta_gain_iCDF, 8); } /****************/ /* Encode NLSFs */ /****************/ psRangeEnc.enc_icdf(psIndices.NLSFIndices[0], psEncC.psNLSF_CB.CB1_iCDF, ((psIndices.signalType >> 1) * psEncC.psNLSF_CB.nVectors), 8); NLSF.silk_NLSF_unpack(ec_ix, pred_Q8, psEncC.psNLSF_CB, psIndices.NLSFIndices[0]); Inlines.OpusAssert(psEncC.psNLSF_CB.order == psEncC.predictLPCOrder); for (i = 0; i < psEncC.psNLSF_CB.order; i++) { if (psIndices.NLSFIndices[i + 1] >= SilkConstants.NLSF_QUANT_MAX_AMPLITUDE) { psRangeEnc.enc_icdf(2 * SilkConstants.NLSF_QUANT_MAX_AMPLITUDE, psEncC.psNLSF_CB.ec_iCDF, (ec_ix[i]), 8); psRangeEnc.enc_icdf(psIndices.NLSFIndices[i + 1] - SilkConstants.NLSF_QUANT_MAX_AMPLITUDE, Tables.silk_NLSF_EXT_iCDF, 8); } else if (psIndices.NLSFIndices[i + 1] <= 0 - SilkConstants.NLSF_QUANT_MAX_AMPLITUDE) { psRangeEnc.enc_icdf(0, psEncC.psNLSF_CB.ec_iCDF, ec_ix[i], 8); psRangeEnc.enc_icdf(-psIndices.NLSFIndices[i + 1] - SilkConstants.NLSF_QUANT_MAX_AMPLITUDE, Tables.silk_NLSF_EXT_iCDF, 8); } else { psRangeEnc.enc_icdf(psIndices.NLSFIndices[i + 1] + SilkConstants.NLSF_QUANT_MAX_AMPLITUDE, psEncC.psNLSF_CB.ec_iCDF, ec_ix[i], 8); } } /* Encode NLSF interpolation factor */ if (psEncC.nb_subfr == SilkConstants.MAX_NB_SUBFR) { Inlines.OpusAssert(psIndices.NLSFInterpCoef_Q2 >= 0 && psIndices.NLSFInterpCoef_Q2 < 5); psRangeEnc.enc_icdf(psIndices.NLSFInterpCoef_Q2, Tables.silk_NLSF_interpolation_factor_iCDF, 8); } if (psIndices.signalType == SilkConstants.TYPE_VOICED) { /*********************/ /* Encode pitch lags */ /*********************/ /* lag index */ encode_absolute_lagIndex = 1; if (condCoding == SilkConstants.CODE_CONDITIONALLY && psEncC.ec_prevSignalType == SilkConstants.TYPE_VOICED) { /* Delta Encoding */ delta_lagIndex = psIndices.lagIndex - psEncC.ec_prevLagIndex; if (delta_lagIndex < -8 || delta_lagIndex > 11) { delta_lagIndex = 0; } else { delta_lagIndex = delta_lagIndex + 9; encode_absolute_lagIndex = 0; /* Only use delta */ } Inlines.OpusAssert(delta_lagIndex >= 0 && delta_lagIndex < 21); psRangeEnc.enc_icdf(delta_lagIndex, Tables.silk_pitch_delta_iCDF, 8); } if (encode_absolute_lagIndex != 0) { /* Absolute encoding */ int pitch_high_bits, pitch_low_bits; pitch_high_bits = Inlines.silk_DIV32_16(psIndices.lagIndex, Inlines.silk_RSHIFT(psEncC.fs_kHz, 1)); pitch_low_bits = psIndices.lagIndex - Inlines.silk_SMULBB(pitch_high_bits, Inlines.silk_RSHIFT(psEncC.fs_kHz, 1)); Inlines.OpusAssert(pitch_low_bits < psEncC.fs_kHz / 2); Inlines.OpusAssert(pitch_high_bits < 32); psRangeEnc.enc_icdf(pitch_high_bits, Tables.silk_pitch_lag_iCDF, 8); psRangeEnc.enc_icdf(pitch_low_bits, psEncC.pitch_lag_low_bits_iCDF, 8); } psEncC.ec_prevLagIndex = psIndices.lagIndex; /* Countour index */ Inlines.OpusAssert(psIndices.contourIndex >= 0); Inlines.OpusAssert((psIndices.contourIndex < 34 && psEncC.fs_kHz > 8 && psEncC.nb_subfr == 4) || (psIndices.contourIndex < 11 && psEncC.fs_kHz == 8 && psEncC.nb_subfr == 4) || (psIndices.contourIndex < 12 && psEncC.fs_kHz > 8 && psEncC.nb_subfr == 2) || (psIndices.contourIndex < 3 && psEncC.fs_kHz == 8 && psEncC.nb_subfr == 2)); psRangeEnc.enc_icdf(psIndices.contourIndex, psEncC.pitch_contour_iCDF, 8); /********************/ /* Encode LTP gains */ /********************/ /* PERIndex value */ Inlines.OpusAssert(psIndices.PERIndex >= 0 && psIndices.PERIndex < 3); psRangeEnc.enc_icdf(psIndices.PERIndex, Tables.silk_LTP_per_index_iCDF, 8); /* Codebook Indices */ for (k = 0; k < psEncC.nb_subfr; k++) { Inlines.OpusAssert(psIndices.LTPIndex[k] >= 0 && psIndices.LTPIndex[k] < (8 << psIndices.PERIndex)); psRangeEnc.enc_icdf(psIndices.LTPIndex[k], Tables.silk_LTP_gain_iCDF_ptrs[psIndices.PERIndex], 8); } /**********************/ /* Encode LTP scaling */ /**********************/ if (condCoding == SilkConstants.CODE_INDEPENDENTLY) { Inlines.OpusAssert(psIndices.LTP_scaleIndex >= 0 && psIndices.LTP_scaleIndex < 3); psRangeEnc.enc_icdf(psIndices.LTP_scaleIndex, Tables.silk_LTPscale_iCDF, 8); } Inlines.OpusAssert(condCoding == 0 || psIndices.LTP_scaleIndex == 0); } psEncC.ec_prevSignalType = psIndices.signalType; /***************/ /* Encode seed */ /***************/ Inlines.OpusAssert(psIndices.Seed >= 0 && psIndices.Seed < 4); psRangeEnc.enc_icdf(psIndices.Seed, Tables.silk_uniform4_iCDF, 8); }
internal static void silk_find_pred_coefs( SilkChannelEncoder psEnc, /* I/O encoder state */ SilkEncoderControl psEncCtrl, /* I/O encoder control */ short[] res_pitch, /* I Residual from pitch analysis */ short[] x, /* I Speech signal */ int x_ptr, int condCoding /* I The type of conditional coding to use */ ) { int i; int[] invGains_Q16 = new int[SilkConstants.MAX_NB_SUBFR]; int[] local_gains = new int[SilkConstants.MAX_NB_SUBFR]; int[] Wght_Q15 = new int[SilkConstants.MAX_NB_SUBFR]; short[] NLSF_Q15 = new short[SilkConstants.MAX_LPC_ORDER]; int x_ptr2; int x_pre_ptr; short[] LPC_in_pre; int tmp, min_gain_Q16, minInvGain_Q30; int[] LTP_corrs_rshift = new int[SilkConstants.MAX_NB_SUBFR]; /* weighting for weighted least squares */ min_gain_Q16 = int.MaxValue >> 6; for (i = 0; i < psEnc.nb_subfr; i++) { min_gain_Q16 = Inlines.silk_min(min_gain_Q16, psEncCtrl.Gains_Q16[i]); } for (i = 0; i < psEnc.nb_subfr; i++) { /* Divide to Q16 */ Inlines.OpusAssert(psEncCtrl.Gains_Q16[i] > 0); /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */ invGains_Q16[i] = Inlines.silk_DIV32_varQ(min_gain_Q16, psEncCtrl.Gains_Q16[i], 16 - 2); /* Ensure Wght_Q15 a minimum value 1 */ invGains_Q16[i] = Inlines.silk_max(invGains_Q16[i], 363); /* Square the inverted gains */ Inlines.OpusAssert(invGains_Q16[i] == Inlines.silk_SAT16(invGains_Q16[i])); tmp = Inlines.silk_SMULWB(invGains_Q16[i], invGains_Q16[i]); Wght_Q15[i] = Inlines.silk_RSHIFT(tmp, 1); /* Invert the inverted and normalized gains */ local_gains[i] = Inlines.silk_DIV32(((int)1 << 16), invGains_Q16[i]); } LPC_in_pre = new short[psEnc.nb_subfr * psEnc.predictLPCOrder + psEnc.frame_length]; if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { int[] WLTP; /**********/ /* VOICED */ /**********/ Inlines.OpusAssert(psEnc.ltp_mem_length - psEnc.predictLPCOrder >= psEncCtrl.pitchL[0] + SilkConstants.LTP_ORDER / 2); WLTP = new int[psEnc.nb_subfr * SilkConstants.LTP_ORDER * SilkConstants.LTP_ORDER]; /* LTP analysis */ BoxedValueInt boxed_codgain = new BoxedValueInt(psEncCtrl.LTPredCodGain_Q7); FindLTP.silk_find_LTP(psEncCtrl.LTPCoef_Q14, WLTP, boxed_codgain, res_pitch, psEncCtrl.pitchL, Wght_Q15, psEnc.subfr_length, psEnc.nb_subfr, psEnc.ltp_mem_length, LTP_corrs_rshift); psEncCtrl.LTPredCodGain_Q7 = boxed_codgain.Val; /* Quantize LTP gain parameters */ BoxedValueSbyte boxed_periodicity = new BoxedValueSbyte(psEnc.indices.PERIndex); BoxedValueInt boxed_gain = new BoxedValueInt(psEnc.sum_log_gain_Q7); QuantizeLTPGains.silk_quant_LTP_gains(psEncCtrl.LTPCoef_Q14, psEnc.indices.LTPIndex, boxed_periodicity, boxed_gain, WLTP, psEnc.mu_LTP_Q9, psEnc.LTPQuantLowComplexity, psEnc.nb_subfr ); psEnc.indices.PERIndex = boxed_periodicity.Val; psEnc.sum_log_gain_Q7 = boxed_gain.Val; /* Control LTP scaling */ LTPScaleControl.silk_LTP_scale_ctrl(psEnc, psEncCtrl, condCoding); /* Create LTP residual */ LTPAnalysisFilter.silk_LTP_analysis_filter(LPC_in_pre, x, x_ptr - psEnc.predictLPCOrder, psEncCtrl.LTPCoef_Q14, psEncCtrl.pitchL, invGains_Q16, psEnc.subfr_length, psEnc.nb_subfr, psEnc.predictLPCOrder); } else { /************/ /* UNVOICED */ /************/ /* Create signal with prepended subframes, scaled by inverse gains */ x_ptr2 = x_ptr - psEnc.predictLPCOrder; x_pre_ptr = 0; for (i = 0; i < psEnc.nb_subfr; i++) { Inlines.silk_scale_copy_vector16(LPC_in_pre, x_pre_ptr, x, x_ptr2, invGains_Q16[i], psEnc.subfr_length + psEnc.predictLPCOrder); x_pre_ptr += psEnc.subfr_length + psEnc.predictLPCOrder; x_ptr2 += psEnc.subfr_length; } Arrays.MemSetShort(psEncCtrl.LTPCoef_Q14, 0, psEnc.nb_subfr * SilkConstants.LTP_ORDER); psEncCtrl.LTPredCodGain_Q7 = 0; psEnc.sum_log_gain_Q7 = 0; } /* Limit on total predictive coding gain */ if (psEnc.first_frame_after_reset != 0) { minInvGain_Q30 = ((int)((1.0f / SilkConstants.MAX_PREDICTION_POWER_GAIN_AFTER_RESET) * ((long)1 << (30)) + 0.5)) /*Inlines.SILK_CONST(1.0f / SilkConstants.MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30)*/; } else { minInvGain_Q30 = Inlines.silk_log2lin(Inlines.silk_SMLAWB(16 << 7, (int)psEncCtrl.LTPredCodGain_Q7, ((int)((1.0f / 3f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f / 3f, 16)*/)); /* Q16 */ minInvGain_Q30 = Inlines.silk_DIV32_varQ(minInvGain_Q30, Inlines.silk_SMULWW(((int)((SilkConstants.MAX_PREDICTION_POWER_GAIN) * ((long)1 << (0)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.MAX_PREDICTION_POWER_GAIN, 0)*/, Inlines.silk_SMLAWB(((int)((0.25f) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(0.25f, 18)*/, ((int)((0.75f) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(0.75f, 18)*/, psEncCtrl.coding_quality_Q14)), 14); } /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */ FindLPC.silk_find_LPC(psEnc, NLSF_Q15, LPC_in_pre, minInvGain_Q30); /* Quantize LSFs */ NLSF.silk_process_NLSFs(psEnc, psEncCtrl.PredCoef_Q12, NLSF_Q15, psEnc.prev_NLSFq_Q15); /* Calculate residual energy using quantized LPC coefficients */ ResidualEnergy.silk_residual_energy(psEncCtrl.ResNrg, psEncCtrl.ResNrgQ, LPC_in_pre, psEncCtrl.PredCoef_Q12, local_gains, psEnc.subfr_length, psEnc.nb_subfr, psEnc.predictLPCOrder); /* Copy to prediction struct for use in next frame for interpolation */ Array.Copy(NLSF_Q15, psEnc.prev_NLSFq_Q15, SilkConstants.MAX_LPC_ORDER); }
/**************************************************************/ /* Compute noise shaping coefficients and initial gain values */ /**************************************************************/ internal static void silk_noise_shape_analysis( SilkChannelEncoder psEnc, /* I/O Encoder state FIX */ SilkEncoderControl psEncCtrl, /* I/O Encoder control FIX */ short[] pitch_res, /* I LPC residual from pitch analysis */ int pitch_res_ptr, short[] x, /* I Input signal [ frame_length + la_shape ] */ int x_ptr ) { SilkShapeState psShapeSt = psEnc.sShape; int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; int SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; int nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; int delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; int[] auto_corr = new int[SilkConstants.MAX_SHAPE_LPC_ORDER + 1]; int[] refl_coef_Q16 = new int[SilkConstants.MAX_SHAPE_LPC_ORDER]; int[] AR1_Q24 = new int[SilkConstants.MAX_SHAPE_LPC_ORDER]; int[] AR2_Q24 = new int[SilkConstants.MAX_SHAPE_LPC_ORDER]; short[] x_windowed; int pitch_res_ptr2; int x_ptr2; /* Point to start of first LPC analysis block */ x_ptr2 = x_ptr - psEnc.la_shape; /****************/ /* GAIN CONTROL */ /****************/ SNR_adj_dB_Q7 = psEnc.SNR_dB_Q7; /* Input quality is the average of the quality in the lowest two VAD bands */ psEncCtrl.input_quality_Q14 = (int)Inlines.silk_RSHIFT((int)psEnc.input_quality_bands_Q15[0] + psEnc.input_quality_bands_Q15[1], 2); /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ psEncCtrl.coding_quality_Q14 = Inlines.silk_RSHIFT(Sigmoid.silk_sigm_Q15(Inlines.silk_RSHIFT_ROUND(SNR_adj_dB_Q7 - ((int)((20.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(20.0f, 7)*/, 4)), 1); /* Reduce coding SNR during low speech activity */ if (psEnc.useCBR == 0) { b_Q8 = ((int)((1.0f) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 8)*/ - psEnc.speech_activity_Q8; b_Q8 = Inlines.silk_SMULWB(Inlines.silk_LSHIFT(b_Q8, 8), b_Q8); SNR_adj_dB_Q7 = Inlines.silk_SMLAWB(SNR_adj_dB_Q7, Inlines.silk_SMULBB(((int)((0 - TuningParameters.BG_SNR_DECR_dB) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(0 - TuningParameters.BG_SNR_DECR_dB, 7)*/ >> (4 + 1), b_Q8), /* Q11*/ Inlines.silk_SMULWB(((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/ + psEncCtrl.input_quality_Q14, psEncCtrl.coding_quality_Q14)); /* Q12*/ } if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /* Reduce gains for periodic signals */ SNR_adj_dB_Q7 = Inlines.silk_SMLAWB(SNR_adj_dB_Q7, ((int)((TuningParameters.HARM_SNR_INCR_dB) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HARM_SNR_INCR_dB, 8)*/, psEnc.LTPCorr_Q15); } else { /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ SNR_adj_dB_Q7 = Inlines.silk_SMLAWB(SNR_adj_dB_Q7, Inlines.silk_SMLAWB(((int)((6.0f) * ((long)1 << (9)) + 0.5)) /*Inlines.SILK_CONST(6.0f, 9)*/, -((int)((0.4f) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(0.4f, 18)*/, psEnc.SNR_dB_Q7), ((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/ - psEncCtrl.input_quality_Q14); } /*************************/ /* SPARSENESS PROCESSING */ /*************************/ /* Set quantizer offset */ if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /* Initially set to 0; may be overruled in process_gains(..) */ psEnc.indices.quantOffsetType = 0; psEncCtrl.sparseness_Q8 = 0; } else { /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ nSamples = Inlines.silk_LSHIFT(psEnc.fs_kHz, 1); energy_variation_Q7 = 0; log_energy_prev_Q7 = 0; pitch_res_ptr2 = pitch_res_ptr; for (k = 0; k < Inlines.silk_SMULBB(SilkConstants.SUB_FRAME_LENGTH_MS, psEnc.nb_subfr) / 2; k++) { SumSqrShift.silk_sum_sqr_shift(out nrg, out scale, pitch_res, pitch_res_ptr2, nSamples); nrg += Inlines.silk_RSHIFT(nSamples, scale); /* Q(-scale)*/ log_energy_Q7 = Inlines.silk_lin2log(nrg); if (k > 0) { energy_variation_Q7 += Inlines.silk_abs(log_energy_Q7 - log_energy_prev_Q7); } log_energy_prev_Q7 = log_energy_Q7; pitch_res_ptr2 += nSamples; } psEncCtrl.sparseness_Q8 = Inlines.silk_RSHIFT(Sigmoid.silk_sigm_Q15(Inlines.silk_SMULWB(energy_variation_Q7 - ((int)((5.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(5.0f, 7)*/, ((int)((0.1f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.1f, 16)*/)), 7); /* Set quantization offset depending on sparseness measure */ if (psEncCtrl.sparseness_Q8 > ((int)((TuningParameters.SPARSENESS_THRESHOLD_QNT_OFFSET) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SPARSENESS_THRESHOLD_QNT_OFFSET, 8)*/) { psEnc.indices.quantOffsetType = 0; } else { psEnc.indices.quantOffsetType = 1; } /* Increase coding SNR for sparse signals */ SNR_adj_dB_Q7 = Inlines.silk_SMLAWB(SNR_adj_dB_Q7, ((int)((TuningParameters.SPARSE_SNR_INCR_dB) * ((long)1 << (15)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SPARSE_SNR_INCR_dB, 15)*/, psEncCtrl.sparseness_Q8 - ((int)((0.5f) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(0.5f, 8)*/); } /*******************************/ /* Control bandwidth expansion */ /*******************************/ /* More BWE for signals with high prediction gain */ strength_Q16 = Inlines.silk_SMULWB(psEncCtrl.predGain_Q16, ((int)((TuningParameters.FIND_PITCH_WHITE_NOISE_FRACTION) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_PITCH_WHITE_NOISE_FRACTION, 16)*/); BWExp1_Q16 = BWExp2_Q16 = Inlines.silk_DIV32_varQ(((int)((TuningParameters.BANDWIDTH_EXPANSION) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.BANDWIDTH_EXPANSION, 16)*/, Inlines.silk_SMLAWW(((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/, strength_Q16, strength_Q16), 16); delta_Q16 = Inlines.silk_SMULWB(((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/ - Inlines.silk_SMULBB(3, psEncCtrl.coding_quality_Q14), ((int)((TuningParameters.LOW_RATE_BANDWIDTH_EXPANSION_DELTA) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16)*/); BWExp1_Q16 = Inlines.silk_SUB32(BWExp1_Q16, delta_Q16); BWExp2_Q16 = Inlines.silk_ADD32(BWExp2_Q16, delta_Q16); /* BWExp1 will be applied after BWExp2, so make it relative */ BWExp1_Q16 = Inlines.silk_DIV32_16(Inlines.silk_LSHIFT(BWExp1_Q16, 14), Inlines.silk_RSHIFT(BWExp2_Q16, 2)); if (psEnc.warping_Q16 > 0) { /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ warping_Q16 = Inlines.silk_SMLAWB(psEnc.warping_Q16, (int)psEncCtrl.coding_quality_Q14, ((int)((0.01f) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(0.01f, 18)*/); } else { warping_Q16 = 0; } /********************************************/ /* Compute noise shaping AR coefs and gains */ /********************************************/ x_windowed = new short[psEnc.shapeWinLength]; for (k = 0; k < psEnc.nb_subfr; k++) { /* Apply window: sine slope followed by flat part followed by cosine slope */ int shift, slope_part, flat_part; flat_part = psEnc.fs_kHz * 3; slope_part = Inlines.silk_RSHIFT(psEnc.shapeWinLength - flat_part, 1); ApplySineWindow.silk_apply_sine_window(x_windowed, 0, x, x_ptr2, 1, slope_part); shift = slope_part; Array.Copy(x, x_ptr2 + shift, x_windowed, shift, flat_part); shift += flat_part; ApplySineWindow.silk_apply_sine_window(x_windowed, shift, x, x_ptr2 + shift, 2, slope_part); /* Update pointer: next LPC analysis block */ x_ptr2 += psEnc.subfr_length; BoxedValueInt scale_boxed = new BoxedValueInt(scale); if (psEnc.warping_Q16 > 0) { /* Calculate warped auto correlation */ Autocorrelation.silk_warped_autocorrelation(auto_corr, scale_boxed, x_windowed, warping_Q16, psEnc.shapeWinLength, psEnc.shapingLPCOrder); } else { /* Calculate regular auto correlation */ Autocorrelation.silk_autocorr(auto_corr, scale_boxed, x_windowed, psEnc.shapeWinLength, psEnc.shapingLPCOrder + 1); } scale = scale_boxed.Val; /* Add white noise, as a fraction of energy */ auto_corr[0] = Inlines.silk_ADD32(auto_corr[0], Inlines.silk_max_32(Inlines.silk_SMULWB(Inlines.silk_RSHIFT(auto_corr[0], 4), ((int)((TuningParameters.SHAPE_WHITE_NOISE_FRACTION) * ((long)1 << (20)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SHAPE_WHITE_NOISE_FRACTION, 20)*/), 1)); /* Calculate the reflection coefficients using schur */ nrg = Schur.silk_schur64(refl_coef_Q16, auto_corr, psEnc.shapingLPCOrder); Inlines.OpusAssert(nrg >= 0); /* Convert reflection coefficients to prediction coefficients */ K2A.silk_k2a_Q16(AR2_Q24, refl_coef_Q16, psEnc.shapingLPCOrder); Qnrg = -scale; /* range: -12...30*/ Inlines.OpusAssert(Qnrg >= -12); Inlines.OpusAssert(Qnrg <= 30); /* Make sure that Qnrg is an even number */ if ((Qnrg & 1) != 0) { Qnrg -= 1; nrg >>= 1; } tmp32 = Inlines.silk_SQRT_APPROX(nrg); Qnrg >>= 1; /* range: -6...15*/ psEncCtrl.Gains_Q16[k] = Inlines.silk_LSHIFT_SAT32(tmp32, 16 - Qnrg); if (psEnc.warping_Q16 > 0) { /* Adjust gain for warping */ gain_mult_Q16 = warped_gain(AR2_Q24, warping_Q16, psEnc.shapingLPCOrder); Inlines.OpusAssert(psEncCtrl.Gains_Q16[k] >= 0); if (Inlines.silk_SMULWW(Inlines.silk_RSHIFT_ROUND(psEncCtrl.Gains_Q16[k], 1), gain_mult_Q16) >= (int.MaxValue >> 1)) { psEncCtrl.Gains_Q16[k] = int.MaxValue; } else { psEncCtrl.Gains_Q16[k] = Inlines.silk_SMULWW(psEncCtrl.Gains_Q16[k], gain_mult_Q16); } } /* Bandwidth expansion for synthesis filter shaping */ BWExpander.silk_bwexpander_32(AR2_Q24, psEnc.shapingLPCOrder, BWExp2_Q16); /* Compute noise shaping filter coefficients */ Array.Copy(AR2_Q24, AR1_Q24, psEnc.shapingLPCOrder); /* Bandwidth expansion for analysis filter shaping */ Inlines.OpusAssert(BWExp1_Q16 <= ((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/); BWExpander.silk_bwexpander_32(AR1_Q24, psEnc.shapingLPCOrder, BWExp1_Q16); /* Ratio of prediction gains, in energy domain */ pre_nrg_Q30 = LPCInversePredGain.silk_LPC_inverse_pred_gain_Q24(AR2_Q24, psEnc.shapingLPCOrder); nrg = LPCInversePredGain.silk_LPC_inverse_pred_gain_Q24(AR1_Q24, psEnc.shapingLPCOrder); /*psEncCtrl.GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ pre_nrg_Q30 = Inlines.silk_LSHIFT32(Inlines.silk_SMULWB(pre_nrg_Q30, ((int)((0.7f) * ((long)1 << (15)) + 0.5)) /*Inlines.SILK_CONST(0.7f, 15)*/), 1); psEncCtrl.GainsPre_Q14[k] = (int)((int)((0.3f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.3f, 14)*/ + Inlines.silk_DIV32_varQ(pre_nrg_Q30, nrg, 14); /* Convert to monic warped prediction coefficients and limit absolute values */ limit_warped_coefs(AR2_Q24, AR1_Q24, warping_Q16, ((int)((3.999f) * ((long)1 << (24)) + 0.5)) /*Inlines.SILK_CONST(3.999f, 24)*/, psEnc.shapingLPCOrder); /* Convert from Q24 to Q13 and store in int16 */ for (i = 0; i < psEnc.shapingLPCOrder; i++) { psEncCtrl.AR1_Q13[k * SilkConstants.MAX_SHAPE_LPC_ORDER + i] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT_ROUND(AR1_Q24[i], 11)); psEncCtrl.AR2_Q13[k * SilkConstants.MAX_SHAPE_LPC_ORDER + i] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT_ROUND(AR2_Q24[i], 11)); } } /*****************/ /* Gain tweaking */ /*****************/ /* Increase gains during low speech activity and put lower limit on gains */ gain_mult_Q16 = Inlines.silk_log2lin(-Inlines.silk_SMLAWB(-((int)((16.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(16.0f, 7)*/, SNR_adj_dB_Q7, ((int)((0.16f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.16f, 16)*/)); gain_add_Q16 = Inlines.silk_log2lin(Inlines.silk_SMLAWB(((int)((16.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(16.0f, 7)*/, ((int)((SilkConstants.MIN_QGAIN_DB) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.MIN_QGAIN_DB, 7)*/, ((int)((0.16f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.16f, 16)*/)); Inlines.OpusAssert(gain_mult_Q16 > 0); for (k = 0; k < psEnc.nb_subfr; k++) { psEncCtrl.Gains_Q16[k] = Inlines.silk_SMULWW(psEncCtrl.Gains_Q16[k], gain_mult_Q16); Inlines.OpusAssert(psEncCtrl.Gains_Q16[k] >= 0); psEncCtrl.Gains_Q16[k] = Inlines.silk_ADD_POS_SAT32(psEncCtrl.Gains_Q16[k], gain_add_Q16); } gain_mult_Q16 = ((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/ + Inlines.silk_RSHIFT_ROUND(Inlines.silk_MLA(((int)((TuningParameters.INPUT_TILT) * ((long)1 << (26)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.INPUT_TILT, 26)*/, psEncCtrl.coding_quality_Q14, ((int)((TuningParameters.HIGH_RATE_INPUT_TILT) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HIGH_RATE_INPUT_TILT, 12)*/), 10); for (k = 0; k < psEnc.nb_subfr; k++) { psEncCtrl.GainsPre_Q14[k] = Inlines.silk_SMULWB(gain_mult_Q16, psEncCtrl.GainsPre_Q14[k]); } /************************************************/ /* Control low-frequency shaping and noise tilt */ /************************************************/ /* Less low frequency shaping for noisy inputs */ strength_Q16 = Inlines.silk_MUL(((int)((TuningParameters.LOW_FREQ_SHAPING) * ((long)1 << (4)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LOW_FREQ_SHAPING, 4)*/, Inlines.silk_SMLAWB(((int)((1.0f) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 12)*/, ((int)((TuningParameters.LOW_QUALITY_LOW_FREQ_SHAPING_DECR) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13)*/, psEnc.input_quality_bands_Q15[0] - ((int)((1.0f) * ((long)1 << (15)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 15)*/)); strength_Q16 = Inlines.silk_RSHIFT(Inlines.silk_MUL(strength_Q16, psEnc.speech_activity_Q8), 8); if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ int fs_kHz_inv = Inlines.silk_DIV32_16(((int)((0.2f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.2f, 14)*/, psEnc.fs_kHz); for (k = 0; k < psEnc.nb_subfr; k++) { b_Q14 = fs_kHz_inv + Inlines.silk_DIV32_16(((int)((3.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(3.0f, 14)*/, psEncCtrl.pitchL[k]); /* Pack two coefficients in one int32 */ psEncCtrl.LF_shp_Q14[k] = Inlines.silk_LSHIFT(((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/ - b_Q14 - Inlines.silk_SMULWB(strength_Q16, b_Q14), 16); psEncCtrl.LF_shp_Q14[k] |= (b_Q14 - ((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/) & 0xFFFF; // opus bug: again, cast to ushort was done here where bitwise masking was intended } Inlines.OpusAssert(((int)((TuningParameters.HARM_HP_NOISE_COEF) * ((long)1 << (24)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HARM_HP_NOISE_COEF, 24)*/ < ((int)((0.5f) * ((long)1 << (24)) + 0.5)) /*Inlines.SILK_CONST(0.5f, 24)*/); /* Guarantees that second argument to SMULWB() is within range of an short*/ Tilt_Q16 = -((int)((TuningParameters.HP_NOISE_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HP_NOISE_COEF, 16)*/ - Inlines.silk_SMULWB(((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/ - ((int)((TuningParameters.HP_NOISE_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HP_NOISE_COEF, 16)*/, Inlines.silk_SMULWB(((int)((TuningParameters.HARM_HP_NOISE_COEF) * ((long)1 << (24)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HARM_HP_NOISE_COEF, 24)*/, psEnc.speech_activity_Q8)); } else { b_Q14 = Inlines.silk_DIV32_16(21299, psEnc.fs_kHz); /* 1.3_Q0 = 21299_Q14*/ /* Pack two coefficients in one int32 */ psEncCtrl.LF_shp_Q14[0] = Inlines.silk_LSHIFT(((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/ - b_Q14 - Inlines.silk_SMULWB(strength_Q16, Inlines.silk_SMULWB(((int)((0.6f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.6f, 16)*/, b_Q14)), 16); psEncCtrl.LF_shp_Q14[0] |= (b_Q14 - ((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/) & 0xFFFF; // opus bug: cast to ushort is better expressed as a bitwise operator, otherwise runtime analysis might flag it as an overflow error for (k = 1; k < psEnc.nb_subfr; k++) { psEncCtrl.LF_shp_Q14[k] = psEncCtrl.LF_shp_Q14[0]; } Tilt_Q16 = -((int)((TuningParameters.HP_NOISE_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HP_NOISE_COEF, 16)*/; } /****************************/ /* HARMONIC SHAPING CONTROL */ /****************************/ /* Control boosting of harmonic frequencies */ HarmBoost_Q16 = Inlines.silk_SMULWB(Inlines.silk_SMULWB(((int)((1.0f) * ((long)1 << (17)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 17)*/ - Inlines.silk_LSHIFT(psEncCtrl.coding_quality_Q14, 3), psEnc.LTPCorr_Q15), ((int)((TuningParameters.LOW_RATE_HARMONIC_BOOST) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LOW_RATE_HARMONIC_BOOST, 16)*/); /* More harmonic boost for noisy input signals */ HarmBoost_Q16 = Inlines.silk_SMLAWB(HarmBoost_Q16, ((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/ - Inlines.silk_LSHIFT(psEncCtrl.input_quality_Q14, 2), ((int)((TuningParameters.LOW_INPUT_QUALITY_HARMONIC_BOOST) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LOW_INPUT_QUALITY_HARMONIC_BOOST, 16)*/); if (SilkConstants.USE_HARM_SHAPING != 0 && psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /* More harmonic noise shaping for high bitrates or noisy input */ HarmShapeGain_Q16 = Inlines.silk_SMLAWB(((int)((TuningParameters.HARMONIC_SHAPING) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HARMONIC_SHAPING, 16)*/, ((int)((1.0f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 16)*/ - Inlines.silk_SMULWB(((int)((1.0f) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 18)*/ - Inlines.silk_LSHIFT(psEncCtrl.coding_quality_Q14, 4), psEncCtrl.input_quality_Q14), ((int)((TuningParameters.HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16)*/); /* Less harmonic noise shaping for less periodic signals */ HarmShapeGain_Q16 = Inlines.silk_SMULWB(Inlines.silk_LSHIFT(HarmShapeGain_Q16, 1), Inlines.silk_SQRT_APPROX(Inlines.silk_LSHIFT(psEnc.LTPCorr_Q15, 15))); } else { HarmShapeGain_Q16 = 0; } /*************************/ /* Smooth over subframes */ /*************************/ for (k = 0; k < SilkConstants.MAX_NB_SUBFR; k++) { psShapeSt.HarmBoost_smth_Q16 = Inlines.silk_SMLAWB(psShapeSt.HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt.HarmBoost_smth_Q16, ((int)((TuningParameters.SUBFR_SMTH_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SUBFR_SMTH_COEF, 16)*/); psShapeSt.HarmShapeGain_smth_Q16 = Inlines.silk_SMLAWB(psShapeSt.HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt.HarmShapeGain_smth_Q16, ((int)((TuningParameters.SUBFR_SMTH_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SUBFR_SMTH_COEF, 16)*/); psShapeSt.Tilt_smth_Q16 = Inlines.silk_SMLAWB(psShapeSt.Tilt_smth_Q16, Tilt_Q16 - psShapeSt.Tilt_smth_Q16, ((int)((TuningParameters.SUBFR_SMTH_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SUBFR_SMTH_COEF, 16)*/); psEncCtrl.HarmBoost_Q14[k] = (int)Inlines.silk_RSHIFT_ROUND(psShapeSt.HarmBoost_smth_Q16, 2); psEncCtrl.HarmShapeGain_Q14[k] = (int)Inlines.silk_RSHIFT_ROUND(psShapeSt.HarmShapeGain_smth_Q16, 2); psEncCtrl.Tilt_Q14[k] = (int)Inlines.silk_RSHIFT_ROUND(psShapeSt.Tilt_smth_Q16, 2); } }
/* Find pitch lags */ internal static void silk_find_pitch_lags( SilkChannelEncoder psEnc, /* I/O encoder state */ SilkEncoderControl psEncCtrl, /* I/O encoder control */ short[] res, /* O residual */ short[] x, /* I Speech signal */ int x_ptr ) { int buf_len, i, scale; int thrhld_Q13, res_nrg; int x_buf, x_buf_ptr; short[] Wsig; int Wsig_ptr; int[] auto_corr = new int[SilkConstants.MAX_FIND_PITCH_LPC_ORDER + 1]; short[] rc_Q15 = new short[SilkConstants.MAX_FIND_PITCH_LPC_ORDER]; int[] A_Q24 = new int[SilkConstants.MAX_FIND_PITCH_LPC_ORDER]; short[] A_Q12 = new short[SilkConstants.MAX_FIND_PITCH_LPC_ORDER]; /******************************************/ /* Set up buffer lengths etc based on Fs */ /******************************************/ buf_len = psEnc.la_pitch + psEnc.frame_length + psEnc.ltp_mem_length; /* Safety check */ Inlines.OpusAssert(buf_len >= psEnc.pitch_LPC_win_length); x_buf = x_ptr - psEnc.ltp_mem_length; /*************************************/ /* Estimate LPC AR coefficients */ /*************************************/ /* Calculate windowed signal */ Wsig = new short[psEnc.pitch_LPC_win_length]; /* First LA_LTP samples */ x_buf_ptr = x_buf + buf_len - psEnc.pitch_LPC_win_length; Wsig_ptr = 0; ApplySineWindow.silk_apply_sine_window(Wsig, Wsig_ptr, x, x_buf_ptr, 1, psEnc.la_pitch); /* Middle un - windowed samples */ Wsig_ptr += psEnc.la_pitch; x_buf_ptr += psEnc.la_pitch; Array.Copy(x, x_buf_ptr, Wsig, Wsig_ptr, (psEnc.pitch_LPC_win_length - Inlines.silk_LSHIFT(psEnc.la_pitch, 1))); /* Last LA_LTP samples */ Wsig_ptr += psEnc.pitch_LPC_win_length - Inlines.silk_LSHIFT(psEnc.la_pitch, 1); x_buf_ptr += psEnc.pitch_LPC_win_length - Inlines.silk_LSHIFT(psEnc.la_pitch, 1); ApplySineWindow.silk_apply_sine_window(Wsig, Wsig_ptr, x, x_buf_ptr, 2, psEnc.la_pitch); /* Calculate autocorrelation sequence */ BoxedValueInt boxed_scale = new BoxedValueInt(); Autocorrelation.silk_autocorr(auto_corr, boxed_scale, Wsig, psEnc.pitch_LPC_win_length, psEnc.pitchEstimationLPCOrder + 1); scale = boxed_scale.Val; /* Add white noise, as fraction of energy */ auto_corr[0] = Inlines.silk_SMLAWB(auto_corr[0], auto_corr[0], ((int)((TuningParameters.FIND_PITCH_WHITE_NOISE_FRACTION) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_PITCH_WHITE_NOISE_FRACTION, 16)*/) + 1; /* Calculate the reflection coefficients using schur */ res_nrg = Schur.silk_schur(rc_Q15, auto_corr, psEnc.pitchEstimationLPCOrder); /* Prediction gain */ psEncCtrl.predGain_Q16 = Inlines.silk_DIV32_varQ(auto_corr[0], Inlines.silk_max_int(res_nrg, 1), 16); /* Convert reflection coefficients to prediction coefficients */ K2A.silk_k2a(A_Q24, rc_Q15, psEnc.pitchEstimationLPCOrder); /* Convert From 32 bit Q24 to 16 bit Q12 coefs */ for (i = 0; i < psEnc.pitchEstimationLPCOrder; i++) { A_Q12[i] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT(A_Q24[i], 12)); } /* Do BWE */ BWExpander.silk_bwexpander(A_Q12, psEnc.pitchEstimationLPCOrder, ((int)((TuningParameters.FIND_PITCH_BANDWIDTH_EXPANSION) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_PITCH_BANDWIDTH_EXPANSION, 16)*/); /*****************************************/ /* LPC analysis filtering */ /*****************************************/ Filters.silk_LPC_analysis_filter(res, 0, x, x_buf, A_Q12, 0, buf_len, psEnc.pitchEstimationLPCOrder); if (psEnc.indices.signalType != SilkConstants.TYPE_NO_VOICE_ACTIVITY && psEnc.first_frame_after_reset == 0) { /* Threshold for pitch estimator */ thrhld_Q13 = ((int)((0.6f) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(0.6f, 13)*/; thrhld_Q13 = Inlines.silk_SMLABB(thrhld_Q13, ((int)((-0.004f) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(-0.004f, 13)*/, psEnc.pitchEstimationLPCOrder); thrhld_Q13 = Inlines.silk_SMLAWB(thrhld_Q13, ((int)((-0.1f) * ((long)1 << (21)) + 0.5)) /*Inlines.SILK_CONST(-0.1f, 21)*/, psEnc.speech_activity_Q8); thrhld_Q13 = Inlines.silk_SMLABB(thrhld_Q13, ((int)((-0.15f) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(-0.15f, 13)*/, Inlines.silk_RSHIFT(psEnc.prevSignalType, 1)); thrhld_Q13 = Inlines.silk_SMLAWB(thrhld_Q13, ((int)((-0.1f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(-0.1f, 14)*/, psEnc.input_tilt_Q15); thrhld_Q13 = Inlines.silk_SAT16(thrhld_Q13); /*****************************************/ /* Call pitch estimator */ /*****************************************/ BoxedValueShort boxed_lagIndex = new BoxedValueShort(psEnc.indices.lagIndex); BoxedValueSbyte boxed_contourIndex = new BoxedValueSbyte(psEnc.indices.contourIndex); BoxedValueInt boxed_LTPcorr = new BoxedValueInt(psEnc.LTPCorr_Q15); if (PitchAnalysisCore.silk_pitch_analysis_core(res, psEncCtrl.pitchL, boxed_lagIndex, boxed_contourIndex, boxed_LTPcorr, psEnc.prevLag, psEnc.pitchEstimationThreshold_Q16, (int)thrhld_Q13, psEnc.fs_kHz, psEnc.pitchEstimationComplexity, psEnc.nb_subfr) == 0) { psEnc.indices.signalType = SilkConstants.TYPE_VOICED; } else { psEnc.indices.signalType = SilkConstants.TYPE_UNVOICED; } psEnc.indices.lagIndex = boxed_lagIndex.Val; psEnc.indices.contourIndex = boxed_contourIndex.Val; psEnc.LTPCorr_Q15 = boxed_LTPcorr.Val; } else { Arrays.MemSetInt(psEncCtrl.pitchL, 0, SilkConstants.MAX_NB_SUBFR); psEnc.indices.lagIndex = 0; psEnc.indices.contourIndex = 0; psEnc.LTPCorr_Q15 = 0; } }