/// <summary> /// Gains scalar dequantization, uniform on log scale /// </summary> /// <param name="gain_Q16">O quantized gains [MAX_NB_SUBFR]</param> /// <param name="ind">I gain indices [MAX_NB_SUBFR]</param> /// <param name="prev_ind">I/O last index in previous frame [Porting note] original implementation passed this as an int8*</param> /// <param name="conditional">I first gain is delta coded if 1</param> /// <param name="nb_subfr">I number of subframes</param> internal static void silk_gains_dequant( int[] gain_Q16, sbyte[] ind, BoxedValueSbyte prev_ind, int conditional, int nb_subfr) { int k, ind_tmp, double_step_size_threshold; for (k = 0; k < nb_subfr; k++) { if (k == 0 && conditional == 0) { /* Gain index is not allowed to go down more than 16 steps (~21.8 dB) */ prev_ind.Val = (sbyte)(Inlines.silk_max_int(ind[k], prev_ind.Val - 16)); } else { /* Delta index */ ind_tmp = ind[k] + SilkConstants.MIN_DELTA_GAIN_QUANT; /* Accumulate deltas */ double_step_size_threshold = 2 * SilkConstants.MAX_DELTA_GAIN_QUANT - SilkConstants.N_LEVELS_QGAIN + prev_ind.Val; if (ind_tmp > double_step_size_threshold) { prev_ind.Val += (sbyte)(Inlines.silk_LSHIFT(ind_tmp, 1) - double_step_size_threshold); } else { prev_ind.Val += (sbyte)(ind_tmp); } } prev_ind.Val = (sbyte)(Inlines.silk_LIMIT_int(prev_ind.Val, 0, SilkConstants.N_LEVELS_QGAIN - 1)); /* Scale and convert to linear scale */ gain_Q16[k] = Inlines.silk_log2lin(Inlines.silk_min_32(Inlines.silk_SMULWB(INV_SCALE_Q16, prev_ind.Val) + OFFSET, 3967)); /* 3967 = 31 in Q7 */ } }
/// <summary> /// Encode frame with Silk /// Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what /// encControl.payloadSize_ms is set to /// </summary> /// <param name="psEnc">I/O State</param> /// <param name="encControl">I Control status</param> /// <param name="samplesIn">I Speech sample input vector</param> /// <param name="nSamplesIn">I Number of samples in input vector</param> /// <param name="psRangeEnc">I/O Compressor data structure</param> /// <param name="nBytesOut">I/O Number of bytes in payload (input: Max bytes)</param> /// <param name="prefillFlag">I Flag to indicate prefilling buffers no coding</param> /// <returns>error code</returns> internal static int silk_Encode( SilkEncoder psEnc, EncControlState encControl, short[] samplesIn, int nSamplesIn, EntropyCoder psRangeEnc, BoxedValueInt nBytesOut, int prefillFlag) { int ret = SilkError.SILK_NO_ERROR; int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0; int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; int nSamplesFromInput = 0, nSamplesFromInputMax; int speech_act_thr_for_switch_Q8; int TargetRate_bps, channelRate_bps, LBRR_symbol, sum; int[] MStargetRates_bps = new int[2]; short[] buf; int transition, curr_block, tot_blocks; nBytesOut.Val = 0; if (encControl.reducedDependency != 0) { psEnc.state_Fxx[0].first_frame_after_reset = 1; psEnc.state_Fxx[1].first_frame_after_reset = 1; } psEnc.state_Fxx[0].nFramesEncoded = psEnc.state_Fxx[1].nFramesEncoded = 0; /* Check values in encoder control structure */ ret += encControl.check_control_input(); if (ret != SilkError.SILK_NO_ERROR) { Inlines.OpusAssert(false); return(ret); } encControl.switchReady = 0; if (encControl.nChannelsInternal > psEnc.nChannelsInternal) { /* Mono . Stereo transition: init state of second channel and stereo state */ ret += SilkEncoder.silk_init_encoder(psEnc.state_Fxx[1]); Arrays.MemSetShort(psEnc.sStereo.pred_prev_Q13, 0, 2); Arrays.MemSetShort(psEnc.sStereo.sSide, 0, 2); psEnc.sStereo.mid_side_amp_Q0[0] = 0; psEnc.sStereo.mid_side_amp_Q0[1] = 1; psEnc.sStereo.mid_side_amp_Q0[2] = 0; psEnc.sStereo.mid_side_amp_Q0[3] = 1; psEnc.sStereo.width_prev_Q14 = 0; psEnc.sStereo.smth_width_Q14 = (short)(((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/); if (psEnc.nChannelsAPI == 2) { psEnc.state_Fxx[1].resampler_state.Assign(psEnc.state_Fxx[0].resampler_state); Array.Copy(psEnc.state_Fxx[0].In_HP_State, psEnc.state_Fxx[1].In_HP_State, 2); } } transition = ((encControl.payloadSize_ms != psEnc.state_Fxx[0].PacketSize_ms) || (psEnc.nChannelsInternal != encControl.nChannelsInternal)) ? 1 : 0; psEnc.nChannelsAPI = encControl.nChannelsAPI; psEnc.nChannelsInternal = encControl.nChannelsInternal; nBlocksOf10ms = Inlines.silk_DIV32(100 * nSamplesIn, encControl.API_sampleRate); tot_blocks = (nBlocksOf10ms > 1) ? nBlocksOf10ms >> 1 : 1; curr_block = 0; if (prefillFlag != 0) { /* Only accept input length of 10 ms */ if (nBlocksOf10ms != 1) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } /* Reset Encoder */ for (n = 0; n < encControl.nChannelsInternal; n++) { ret += SilkEncoder.silk_init_encoder(psEnc.state_Fxx[n]); Inlines.OpusAssert(ret == SilkError.SILK_NO_ERROR); } tmp_payloadSize_ms = encControl.payloadSize_ms; encControl.payloadSize_ms = 10; tmp_complexity = encControl.complexity; encControl.complexity = 0; for (n = 0; n < encControl.nChannelsInternal; n++) { psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].prefillFlag = 1; } } else { /* Only accept input lengths that are a multiple of 10 ms */ if (nBlocksOf10ms * encControl.API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } /* Make sure no more than one packet can be produced */ if (1000 * (int)nSamplesIn > encControl.payloadSize_ms * encControl.API_sampleRate) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } } TargetRate_bps = Inlines.silk_RSHIFT32(encControl.bitRate, encControl.nChannelsInternal - 1); for (n = 0; n < encControl.nChannelsInternal; n++) { /* Force the side channel to the same rate as the mid */ int force_fs_kHz = (n == 1) ? psEnc.state_Fxx[0].fs_kHz : 0; ret += psEnc.state_Fxx[n].silk_control_encoder(encControl, TargetRate_bps, psEnc.allowBandwidthSwitch, n, force_fs_kHz); if (ret != SilkError.SILK_NO_ERROR) { Inlines.OpusAssert(false); return(ret); } if (psEnc.state_Fxx[n].first_frame_after_reset != 0 || transition != 0) { for (i = 0; i < psEnc.state_Fxx[0].nFramesPerPacket; i++) { psEnc.state_Fxx[n].LBRR_flags[i] = 0; } } psEnc.state_Fxx[n].inDTX = psEnc.state_Fxx[n].useDTX; } Inlines.OpusAssert(encControl.nChannelsInternal == 1 || psEnc.state_Fxx[0].fs_kHz == psEnc.state_Fxx[1].fs_kHz); /* Input buffering/resampling and encoding */ nSamplesToBufferMax = 10 * nBlocksOf10ms * psEnc.state_Fxx[0].fs_kHz; nSamplesFromInputMax = Inlines.silk_DIV32_16(nSamplesToBufferMax * psEnc.state_Fxx[0].API_fs_Hz, (short)(psEnc.state_Fxx[0].fs_kHz * 1000)); buf = new short[nSamplesFromInputMax]; int samplesIn_ptr = 0; while (true) { nSamplesToBuffer = psEnc.state_Fxx[0].frame_length - psEnc.state_Fxx[0].inputBufIx; nSamplesToBuffer = Inlines.silk_min(nSamplesToBuffer, nSamplesToBufferMax); nSamplesFromInput = Inlines.silk_DIV32_16(nSamplesToBuffer * psEnc.state_Fxx[0].API_fs_Hz, psEnc.state_Fxx[0].fs_kHz * 1000); /* Resample and write to buffer */ if (encControl.nChannelsAPI == 2 && encControl.nChannelsInternal == 2) { int id = psEnc.state_Fxx[0].nFramesEncoded; for (n = 0; n < nSamplesFromInput; n++) { buf[n] = samplesIn[samplesIn_ptr + (2 * n)]; } /* Making sure to start both resamplers from the same state when switching from mono to stereo */ if (psEnc.nPrevChannelsInternal == 1 && id == 0) { //silk_memcpy(&psEnc.state_Fxx[1].resampler_state, &psEnc.state_Fxx[0].resampler_state, sizeof(psEnc.state_Fxx[1].resampler_state)); psEnc.state_Fxx[1].resampler_state.Assign(psEnc.state_Fxx[0].resampler_state); } ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; nSamplesToBuffer = psEnc.state_Fxx[1].frame_length - psEnc.state_Fxx[1].inputBufIx; nSamplesToBuffer = Inlines.silk_min(nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc.state_Fxx[1].fs_kHz); for (n = 0; n < nSamplesFromInput; n++) { buf[n] = samplesIn[samplesIn_ptr + (2 * n) + 1]; } ret += Resampler.silk_resampler( psEnc.state_Fxx[1].resampler_state, psEnc.state_Fxx[1].inputBuf, psEnc.state_Fxx[1].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[1].inputBufIx += nSamplesToBuffer; } else if (encControl.nChannelsAPI == 2 && encControl.nChannelsInternal == 1) { /* Combine left and right channels before resampling */ for (n = 0; n < nSamplesFromInput; n++) { sum = samplesIn[samplesIn_ptr + (2 * n)] + samplesIn[samplesIn_ptr + (2 * n) + 1]; buf[n] = (short)Inlines.silk_RSHIFT_ROUND(sum, 1); } ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); /* On the first mono frame, average the results for the two resampler states */ if (psEnc.nPrevChannelsInternal == 2 && psEnc.state_Fxx[0].nFramesEncoded == 0) { ret += Resampler.silk_resampler( psEnc.state_Fxx[1].resampler_state, psEnc.state_Fxx[1].inputBuf, psEnc.state_Fxx[1].inputBufIx + 2, buf, 0, nSamplesFromInput); for (n = 0; n < psEnc.state_Fxx[0].frame_length; n++) { psEnc.state_Fxx[0].inputBuf[psEnc.state_Fxx[0].inputBufIx + n + 2] = (short)(Inlines.silk_RSHIFT(psEnc.state_Fxx[0].inputBuf[psEnc.state_Fxx[0].inputBufIx + n + 2] + psEnc.state_Fxx[1].inputBuf[psEnc.state_Fxx[1].inputBufIx + n + 2], 1)); } } psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; } else { Inlines.OpusAssert(encControl.nChannelsAPI == 1 && encControl.nChannelsInternal == 1); Array.Copy(samplesIn, samplesIn_ptr, buf, 0, nSamplesFromInput); ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; } samplesIn_ptr += (nSamplesFromInput * encControl.nChannelsAPI); nSamplesIn -= nSamplesFromInput; /* Default */ psEnc.allowBandwidthSwitch = 0; /* Silk encoder */ if (psEnc.state_Fxx[0].inputBufIx >= psEnc.state_Fxx[0].frame_length) { /* Enough data in input buffer, so encode */ Inlines.OpusAssert(psEnc.state_Fxx[0].inputBufIx == psEnc.state_Fxx[0].frame_length); Inlines.OpusAssert(encControl.nChannelsInternal == 1 || psEnc.state_Fxx[1].inputBufIx == psEnc.state_Fxx[1].frame_length); /* Deal with LBRR data */ if (psEnc.state_Fxx[0].nFramesEncoded == 0 && prefillFlag == 0) { /* Create space at start of payload for VAD and FEC flags */ byte[] iCDF = { 0, 0 }; iCDF[0] = (byte)(256 - Inlines.silk_RSHIFT(256, (psEnc.state_Fxx[0].nFramesPerPacket + 1) * encControl.nChannelsInternal)); psRangeEnc.enc_icdf(0, iCDF, 8); /* Encode any LBRR data from previous packet */ /* Encode LBRR flags */ for (n = 0; n < encControl.nChannelsInternal; n++) { LBRR_symbol = 0; for (i = 0; i < psEnc.state_Fxx[n].nFramesPerPacket; i++) { LBRR_symbol |= Inlines.silk_LSHIFT(psEnc.state_Fxx[n].LBRR_flags[i], i); } psEnc.state_Fxx[n].LBRR_flag = (sbyte)(LBRR_symbol > 0 ? 1 : 0); if (LBRR_symbol != 0 && psEnc.state_Fxx[n].nFramesPerPacket > 1) { psRangeEnc.enc_icdf(LBRR_symbol - 1, Tables.silk_LBRR_flags_iCDF_ptr[psEnc.state_Fxx[n].nFramesPerPacket - 2], 8); } } /* Code LBRR indices and excitation signals */ for (i = 0; i < psEnc.state_Fxx[0].nFramesPerPacket; i++) { for (n = 0; n < encControl.nChannelsInternal; n++) { if (psEnc.state_Fxx[n].LBRR_flags[i] != 0) { int condCoding; if (encControl.nChannelsInternal == 2 && n == 0) { Stereo.silk_stereo_encode_pred(psRangeEnc, psEnc.sStereo.predIx[i]); /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ if (psEnc.state_Fxx[1].LBRR_flags[i] == 0) { Stereo.silk_stereo_encode_mid_only(psRangeEnc, psEnc.sStereo.mid_only_flags[i]); } } /* Use conditional coding if previous frame available */ if (i > 0 && psEnc.state_Fxx[n].LBRR_flags[i - 1] != 0) { condCoding = SilkConstants.CODE_CONDITIONALLY; } else { condCoding = SilkConstants.CODE_INDEPENDENTLY; } EncodeIndices.silk_encode_indices(psEnc.state_Fxx[n], psRangeEnc, i, 1, condCoding); EncodePulses.silk_encode_pulses(psRangeEnc, psEnc.state_Fxx[n].indices_LBRR[i].signalType, psEnc.state_Fxx[n].indices_LBRR[i].quantOffsetType, psEnc.state_Fxx[n].pulses_LBRR[i], psEnc.state_Fxx[n].frame_length); } } } /* Reset LBRR flags */ for (n = 0; n < encControl.nChannelsInternal; n++) { Arrays.MemSetInt(psEnc.state_Fxx[n].LBRR_flags, 0, SilkConstants.MAX_FRAMES_PER_PACKET); } psEnc.nBitsUsedLBRR = psRangeEnc.tell(); } HPVariableCutoff.silk_HP_variable_cutoff(psEnc.state_Fxx); /* Total target bits for packet */ nBits = Inlines.silk_DIV32_16(Inlines.silk_MUL(encControl.bitRate, encControl.payloadSize_ms), 1000); /* Subtract bits used for LBRR */ if (prefillFlag == 0) { nBits -= psEnc.nBitsUsedLBRR; } /* Divide by number of uncoded frames left in packet */ nBits = Inlines.silk_DIV32_16(nBits, psEnc.state_Fxx[0].nFramesPerPacket); /* Convert to bits/second */ if (encControl.payloadSize_ms == 10) { TargetRate_bps = Inlines.silk_SMULBB(nBits, 100); } else { TargetRate_bps = Inlines.silk_SMULBB(nBits, 50); } /* Subtract fraction of bits in excess of target in previous frames and packets */ TargetRate_bps -= Inlines.silk_DIV32_16(Inlines.silk_MUL(psEnc.nBitsExceeded, 1000), TuningParameters.BITRESERVOIR_DECAY_TIME_MS); if (prefillFlag == 0 && psEnc.state_Fxx[0].nFramesEncoded > 0) { /* Compare actual vs target bits so far in this packet */ int bitsBalance = psRangeEnc.tell() - psEnc.nBitsUsedLBRR - nBits * psEnc.state_Fxx[0].nFramesEncoded; TargetRate_bps -= Inlines.silk_DIV32_16(Inlines.silk_MUL(bitsBalance, 1000), TuningParameters.BITRESERVOIR_DECAY_TIME_MS); } /* Never exceed input bitrate */ TargetRate_bps = Inlines.silk_LIMIT(TargetRate_bps, encControl.bitRate, 5000); /* Convert Left/Right to Mid/Side */ if (encControl.nChannelsInternal == 2) { BoxedValueSbyte midOnlyFlagBoxed = new BoxedValueSbyte(psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded]); Stereo.silk_stereo_LR_to_MS(psEnc.sStereo, psEnc.state_Fxx[0].inputBuf, 2, psEnc.state_Fxx[1].inputBuf, 2, psEnc.sStereo.predIx[psEnc.state_Fxx[0].nFramesEncoded], midOnlyFlagBoxed, MStargetRates_bps, TargetRate_bps, psEnc.state_Fxx[0].speech_activity_Q8, encControl.toMono, psEnc.state_Fxx[0].fs_kHz, psEnc.state_Fxx[0].frame_length); psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded] = midOnlyFlagBoxed.Val; if (midOnlyFlagBoxed.Val == 0) { /* Reset side channel encoder memory for first frame with side coding */ if (psEnc.prev_decode_only_middle == 1) { psEnc.state_Fxx[1].sShape.Reset(); psEnc.state_Fxx[1].sPrefilt.Reset(); psEnc.state_Fxx[1].sNSQ.Reset(); Arrays.MemSetShort(psEnc.state_Fxx[1].prev_NLSFq_Q15, 0, SilkConstants.MAX_LPC_ORDER); Arrays.MemSetInt(psEnc.state_Fxx[1].sLP.In_LP_State, 0, 2); psEnc.state_Fxx[1].prevLag = 100; psEnc.state_Fxx[1].sNSQ.lagPrev = 100; psEnc.state_Fxx[1].sShape.LastGainIndex = 10; psEnc.state_Fxx[1].prevSignalType = SilkConstants.TYPE_NO_VOICE_ACTIVITY; psEnc.state_Fxx[1].sNSQ.prev_gain_Q16 = 65536; psEnc.state_Fxx[1].first_frame_after_reset = 1; } psEnc.state_Fxx[1].silk_encode_do_VAD(); } else { psEnc.state_Fxx[1].VAD_flags[psEnc.state_Fxx[0].nFramesEncoded] = 0; } if (prefillFlag == 0) { Stereo.silk_stereo_encode_pred(psRangeEnc, psEnc.sStereo.predIx[psEnc.state_Fxx[0].nFramesEncoded]); if (psEnc.state_Fxx[1].VAD_flags[psEnc.state_Fxx[0].nFramesEncoded] == 0) { Stereo.silk_stereo_encode_mid_only(psRangeEnc, psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded]); } } } else { /* Buffering */ Array.Copy(psEnc.sStereo.sMid, psEnc.state_Fxx[0].inputBuf, 2); Array.Copy(psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].frame_length, psEnc.sStereo.sMid, 0, 2); } psEnc.state_Fxx[0].silk_encode_do_VAD(); /* Encode */ for (n = 0; n < encControl.nChannelsInternal; n++) { int maxBits, useCBR; /* Handling rate constraints */ maxBits = encControl.maxBits; if (tot_blocks == 2 && curr_block == 0) { maxBits = maxBits * 3 / 5; } else if (tot_blocks == 3) { if (curr_block == 0) { maxBits = maxBits * 2 / 5; } else if (curr_block == 1) { maxBits = maxBits * 3 / 4; } } useCBR = (encControl.useCBR != 0 && curr_block == tot_blocks - 1) ? 1 : 0; if (encControl.nChannelsInternal == 1) { channelRate_bps = TargetRate_bps; } else { channelRate_bps = MStargetRates_bps[n]; if (n == 0 && MStargetRates_bps[1] > 0) { useCBR = 0; /* Give mid up to 1/2 of the max bits for that frame */ maxBits -= encControl.maxBits / (tot_blocks * 2); } } if (channelRate_bps > 0) { int condCoding; psEnc.state_Fxx[n].silk_control_SNR(channelRate_bps); /* Use independent coding if no previous frame available */ if (psEnc.state_Fxx[0].nFramesEncoded - n <= 0) { condCoding = SilkConstants.CODE_INDEPENDENTLY; } else if (n > 0 && psEnc.prev_decode_only_middle != 0) { /* If we skipped a side frame in this packet, we don't * need LTP scaling; the LTP state is well-defined. */ condCoding = SilkConstants.CODE_INDEPENDENTLY_NO_LTP_SCALING; } else { condCoding = SilkConstants.CODE_CONDITIONALLY; } ret += psEnc.state_Fxx[n].silk_encode_frame(nBytesOut, psRangeEnc, condCoding, maxBits, useCBR); Inlines.OpusAssert(ret == SilkError.SILK_NO_ERROR); } psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].inputBufIx = 0; psEnc.state_Fxx[n].nFramesEncoded++; } psEnc.prev_decode_only_middle = psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded - 1]; /* Insert VAD and FEC flags at beginning of bitstream */ if (nBytesOut.Val > 0 && psEnc.state_Fxx[0].nFramesEncoded == psEnc.state_Fxx[0].nFramesPerPacket) { flags = 0; for (n = 0; n < encControl.nChannelsInternal; n++) { for (i = 0; i < psEnc.state_Fxx[n].nFramesPerPacket; i++) { flags = Inlines.silk_LSHIFT(flags, 1); flags |= (int)psEnc.state_Fxx[n].VAD_flags[i]; } flags = Inlines.silk_LSHIFT(flags, 1); flags |= (int)psEnc.state_Fxx[n].LBRR_flag; } if (prefillFlag == 0) { psRangeEnc.enc_patch_initial_bits((uint)flags, (uint)((psEnc.state_Fxx[0].nFramesPerPacket + 1) * encControl.nChannelsInternal)); } /* Return zero bytes if all channels DTXed */ if (psEnc.state_Fxx[0].inDTX != 0 && (encControl.nChannelsInternal == 1 || psEnc.state_Fxx[1].inDTX != 0)) { nBytesOut.Val = 0; } psEnc.nBitsExceeded += nBytesOut.Val * 8; psEnc.nBitsExceeded -= Inlines.silk_DIV32_16(Inlines.silk_MUL(encControl.bitRate, encControl.payloadSize_ms), 1000); psEnc.nBitsExceeded = Inlines.silk_LIMIT(psEnc.nBitsExceeded, 0, 10000); /* Update flag indicating if bandwidth switching is allowed */ speech_act_thr_for_switch_Q8 = Inlines.silk_SMLAWB(((int)((TuningParameters.SPEECH_ACTIVITY_DTX_THRES) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SPEECH_ACTIVITY_DTX_THRES, 8)*/, ((int)(((1 - TuningParameters.SPEECH_ACTIVITY_DTX_THRES) / TuningParameters.MAX_BANDWIDTH_SWITCH_DELAY_MS) * ((long)1 << (16 + 8)) + 0.5)) /*Inlines.SILK_CONST((1 - TuningParameters.SPEECH_ACTIVITY_DTX_THRES) / TuningParameters.MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8)*/, psEnc.timeSinceSwitchAllowed_ms); if (psEnc.state_Fxx[0].speech_activity_Q8 < speech_act_thr_for_switch_Q8) { psEnc.allowBandwidthSwitch = 1; psEnc.timeSinceSwitchAllowed_ms = 0; } else { psEnc.allowBandwidthSwitch = 0; psEnc.timeSinceSwitchAllowed_ms += encControl.payloadSize_ms; } } if (nSamplesIn == 0) { break; } } else { break; } curr_block++; } psEnc.nPrevChannelsInternal = encControl.nChannelsInternal; encControl.allowBandwidthSwitch = psEnc.allowBandwidthSwitch; encControl.inWBmodeWithoutVariableLP = (psEnc.state_Fxx[0].fs_kHz == 16 && psEnc.state_Fxx[0].sLP.mode == 0) ? 1 : 0; encControl.internalSampleRate = Inlines.silk_SMULBB(psEnc.state_Fxx[0].fs_kHz, 1000); encControl.stereoWidth_Q14 = encControl.toMono != 0 ? 0 : psEnc.sStereo.smth_width_Q14; if (prefillFlag != 0) { encControl.payloadSize_ms = tmp_payloadSize_ms; encControl.complexity = tmp_complexity; for (n = 0; n < encControl.nChannelsInternal; n++) { psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].prefillFlag = 0; } } return(ret); }
/// <summary> /// Convert Left/Right stereo signal to adaptive Mid/Side representation /// </summary> /// <param name="state">I/O State</param> /// <param name="x1">I/O Left input signal, becomes mid signal</param> /// <param name="x1_ptr"></param> /// <param name="x2">I/O Right input signal, becomes side signal</param> /// <param name="x2_ptr"></param> /// <param name="ix">O Quantization indices [ 2 ][ 3 ]</param> /// <param name="mid_only_flag">O Flag: only mid signal coded</param> /// <param name="mid_side_rates_bps">O Bitrates for mid and side signals</param> /// <param name="total_rate_bps">I Total bitrate</param> /// <param name="prev_speech_act_Q8">I Speech activity level in previous frame</param> /// <param name="toMono">I Last frame before a stereo.mono transition</param> /// <param name="fs_kHz">I Sample rate (kHz)</param> /// <param name="frame_length">I Number of samples</param> internal static void silk_stereo_LR_to_MS( StereoEncodeState state, short[] x1, int x1_ptr, short[] x2, int x2_ptr, sbyte[][] ix, BoxedValueSbyte mid_only_flag, int[] mid_side_rates_bps, int total_rate_bps, int prev_speech_act_Q8, int toMono, int fs_kHz, int frame_length) { int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13; int sum, diff, smooth_coef_Q16, pred0_Q13, pred1_Q13; int[] pred_Q13 = new int[2]; int frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24; BoxedValueInt LP_ratio_Q14 = new BoxedValueInt(); BoxedValueInt HP_ratio_Q14 = new BoxedValueInt(); short[] side; short[] LP_mid; short[] HP_mid; short[] LP_side; short[] HP_side; int mid = x1_ptr - 2; side = new short[frame_length + 2]; /* Convert to basic mid/side signals */ for (n = 0; n < frame_length + 2; n++) { sum = x1[x1_ptr + n - 2] + (int)x2[x2_ptr + n - 2]; diff = x1[x1_ptr + n - 2] - (int)x2[x2_ptr + n - 2]; x1[mid + n] = (short)Inlines.silk_RSHIFT_ROUND(sum, 1); side[n] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT_ROUND(diff, 1)); } /* Buffering */ Array.Copy(state.sMid, 0, x1, mid, 2); Array.Copy(state.sSide, side, 2); Array.Copy(x1, mid + frame_length, state.sMid, 0, 2); Array.Copy(side, frame_length, state.sSide, 0, 2); /* LP and HP filter mid signal */ LP_mid = new short[frame_length]; HP_mid = new short[frame_length]; for (n = 0; n < frame_length; n++) { sum = Inlines.silk_RSHIFT_ROUND(Inlines.silk_ADD_LSHIFT32(x1[mid + n] + x1[mid + n + 2], x1[mid + n + 1], 1), 2); LP_mid[n] = (short)(sum); HP_mid[n] = (short)(x1[mid + n + 1] - sum); } /* LP and HP filter side signal */ LP_side = new short[frame_length]; HP_side = new short[frame_length]; for (n = 0; n < frame_length; n++) { sum = Inlines.silk_RSHIFT_ROUND(Inlines.silk_ADD_LSHIFT32(side[n] + side[n + 2], side[n + 1], 1), 2); LP_side[n] = (short)(sum); HP_side[n] = (short)(side[n + 1] - sum); } /* Find energies and predictors */ is10msFrame = (frame_length == 10 * fs_kHz ? 1 : 0); smooth_coef_Q16 = is10msFrame != 0 ? ((int)((SilkConstants.STEREO_RATIO_SMOOTH_COEF / 2) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.STEREO_RATIO_SMOOTH_COEF / 2, 16)*/ : ((int)((SilkConstants.STEREO_RATIO_SMOOTH_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.STEREO_RATIO_SMOOTH_COEF, 16)*/; smooth_coef_Q16 = Inlines.silk_SMULWB(Inlines.silk_SMULBB(prev_speech_act_Q8, prev_speech_act_Q8), smooth_coef_Q16); pred_Q13[0] = silk_stereo_find_predictor(LP_ratio_Q14, LP_mid, LP_side, state.mid_side_amp_Q0, 0, frame_length, smooth_coef_Q16); pred_Q13[1] = silk_stereo_find_predictor(HP_ratio_Q14, HP_mid, HP_side, state.mid_side_amp_Q0, 2, frame_length, smooth_coef_Q16); /* Ratio of the norms of residual and mid signals */ frac_Q16 = Inlines.silk_SMLABB(HP_ratio_Q14.Val, LP_ratio_Q14.Val, 3); frac_Q16 = Inlines.silk_min(frac_Q16, ((int)((1) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1, 16)*/); /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */ total_rate_bps -= is10msFrame != 0 ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */ if (total_rate_bps < 1) { total_rate_bps = 1; } min_mid_rate_bps = Inlines.silk_SMLABB(2000, fs_kHz, 900); Inlines.OpusAssert(min_mid_rate_bps < 32767); /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */ frac_3_Q16 = Inlines.silk_MUL(3, frac_Q16); mid_side_rates_bps[0] = Inlines.silk_DIV32_varQ(total_rate_bps, ((int)((8 + 5) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(8 + 5, 16)*/ + frac_3_Q16, 16 + 3); /* If Mid bitrate below minimum, reduce stereo width */ if (mid_side_rates_bps[0] < min_mid_rate_bps) { mid_side_rates_bps[0] = min_mid_rate_bps; mid_side_rates_bps[1] = total_rate_bps - mid_side_rates_bps[0]; /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */ width_Q14 = Inlines.silk_DIV32_varQ(Inlines.silk_LSHIFT(mid_side_rates_bps[1], 1) - min_mid_rate_bps, Inlines.silk_SMULWB(((int)((1) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1, 16)*/ + frac_3_Q16, min_mid_rate_bps), 14 + 2); width_Q14 = Inlines.silk_LIMIT(width_Q14, 0, ((int)((1) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1, 14)*/); } else { mid_side_rates_bps[1] = total_rate_bps - mid_side_rates_bps[0]; width_Q14 = ((int)((1) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1, 14)*/; } /* Smoother */ state.smth_width_Q14 = (short)Inlines.silk_SMLAWB(state.smth_width_Q14, width_Q14 - state.smth_width_Q14, smooth_coef_Q16); /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */ mid_only_flag.Val = 0; if (toMono != 0) { /* Last frame before stereo.mono transition; collapse stereo width */ width_Q14 = 0; pred_Q13[0] = 0; pred_Q13[1] = 0; silk_stereo_quant_pred(pred_Q13, ix); } else if (state.width_prev_Q14 == 0 && (8 * total_rate_bps < 13 * min_mid_rate_bps || Inlines.silk_SMULWB(frac_Q16, state.smth_width_Q14) < ((int)((0.05f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.05f, 14)*/)) { /* Code as panned-mono; previous frame already had zero width */ /* Scale down and quantize predictors */ pred_Q13[0] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[0]), 14); pred_Q13[1] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[1]), 14); silk_stereo_quant_pred(pred_Q13, ix); /* Collapse stereo width */ width_Q14 = 0; pred_Q13[0] = 0; pred_Q13[1] = 0; mid_side_rates_bps[0] = total_rate_bps; mid_side_rates_bps[1] = 0; mid_only_flag.Val = 1; } else if (state.width_prev_Q14 != 0 && (8 * total_rate_bps < 11 * min_mid_rate_bps || Inlines.silk_SMULWB(frac_Q16, state.smth_width_Q14) < ((int)((0.02f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.02f, 14)*/)) { /* Transition to zero-width stereo */ /* Scale down and quantize predictors */ pred_Q13[0] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[0]), 14); pred_Q13[1] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[1]), 14); silk_stereo_quant_pred(pred_Q13, ix); /* Collapse stereo width */ width_Q14 = 0; pred_Q13[0] = 0; pred_Q13[1] = 0; } else if (state.smth_width_Q14 > ((int)((0.95f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.95f, 14)*/) { /* Full-width stereo coding */ silk_stereo_quant_pred(pred_Q13, ix); width_Q14 = ((int)((1) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1, 14)*/; } else { /* Reduced-width stereo coding; scale down and quantize predictors */ pred_Q13[0] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[0]), 14); pred_Q13[1] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[1]), 14); silk_stereo_quant_pred(pred_Q13, ix); width_Q14 = state.smth_width_Q14; } /* Make sure to keep on encoding until the tapered output has been transmitted */ if (mid_only_flag.Val == 1) { state.silent_side_len += (short)(frame_length - SilkConstants.STEREO_INTERP_LEN_MS * fs_kHz); if (state.silent_side_len < SilkConstants.LA_SHAPE_MS * fs_kHz) { mid_only_flag.Val = 0; } else { /* Limit to avoid wrapping around */ state.silent_side_len = 10000; } } else { state.silent_side_len = 0; } if (mid_only_flag.Val == 0 && mid_side_rates_bps[1] < 1) { mid_side_rates_bps[1] = 1; mid_side_rates_bps[0] = Inlines.silk_max_int(1, total_rate_bps - mid_side_rates_bps[1]); } /* Interpolate predictors and subtract prediction from side channel */ pred0_Q13 = -state.pred_prev_Q13[0]; pred1_Q13 = -state.pred_prev_Q13[1]; w_Q24 = Inlines.silk_LSHIFT(state.width_prev_Q14, 10); denom_Q16 = Inlines.silk_DIV32_16((int)1 << 16, SilkConstants.STEREO_INTERP_LEN_MS * fs_kHz); delta0_Q13 = 0 - Inlines.silk_RSHIFT_ROUND(Inlines.silk_SMULBB(pred_Q13[0] - state.pred_prev_Q13[0], denom_Q16), 16); delta1_Q13 = 0 - Inlines.silk_RSHIFT_ROUND(Inlines.silk_SMULBB(pred_Q13[1] - state.pred_prev_Q13[1], denom_Q16), 16); deltaw_Q24 = Inlines.silk_LSHIFT(Inlines.silk_SMULWB(width_Q14 - state.width_prev_Q14, denom_Q16), 10); for (n = 0; n < SilkConstants.STEREO_INTERP_LEN_MS * fs_kHz; n++) { pred0_Q13 += delta0_Q13; pred1_Q13 += delta1_Q13; w_Q24 += deltaw_Q24; sum = Inlines.silk_LSHIFT(Inlines.silk_ADD_LSHIFT(x1[mid + n] + x1[mid + n + 2], x1[mid + n + 1], 1), 9); /* Q11 */ sum = Inlines.silk_SMLAWB(Inlines.silk_SMULWB(w_Q24, side[n + 1]), sum, pred0_Q13); /* Q8 */ sum = Inlines.silk_SMLAWB(sum, Inlines.silk_LSHIFT((int)x1[mid + n + 1], 11), pred1_Q13); /* Q8 */ x2[x2_ptr + n - 1] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT_ROUND(sum, 8)); } pred0_Q13 = 0 - pred_Q13[0]; pred1_Q13 = 0 - pred_Q13[1]; w_Q24 = Inlines.silk_LSHIFT(width_Q14, 10); for (n = SilkConstants.STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++) { sum = Inlines.silk_LSHIFT(Inlines.silk_ADD_LSHIFT(x1[mid + n] + x1[mid + n + 2], x1[mid + n + 1], 1), 9); /* Q11 */ sum = Inlines.silk_SMLAWB(Inlines.silk_SMULWB(w_Q24, side[n + 1]), sum, pred0_Q13); /* Q8 */ sum = Inlines.silk_SMLAWB(sum, Inlines.silk_LSHIFT((int)x1[mid + n + 1], 11), pred1_Q13); /* Q8 */ x2[x2_ptr + n - 1] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT_ROUND(sum, 8)); } state.pred_prev_Q13[0] = (short)pred_Q13[0]; state.pred_prev_Q13[1] = (short)pred_Q13[1]; state.width_prev_Q14 = (short)width_Q14; }
/* Processing of gains */ internal static void silk_process_gains( SilkChannelEncoder psEnc, /* I/O Encoder state */ SilkEncoderControl psEncCtrl, /* I/O Encoder control */ int condCoding /* I The type of conditional coding to use */ ) { SilkShapeState psShapeSt = psEnc.sShape; int k; int s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10; /* Gain reduction when LTP coding gain is high */ if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl.LTPredCodGain - 12.0f ) ); */ s_Q16 = 0 - Sigmoid.silk_sigm_Q15(Inlines.silk_RSHIFT_ROUND(psEncCtrl.LTPredCodGain_Q7 - ((int)((12.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(12.0f, 7)*/, 4)); for (k = 0; k < psEnc.nb_subfr; k++) { psEncCtrl.Gains_Q16[k] = Inlines.silk_SMLAWB(psEncCtrl.Gains_Q16[k], psEncCtrl.Gains_Q16[k], s_Q16); } } /* Limit the quantized signal */ /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */ InvMaxSqrVal_Q16 = Inlines.silk_DIV32_16(Inlines.silk_log2lin( Inlines.silk_SMULWB(((int)((21 + 16 / 0.33f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(21 + 16 / 0.33f, 7)*/ - psEnc.SNR_dB_Q7, ((int)((0.33f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(0.33f, 16)*/)), psEnc.subfr_length); for (k = 0; k < psEnc.nb_subfr; k++) { /* Soft limit on ratio residual energy and squared gains */ ResNrg = psEncCtrl.ResNrg[k]; ResNrgPart = Inlines.silk_SMULWW(ResNrg, InvMaxSqrVal_Q16); if (psEncCtrl.ResNrgQ[k] > 0) { ResNrgPart = Inlines.silk_RSHIFT_ROUND(ResNrgPart, psEncCtrl.ResNrgQ[k]); } else { if (ResNrgPart >= Inlines.silk_RSHIFT(int.MaxValue, -psEncCtrl.ResNrgQ[k])) { ResNrgPart = int.MaxValue; } else { ResNrgPart = Inlines.silk_LSHIFT(ResNrgPart, -psEncCtrl.ResNrgQ[k]); } } gain = psEncCtrl.Gains_Q16[k]; gain_squared = Inlines.silk_ADD_SAT32(ResNrgPart, Inlines.silk_SMMUL(gain, gain)); if (gain_squared < short.MaxValue) { /* recalculate with higher precision */ gain_squared = Inlines.silk_SMLAWW(Inlines.silk_LSHIFT(ResNrgPart, 16), gain, gain); Inlines.OpusAssert(gain_squared > 0); gain = Inlines.silk_SQRT_APPROX(gain_squared); /* Q8 */ gain = Inlines.silk_min(gain, int.MaxValue >> 8); psEncCtrl.Gains_Q16[k] = Inlines.silk_LSHIFT_SAT32(gain, 8); /* Q16 */ } else { gain = Inlines.silk_SQRT_APPROX(gain_squared); /* Q0 */ gain = Inlines.silk_min(gain, int.MaxValue >> 16); psEncCtrl.Gains_Q16[k] = Inlines.silk_LSHIFT_SAT32(gain, 16); /* Q16 */ } } /* Save unquantized gains and gain Index */ Array.Copy(psEncCtrl.Gains_Q16, psEncCtrl.GainsUnq_Q16, psEnc.nb_subfr); psEncCtrl.lastGainIndexPrev = psShapeSt.LastGainIndex; /* Quantize gains */ BoxedValueSbyte boxed_lastGainIndex = new BoxedValueSbyte(psShapeSt.LastGainIndex); GainQuantization.silk_gains_quant(psEnc.indices.GainsIndices, psEncCtrl.Gains_Q16, boxed_lastGainIndex, condCoding == SilkConstants.CODE_CONDITIONALLY ? 1 : 0, psEnc.nb_subfr); psShapeSt.LastGainIndex = boxed_lastGainIndex.Val; /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */ if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { if (psEncCtrl.LTPredCodGain_Q7 + Inlines.silk_RSHIFT(psEnc.input_tilt_Q15, 8) > ((int)((1.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 7)*/) { psEnc.indices.quantOffsetType = 0; } else { psEnc.indices.quantOffsetType = 1; } } /* Quantizer boundary adjustment */ quant_offset_Q10 = Tables.silk_Quantization_Offsets_Q10[psEnc.indices.signalType >> 1][psEnc.indices.quantOffsetType]; psEncCtrl.Lambda_Q10 = ((int)((TuningParameters.LAMBDA_OFFSET) * ((long)1 << (10)) + 0.5))/*Inlines.SILK_CONST(TuningParameters.LAMBDA_OFFSET, 10)*/ + Inlines.silk_SMULBB(((int)((TuningParameters.LAMBDA_DELAYED_DECISIONS) * ((long)1 << (10)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_DELAYED_DECISIONS, 10)*/, psEnc.nStatesDelayedDecision) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_SPEECH_ACT) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_SPEECH_ACT, 18)*/, psEnc.speech_activity_Q8) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_INPUT_QUALITY) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_INPUT_QUALITY, 12)*/, psEncCtrl.input_quality_Q14) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_CODING_QUALITY) * ((long)1 << (12)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_CODING_QUALITY, 12)*/, psEncCtrl.coding_quality_Q14) + Inlines.silk_SMULWB(((int)((TuningParameters.LAMBDA_QUANT_OFFSET) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LAMBDA_QUANT_OFFSET, 16)*/, quant_offset_Q10); Inlines.OpusAssert(psEncCtrl.Lambda_Q10 > 0); Inlines.OpusAssert(psEncCtrl.Lambda_Q10 < ((int)((2) * ((long)1 << (10)) + 0.5)) /*Inlines.SILK_CONST(2, 10)*/); }
/* Decode parameters from payload */ internal static void silk_decode_parameters( SilkChannelDecoder psDec, /* I/O State */ SilkDecoderControl psDecCtrl, /* I/O Decoder control */ int condCoding /* I The type of conditional coding to use */ ) { int i, k, Ix; short[] pNLSF_Q15 = new short[psDec.LPC_order]; short[] pNLSF0_Q15 = new short[psDec.LPC_order]; sbyte[][] cbk_ptr_Q7; /* Dequant Gains */ BoxedValueSbyte boxedLastGainIndex = new BoxedValueSbyte(psDec.LastGainIndex); GainQuantization.silk_gains_dequant(psDecCtrl.Gains_Q16, psDec.indices.GainsIndices, boxedLastGainIndex, condCoding == SilkConstants.CODE_CONDITIONALLY ? 1 : 0, psDec.nb_subfr); psDec.LastGainIndex = boxedLastGainIndex.Val; /****************/ /* Decode NLSFs */ /****************/ NLSF.silk_NLSF_decode(pNLSF_Q15, psDec.indices.NLSFIndices, psDec.psNLSF_CB); /* Convert NLSF parameters to AR prediction filter coefficients */ NLSF.silk_NLSF2A(psDecCtrl.PredCoef_Q12[1], pNLSF_Q15, psDec.LPC_order); /* If just reset, e.g., because internal Fs changed, do not allow interpolation */ /* improves the case of packet loss in the first frame after a switch */ if (psDec.first_frame_after_reset == 1) { psDec.indices.NLSFInterpCoef_Q2 = 4; } if (psDec.indices.NLSFInterpCoef_Q2 < 4) { /* Calculation of the interpolated NLSF0 vector from the interpolation factor, */ /* the previous NLSF1, and the current NLSF1 */ for (i = 0; i < psDec.LPC_order; i++) { pNLSF0_Q15[i] = (short)(psDec.prevNLSF_Q15[i] + Inlines.silk_RSHIFT(Inlines.silk_MUL(psDec.indices.NLSFInterpCoef_Q2, pNLSF_Q15[i] - psDec.prevNLSF_Q15[i]), 2)); } /* Convert NLSF parameters to AR prediction filter coefficients */ NLSF.silk_NLSF2A(psDecCtrl.PredCoef_Q12[0], pNLSF0_Q15, psDec.LPC_order); } else { /* Copy LPC coefficients for first half from second half */ Array.Copy(psDecCtrl.PredCoef_Q12[1], psDecCtrl.PredCoef_Q12[0], psDec.LPC_order); } Array.Copy(pNLSF_Q15, psDec.prevNLSF_Q15, psDec.LPC_order); /* After a packet loss do BWE of LPC coefs */ if (psDec.lossCnt != 0) { BWExpander.silk_bwexpander(psDecCtrl.PredCoef_Q12[0], psDec.LPC_order, SilkConstants.BWE_AFTER_LOSS_Q16); BWExpander.silk_bwexpander(psDecCtrl.PredCoef_Q12[1], psDec.LPC_order, SilkConstants.BWE_AFTER_LOSS_Q16); } if (psDec.indices.signalType == SilkConstants.TYPE_VOICED) { /*********************/ /* Decode pitch lags */ /*********************/ /* Decode pitch values */ DecodePitch.silk_decode_pitch(psDec.indices.lagIndex, psDec.indices.contourIndex, psDecCtrl.pitchL, psDec.fs_kHz, psDec.nb_subfr); /* Decode Codebook Index */ cbk_ptr_Q7 = Tables.silk_LTP_vq_ptrs_Q7[psDec.indices.PERIndex]; /* set pointer to start of codebook */ for (k = 0; k < psDec.nb_subfr; k++) { Ix = psDec.indices.LTPIndex[k]; for (i = 0; i < SilkConstants.LTP_ORDER; i++) { psDecCtrl.LTPCoef_Q14[k * SilkConstants.LTP_ORDER + i] = (short)(Inlines.silk_LSHIFT(cbk_ptr_Q7[Ix][i], 7)); } } /**********************/ /* Decode LTP scaling */ /**********************/ Ix = psDec.indices.LTP_scaleIndex; psDecCtrl.LTP_scale_Q14 = Tables.silk_LTPScales_table_Q14[Ix]; } else { Arrays.MemSetInt(psDecCtrl.pitchL, 0, psDec.nb_subfr); Arrays.MemSetShort(psDecCtrl.LTPCoef_Q14, 0, SilkConstants.LTP_ORDER * psDec.nb_subfr); psDec.indices.PERIndex = 0; psDecCtrl.LTP_scale_Q14 = 0; } }
/*************************************************************/ /* FIXED POINT CORE PITCH ANALYSIS FUNCTION */ /*************************************************************/ internal static int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ short[] frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ int[] pitch_out, /* O 4 pitch lag values */ BoxedValueShort lagIndex, /* O Lag Index */ BoxedValueSbyte contourIndex, /* O Pitch contour Index */ BoxedValueInt LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ int search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ int Fs_kHz, /* I Sample frequency (kHz) */ int complexity, /* I Complexity setting, 0-2, where 2 is highest */ int nb_subfr /* I number of 5 ms subframes */ ) { short[] frame_8kHz; short[] frame_4kHz; int[] filt_state = new int[6]; short[] input_frame_ptr; int i, k, d, j; short[] C; int[] xcorr32; short[] basis; int basis_ptr; short[] target; int target_ptr; int cross_corr, normalizer, energy, shift, energy_basis, energy_target; int Cmax, length_d_srch, length_d_comp; int[] d_srch = new int[SilkConstants.PE_D_SRCH_LENGTH]; short[] d_comp; int sum, threshold, lag_counter; int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new; int CCmax, CCmax_b, CCmax_new_b, CCmax_new; int[] CC = new int[SilkConstants.PE_NB_CBKS_STAGE2_EXT]; silk_pe_stage3_vals[] energies_st3; silk_pe_stage3_vals[] cross_corr_st3; int frame_length, frame_length_8kHz, frame_length_4kHz; int sf_length; int min_lag; int max_lag; int contour_bias_Q15, diff; int nb_cbk_search; int delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13; sbyte[][] Lag_CB_ptr; /* Check for valid sampling frequency */ Inlines.OpusAssert(Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16); /* Check for valid complexity setting */ Inlines.OpusAssert(complexity >= SilkConstants.SILK_PE_MIN_COMPLEX); Inlines.OpusAssert(complexity <= SilkConstants.SILK_PE_MAX_COMPLEX); Inlines.OpusAssert(search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1 << 16)); Inlines.OpusAssert(search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1 << 13)); /* Set up frame lengths max / min lag for the sampling frequency */ frame_length = (SilkConstants.PE_LTP_MEM_LENGTH_MS + nb_subfr * SilkConstants.PE_SUBFR_LENGTH_MS) * Fs_kHz; frame_length_4kHz = (SilkConstants.PE_LTP_MEM_LENGTH_MS + nb_subfr * SilkConstants.PE_SUBFR_LENGTH_MS) * 4; frame_length_8kHz = (SilkConstants.PE_LTP_MEM_LENGTH_MS + nb_subfr * SilkConstants.PE_SUBFR_LENGTH_MS) * 8; sf_length = SilkConstants.PE_SUBFR_LENGTH_MS * Fs_kHz; min_lag = SilkConstants.PE_MIN_LAG_MS * Fs_kHz; max_lag = SilkConstants.PE_MAX_LAG_MS * Fs_kHz - 1; /* Resample from input sampled at Fs_kHz to 8 kHz */ frame_8kHz = new short[frame_length_8kHz]; if (Fs_kHz == 16) { Arrays.MemSetInt(filt_state, 0, 2); Resampler.silk_resampler_down2(filt_state, frame_8kHz, frame, frame_length); } else if (Fs_kHz == 12) { Arrays.MemSetInt(filt_state, 0, 6); Resampler.silk_resampler_down2_3(filt_state, frame_8kHz, frame, frame_length); } else { Inlines.OpusAssert(Fs_kHz == 8); Array.Copy(frame, frame_8kHz, frame_length_8kHz); } /* Decimate again to 4 kHz */ Arrays.MemSetInt(filt_state, 0, 2); /* Set state to zero */ frame_4kHz = new short[frame_length_4kHz]; Resampler.silk_resampler_down2(filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz); /* Low-pass filter */ for (i = frame_length_4kHz - 1; i > 0; i--) { frame_4kHz[i] = Inlines.silk_ADD_SAT16(frame_4kHz[i], frame_4kHz[i - 1]); } /******************************************************************************* ** Scale 4 kHz signal down to prevent correlations measures from overflowing ** find scaling as max scaling for each 8kHz(?) subframe *******************************************************************************/ /* Inner product is calculated with different lengths, so scale for the worst case */ SumSqrShift.silk_sum_sqr_shift(out energy, out shift, frame_4kHz, frame_length_4kHz); if (shift > 0) { shift = Inlines.silk_RSHIFT(shift, 1); for (i = 0; i < frame_length_4kHz; i++) { frame_4kHz[i] = Inlines.silk_RSHIFT16(frame_4kHz[i], shift); } } /****************************************************************************** * FIRST STAGE, operating in 4 khz ******************************************************************************/ C = new short[nb_subfr * CSTRIDE_8KHZ]; xcorr32 = new int[MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1]; Arrays.MemSetShort(C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ); target = frame_4kHz; target_ptr = Inlines.silk_LSHIFT(SF_LENGTH_4KHZ, 2); for (k = 0; k < nb_subfr >> 1; k++) { basis = target; basis_ptr = target_ptr - MIN_LAG_4KHZ; CeltPitchXCorr.pitch_xcorr(target, target_ptr, target, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1); /* Calculate first vector products before loop */ cross_corr = xcorr32[MAX_LAG_4KHZ - MIN_LAG_4KHZ]; normalizer = Inlines.silk_inner_prod_self(target, target_ptr, SF_LENGTH_8KHZ); normalizer = Inlines.silk_ADD32(normalizer, Inlines.silk_inner_prod_self(basis, basis_ptr, SF_LENGTH_8KHZ)); normalizer = Inlines.silk_ADD32(normalizer, Inlines.silk_SMULBB(SF_LENGTH_8KHZ, 4000)); Inlines.MatrixSet(C, k, 0, CSTRIDE_4KHZ, (short)Inlines.silk_DIV32_varQ(cross_corr, normalizer, 13 + 1)); /* Q13 */ /* From now on normalizer is computed recursively */ for (d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++) { basis_ptr--; cross_corr = xcorr32[MAX_LAG_4KHZ - d]; /* Add contribution of new sample and remove contribution from oldest sample */ normalizer = Inlines.silk_ADD32(normalizer, Inlines.silk_SMULBB(basis[basis_ptr], basis[basis_ptr]) - Inlines.silk_SMULBB(basis[basis_ptr + SF_LENGTH_8KHZ], basis[basis_ptr + SF_LENGTH_8KHZ])); Inlines.MatrixSet(C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ, (short)Inlines.silk_DIV32_varQ(cross_corr, normalizer, 13 + 1)); /* Q13 */ } /* Update target pointer */ target_ptr += SF_LENGTH_8KHZ; } /* Combine two subframes into single correlation measure and apply short-lag bias */ if (nb_subfr == SilkConstants.PE_MAX_NB_SUBFR) { for (i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i--) { sum = (int)Inlines.MatrixGet(C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ) + (int)Inlines.MatrixGet(C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ); /* Q14 */ sum = Inlines.silk_SMLAWB(sum, sum, Inlines.silk_LSHIFT(-i, 4)); /* Q14 */ C[i - MIN_LAG_4KHZ] = (short)sum; /* Q14 */ } } else { /* Only short-lag bias */ for (i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i--) { sum = Inlines.silk_LSHIFT((int)C[i - MIN_LAG_4KHZ], 1); /* Q14 */ sum = Inlines.silk_SMLAWB(sum, sum, Inlines.silk_LSHIFT(-i, 4)); /* Q14 */ C[i - MIN_LAG_4KHZ] = (short)sum; /* Q14 */ } } /* Sort */ length_d_srch = Inlines.silk_ADD_LSHIFT32(4, complexity, 1); Inlines.OpusAssert(3 * length_d_srch <= SilkConstants.PE_D_SRCH_LENGTH); Sort.silk_insertion_sort_decreasing_int16(C, d_srch, CSTRIDE_4KHZ, length_d_srch); /* Escape if correlation is very low already here */ Cmax = (int)C[0]; /* Q14 */ if (Cmax < ((int)((0.2f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.2f, 14)*/) { Arrays.MemSetInt(pitch_out, 0, nb_subfr); LTPCorr_Q15.Val = 0; lagIndex.Val = 0; contourIndex.Val = 0; return(1); } threshold = Inlines.silk_SMULWB(search_thres1_Q16, Cmax); for (i = 0; i < length_d_srch; i++) { /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */ if (C[i] > threshold) { d_srch[i] = Inlines.silk_LSHIFT(d_srch[i] + MIN_LAG_4KHZ, 1); } else { length_d_srch = i; break; } } Inlines.OpusAssert(length_d_srch > 0); d_comp = new short[D_COMP_STRIDE]; for (i = D_COMP_MIN; i < D_COMP_MAX; i++) { d_comp[i - D_COMP_MIN] = 0; } for (i = 0; i < length_d_srch; i++) { d_comp[d_srch[i] - D_COMP_MIN] = 1; } /* Convolution */ for (i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i--) { d_comp[i - D_COMP_MIN] += (short)(d_comp[i - 1 - D_COMP_MIN] + d_comp[i - 2 - D_COMP_MIN]); } length_d_srch = 0; for (i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++) { if (d_comp[i + 1 - D_COMP_MIN] > 0) { d_srch[length_d_srch] = i; length_d_srch++; } } /* Convolution */ for (i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i--) { d_comp[i - D_COMP_MIN] += (short)(d_comp[i - 1 - D_COMP_MIN] + d_comp[i - 2 - D_COMP_MIN] + d_comp[i - 3 - D_COMP_MIN]); } length_d_comp = 0; for (i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++) { if (d_comp[i - D_COMP_MIN] > 0) { d_comp[length_d_comp] = (short)(i - 2); length_d_comp++; } } /********************************************************************************** ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation *************************************************************************************/ /****************************************************************************** ** Scale signal down to avoid correlations measures from overflowing *******************************************************************************/ /* find scaling as max scaling for each subframe */ SumSqrShift.silk_sum_sqr_shift(out energy, out shift, frame_8kHz, frame_length_8kHz); if (shift > 0) { shift = Inlines.silk_RSHIFT(shift, 1); for (i = 0; i < frame_length_8kHz; i++) { frame_8kHz[i] = Inlines.silk_RSHIFT16(frame_8kHz[i], shift); } } /********************************************************************************* * Find energy of each subframe projected onto its history, for a range of delays *********************************************************************************/ Arrays.MemSetShort(C, 0, nb_subfr * CSTRIDE_8KHZ); target = frame_8kHz; target_ptr = SilkConstants.PE_LTP_MEM_LENGTH_MS * 8; for (k = 0; k < nb_subfr; k++) { energy_target = Inlines.silk_ADD32(Inlines.silk_inner_prod(target, target_ptr, target, target_ptr, SF_LENGTH_8KHZ), 1); for (j = 0; j < length_d_comp; j++) { d = d_comp[j]; basis = target; basis_ptr = target_ptr - d; cross_corr = Inlines.silk_inner_prod(target, target_ptr, basis, basis_ptr, SF_LENGTH_8KHZ); if (cross_corr > 0) { energy_basis = Inlines.silk_inner_prod_self(basis, basis_ptr, SF_LENGTH_8KHZ); Inlines.MatrixSet(C, k, d - (MIN_LAG_8KHZ - 2), CSTRIDE_8KHZ, (short)Inlines.silk_DIV32_varQ(cross_corr, Inlines.silk_ADD32(energy_target, energy_basis), 13 + 1)); /* Q13 */ } else { Inlines.MatrixSet <short>(C, k, d - (MIN_LAG_8KHZ - 2), CSTRIDE_8KHZ, 0); } } target_ptr += SF_LENGTH_8KHZ; } /* search over lag range and lags codebook */ /* scale factor for lag codebook, as a function of center lag */ CCmax = int.MinValue; CCmax_b = int.MinValue; CBimax = 0; /* To avoid returning undefined lag values */ lag = -1; /* To check if lag with strong enough correlation has been found */ if (prevLag > 0) { if (Fs_kHz == 12) { prevLag = Inlines.silk_DIV32_16(Inlines.silk_LSHIFT(prevLag, 1), 3); } else if (Fs_kHz == 16) { prevLag = Inlines.silk_RSHIFT(prevLag, 1); } prevLag_log2_Q7 = Inlines.silk_lin2log((int)prevLag); } else { prevLag_log2_Q7 = 0; } Inlines.OpusAssert(search_thres2_Q13 == Inlines.silk_SAT16(search_thres2_Q13)); /* Set up stage 2 codebook based on number of subframes */ if (nb_subfr == SilkConstants.PE_MAX_NB_SUBFR) { Lag_CB_ptr = Tables.silk_CB_lags_stage2; if (Fs_kHz == 8 && complexity > SilkConstants.SILK_PE_MIN_COMPLEX) { /* If input is 8 khz use a larger codebook here because it is last stage */ nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE2_EXT; } else { nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE2; } } else { Lag_CB_ptr = Tables.silk_CB_lags_stage2_10_ms; nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE2_10MS; } for (k = 0; k < length_d_srch; k++) { d = d_srch[k]; for (j = 0; j < nb_cbk_search; j++) { CC[j] = 0; for (i = 0; i < nb_subfr; i++) { int d_subfr; /* Try all codebooks */ d_subfr = d + Lag_CB_ptr[i][j]; CC[j] = CC[j] + (int)Inlines.MatrixGet(C, i, d_subfr - (MIN_LAG_8KHZ - 2), CSTRIDE_8KHZ); } } /* Find best codebook */ CCmax_new = int.MinValue; CBimax_new = 0; for (i = 0; i < nb_cbk_search; i++) { if (CC[i] > CCmax_new) { CCmax_new = CC[i]; CBimax_new = i; } } /* Bias towards shorter lags */ lag_log2_Q7 = Inlines.silk_lin2log(d); /* Q7 */ Inlines.OpusAssert(lag_log2_Q7 == Inlines.silk_SAT16(lag_log2_Q7)); Inlines.OpusAssert(nb_subfr * ((int)((SilkConstants.PE_SHORTLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_SHORTLAG_BIAS, 13)*/ == Inlines.silk_SAT16(nb_subfr * ((int)((SilkConstants.PE_SHORTLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_SHORTLAG_BIAS, 13)*/)); CCmax_new_b = CCmax_new - Inlines.silk_RSHIFT(Inlines.silk_SMULBB(nb_subfr * ((int)((SilkConstants.PE_SHORTLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_SHORTLAG_BIAS, 13)*/, lag_log2_Q7), 7); /* Q13 */ /* Bias towards previous lag */ Inlines.OpusAssert(nb_subfr * ((int)((SilkConstants.PE_PREVLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_PREVLAG_BIAS, 13)*/ == Inlines.silk_SAT16(nb_subfr * ((int)((SilkConstants.PE_PREVLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_PREVLAG_BIAS, 13)*/)); if (prevLag > 0) { delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7; Inlines.OpusAssert(delta_lag_log2_sqr_Q7 == Inlines.silk_SAT16(delta_lag_log2_sqr_Q7)); delta_lag_log2_sqr_Q7 = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7), 7); prev_lag_bias_Q13 = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(nb_subfr * ((int)((SilkConstants.PE_PREVLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_PREVLAG_BIAS, 13)*/, LTPCorr_Q15.Val), 15); /* Q13 */ prev_lag_bias_Q13 = Inlines.silk_DIV32(Inlines.silk_MUL(prev_lag_bias_Q13, delta_lag_log2_sqr_Q7), delta_lag_log2_sqr_Q7 + ((int)((0.5f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(0.5f, 7)*/); CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */ } if (CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ CCmax_new > Inlines.silk_SMULBB(nb_subfr, search_thres2_Q13) && /* Correlation needs to be high enough to be voiced */ Tables.silk_CB_lags_stage2[0][CBimax_new] <= MIN_LAG_8KHZ /* Lag must be in range */ ) { CCmax_b = CCmax_new_b; CCmax = CCmax_new; lag = d; CBimax = CBimax_new; } } if (lag == -1) { /* No suitable candidate found */ Arrays.MemSetInt(pitch_out, 0, nb_subfr); LTPCorr_Q15.Val = 0; lagIndex.Val = 0; contourIndex.Val = 0; return(1); } /* Output normalized correlation */ LTPCorr_Q15.Val = (int)Inlines.silk_LSHIFT(Inlines.silk_DIV32_16(CCmax, nb_subfr), 2); Inlines.OpusAssert(LTPCorr_Q15.Val >= 0); if (Fs_kHz > 8) { short[] scratch_mem; /***************************************************************************/ /* Scale input signal down to avoid correlations measures from overflowing */ /***************************************************************************/ /* find scaling as max scaling for each subframe */ SumSqrShift.silk_sum_sqr_shift(out energy, out shift, frame, frame_length); if (shift > 0) { scratch_mem = new short[frame_length]; /* Move signal to scratch mem because the input signal should be unchanged */ shift = Inlines.silk_RSHIFT(shift, 1); for (i = 0; i < frame_length; i++) { scratch_mem[i] = Inlines.silk_RSHIFT16(frame[i], shift); } input_frame_ptr = scratch_mem; } else { input_frame_ptr = frame; } /* Search in original signal */ CBimax_old = CBimax; /* Compensate for decimation */ Inlines.OpusAssert(lag == Inlines.silk_SAT16(lag)); if (Fs_kHz == 12) { lag = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(lag, 3), 1); } else if (Fs_kHz == 16) { lag = Inlines.silk_LSHIFT(lag, 1); } else { lag = Inlines.silk_SMULBB(lag, 3); } lag = Inlines.silk_LIMIT_int(lag, min_lag, max_lag); start_lag = Inlines.silk_max_int(lag - 2, min_lag); end_lag = Inlines.silk_min_int(lag + 2, max_lag); lag_new = lag; /* to avoid undefined lag */ CBimax = 0; /* to avoid undefined lag */ CCmax = int.MinValue; /* pitch lags according to second stage */ for (k = 0; k < nb_subfr; k++) { pitch_out[k] = lag + 2 * Tables.silk_CB_lags_stage2[k][CBimax_old]; } /* Set up codebook parameters according to complexity setting and frame length */ if (nb_subfr == SilkConstants.PE_MAX_NB_SUBFR) { nb_cbk_search = (int)Tables.silk_nb_cbk_searchs_stage3[complexity]; Lag_CB_ptr = Tables.silk_CB_lags_stage3; } else { nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE3_10MS; Lag_CB_ptr = Tables.silk_CB_lags_stage3_10_ms; } /* Calculate the correlations and energies needed in stage 3 */ energies_st3 = new silk_pe_stage3_vals[nb_subfr * nb_cbk_search]; cross_corr_st3 = new silk_pe_stage3_vals[nb_subfr * nb_cbk_search]; for (int c = 0; c < nb_subfr * nb_cbk_search; c++) { energies_st3[c] = new silk_pe_stage3_vals(); // fixme: these can be replaced with a linearized array probably, or at least a struct cross_corr_st3[c] = new silk_pe_stage3_vals(); } silk_P_Ana_calc_corr_st3(cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity); silk_P_Ana_calc_energy_st3(energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity); lag_counter = 0; Inlines.OpusAssert(lag == Inlines.silk_SAT16(lag)); contour_bias_Q15 = Inlines.silk_DIV32_16(((int)((SilkConstants.PE_FLATCONTOUR_BIAS) * ((long)1 << (15)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_FLATCONTOUR_BIAS, 15)*/, lag); target = input_frame_ptr; target_ptr = SilkConstants.PE_LTP_MEM_LENGTH_MS * Fs_kHz; energy_target = Inlines.silk_ADD32(Inlines.silk_inner_prod_self(target, target_ptr, nb_subfr * sf_length), 1); for (d = start_lag; d <= end_lag; d++) { for (j = 0; j < nb_cbk_search; j++) { cross_corr = 0; energy = energy_target; for (k = 0; k < nb_subfr; k++) { cross_corr = Inlines.silk_ADD32(cross_corr, Inlines.MatrixGet(cross_corr_st3, k, j, nb_cbk_search).Values[lag_counter]); energy = Inlines.silk_ADD32(energy, Inlines.MatrixGet(energies_st3, k, j, nb_cbk_search).Values[lag_counter]); Inlines.OpusAssert(energy >= 0); } if (cross_corr > 0) { CCmax_new = Inlines.silk_DIV32_varQ(cross_corr, energy, 13 + 1); /* Q13 */ /* Reduce depending on flatness of contour */ diff = short.MaxValue - Inlines.silk_MUL(contour_bias_Q15, j); /* Q15 */ Inlines.OpusAssert(diff == Inlines.silk_SAT16(diff)); CCmax_new = Inlines.silk_SMULWB(CCmax_new, diff); /* Q14 */ } else { CCmax_new = 0; } if (CCmax_new > CCmax && (d + Tables.silk_CB_lags_stage3[0][j]) <= max_lag) { CCmax = CCmax_new; lag_new = d; CBimax = j; } } lag_counter++; } for (k = 0; k < nb_subfr; k++) { pitch_out[k] = lag_new + Lag_CB_ptr[k][CBimax]; pitch_out[k] = Inlines.silk_LIMIT(pitch_out[k], min_lag, SilkConstants.PE_MAX_LAG_MS * Fs_kHz); } lagIndex.Val = (short)(lag_new - min_lag); contourIndex.Val = (sbyte)CBimax; } else /* Fs_kHz == 8 */ /* Save Lags */ { for (k = 0; k < nb_subfr; k++) { pitch_out[k] = lag + Lag_CB_ptr[k][CBimax]; pitch_out[k] = Inlines.silk_LIMIT(pitch_out[k], MIN_LAG_8KHZ, SilkConstants.PE_MAX_LAG_MS * 8); } lagIndex.Val = (short)(lag - MIN_LAG_8KHZ); contourIndex.Val = (sbyte)CBimax; } Inlines.OpusAssert(lagIndex.Val >= 0); /* return as voiced */ return(0); }
internal static void silk_find_pred_coefs( SilkChannelEncoder psEnc, /* I/O encoder state */ SilkEncoderControl psEncCtrl, /* I/O encoder control */ short[] res_pitch, /* I Residual from pitch analysis */ short[] x, /* I Speech signal */ int x_ptr, int condCoding /* I The type of conditional coding to use */ ) { int i; int[] invGains_Q16 = new int[SilkConstants.MAX_NB_SUBFR]; int[] local_gains = new int[SilkConstants.MAX_NB_SUBFR]; int[] Wght_Q15 = new int[SilkConstants.MAX_NB_SUBFR]; short[] NLSF_Q15 = new short[SilkConstants.MAX_LPC_ORDER]; int x_ptr2; int x_pre_ptr; short[] LPC_in_pre; int tmp, min_gain_Q16, minInvGain_Q30; int[] LTP_corrs_rshift = new int[SilkConstants.MAX_NB_SUBFR]; /* weighting for weighted least squares */ min_gain_Q16 = int.MaxValue >> 6; for (i = 0; i < psEnc.nb_subfr; i++) { min_gain_Q16 = Inlines.silk_min(min_gain_Q16, psEncCtrl.Gains_Q16[i]); } for (i = 0; i < psEnc.nb_subfr; i++) { /* Divide to Q16 */ Inlines.OpusAssert(psEncCtrl.Gains_Q16[i] > 0); /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */ invGains_Q16[i] = Inlines.silk_DIV32_varQ(min_gain_Q16, psEncCtrl.Gains_Q16[i], 16 - 2); /* Ensure Wght_Q15 a minimum value 1 */ invGains_Q16[i] = Inlines.silk_max(invGains_Q16[i], 363); /* Square the inverted gains */ Inlines.OpusAssert(invGains_Q16[i] == Inlines.silk_SAT16(invGains_Q16[i])); tmp = Inlines.silk_SMULWB(invGains_Q16[i], invGains_Q16[i]); Wght_Q15[i] = Inlines.silk_RSHIFT(tmp, 1); /* Invert the inverted and normalized gains */ local_gains[i] = Inlines.silk_DIV32(((int)1 << 16), invGains_Q16[i]); } LPC_in_pre = new short[psEnc.nb_subfr * psEnc.predictLPCOrder + psEnc.frame_length]; if (psEnc.indices.signalType == SilkConstants.TYPE_VOICED) { int[] WLTP; /**********/ /* VOICED */ /**********/ Inlines.OpusAssert(psEnc.ltp_mem_length - psEnc.predictLPCOrder >= psEncCtrl.pitchL[0] + SilkConstants.LTP_ORDER / 2); WLTP = new int[psEnc.nb_subfr * SilkConstants.LTP_ORDER * SilkConstants.LTP_ORDER]; /* LTP analysis */ BoxedValueInt boxed_codgain = new BoxedValueInt(psEncCtrl.LTPredCodGain_Q7); FindLTP.silk_find_LTP(psEncCtrl.LTPCoef_Q14, WLTP, boxed_codgain, res_pitch, psEncCtrl.pitchL, Wght_Q15, psEnc.subfr_length, psEnc.nb_subfr, psEnc.ltp_mem_length, LTP_corrs_rshift); psEncCtrl.LTPredCodGain_Q7 = boxed_codgain.Val; /* Quantize LTP gain parameters */ BoxedValueSbyte boxed_periodicity = new BoxedValueSbyte(psEnc.indices.PERIndex); BoxedValueInt boxed_gain = new BoxedValueInt(psEnc.sum_log_gain_Q7); QuantizeLTPGains.silk_quant_LTP_gains(psEncCtrl.LTPCoef_Q14, psEnc.indices.LTPIndex, boxed_periodicity, boxed_gain, WLTP, psEnc.mu_LTP_Q9, psEnc.LTPQuantLowComplexity, psEnc.nb_subfr ); psEnc.indices.PERIndex = boxed_periodicity.Val; psEnc.sum_log_gain_Q7 = boxed_gain.Val; /* Control LTP scaling */ LTPScaleControl.silk_LTP_scale_ctrl(psEnc, psEncCtrl, condCoding); /* Create LTP residual */ LTPAnalysisFilter.silk_LTP_analysis_filter(LPC_in_pre, x, x_ptr - psEnc.predictLPCOrder, psEncCtrl.LTPCoef_Q14, psEncCtrl.pitchL, invGains_Q16, psEnc.subfr_length, psEnc.nb_subfr, psEnc.predictLPCOrder); } else { /************/ /* UNVOICED */ /************/ /* Create signal with prepended subframes, scaled by inverse gains */ x_ptr2 = x_ptr - psEnc.predictLPCOrder; x_pre_ptr = 0; for (i = 0; i < psEnc.nb_subfr; i++) { Inlines.silk_scale_copy_vector16(LPC_in_pre, x_pre_ptr, x, x_ptr2, invGains_Q16[i], psEnc.subfr_length + psEnc.predictLPCOrder); x_pre_ptr += psEnc.subfr_length + psEnc.predictLPCOrder; x_ptr2 += psEnc.subfr_length; } Arrays.MemSetShort(psEncCtrl.LTPCoef_Q14, 0, psEnc.nb_subfr * SilkConstants.LTP_ORDER); psEncCtrl.LTPredCodGain_Q7 = 0; psEnc.sum_log_gain_Q7 = 0; } /* Limit on total predictive coding gain */ if (psEnc.first_frame_after_reset != 0) { minInvGain_Q30 = ((int)((1.0f / SilkConstants.MAX_PREDICTION_POWER_GAIN_AFTER_RESET) * ((long)1 << (30)) + 0.5)) /*Inlines.SILK_CONST(1.0f / SilkConstants.MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30)*/; } else { minInvGain_Q30 = Inlines.silk_log2lin(Inlines.silk_SMLAWB(16 << 7, (int)psEncCtrl.LTPredCodGain_Q7, ((int)((1.0f / 3f) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1.0f / 3f, 16)*/)); /* Q16 */ minInvGain_Q30 = Inlines.silk_DIV32_varQ(minInvGain_Q30, Inlines.silk_SMULWW(((int)((SilkConstants.MAX_PREDICTION_POWER_GAIN) * ((long)1 << (0)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.MAX_PREDICTION_POWER_GAIN, 0)*/, Inlines.silk_SMLAWB(((int)((0.25f) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(0.25f, 18)*/, ((int)((0.75f) * ((long)1 << (18)) + 0.5)) /*Inlines.SILK_CONST(0.75f, 18)*/, psEncCtrl.coding_quality_Q14)), 14); } /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */ FindLPC.silk_find_LPC(psEnc, NLSF_Q15, LPC_in_pre, minInvGain_Q30); /* Quantize LSFs */ NLSF.silk_process_NLSFs(psEnc, psEncCtrl.PredCoef_Q12, NLSF_Q15, psEnc.prev_NLSFq_Q15); /* Calculate residual energy using quantized LPC coefficients */ ResidualEnergy.silk_residual_energy(psEncCtrl.ResNrg, psEncCtrl.ResNrgQ, LPC_in_pre, psEncCtrl.PredCoef_Q12, local_gains, psEnc.subfr_length, psEnc.nb_subfr, psEnc.predictLPCOrder); /* Copy to prediction struct for use in next frame for interpolation */ Array.Copy(NLSF_Q15, psEnc.prev_NLSFq_Q15, SilkConstants.MAX_LPC_ORDER); }
internal static void silk_quant_LTP_gains( short[] B_Q14, /* I/O (un)quantized LTP gains [MAX_NB_SUBFR * LTP_ORDER] */ sbyte[] cbk_index, /* O Codebook Index [MAX_NB_SUBFR] */ BoxedValueSbyte periodicity_index, /* O Periodicity Index */ BoxedValueInt sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ int[] W_Q18, /* I Error Weights in Q18 [MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER] */ int mu_Q9, /* I Mu value (R/D tradeoff) */ int lowComplexity, /* I Flag for low complexity */ int nb_subfr /* I number of subframes */ ) { int j, k, cbk_size; sbyte[] temp_idx = new sbyte[SilkConstants.MAX_NB_SUBFR]; byte[] cl_ptr_Q5; sbyte[][] cbk_ptr_Q7; byte[] cbk_gain_ptr_Q7; int b_Q14_ptr; int W_Q18_ptr; int rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14; int sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7; /***************************************************/ /* iterate over different codebooks with different */ /* rates/distortions, and choose best */ /***************************************************/ min_rate_dist_Q14 = int.MaxValue; best_sum_log_gain_Q7 = 0; for (k = 0; k < 3; k++) { /* Safety margin for pitch gain control, to take into account factors * such as state rescaling/rewhitening. */ int gain_safety = ((int)((0.4f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(0.4f, 7)*/; cl_ptr_Q5 = Tables.silk_LTP_gain_BITS_Q5_ptrs[k]; cbk_ptr_Q7 = Tables.silk_LTP_vq_ptrs_Q7[k]; cbk_gain_ptr_Q7 = Tables.silk_LTP_vq_gain_ptrs_Q7[k]; cbk_size = Tables.silk_LTP_vq_sizes[k]; /* Set up pointer to first subframe */ W_Q18_ptr = 0; b_Q14_ptr = 0; rate_dist_Q14 = 0; sum_log_gain_tmp_Q7 = sum_log_gain_Q7.Val; for (j = 0; j < nb_subfr; j++) { max_gain_Q7 = Inlines.silk_log2lin((((int)((TuningParameters.MAX_SUM_LOG_GAIN_DB / 6.0f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.MAX_SUM_LOG_GAIN_DB / 6.0f, 7)*/ - sum_log_gain_tmp_Q7) + ((int)((7) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(7, 7)*/) - gain_safety; BoxedValueSbyte temp_idx_box = new BoxedValueSbyte(temp_idx[j]); BoxedValueInt rate_dist_Q14_subfr_box = new BoxedValueInt(); BoxedValueInt gain_Q7_box = new BoxedValueInt(); VQ_WMat_EC.silk_VQ_WMat_EC( temp_idx_box, /* O index of best codebook vector */ rate_dist_Q14_subfr_box, /* O best weighted quantization error + mu * rate */ gain_Q7_box, /* O sum of absolute LTP coefficients */ B_Q14, b_Q14_ptr, /* I input vector to be quantized */ W_Q18, W_Q18_ptr, /* I weighting matrix */ cbk_ptr_Q7, /* I codebook */ cbk_gain_ptr_Q7, /* I codebook effective gains */ cl_ptr_Q5, /* I code length for each codebook vector */ mu_Q9, /* I tradeoff between weighted error and rate */ max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ cbk_size /* I number of vectors in codebook */ ); rate_dist_Q14_subfr = rate_dist_Q14_subfr_box.Val; gain_Q7 = gain_Q7_box.Val; temp_idx[j] = temp_idx_box.Val; rate_dist_Q14 = Inlines.silk_ADD_POS_SAT32(rate_dist_Q14, rate_dist_Q14_subfr); sum_log_gain_tmp_Q7 = Inlines.silk_max(0, sum_log_gain_tmp_Q7 + Inlines.silk_lin2log(gain_safety + gain_Q7) - ((int)((7) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(7, 7)*/); b_Q14_ptr += SilkConstants.LTP_ORDER; W_Q18_ptr += SilkConstants.LTP_ORDER * SilkConstants.LTP_ORDER; } /* Avoid never finding a codebook */ rate_dist_Q14 = Inlines.silk_min(int.MaxValue - 1, rate_dist_Q14); if (rate_dist_Q14 < min_rate_dist_Q14) { min_rate_dist_Q14 = rate_dist_Q14; periodicity_index.Val = (sbyte)k; Array.Copy(temp_idx, 0, cbk_index, 0, nb_subfr); best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7; } /* Break early in low-complexity mode if rate distortion is below threshold */ if (lowComplexity != 0 && (rate_dist_Q14 < Tables.silk_LTP_gain_middle_avg_RD_Q14)) { break; } } cbk_ptr_Q7 = Tables.silk_LTP_vq_ptrs_Q7[periodicity_index.Val]; for (j = 0; j < nb_subfr; j++) { for (k = 0; k < SilkConstants.LTP_ORDER; k++) { B_Q14[j * SilkConstants.LTP_ORDER + k] = (short)(Inlines.silk_LSHIFT(cbk_ptr_Q7[cbk_index[j]][k], 7)); } } sum_log_gain_Q7.Val = best_sum_log_gain_Q7; }
/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ internal static void silk_VQ_WMat_EC( BoxedValueSbyte ind, /* O index of best codebook vector */ BoxedValueInt rate_dist_Q14, /* O best weighted quant error + mu * rate */ BoxedValueInt gain_Q7, /* O sum of absolute LTP coefficients */ short[] in_Q14, /* I input vector to be quantized */ int in_Q14_ptr, int[] W_Q18, /* I weighting matrix */ int W_Q18_ptr, sbyte[][] cb_Q7, /* I codebook */ byte[] cb_gain_Q7, /* I codebook effective gain */ byte[] cl_Q5, /* I code length for each codebook vector */ int mu_Q9, /* I tradeoff betw. weighted error and rate */ int max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ int L /* I number of vectors in codebook */ ) { int k, gain_tmp_Q7; sbyte[] cb_row_Q7; int cb_row_Q7_ptr = 0; short[] diff_Q14 = new short[5]; int sum1_Q14, sum2_Q16; /* Loop over codebook */ rate_dist_Q14.Val = int.MaxValue; for (k = 0; k < L; k++) { /* Go to next cbk vector */ cb_row_Q7 = cb_Q7[cb_row_Q7_ptr++]; gain_tmp_Q7 = cb_gain_Q7[k]; diff_Q14[0] = (short)(in_Q14[in_Q14_ptr] - Inlines.silk_LSHIFT(cb_row_Q7[0], 7)); diff_Q14[1] = (short)(in_Q14[in_Q14_ptr + 1] - Inlines.silk_LSHIFT(cb_row_Q7[1], 7)); diff_Q14[2] = (short)(in_Q14[in_Q14_ptr + 2] - Inlines.silk_LSHIFT(cb_row_Q7[2], 7)); diff_Q14[3] = (short)(in_Q14[in_Q14_ptr + 3] - Inlines.silk_LSHIFT(cb_row_Q7[3], 7)); diff_Q14[4] = (short)(in_Q14[in_Q14_ptr + 4] - Inlines.silk_LSHIFT(cb_row_Q7[4], 7)); /* Weighted rate */ sum1_Q14 = Inlines.silk_SMULBB(mu_Q9, cl_Q5[k]); /* Penalty for too large gain */ sum1_Q14 = Inlines.silk_ADD_LSHIFT32(sum1_Q14, Inlines.silk_max(Inlines.silk_SUB32(gain_tmp_Q7, max_gain_Q7), 0), 10); Inlines.OpusAssert(sum1_Q14 >= 0); /* first row of W_Q18 */ sum2_Q16 = Inlines.silk_SMULWB(W_Q18[W_Q18_ptr + 1], diff_Q14[1]); sum2_Q16 = Inlines.silk_SMLAWB(sum2_Q16, W_Q18[W_Q18_ptr + 2], diff_Q14[2]); sum2_Q16 = Inlines.silk_SMLAWB(sum2_Q16, W_Q18[W_Q18_ptr + 3], diff_Q14[3]); sum2_Q16 = Inlines.silk_SMLAWB(sum2_Q16, W_Q18[W_Q18_ptr + 4], diff_Q14[4]); sum2_Q16 = Inlines.silk_LSHIFT(sum2_Q16, 1); sum2_Q16 = Inlines.silk_SMLAWB(sum2_Q16, W_Q18[W_Q18_ptr], diff_Q14[0]); sum1_Q14 = Inlines.silk_SMLAWB(sum1_Q14, sum2_Q16, diff_Q14[0]); /* second row of W_Q18 */ sum2_Q16 = Inlines.silk_SMULWB(W_Q18[W_Q18_ptr + 7], diff_Q14[2]); sum2_Q16 = Inlines.silk_SMLAWB(sum2_Q16, W_Q18[W_Q18_ptr + 8], diff_Q14[3]); sum2_Q16 = Inlines.silk_SMLAWB(sum2_Q16, W_Q18[W_Q18_ptr + 9], diff_Q14[4]); sum2_Q16 = Inlines.silk_LSHIFT(sum2_Q16, 1); sum2_Q16 = Inlines.silk_SMLAWB(sum2_Q16, W_Q18[W_Q18_ptr + 6], diff_Q14[1]); sum1_Q14 = Inlines.silk_SMLAWB(sum1_Q14, sum2_Q16, diff_Q14[1]); /* third row of W_Q18 */ sum2_Q16 = Inlines.silk_SMULWB(W_Q18[W_Q18_ptr + 13], diff_Q14[3]); sum2_Q16 = Inlines.silk_SMLAWB(sum2_Q16, W_Q18[W_Q18_ptr + 14], diff_Q14[4]); sum2_Q16 = Inlines.silk_LSHIFT(sum2_Q16, 1); sum2_Q16 = Inlines.silk_SMLAWB(sum2_Q16, W_Q18[W_Q18_ptr + 12], diff_Q14[2]); sum1_Q14 = Inlines.silk_SMLAWB(sum1_Q14, sum2_Q16, diff_Q14[2]); /* fourth row of W_Q18 */ sum2_Q16 = Inlines.silk_SMULWB(W_Q18[W_Q18_ptr + 19], diff_Q14[4]); sum2_Q16 = Inlines.silk_LSHIFT(sum2_Q16, 1); sum2_Q16 = Inlines.silk_SMLAWB(sum2_Q16, W_Q18[W_Q18_ptr + 18], diff_Q14[3]); sum1_Q14 = Inlines.silk_SMLAWB(sum1_Q14, sum2_Q16, diff_Q14[3]); /* last row of W_Q18 */ sum2_Q16 = Inlines.silk_SMULWB(W_Q18[W_Q18_ptr + 24], diff_Q14[4]); sum1_Q14 = Inlines.silk_SMLAWB(sum1_Q14, sum2_Q16, diff_Q14[4]); Inlines.OpusAssert(sum1_Q14 >= 0); /* find best */ if (sum1_Q14 < rate_dist_Q14.Val) { rate_dist_Q14.Val = sum1_Q14; ind.Val = (sbyte)k; gain_Q7.Val = gain_tmp_Q7; } } }
/* Find pitch lags */ internal static void silk_find_pitch_lags( SilkChannelEncoder psEnc, /* I/O encoder state */ SilkEncoderControl psEncCtrl, /* I/O encoder control */ short[] res, /* O residual */ short[] x, /* I Speech signal */ int x_ptr ) { int buf_len, i, scale; int thrhld_Q13, res_nrg; int x_buf, x_buf_ptr; short[] Wsig; int Wsig_ptr; int[] auto_corr = new int[SilkConstants.MAX_FIND_PITCH_LPC_ORDER + 1]; short[] rc_Q15 = new short[SilkConstants.MAX_FIND_PITCH_LPC_ORDER]; int[] A_Q24 = new int[SilkConstants.MAX_FIND_PITCH_LPC_ORDER]; short[] A_Q12 = new short[SilkConstants.MAX_FIND_PITCH_LPC_ORDER]; /******************************************/ /* Set up buffer lengths etc based on Fs */ /******************************************/ buf_len = psEnc.la_pitch + psEnc.frame_length + psEnc.ltp_mem_length; /* Safety check */ Inlines.OpusAssert(buf_len >= psEnc.pitch_LPC_win_length); x_buf = x_ptr - psEnc.ltp_mem_length; /*************************************/ /* Estimate LPC AR coefficients */ /*************************************/ /* Calculate windowed signal */ Wsig = new short[psEnc.pitch_LPC_win_length]; /* First LA_LTP samples */ x_buf_ptr = x_buf + buf_len - psEnc.pitch_LPC_win_length; Wsig_ptr = 0; ApplySineWindow.silk_apply_sine_window(Wsig, Wsig_ptr, x, x_buf_ptr, 1, psEnc.la_pitch); /* Middle un - windowed samples */ Wsig_ptr += psEnc.la_pitch; x_buf_ptr += psEnc.la_pitch; Array.Copy(x, x_buf_ptr, Wsig, Wsig_ptr, (psEnc.pitch_LPC_win_length - Inlines.silk_LSHIFT(psEnc.la_pitch, 1))); /* Last LA_LTP samples */ Wsig_ptr += psEnc.pitch_LPC_win_length - Inlines.silk_LSHIFT(psEnc.la_pitch, 1); x_buf_ptr += psEnc.pitch_LPC_win_length - Inlines.silk_LSHIFT(psEnc.la_pitch, 1); ApplySineWindow.silk_apply_sine_window(Wsig, Wsig_ptr, x, x_buf_ptr, 2, psEnc.la_pitch); /* Calculate autocorrelation sequence */ BoxedValueInt boxed_scale = new BoxedValueInt(); Autocorrelation.silk_autocorr(auto_corr, boxed_scale, Wsig, psEnc.pitch_LPC_win_length, psEnc.pitchEstimationLPCOrder + 1); scale = boxed_scale.Val; /* Add white noise, as fraction of energy */ auto_corr[0] = Inlines.silk_SMLAWB(auto_corr[0], auto_corr[0], ((int)((TuningParameters.FIND_PITCH_WHITE_NOISE_FRACTION) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_PITCH_WHITE_NOISE_FRACTION, 16)*/) + 1; /* Calculate the reflection coefficients using schur */ res_nrg = Schur.silk_schur(rc_Q15, auto_corr, psEnc.pitchEstimationLPCOrder); /* Prediction gain */ psEncCtrl.predGain_Q16 = Inlines.silk_DIV32_varQ(auto_corr[0], Inlines.silk_max_int(res_nrg, 1), 16); /* Convert reflection coefficients to prediction coefficients */ K2A.silk_k2a(A_Q24, rc_Q15, psEnc.pitchEstimationLPCOrder); /* Convert From 32 bit Q24 to 16 bit Q12 coefs */ for (i = 0; i < psEnc.pitchEstimationLPCOrder; i++) { A_Q12[i] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT(A_Q24[i], 12)); } /* Do BWE */ BWExpander.silk_bwexpander(A_Q12, psEnc.pitchEstimationLPCOrder, ((int)((TuningParameters.FIND_PITCH_BANDWIDTH_EXPANSION) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_PITCH_BANDWIDTH_EXPANSION, 16)*/); /*****************************************/ /* LPC analysis filtering */ /*****************************************/ Filters.silk_LPC_analysis_filter(res, 0, x, x_buf, A_Q12, 0, buf_len, psEnc.pitchEstimationLPCOrder); if (psEnc.indices.signalType != SilkConstants.TYPE_NO_VOICE_ACTIVITY && psEnc.first_frame_after_reset == 0) { /* Threshold for pitch estimator */ thrhld_Q13 = ((int)((0.6f) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(0.6f, 13)*/; thrhld_Q13 = Inlines.silk_SMLABB(thrhld_Q13, ((int)((-0.004f) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(-0.004f, 13)*/, psEnc.pitchEstimationLPCOrder); thrhld_Q13 = Inlines.silk_SMLAWB(thrhld_Q13, ((int)((-0.1f) * ((long)1 << (21)) + 0.5)) /*Inlines.SILK_CONST(-0.1f, 21)*/, psEnc.speech_activity_Q8); thrhld_Q13 = Inlines.silk_SMLABB(thrhld_Q13, ((int)((-0.15f) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(-0.15f, 13)*/, Inlines.silk_RSHIFT(psEnc.prevSignalType, 1)); thrhld_Q13 = Inlines.silk_SMLAWB(thrhld_Q13, ((int)((-0.1f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(-0.1f, 14)*/, psEnc.input_tilt_Q15); thrhld_Q13 = Inlines.silk_SAT16(thrhld_Q13); /*****************************************/ /* Call pitch estimator */ /*****************************************/ BoxedValueShort boxed_lagIndex = new BoxedValueShort(psEnc.indices.lagIndex); BoxedValueSbyte boxed_contourIndex = new BoxedValueSbyte(psEnc.indices.contourIndex); BoxedValueInt boxed_LTPcorr = new BoxedValueInt(psEnc.LTPCorr_Q15); if (PitchAnalysisCore.silk_pitch_analysis_core(res, psEncCtrl.pitchL, boxed_lagIndex, boxed_contourIndex, boxed_LTPcorr, psEnc.prevLag, psEnc.pitchEstimationThreshold_Q16, (int)thrhld_Q13, psEnc.fs_kHz, psEnc.pitchEstimationComplexity, psEnc.nb_subfr) == 0) { psEnc.indices.signalType = SilkConstants.TYPE_VOICED; } else { psEnc.indices.signalType = SilkConstants.TYPE_UNVOICED; } psEnc.indices.lagIndex = boxed_lagIndex.Val; psEnc.indices.contourIndex = boxed_contourIndex.Val; psEnc.LTPCorr_Q15 = boxed_LTPcorr.Val; } else { Arrays.MemSetInt(psEncCtrl.pitchL, 0, SilkConstants.MAX_NB_SUBFR); psEnc.indices.lagIndex = 0; psEnc.indices.contourIndex = 0; psEnc.LTPCorr_Q15 = 0; } }
/// <summary> /// Gain scalar quantization with hysteresis, uniform on log scale /// </summary> /// <param name="ind">O gain indices [MAX_NB_SUBFR]</param> /// <param name="gain_Q16">I/O gains (quantized out) [MAX_NB_SUBFR]</param> /// <param name="prev_ind">I/O last index in previous frame. [Porting note] original implementation passed this as an int8*</param> /// <param name="conditional">I first gain is delta coded if 1</param> /// <param name="nb_subfr">I number of subframes</param> internal static void silk_gains_quant( sbyte[] ind, int[] gain_Q16, BoxedValueSbyte prev_ind, int conditional, int nb_subfr) { int k, double_step_size_threshold; for (k = 0; k < nb_subfr; k++) { // Debug.WriteLine("2a 0x{0:x}", (uint)gain_Q16[k]); /* Convert to log scale, scale, floor() */ ind[k] = (sbyte)(Inlines.silk_SMULWB(SCALE_Q16, Inlines.silk_lin2log(gain_Q16[k]) - OFFSET)); /* Round towards previous quantized gain (hysteresis) */ if (ind[k] < prev_ind.Val) { ind[k]++; } ind[k] = (sbyte)(Inlines.silk_LIMIT_int(ind[k], 0, SilkConstants.N_LEVELS_QGAIN - 1)); /* Compute delta indices and limit */ if (k == 0 && conditional == 0) { /* Full index */ ind[k] = (sbyte)(Inlines.silk_LIMIT_int(ind[k], prev_ind.Val + SilkConstants.MIN_DELTA_GAIN_QUANT, SilkConstants.N_LEVELS_QGAIN - 1)); prev_ind.Val = ind[k]; } else { /* Delta index */ ind[k] = (sbyte)(ind[k] - prev_ind.Val); /* Double the quantization step size for large gain increases, so that the max gain level can be reached */ double_step_size_threshold = 2 * SilkConstants.MAX_DELTA_GAIN_QUANT - SilkConstants.N_LEVELS_QGAIN + prev_ind.Val; if (ind[k] > double_step_size_threshold) { ind[k] = (sbyte)(double_step_size_threshold + Inlines.silk_RSHIFT(ind[k] - double_step_size_threshold + 1, 1)); } ind[k] = (sbyte)(Inlines.silk_LIMIT_int(ind[k], SilkConstants.MIN_DELTA_GAIN_QUANT, SilkConstants.MAX_DELTA_GAIN_QUANT)); /* Accumulate deltas */ if (ind[k] > double_step_size_threshold) { prev_ind.Val += (sbyte)(Inlines.silk_LSHIFT(ind[k], 1) - double_step_size_threshold); } else { prev_ind.Val += ind[k]; } /* Shift to make non-negative */ ind[k] -= SilkConstants.MIN_DELTA_GAIN_QUANT; // Debug.WriteLine("2b 0x{0:x}", (uint)ind[k]); } /* Scale and convert to linear scale */ gain_Q16[k] = Inlines.silk_log2lin(Inlines.silk_min_32(Inlines.silk_SMULWB(INV_SCALE_Q16, prev_ind.Val) + OFFSET, 3967)); /* 3967 = 31 in Q7 */ } }