/* Decode a frame */ internal static int silk_Decode( /* O Returns error code */ SilkDecoder psDec, /* I/O State */ DecControlState decControl, /* I/O Control Structure */ int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ int newPacketFlag, /* I Indicates first decoder call for this packet */ EntropyCoder psRangeDec, /* I/O Compressor data structure */ short[] samplesOut, /* O Decoded output speech vector */ int samplesOut_ptr, out int nSamplesOut /* O Number of samples decoded */ ) { int i, n, decode_only_middle = 0, ret = SilkError.SILK_NO_ERROR; int LBRR_symbol; BoxedValueInt nSamplesOutDec = new BoxedValueInt(); short[] samplesOut_tmp; int[] samplesOut_tmp_ptrs = new int[2]; short[] samplesOut1_tmp_storage1; short[] samplesOut1_tmp_storage2; short[] samplesOut2_tmp; int[] MS_pred_Q13 = new int[] { 0, 0 }; short[] resample_out; int resample_out_ptr; SilkChannelDecoder[] channel_state = psDec.channel_state; int has_side; int stereo_to_mono; int delay_stack_alloc; nSamplesOut = 0; Inlines.OpusAssert(decControl.nChannelsInternal == 1 || decControl.nChannelsInternal == 2); /**********************************/ /* Test if first frame in payload */ /**********************************/ if (newPacketFlag != 0) { for (n = 0; n < decControl.nChannelsInternal; n++) { channel_state[n].nFramesDecoded = 0; /* Used to count frames in packet */ } } /* If Mono . Stereo transition in bitstream: init state of second channel */ if (decControl.nChannelsInternal > psDec.nChannelsInternal) { ret += channel_state[1].silk_init_decoder(); } stereo_to_mono = (decControl.nChannelsInternal == 1 && psDec.nChannelsInternal == 2 && (decControl.internalSampleRate == 1000 * channel_state[0].fs_kHz)) ? 1 : 0; if (channel_state[0].nFramesDecoded == 0) { for (n = 0; n < decControl.nChannelsInternal; n++) { int fs_kHz_dec; if (decControl.payloadSize_ms == 0) { /* Assuming packet loss, use 10 ms */ channel_state[n].nFramesPerPacket = 1; channel_state[n].nb_subfr = 2; } else if (decControl.payloadSize_ms == 10) { channel_state[n].nFramesPerPacket = 1; channel_state[n].nb_subfr = 2; } else if (decControl.payloadSize_ms == 20) { channel_state[n].nFramesPerPacket = 1; channel_state[n].nb_subfr = 4; } else if (decControl.payloadSize_ms == 40) { channel_state[n].nFramesPerPacket = 2; channel_state[n].nb_subfr = 4; } else if (decControl.payloadSize_ms == 60) { channel_state[n].nFramesPerPacket = 3; channel_state[n].nb_subfr = 4; } else { Inlines.OpusAssert(false); return(SilkError.SILK_DEC_INVALID_FRAME_SIZE); } fs_kHz_dec = (decControl.internalSampleRate >> 10) + 1; if (fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16) { Inlines.OpusAssert(false); return(SilkError.SILK_DEC_INVALID_SAMPLING_FREQUENCY); } ret += channel_state[n].silk_decoder_set_fs(fs_kHz_dec, decControl.API_sampleRate); } } if (decControl.nChannelsAPI == 2 && decControl.nChannelsInternal == 2 && (psDec.nChannelsAPI == 1 || psDec.nChannelsInternal == 1)) { Arrays.MemSetShort(psDec.sStereo.pred_prev_Q13, 0, 2); Arrays.MemSetShort(psDec.sStereo.sSide, 0, 2); channel_state[1].resampler_state.Assign(channel_state[0].resampler_state); } psDec.nChannelsAPI = decControl.nChannelsAPI; psDec.nChannelsInternal = decControl.nChannelsInternal; if (decControl.API_sampleRate > (int)SilkConstants.MAX_API_FS_KHZ * 1000 || decControl.API_sampleRate < 8000) { ret = SilkError.SILK_DEC_INVALID_SAMPLING_FREQUENCY; return(ret); } if (lostFlag != DecoderAPIFlag.FLAG_PACKET_LOST && channel_state[0].nFramesDecoded == 0) { /* First decoder call for this payload */ /* Decode VAD flags and LBRR flag */ for (n = 0; n < decControl.nChannelsInternal; n++) { for (i = 0; i < channel_state[n].nFramesPerPacket; i++) { channel_state[n].VAD_flags[i] = psRangeDec.dec_bit_logp(1); } channel_state[n].LBRR_flag = psRangeDec.dec_bit_logp(1); } /* Decode LBRR flags */ for (n = 0; n < decControl.nChannelsInternal; n++) { Arrays.MemSetInt(channel_state[n].LBRR_flags, 0, SilkConstants.MAX_FRAMES_PER_PACKET); if (channel_state[n].LBRR_flag != 0) { if (channel_state[n].nFramesPerPacket == 1) { channel_state[n].LBRR_flags[0] = 1; } else { LBRR_symbol = psRangeDec.dec_icdf(Tables.silk_LBRR_flags_iCDF_ptr[channel_state[n].nFramesPerPacket - 2], 8) + 1; for (i = 0; i < channel_state[n].nFramesPerPacket; i++) { channel_state[n].LBRR_flags[i] = Inlines.silk_RSHIFT(LBRR_symbol, i) & 1; } } } } if (lostFlag == DecoderAPIFlag.FLAG_DECODE_NORMAL) { /* Regular decoding: skip all LBRR data */ for (i = 0; i < channel_state[0].nFramesPerPacket; i++) { for (n = 0; n < decControl.nChannelsInternal; n++) { if (channel_state[n].LBRR_flags[i] != 0) { short[] pulses = new short[SilkConstants.MAX_FRAME_LENGTH]; int condCoding; if (decControl.nChannelsInternal == 2 && n == 0) { Stereo.silk_stereo_decode_pred(psRangeDec, MS_pred_Q13); if (channel_state[1].LBRR_flags[i] == 0) { BoxedValueInt decodeOnlyMiddleBoxed = new BoxedValueInt(decode_only_middle); Stereo.silk_stereo_decode_mid_only(psRangeDec, decodeOnlyMiddleBoxed); decode_only_middle = decodeOnlyMiddleBoxed.Val; } } /* Use conditional coding if previous frame available */ if (i > 0 && (channel_state[n].LBRR_flags[i - 1] != 0)) { condCoding = SilkConstants.CODE_CONDITIONALLY; } else { condCoding = SilkConstants.CODE_INDEPENDENTLY; } DecodeIndices.silk_decode_indices(channel_state[n], psRangeDec, i, 1, condCoding); DecodePulses.silk_decode_pulses(psRangeDec, pulses, channel_state[n].indices.signalType, channel_state[n].indices.quantOffsetType, channel_state[n].frame_length); } } } } } /* Get MS predictor index */ if (decControl.nChannelsInternal == 2) { if (lostFlag == DecoderAPIFlag.FLAG_DECODE_NORMAL || (lostFlag == DecoderAPIFlag.FLAG_DECODE_LBRR && channel_state[0].LBRR_flags[channel_state[0].nFramesDecoded] == 1)) { Stereo.silk_stereo_decode_pred(psRangeDec, MS_pred_Q13); /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ if ((lostFlag == DecoderAPIFlag.FLAG_DECODE_NORMAL && channel_state[1].VAD_flags[channel_state[0].nFramesDecoded] == 0) || (lostFlag == DecoderAPIFlag.FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[channel_state[0].nFramesDecoded] == 0)) { BoxedValueInt decodeOnlyMiddleBoxed = new BoxedValueInt(decode_only_middle); Stereo.silk_stereo_decode_mid_only(psRangeDec, decodeOnlyMiddleBoxed); decode_only_middle = decodeOnlyMiddleBoxed.Val; } else { decode_only_middle = 0; } } else { for (n = 0; n < 2; n++) { MS_pred_Q13[n] = psDec.sStereo.pred_prev_Q13[n]; } } } /* Reset side channel decoder prediction memory for first frame with side coding */ if (decControl.nChannelsInternal == 2 && decode_only_middle == 0 && psDec.prev_decode_only_middle == 1) { Arrays.MemSetShort(psDec.channel_state[1].outBuf, 0, SilkConstants.MAX_FRAME_LENGTH + 2 * SilkConstants.MAX_SUB_FRAME_LENGTH); Arrays.MemSetInt(psDec.channel_state[1].sLPC_Q14_buf, 0, SilkConstants.MAX_LPC_ORDER); psDec.channel_state[1].lagPrev = 100; psDec.channel_state[1].LastGainIndex = 10; psDec.channel_state[1].prevSignalType = SilkConstants.TYPE_NO_VOICE_ACTIVITY; psDec.channel_state[1].first_frame_after_reset = 1; } /* Check if the temp buffer fits into the output PCM buffer. If it fits, * we can delay allocating the temp buffer until after the SILK peak stack * usage. We need to use a < and not a <= because of the two extra samples. */ delay_stack_alloc = (decControl.internalSampleRate * decControl.nChannelsInternal < decControl.API_sampleRate * decControl.nChannelsAPI) ? 1 : 0; if (delay_stack_alloc != 0) { samplesOut_tmp = samplesOut; samplesOut_tmp_ptrs[0] = samplesOut_ptr; samplesOut_tmp_ptrs[1] = samplesOut_ptr + channel_state[0].frame_length + 2; } else { samplesOut1_tmp_storage1 = new short[decControl.nChannelsInternal * (channel_state[0].frame_length + 2)]; samplesOut_tmp = samplesOut1_tmp_storage1; samplesOut_tmp_ptrs[0] = 0; samplesOut_tmp_ptrs[1] = channel_state[0].frame_length + 2; } if (lostFlag == DecoderAPIFlag.FLAG_DECODE_NORMAL) { has_side = (decode_only_middle == 0) ? 1 : 0; } else { has_side = (psDec.prev_decode_only_middle == 0 || (decControl.nChannelsInternal == 2 && lostFlag == DecoderAPIFlag.FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[channel_state[1].nFramesDecoded] == 1)) ? 1 : 0; } /* Call decoder for one frame */ for (n = 0; n < decControl.nChannelsInternal; n++) { if (n == 0 || (has_side != 0)) { int FrameIndex; int condCoding; FrameIndex = channel_state[0].nFramesDecoded - n; /* Use independent coding if no previous frame available */ if (FrameIndex <= 0) { condCoding = SilkConstants.CODE_INDEPENDENTLY; } else if (lostFlag == DecoderAPIFlag.FLAG_DECODE_LBRR) { condCoding = (channel_state[n].LBRR_flags[FrameIndex - 1] != 0) ? SilkConstants.CODE_CONDITIONALLY : SilkConstants.CODE_INDEPENDENTLY; } else if (n > 0 && (psDec.prev_decode_only_middle != 0)) { /* If we skipped a side frame in this packet, we don't * need LTP scaling; the LTP state is well-defined. */ condCoding = SilkConstants.CODE_INDEPENDENTLY_NO_LTP_SCALING; } else { condCoding = SilkConstants.CODE_CONDITIONALLY; } ret += channel_state[n].silk_decode_frame(psRangeDec, samplesOut_tmp, samplesOut_tmp_ptrs[n] + 2, nSamplesOutDec, lostFlag, condCoding); } else { Arrays.MemSetWithOffset <short>(samplesOut_tmp, 0, samplesOut_tmp_ptrs[n] + 2, nSamplesOutDec.Val); } channel_state[n].nFramesDecoded++; } if (decControl.nChannelsAPI == 2 && decControl.nChannelsInternal == 2) { /* Convert Mid/Side to Left/Right */ Stereo.silk_stereo_MS_to_LR(psDec.sStereo, samplesOut_tmp, samplesOut_tmp_ptrs[0], samplesOut_tmp, samplesOut_tmp_ptrs[1], MS_pred_Q13, channel_state[0].fs_kHz, nSamplesOutDec.Val); } else { /* Buffering */ Array.Copy(psDec.sStereo.sMid, 0, samplesOut_tmp, samplesOut_tmp_ptrs[0], 2); Array.Copy(samplesOut_tmp, samplesOut_tmp_ptrs[0] + nSamplesOutDec.Val, psDec.sStereo.sMid, 0, 2); } /* Number of output samples */ nSamplesOut = Inlines.silk_DIV32(nSamplesOutDec.Val * decControl.API_sampleRate, Inlines.silk_SMULBB(channel_state[0].fs_kHz, 1000)); /* Set up pointers to temp buffers */ if (decControl.nChannelsAPI == 2) { samplesOut2_tmp = new short[nSamplesOut]; resample_out = samplesOut2_tmp; resample_out_ptr = 0; } else { resample_out = samplesOut; resample_out_ptr = samplesOut_ptr; } if (delay_stack_alloc != 0) { samplesOut1_tmp_storage2 = new short[decControl.nChannelsInternal * (channel_state[0].frame_length + 2)]; Array.Copy(samplesOut, samplesOut_ptr, samplesOut1_tmp_storage2, 0, decControl.nChannelsInternal * (channel_state[0].frame_length + 2)); samplesOut_tmp = samplesOut1_tmp_storage2; samplesOut_tmp_ptrs[0] = 0; samplesOut_tmp_ptrs[1] = channel_state[0].frame_length + 2; } for (n = 0; n < Inlines.silk_min(decControl.nChannelsAPI, decControl.nChannelsInternal); n++) { /* Resample decoded signal to API_sampleRate */ ret += Resampler.silk_resampler(channel_state[n].resampler_state, resample_out, resample_out_ptr, samplesOut_tmp, samplesOut_tmp_ptrs[n] + 1, nSamplesOutDec.Val); /* Interleave if stereo output and stereo stream */ if (decControl.nChannelsAPI == 2) { int nptr = samplesOut_ptr + n; for (i = 0; i < nSamplesOut; i++) { samplesOut[nptr + 2 * i] = resample_out[resample_out_ptr + i]; } } } /* Create two channel output from mono stream */ if (decControl.nChannelsAPI == 2 && decControl.nChannelsInternal == 1) { if (stereo_to_mono != 0) { /* Resample right channel for newly collapsed stereo just in case * we weren't doing collapsing when switching to mono */ ret += Resampler.silk_resampler(channel_state[1].resampler_state, resample_out, resample_out_ptr, samplesOut_tmp, samplesOut_tmp_ptrs[0] + 1, nSamplesOutDec.Val); for (i = 0; i < nSamplesOut; i++) { samplesOut[samplesOut_ptr + 1 + 2 * i] = resample_out[resample_out_ptr + i]; } } else { for (i = 0; i < nSamplesOut; i++) { samplesOut[samplesOut_ptr + 1 + 2 * i] = samplesOut[samplesOut_ptr + 2 * i]; } } } /* Export pitch lag, measured at 48 kHz sampling rate */ if (channel_state[0].prevSignalType == SilkConstants.TYPE_VOICED) { int[] mult_tab = { 6, 4, 3 }; decControl.prevPitchLag = channel_state[0].lagPrev * mult_tab[(channel_state[0].fs_kHz - 8) >> 2]; } else { decControl.prevPitchLag = 0; } if (lostFlag == DecoderAPIFlag.FLAG_PACKET_LOST) { /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" * if we lose packets when the energy is going down */ for (i = 0; i < psDec.nChannelsInternal; i++) { psDec.channel_state[i].LastGainIndex = 10; } } else { psDec.prev_decode_only_middle = decode_only_middle; } return(ret); }
/// <summary> /// Encode frame with Silk /// Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what /// encControl.payloadSize_ms is set to /// </summary> /// <param name="psEnc">I/O State</param> /// <param name="encControl">I Control status</param> /// <param name="samplesIn">I Speech sample input vector</param> /// <param name="nSamplesIn">I Number of samples in input vector</param> /// <param name="psRangeEnc">I/O Compressor data structure</param> /// <param name="nBytesOut">I/O Number of bytes in payload (input: Max bytes)</param> /// <param name="prefillFlag">I Flag to indicate prefilling buffers no coding</param> /// <returns>error code</returns> internal static int silk_Encode( SilkEncoder psEnc, EncControlState encControl, short[] samplesIn, int nSamplesIn, EntropyCoder psRangeEnc, BoxedValueInt nBytesOut, int prefillFlag) { int ret = SilkError.SILK_NO_ERROR; int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0; int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; int nSamplesFromInput = 0, nSamplesFromInputMax; int speech_act_thr_for_switch_Q8; int TargetRate_bps, channelRate_bps, LBRR_symbol, sum; int[] MStargetRates_bps = new int[2]; short[] buf; int transition, curr_block, tot_blocks; nBytesOut.Val = 0; if (encControl.reducedDependency != 0) { psEnc.state_Fxx[0].first_frame_after_reset = 1; psEnc.state_Fxx[1].first_frame_after_reset = 1; } psEnc.state_Fxx[0].nFramesEncoded = psEnc.state_Fxx[1].nFramesEncoded = 0; /* Check values in encoder control structure */ ret += encControl.check_control_input(); if (ret != SilkError.SILK_NO_ERROR) { Inlines.OpusAssert(false); return(ret); } encControl.switchReady = 0; if (encControl.nChannelsInternal > psEnc.nChannelsInternal) { /* Mono . Stereo transition: init state of second channel and stereo state */ ret += SilkEncoder.silk_init_encoder(psEnc.state_Fxx[1]); Arrays.MemSetShort(psEnc.sStereo.pred_prev_Q13, 0, 2); Arrays.MemSetShort(psEnc.sStereo.sSide, 0, 2); psEnc.sStereo.mid_side_amp_Q0[0] = 0; psEnc.sStereo.mid_side_amp_Q0[1] = 1; psEnc.sStereo.mid_side_amp_Q0[2] = 0; psEnc.sStereo.mid_side_amp_Q0[3] = 1; psEnc.sStereo.width_prev_Q14 = 0; psEnc.sStereo.smth_width_Q14 = (short)(((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/); if (psEnc.nChannelsAPI == 2) { psEnc.state_Fxx[1].resampler_state.Assign(psEnc.state_Fxx[0].resampler_state); Array.Copy(psEnc.state_Fxx[0].In_HP_State, psEnc.state_Fxx[1].In_HP_State, 2); } } transition = ((encControl.payloadSize_ms != psEnc.state_Fxx[0].PacketSize_ms) || (psEnc.nChannelsInternal != encControl.nChannelsInternal)) ? 1 : 0; psEnc.nChannelsAPI = encControl.nChannelsAPI; psEnc.nChannelsInternal = encControl.nChannelsInternal; nBlocksOf10ms = Inlines.silk_DIV32(100 * nSamplesIn, encControl.API_sampleRate); tot_blocks = (nBlocksOf10ms > 1) ? nBlocksOf10ms >> 1 : 1; curr_block = 0; if (prefillFlag != 0) { /* Only accept input length of 10 ms */ if (nBlocksOf10ms != 1) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } /* Reset Encoder */ for (n = 0; n < encControl.nChannelsInternal; n++) { ret += SilkEncoder.silk_init_encoder(psEnc.state_Fxx[n]); Inlines.OpusAssert(ret == SilkError.SILK_NO_ERROR); } tmp_payloadSize_ms = encControl.payloadSize_ms; encControl.payloadSize_ms = 10; tmp_complexity = encControl.complexity; encControl.complexity = 0; for (n = 0; n < encControl.nChannelsInternal; n++) { psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].prefillFlag = 1; } } else { /* Only accept input lengths that are a multiple of 10 ms */ if (nBlocksOf10ms * encControl.API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } /* Make sure no more than one packet can be produced */ if (1000 * (int)nSamplesIn > encControl.payloadSize_ms * encControl.API_sampleRate) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } } TargetRate_bps = Inlines.silk_RSHIFT32(encControl.bitRate, encControl.nChannelsInternal - 1); for (n = 0; n < encControl.nChannelsInternal; n++) { /* Force the side channel to the same rate as the mid */ int force_fs_kHz = (n == 1) ? psEnc.state_Fxx[0].fs_kHz : 0; ret += psEnc.state_Fxx[n].silk_control_encoder(encControl, TargetRate_bps, psEnc.allowBandwidthSwitch, n, force_fs_kHz); if (ret != SilkError.SILK_NO_ERROR) { Inlines.OpusAssert(false); return(ret); } if (psEnc.state_Fxx[n].first_frame_after_reset != 0 || transition != 0) { for (i = 0; i < psEnc.state_Fxx[0].nFramesPerPacket; i++) { psEnc.state_Fxx[n].LBRR_flags[i] = 0; } } psEnc.state_Fxx[n].inDTX = psEnc.state_Fxx[n].useDTX; } Inlines.OpusAssert(encControl.nChannelsInternal == 1 || psEnc.state_Fxx[0].fs_kHz == psEnc.state_Fxx[1].fs_kHz); /* Input buffering/resampling and encoding */ nSamplesToBufferMax = 10 * nBlocksOf10ms * psEnc.state_Fxx[0].fs_kHz; nSamplesFromInputMax = Inlines.silk_DIV32_16(nSamplesToBufferMax * psEnc.state_Fxx[0].API_fs_Hz, (short)(psEnc.state_Fxx[0].fs_kHz * 1000)); buf = new short[nSamplesFromInputMax]; int samplesIn_ptr = 0; while (true) { nSamplesToBuffer = psEnc.state_Fxx[0].frame_length - psEnc.state_Fxx[0].inputBufIx; nSamplesToBuffer = Inlines.silk_min(nSamplesToBuffer, nSamplesToBufferMax); nSamplesFromInput = Inlines.silk_DIV32_16(nSamplesToBuffer * psEnc.state_Fxx[0].API_fs_Hz, psEnc.state_Fxx[0].fs_kHz * 1000); /* Resample and write to buffer */ if (encControl.nChannelsAPI == 2 && encControl.nChannelsInternal == 2) { int id = psEnc.state_Fxx[0].nFramesEncoded; for (n = 0; n < nSamplesFromInput; n++) { buf[n] = samplesIn[samplesIn_ptr + (2 * n)]; } /* Making sure to start both resamplers from the same state when switching from mono to stereo */ if (psEnc.nPrevChannelsInternal == 1 && id == 0) { //silk_memcpy(&psEnc.state_Fxx[1].resampler_state, &psEnc.state_Fxx[0].resampler_state, sizeof(psEnc.state_Fxx[1].resampler_state)); psEnc.state_Fxx[1].resampler_state.Assign(psEnc.state_Fxx[0].resampler_state); } ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; nSamplesToBuffer = psEnc.state_Fxx[1].frame_length - psEnc.state_Fxx[1].inputBufIx; nSamplesToBuffer = Inlines.silk_min(nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc.state_Fxx[1].fs_kHz); for (n = 0; n < nSamplesFromInput; n++) { buf[n] = samplesIn[samplesIn_ptr + (2 * n) + 1]; } ret += Resampler.silk_resampler( psEnc.state_Fxx[1].resampler_state, psEnc.state_Fxx[1].inputBuf, psEnc.state_Fxx[1].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[1].inputBufIx += nSamplesToBuffer; } else if (encControl.nChannelsAPI == 2 && encControl.nChannelsInternal == 1) { /* Combine left and right channels before resampling */ for (n = 0; n < nSamplesFromInput; n++) { sum = samplesIn[samplesIn_ptr + (2 * n)] + samplesIn[samplesIn_ptr + (2 * n) + 1]; buf[n] = (short)Inlines.silk_RSHIFT_ROUND(sum, 1); } ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); /* On the first mono frame, average the results for the two resampler states */ if (psEnc.nPrevChannelsInternal == 2 && psEnc.state_Fxx[0].nFramesEncoded == 0) { ret += Resampler.silk_resampler( psEnc.state_Fxx[1].resampler_state, psEnc.state_Fxx[1].inputBuf, psEnc.state_Fxx[1].inputBufIx + 2, buf, 0, nSamplesFromInput); for (n = 0; n < psEnc.state_Fxx[0].frame_length; n++) { psEnc.state_Fxx[0].inputBuf[psEnc.state_Fxx[0].inputBufIx + n + 2] = (short)(Inlines.silk_RSHIFT(psEnc.state_Fxx[0].inputBuf[psEnc.state_Fxx[0].inputBufIx + n + 2] + psEnc.state_Fxx[1].inputBuf[psEnc.state_Fxx[1].inputBufIx + n + 2], 1)); } } psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; } else { Inlines.OpusAssert(encControl.nChannelsAPI == 1 && encControl.nChannelsInternal == 1); Array.Copy(samplesIn, samplesIn_ptr, buf, 0, nSamplesFromInput); ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; } samplesIn_ptr += (nSamplesFromInput * encControl.nChannelsAPI); nSamplesIn -= nSamplesFromInput; /* Default */ psEnc.allowBandwidthSwitch = 0; /* Silk encoder */ if (psEnc.state_Fxx[0].inputBufIx >= psEnc.state_Fxx[0].frame_length) { /* Enough data in input buffer, so encode */ Inlines.OpusAssert(psEnc.state_Fxx[0].inputBufIx == psEnc.state_Fxx[0].frame_length); Inlines.OpusAssert(encControl.nChannelsInternal == 1 || psEnc.state_Fxx[1].inputBufIx == psEnc.state_Fxx[1].frame_length); /* Deal with LBRR data */ if (psEnc.state_Fxx[0].nFramesEncoded == 0 && prefillFlag == 0) { /* Create space at start of payload for VAD and FEC flags */ byte[] iCDF = { 0, 0 }; iCDF[0] = (byte)(256 - Inlines.silk_RSHIFT(256, (psEnc.state_Fxx[0].nFramesPerPacket + 1) * encControl.nChannelsInternal)); psRangeEnc.enc_icdf(0, iCDF, 8); /* Encode any LBRR data from previous packet */ /* Encode LBRR flags */ for (n = 0; n < encControl.nChannelsInternal; n++) { LBRR_symbol = 0; for (i = 0; i < psEnc.state_Fxx[n].nFramesPerPacket; i++) { LBRR_symbol |= Inlines.silk_LSHIFT(psEnc.state_Fxx[n].LBRR_flags[i], i); } psEnc.state_Fxx[n].LBRR_flag = (sbyte)(LBRR_symbol > 0 ? 1 : 0); if (LBRR_symbol != 0 && psEnc.state_Fxx[n].nFramesPerPacket > 1) { psRangeEnc.enc_icdf(LBRR_symbol - 1, Tables.silk_LBRR_flags_iCDF_ptr[psEnc.state_Fxx[n].nFramesPerPacket - 2], 8); } } /* Code LBRR indices and excitation signals */ for (i = 0; i < psEnc.state_Fxx[0].nFramesPerPacket; i++) { for (n = 0; n < encControl.nChannelsInternal; n++) { if (psEnc.state_Fxx[n].LBRR_flags[i] != 0) { int condCoding; if (encControl.nChannelsInternal == 2 && n == 0) { Stereo.silk_stereo_encode_pred(psRangeEnc, psEnc.sStereo.predIx[i]); /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ if (psEnc.state_Fxx[1].LBRR_flags[i] == 0) { Stereo.silk_stereo_encode_mid_only(psRangeEnc, psEnc.sStereo.mid_only_flags[i]); } } /* Use conditional coding if previous frame available */ if (i > 0 && psEnc.state_Fxx[n].LBRR_flags[i - 1] != 0) { condCoding = SilkConstants.CODE_CONDITIONALLY; } else { condCoding = SilkConstants.CODE_INDEPENDENTLY; } EncodeIndices.silk_encode_indices(psEnc.state_Fxx[n], psRangeEnc, i, 1, condCoding); EncodePulses.silk_encode_pulses(psRangeEnc, psEnc.state_Fxx[n].indices_LBRR[i].signalType, psEnc.state_Fxx[n].indices_LBRR[i].quantOffsetType, psEnc.state_Fxx[n].pulses_LBRR[i], psEnc.state_Fxx[n].frame_length); } } } /* Reset LBRR flags */ for (n = 0; n < encControl.nChannelsInternal; n++) { Arrays.MemSetInt(psEnc.state_Fxx[n].LBRR_flags, 0, SilkConstants.MAX_FRAMES_PER_PACKET); } psEnc.nBitsUsedLBRR = psRangeEnc.tell(); } HPVariableCutoff.silk_HP_variable_cutoff(psEnc.state_Fxx); /* Total target bits for packet */ nBits = Inlines.silk_DIV32_16(Inlines.silk_MUL(encControl.bitRate, encControl.payloadSize_ms), 1000); /* Subtract bits used for LBRR */ if (prefillFlag == 0) { nBits -= psEnc.nBitsUsedLBRR; } /* Divide by number of uncoded frames left in packet */ nBits = Inlines.silk_DIV32_16(nBits, psEnc.state_Fxx[0].nFramesPerPacket); /* Convert to bits/second */ if (encControl.payloadSize_ms == 10) { TargetRate_bps = Inlines.silk_SMULBB(nBits, 100); } else { TargetRate_bps = Inlines.silk_SMULBB(nBits, 50); } /* Subtract fraction of bits in excess of target in previous frames and packets */ TargetRate_bps -= Inlines.silk_DIV32_16(Inlines.silk_MUL(psEnc.nBitsExceeded, 1000), TuningParameters.BITRESERVOIR_DECAY_TIME_MS); if (prefillFlag == 0 && psEnc.state_Fxx[0].nFramesEncoded > 0) { /* Compare actual vs target bits so far in this packet */ int bitsBalance = psRangeEnc.tell() - psEnc.nBitsUsedLBRR - nBits * psEnc.state_Fxx[0].nFramesEncoded; TargetRate_bps -= Inlines.silk_DIV32_16(Inlines.silk_MUL(bitsBalance, 1000), TuningParameters.BITRESERVOIR_DECAY_TIME_MS); } /* Never exceed input bitrate */ TargetRate_bps = Inlines.silk_LIMIT(TargetRate_bps, encControl.bitRate, 5000); /* Convert Left/Right to Mid/Side */ if (encControl.nChannelsInternal == 2) { BoxedValueSbyte midOnlyFlagBoxed = new BoxedValueSbyte(psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded]); Stereo.silk_stereo_LR_to_MS(psEnc.sStereo, psEnc.state_Fxx[0].inputBuf, 2, psEnc.state_Fxx[1].inputBuf, 2, psEnc.sStereo.predIx[psEnc.state_Fxx[0].nFramesEncoded], midOnlyFlagBoxed, MStargetRates_bps, TargetRate_bps, psEnc.state_Fxx[0].speech_activity_Q8, encControl.toMono, psEnc.state_Fxx[0].fs_kHz, psEnc.state_Fxx[0].frame_length); psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded] = midOnlyFlagBoxed.Val; if (midOnlyFlagBoxed.Val == 0) { /* Reset side channel encoder memory for first frame with side coding */ if (psEnc.prev_decode_only_middle == 1) { psEnc.state_Fxx[1].sShape.Reset(); psEnc.state_Fxx[1].sPrefilt.Reset(); psEnc.state_Fxx[1].sNSQ.Reset(); Arrays.MemSetShort(psEnc.state_Fxx[1].prev_NLSFq_Q15, 0, SilkConstants.MAX_LPC_ORDER); Arrays.MemSetInt(psEnc.state_Fxx[1].sLP.In_LP_State, 0, 2); psEnc.state_Fxx[1].prevLag = 100; psEnc.state_Fxx[1].sNSQ.lagPrev = 100; psEnc.state_Fxx[1].sShape.LastGainIndex = 10; psEnc.state_Fxx[1].prevSignalType = SilkConstants.TYPE_NO_VOICE_ACTIVITY; psEnc.state_Fxx[1].sNSQ.prev_gain_Q16 = 65536; psEnc.state_Fxx[1].first_frame_after_reset = 1; } psEnc.state_Fxx[1].silk_encode_do_VAD(); } else { psEnc.state_Fxx[1].VAD_flags[psEnc.state_Fxx[0].nFramesEncoded] = 0; } if (prefillFlag == 0) { Stereo.silk_stereo_encode_pred(psRangeEnc, psEnc.sStereo.predIx[psEnc.state_Fxx[0].nFramesEncoded]); if (psEnc.state_Fxx[1].VAD_flags[psEnc.state_Fxx[0].nFramesEncoded] == 0) { Stereo.silk_stereo_encode_mid_only(psRangeEnc, psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded]); } } } else { /* Buffering */ Array.Copy(psEnc.sStereo.sMid, psEnc.state_Fxx[0].inputBuf, 2); Array.Copy(psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].frame_length, psEnc.sStereo.sMid, 0, 2); } psEnc.state_Fxx[0].silk_encode_do_VAD(); /* Encode */ for (n = 0; n < encControl.nChannelsInternal; n++) { int maxBits, useCBR; /* Handling rate constraints */ maxBits = encControl.maxBits; if (tot_blocks == 2 && curr_block == 0) { maxBits = maxBits * 3 / 5; } else if (tot_blocks == 3) { if (curr_block == 0) { maxBits = maxBits * 2 / 5; } else if (curr_block == 1) { maxBits = maxBits * 3 / 4; } } useCBR = (encControl.useCBR != 0 && curr_block == tot_blocks - 1) ? 1 : 0; if (encControl.nChannelsInternal == 1) { channelRate_bps = TargetRate_bps; } else { channelRate_bps = MStargetRates_bps[n]; if (n == 0 && MStargetRates_bps[1] > 0) { useCBR = 0; /* Give mid up to 1/2 of the max bits for that frame */ maxBits -= encControl.maxBits / (tot_blocks * 2); } } if (channelRate_bps > 0) { int condCoding; psEnc.state_Fxx[n].silk_control_SNR(channelRate_bps); /* Use independent coding if no previous frame available */ if (psEnc.state_Fxx[0].nFramesEncoded - n <= 0) { condCoding = SilkConstants.CODE_INDEPENDENTLY; } else if (n > 0 && psEnc.prev_decode_only_middle != 0) { /* If we skipped a side frame in this packet, we don't * need LTP scaling; the LTP state is well-defined. */ condCoding = SilkConstants.CODE_INDEPENDENTLY_NO_LTP_SCALING; } else { condCoding = SilkConstants.CODE_CONDITIONALLY; } ret += psEnc.state_Fxx[n].silk_encode_frame(nBytesOut, psRangeEnc, condCoding, maxBits, useCBR); Inlines.OpusAssert(ret == SilkError.SILK_NO_ERROR); } psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].inputBufIx = 0; psEnc.state_Fxx[n].nFramesEncoded++; } psEnc.prev_decode_only_middle = psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded - 1]; /* Insert VAD and FEC flags at beginning of bitstream */ if (nBytesOut.Val > 0 && psEnc.state_Fxx[0].nFramesEncoded == psEnc.state_Fxx[0].nFramesPerPacket) { flags = 0; for (n = 0; n < encControl.nChannelsInternal; n++) { for (i = 0; i < psEnc.state_Fxx[n].nFramesPerPacket; i++) { flags = Inlines.silk_LSHIFT(flags, 1); flags |= (int)psEnc.state_Fxx[n].VAD_flags[i]; } flags = Inlines.silk_LSHIFT(flags, 1); flags |= (int)psEnc.state_Fxx[n].LBRR_flag; } if (prefillFlag == 0) { psRangeEnc.enc_patch_initial_bits((uint)flags, (uint)((psEnc.state_Fxx[0].nFramesPerPacket + 1) * encControl.nChannelsInternal)); } /* Return zero bytes if all channels DTXed */ if (psEnc.state_Fxx[0].inDTX != 0 && (encControl.nChannelsInternal == 1 || psEnc.state_Fxx[1].inDTX != 0)) { nBytesOut.Val = 0; } psEnc.nBitsExceeded += nBytesOut.Val * 8; psEnc.nBitsExceeded -= Inlines.silk_DIV32_16(Inlines.silk_MUL(encControl.bitRate, encControl.payloadSize_ms), 1000); psEnc.nBitsExceeded = Inlines.silk_LIMIT(psEnc.nBitsExceeded, 0, 10000); /* Update flag indicating if bandwidth switching is allowed */ speech_act_thr_for_switch_Q8 = Inlines.silk_SMLAWB(((int)((TuningParameters.SPEECH_ACTIVITY_DTX_THRES) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SPEECH_ACTIVITY_DTX_THRES, 8)*/, ((int)(((1 - TuningParameters.SPEECH_ACTIVITY_DTX_THRES) / TuningParameters.MAX_BANDWIDTH_SWITCH_DELAY_MS) * ((long)1 << (16 + 8)) + 0.5)) /*Inlines.SILK_CONST((1 - TuningParameters.SPEECH_ACTIVITY_DTX_THRES) / TuningParameters.MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8)*/, psEnc.timeSinceSwitchAllowed_ms); if (psEnc.state_Fxx[0].speech_activity_Q8 < speech_act_thr_for_switch_Q8) { psEnc.allowBandwidthSwitch = 1; psEnc.timeSinceSwitchAllowed_ms = 0; } else { psEnc.allowBandwidthSwitch = 0; psEnc.timeSinceSwitchAllowed_ms += encControl.payloadSize_ms; } } if (nSamplesIn == 0) { break; } } else { break; } curr_block++; } psEnc.nPrevChannelsInternal = encControl.nChannelsInternal; encControl.allowBandwidthSwitch = psEnc.allowBandwidthSwitch; encControl.inWBmodeWithoutVariableLP = (psEnc.state_Fxx[0].fs_kHz == 16 && psEnc.state_Fxx[0].sLP.mode == 0) ? 1 : 0; encControl.internalSampleRate = Inlines.silk_SMULBB(psEnc.state_Fxx[0].fs_kHz, 1000); encControl.stereoWidth_Q14 = encControl.toMono != 0 ? 0 : psEnc.sStereo.smth_width_Q14; if (prefillFlag != 0) { encControl.payloadSize_ms = tmp_payloadSize_ms; encControl.complexity = tmp_complexity; for (n = 0; n < encControl.nChannelsInternal; n++) { psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].prefillFlag = 0; } } return(ret); }
/*************************************************************/ /* FIXED POINT CORE PITCH ANALYSIS FUNCTION */ /*************************************************************/ internal static int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ short[] frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ int[] pitch_out, /* O 4 pitch lag values */ BoxedValueShort lagIndex, /* O Lag Index */ BoxedValueSbyte contourIndex, /* O Pitch contour Index */ BoxedValueInt LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ int search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ int Fs_kHz, /* I Sample frequency (kHz) */ int complexity, /* I Complexity setting, 0-2, where 2 is highest */ int nb_subfr /* I number of 5 ms subframes */ ) { short[] frame_8kHz; short[] frame_4kHz; int[] filt_state = new int[6]; short[] input_frame_ptr; int i, k, d, j; short[] C; int[] xcorr32; short[] basis; int basis_ptr; short[] target; int target_ptr; int cross_corr, normalizer, energy, shift, energy_basis, energy_target; int Cmax, length_d_srch, length_d_comp; int[] d_srch = new int[SilkConstants.PE_D_SRCH_LENGTH]; short[] d_comp; int sum, threshold, lag_counter; int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new; int CCmax, CCmax_b, CCmax_new_b, CCmax_new; int[] CC = new int[SilkConstants.PE_NB_CBKS_STAGE2_EXT]; silk_pe_stage3_vals[] energies_st3; silk_pe_stage3_vals[] cross_corr_st3; int frame_length, frame_length_8kHz, frame_length_4kHz; int sf_length; int min_lag; int max_lag; int contour_bias_Q15, diff; int nb_cbk_search; int delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13; sbyte[][] Lag_CB_ptr; /* Check for valid sampling frequency */ Inlines.OpusAssert(Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16); /* Check for valid complexity setting */ Inlines.OpusAssert(complexity >= SilkConstants.SILK_PE_MIN_COMPLEX); Inlines.OpusAssert(complexity <= SilkConstants.SILK_PE_MAX_COMPLEX); Inlines.OpusAssert(search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1 << 16)); Inlines.OpusAssert(search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1 << 13)); /* Set up frame lengths max / min lag for the sampling frequency */ frame_length = (SilkConstants.PE_LTP_MEM_LENGTH_MS + nb_subfr * SilkConstants.PE_SUBFR_LENGTH_MS) * Fs_kHz; frame_length_4kHz = (SilkConstants.PE_LTP_MEM_LENGTH_MS + nb_subfr * SilkConstants.PE_SUBFR_LENGTH_MS) * 4; frame_length_8kHz = (SilkConstants.PE_LTP_MEM_LENGTH_MS + nb_subfr * SilkConstants.PE_SUBFR_LENGTH_MS) * 8; sf_length = SilkConstants.PE_SUBFR_LENGTH_MS * Fs_kHz; min_lag = SilkConstants.PE_MIN_LAG_MS * Fs_kHz; max_lag = SilkConstants.PE_MAX_LAG_MS * Fs_kHz - 1; /* Resample from input sampled at Fs_kHz to 8 kHz */ frame_8kHz = new short[frame_length_8kHz]; if (Fs_kHz == 16) { Arrays.MemSetInt(filt_state, 0, 2); Resampler.silk_resampler_down2(filt_state, frame_8kHz, frame, frame_length); } else if (Fs_kHz == 12) { Arrays.MemSetInt(filt_state, 0, 6); Resampler.silk_resampler_down2_3(filt_state, frame_8kHz, frame, frame_length); } else { Inlines.OpusAssert(Fs_kHz == 8); Array.Copy(frame, frame_8kHz, frame_length_8kHz); } /* Decimate again to 4 kHz */ Arrays.MemSetInt(filt_state, 0, 2); /* Set state to zero */ frame_4kHz = new short[frame_length_4kHz]; Resampler.silk_resampler_down2(filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz); /* Low-pass filter */ for (i = frame_length_4kHz - 1; i > 0; i--) { frame_4kHz[i] = Inlines.silk_ADD_SAT16(frame_4kHz[i], frame_4kHz[i - 1]); } /******************************************************************************* ** Scale 4 kHz signal down to prevent correlations measures from overflowing ** find scaling as max scaling for each 8kHz(?) subframe *******************************************************************************/ /* Inner product is calculated with different lengths, so scale for the worst case */ SumSqrShift.silk_sum_sqr_shift(out energy, out shift, frame_4kHz, frame_length_4kHz); if (shift > 0) { shift = Inlines.silk_RSHIFT(shift, 1); for (i = 0; i < frame_length_4kHz; i++) { frame_4kHz[i] = Inlines.silk_RSHIFT16(frame_4kHz[i], shift); } } /****************************************************************************** * FIRST STAGE, operating in 4 khz ******************************************************************************/ C = new short[nb_subfr * CSTRIDE_8KHZ]; xcorr32 = new int[MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1]; Arrays.MemSetShort(C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ); target = frame_4kHz; target_ptr = Inlines.silk_LSHIFT(SF_LENGTH_4KHZ, 2); for (k = 0; k < nb_subfr >> 1; k++) { basis = target; basis_ptr = target_ptr - MIN_LAG_4KHZ; CeltPitchXCorr.pitch_xcorr(target, target_ptr, target, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1); /* Calculate first vector products before loop */ cross_corr = xcorr32[MAX_LAG_4KHZ - MIN_LAG_4KHZ]; normalizer = Inlines.silk_inner_prod_self(target, target_ptr, SF_LENGTH_8KHZ); normalizer = Inlines.silk_ADD32(normalizer, Inlines.silk_inner_prod_self(basis, basis_ptr, SF_LENGTH_8KHZ)); normalizer = Inlines.silk_ADD32(normalizer, Inlines.silk_SMULBB(SF_LENGTH_8KHZ, 4000)); Inlines.MatrixSet(C, k, 0, CSTRIDE_4KHZ, (short)Inlines.silk_DIV32_varQ(cross_corr, normalizer, 13 + 1)); /* Q13 */ /* From now on normalizer is computed recursively */ for (d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++) { basis_ptr--; cross_corr = xcorr32[MAX_LAG_4KHZ - d]; /* Add contribution of new sample and remove contribution from oldest sample */ normalizer = Inlines.silk_ADD32(normalizer, Inlines.silk_SMULBB(basis[basis_ptr], basis[basis_ptr]) - Inlines.silk_SMULBB(basis[basis_ptr + SF_LENGTH_8KHZ], basis[basis_ptr + SF_LENGTH_8KHZ])); Inlines.MatrixSet(C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ, (short)Inlines.silk_DIV32_varQ(cross_corr, normalizer, 13 + 1)); /* Q13 */ } /* Update target pointer */ target_ptr += SF_LENGTH_8KHZ; } /* Combine two subframes into single correlation measure and apply short-lag bias */ if (nb_subfr == SilkConstants.PE_MAX_NB_SUBFR) { for (i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i--) { sum = (int)Inlines.MatrixGet(C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ) + (int)Inlines.MatrixGet(C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ); /* Q14 */ sum = Inlines.silk_SMLAWB(sum, sum, Inlines.silk_LSHIFT(-i, 4)); /* Q14 */ C[i - MIN_LAG_4KHZ] = (short)sum; /* Q14 */ } } else { /* Only short-lag bias */ for (i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i--) { sum = Inlines.silk_LSHIFT((int)C[i - MIN_LAG_4KHZ], 1); /* Q14 */ sum = Inlines.silk_SMLAWB(sum, sum, Inlines.silk_LSHIFT(-i, 4)); /* Q14 */ C[i - MIN_LAG_4KHZ] = (short)sum; /* Q14 */ } } /* Sort */ length_d_srch = Inlines.silk_ADD_LSHIFT32(4, complexity, 1); Inlines.OpusAssert(3 * length_d_srch <= SilkConstants.PE_D_SRCH_LENGTH); Sort.silk_insertion_sort_decreasing_int16(C, d_srch, CSTRIDE_4KHZ, length_d_srch); /* Escape if correlation is very low already here */ Cmax = (int)C[0]; /* Q14 */ if (Cmax < ((int)((0.2f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.2f, 14)*/) { Arrays.MemSetInt(pitch_out, 0, nb_subfr); LTPCorr_Q15.Val = 0; lagIndex.Val = 0; contourIndex.Val = 0; return(1); } threshold = Inlines.silk_SMULWB(search_thres1_Q16, Cmax); for (i = 0; i < length_d_srch; i++) { /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */ if (C[i] > threshold) { d_srch[i] = Inlines.silk_LSHIFT(d_srch[i] + MIN_LAG_4KHZ, 1); } else { length_d_srch = i; break; } } Inlines.OpusAssert(length_d_srch > 0); d_comp = new short[D_COMP_STRIDE]; for (i = D_COMP_MIN; i < D_COMP_MAX; i++) { d_comp[i - D_COMP_MIN] = 0; } for (i = 0; i < length_d_srch; i++) { d_comp[d_srch[i] - D_COMP_MIN] = 1; } /* Convolution */ for (i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i--) { d_comp[i - D_COMP_MIN] += (short)(d_comp[i - 1 - D_COMP_MIN] + d_comp[i - 2 - D_COMP_MIN]); } length_d_srch = 0; for (i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++) { if (d_comp[i + 1 - D_COMP_MIN] > 0) { d_srch[length_d_srch] = i; length_d_srch++; } } /* Convolution */ for (i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i--) { d_comp[i - D_COMP_MIN] += (short)(d_comp[i - 1 - D_COMP_MIN] + d_comp[i - 2 - D_COMP_MIN] + d_comp[i - 3 - D_COMP_MIN]); } length_d_comp = 0; for (i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++) { if (d_comp[i - D_COMP_MIN] > 0) { d_comp[length_d_comp] = (short)(i - 2); length_d_comp++; } } /********************************************************************************** ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation *************************************************************************************/ /****************************************************************************** ** Scale signal down to avoid correlations measures from overflowing *******************************************************************************/ /* find scaling as max scaling for each subframe */ SumSqrShift.silk_sum_sqr_shift(out energy, out shift, frame_8kHz, frame_length_8kHz); if (shift > 0) { shift = Inlines.silk_RSHIFT(shift, 1); for (i = 0; i < frame_length_8kHz; i++) { frame_8kHz[i] = Inlines.silk_RSHIFT16(frame_8kHz[i], shift); } } /********************************************************************************* * Find energy of each subframe projected onto its history, for a range of delays *********************************************************************************/ Arrays.MemSetShort(C, 0, nb_subfr * CSTRIDE_8KHZ); target = frame_8kHz; target_ptr = SilkConstants.PE_LTP_MEM_LENGTH_MS * 8; for (k = 0; k < nb_subfr; k++) { energy_target = Inlines.silk_ADD32(Inlines.silk_inner_prod(target, target_ptr, target, target_ptr, SF_LENGTH_8KHZ), 1); for (j = 0; j < length_d_comp; j++) { d = d_comp[j]; basis = target; basis_ptr = target_ptr - d; cross_corr = Inlines.silk_inner_prod(target, target_ptr, basis, basis_ptr, SF_LENGTH_8KHZ); if (cross_corr > 0) { energy_basis = Inlines.silk_inner_prod_self(basis, basis_ptr, SF_LENGTH_8KHZ); Inlines.MatrixSet(C, k, d - (MIN_LAG_8KHZ - 2), CSTRIDE_8KHZ, (short)Inlines.silk_DIV32_varQ(cross_corr, Inlines.silk_ADD32(energy_target, energy_basis), 13 + 1)); /* Q13 */ } else { Inlines.MatrixSet <short>(C, k, d - (MIN_LAG_8KHZ - 2), CSTRIDE_8KHZ, 0); } } target_ptr += SF_LENGTH_8KHZ; } /* search over lag range and lags codebook */ /* scale factor for lag codebook, as a function of center lag */ CCmax = int.MinValue; CCmax_b = int.MinValue; CBimax = 0; /* To avoid returning undefined lag values */ lag = -1; /* To check if lag with strong enough correlation has been found */ if (prevLag > 0) { if (Fs_kHz == 12) { prevLag = Inlines.silk_DIV32_16(Inlines.silk_LSHIFT(prevLag, 1), 3); } else if (Fs_kHz == 16) { prevLag = Inlines.silk_RSHIFT(prevLag, 1); } prevLag_log2_Q7 = Inlines.silk_lin2log((int)prevLag); } else { prevLag_log2_Q7 = 0; } Inlines.OpusAssert(search_thres2_Q13 == Inlines.silk_SAT16(search_thres2_Q13)); /* Set up stage 2 codebook based on number of subframes */ if (nb_subfr == SilkConstants.PE_MAX_NB_SUBFR) { Lag_CB_ptr = Tables.silk_CB_lags_stage2; if (Fs_kHz == 8 && complexity > SilkConstants.SILK_PE_MIN_COMPLEX) { /* If input is 8 khz use a larger codebook here because it is last stage */ nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE2_EXT; } else { nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE2; } } else { Lag_CB_ptr = Tables.silk_CB_lags_stage2_10_ms; nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE2_10MS; } for (k = 0; k < length_d_srch; k++) { d = d_srch[k]; for (j = 0; j < nb_cbk_search; j++) { CC[j] = 0; for (i = 0; i < nb_subfr; i++) { int d_subfr; /* Try all codebooks */ d_subfr = d + Lag_CB_ptr[i][j]; CC[j] = CC[j] + (int)Inlines.MatrixGet(C, i, d_subfr - (MIN_LAG_8KHZ - 2), CSTRIDE_8KHZ); } } /* Find best codebook */ CCmax_new = int.MinValue; CBimax_new = 0; for (i = 0; i < nb_cbk_search; i++) { if (CC[i] > CCmax_new) { CCmax_new = CC[i]; CBimax_new = i; } } /* Bias towards shorter lags */ lag_log2_Q7 = Inlines.silk_lin2log(d); /* Q7 */ Inlines.OpusAssert(lag_log2_Q7 == Inlines.silk_SAT16(lag_log2_Q7)); Inlines.OpusAssert(nb_subfr * ((int)((SilkConstants.PE_SHORTLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_SHORTLAG_BIAS, 13)*/ == Inlines.silk_SAT16(nb_subfr * ((int)((SilkConstants.PE_SHORTLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_SHORTLAG_BIAS, 13)*/)); CCmax_new_b = CCmax_new - Inlines.silk_RSHIFT(Inlines.silk_SMULBB(nb_subfr * ((int)((SilkConstants.PE_SHORTLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_SHORTLAG_BIAS, 13)*/, lag_log2_Q7), 7); /* Q13 */ /* Bias towards previous lag */ Inlines.OpusAssert(nb_subfr * ((int)((SilkConstants.PE_PREVLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_PREVLAG_BIAS, 13)*/ == Inlines.silk_SAT16(nb_subfr * ((int)((SilkConstants.PE_PREVLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_PREVLAG_BIAS, 13)*/)); if (prevLag > 0) { delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7; Inlines.OpusAssert(delta_lag_log2_sqr_Q7 == Inlines.silk_SAT16(delta_lag_log2_sqr_Q7)); delta_lag_log2_sqr_Q7 = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7), 7); prev_lag_bias_Q13 = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(nb_subfr * ((int)((SilkConstants.PE_PREVLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_PREVLAG_BIAS, 13)*/, LTPCorr_Q15.Val), 15); /* Q13 */ prev_lag_bias_Q13 = Inlines.silk_DIV32(Inlines.silk_MUL(prev_lag_bias_Q13, delta_lag_log2_sqr_Q7), delta_lag_log2_sqr_Q7 + ((int)((0.5f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(0.5f, 7)*/); CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */ } if (CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ CCmax_new > Inlines.silk_SMULBB(nb_subfr, search_thres2_Q13) && /* Correlation needs to be high enough to be voiced */ Tables.silk_CB_lags_stage2[0][CBimax_new] <= MIN_LAG_8KHZ /* Lag must be in range */ ) { CCmax_b = CCmax_new_b; CCmax = CCmax_new; lag = d; CBimax = CBimax_new; } } if (lag == -1) { /* No suitable candidate found */ Arrays.MemSetInt(pitch_out, 0, nb_subfr); LTPCorr_Q15.Val = 0; lagIndex.Val = 0; contourIndex.Val = 0; return(1); } /* Output normalized correlation */ LTPCorr_Q15.Val = (int)Inlines.silk_LSHIFT(Inlines.silk_DIV32_16(CCmax, nb_subfr), 2); Inlines.OpusAssert(LTPCorr_Q15.Val >= 0); if (Fs_kHz > 8) { short[] scratch_mem; /***************************************************************************/ /* Scale input signal down to avoid correlations measures from overflowing */ /***************************************************************************/ /* find scaling as max scaling for each subframe */ SumSqrShift.silk_sum_sqr_shift(out energy, out shift, frame, frame_length); if (shift > 0) { scratch_mem = new short[frame_length]; /* Move signal to scratch mem because the input signal should be unchanged */ shift = Inlines.silk_RSHIFT(shift, 1); for (i = 0; i < frame_length; i++) { scratch_mem[i] = Inlines.silk_RSHIFT16(frame[i], shift); } input_frame_ptr = scratch_mem; } else { input_frame_ptr = frame; } /* Search in original signal */ CBimax_old = CBimax; /* Compensate for decimation */ Inlines.OpusAssert(lag == Inlines.silk_SAT16(lag)); if (Fs_kHz == 12) { lag = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(lag, 3), 1); } else if (Fs_kHz == 16) { lag = Inlines.silk_LSHIFT(lag, 1); } else { lag = Inlines.silk_SMULBB(lag, 3); } lag = Inlines.silk_LIMIT_int(lag, min_lag, max_lag); start_lag = Inlines.silk_max_int(lag - 2, min_lag); end_lag = Inlines.silk_min_int(lag + 2, max_lag); lag_new = lag; /* to avoid undefined lag */ CBimax = 0; /* to avoid undefined lag */ CCmax = int.MinValue; /* pitch lags according to second stage */ for (k = 0; k < nb_subfr; k++) { pitch_out[k] = lag + 2 * Tables.silk_CB_lags_stage2[k][CBimax_old]; } /* Set up codebook parameters according to complexity setting and frame length */ if (nb_subfr == SilkConstants.PE_MAX_NB_SUBFR) { nb_cbk_search = (int)Tables.silk_nb_cbk_searchs_stage3[complexity]; Lag_CB_ptr = Tables.silk_CB_lags_stage3; } else { nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE3_10MS; Lag_CB_ptr = Tables.silk_CB_lags_stage3_10_ms; } /* Calculate the correlations and energies needed in stage 3 */ energies_st3 = new silk_pe_stage3_vals[nb_subfr * nb_cbk_search]; cross_corr_st3 = new silk_pe_stage3_vals[nb_subfr * nb_cbk_search]; for (int c = 0; c < nb_subfr * nb_cbk_search; c++) { energies_st3[c] = new silk_pe_stage3_vals(); // fixme: these can be replaced with a linearized array probably, or at least a struct cross_corr_st3[c] = new silk_pe_stage3_vals(); } silk_P_Ana_calc_corr_st3(cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity); silk_P_Ana_calc_energy_st3(energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity); lag_counter = 0; Inlines.OpusAssert(lag == Inlines.silk_SAT16(lag)); contour_bias_Q15 = Inlines.silk_DIV32_16(((int)((SilkConstants.PE_FLATCONTOUR_BIAS) * ((long)1 << (15)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_FLATCONTOUR_BIAS, 15)*/, lag); target = input_frame_ptr; target_ptr = SilkConstants.PE_LTP_MEM_LENGTH_MS * Fs_kHz; energy_target = Inlines.silk_ADD32(Inlines.silk_inner_prod_self(target, target_ptr, nb_subfr * sf_length), 1); for (d = start_lag; d <= end_lag; d++) { for (j = 0; j < nb_cbk_search; j++) { cross_corr = 0; energy = energy_target; for (k = 0; k < nb_subfr; k++) { cross_corr = Inlines.silk_ADD32(cross_corr, Inlines.MatrixGet(cross_corr_st3, k, j, nb_cbk_search).Values[lag_counter]); energy = Inlines.silk_ADD32(energy, Inlines.MatrixGet(energies_st3, k, j, nb_cbk_search).Values[lag_counter]); Inlines.OpusAssert(energy >= 0); } if (cross_corr > 0) { CCmax_new = Inlines.silk_DIV32_varQ(cross_corr, energy, 13 + 1); /* Q13 */ /* Reduce depending on flatness of contour */ diff = short.MaxValue - Inlines.silk_MUL(contour_bias_Q15, j); /* Q15 */ Inlines.OpusAssert(diff == Inlines.silk_SAT16(diff)); CCmax_new = Inlines.silk_SMULWB(CCmax_new, diff); /* Q14 */ } else { CCmax_new = 0; } if (CCmax_new > CCmax && (d + Tables.silk_CB_lags_stage3[0][j]) <= max_lag) { CCmax = CCmax_new; lag_new = d; CBimax = j; } } lag_counter++; } for (k = 0; k < nb_subfr; k++) { pitch_out[k] = lag_new + Lag_CB_ptr[k][CBimax]; pitch_out[k] = Inlines.silk_LIMIT(pitch_out[k], min_lag, SilkConstants.PE_MAX_LAG_MS * Fs_kHz); } lagIndex.Val = (short)(lag_new - min_lag); contourIndex.Val = (sbyte)CBimax; } else /* Fs_kHz == 8 */ /* Save Lags */ { for (k = 0; k < nb_subfr; k++) { pitch_out[k] = lag + Lag_CB_ptr[k][CBimax]; pitch_out[k] = Inlines.silk_LIMIT(pitch_out[k], MIN_LAG_8KHZ, SilkConstants.PE_MAX_LAG_MS * 8); } lagIndex.Val = (short)(lag - MIN_LAG_8KHZ); contourIndex.Val = (sbyte)CBimax; } Inlines.OpusAssert(lagIndex.Val >= 0); /* return as voiced */ return(0); }