/// <summary> /// Entropy code the mid/side quantization indices /// </summary> /// <param name="psRangeEnc">I/O Compressor data structure</param> /// <param name="ix">I Quantization indices [ 2 ][ 3 ]</param> internal static void silk_stereo_encode_pred(EntropyCoder psRangeEnc, sbyte[][] ix) { int n; /* Entropy coding */ n = 5 * ix[0][2] + ix[1][2]; Inlines.OpusAssert(n < 25); psRangeEnc.enc_icdf(n, Tables.silk_stereo_pred_joint_iCDF, 8); for (n = 0; n < 2; n++) { Inlines.OpusAssert(ix[n][0] < 3); Inlines.OpusAssert(ix[n][1] < SilkConstants.STEREO_QUANT_SUB_STEPS); psRangeEnc.enc_icdf(ix[n][0], Tables.silk_uniform3_iCDF, 8); psRangeEnc.enc_icdf(ix[n][1], Tables.silk_uniform5_iCDF, 8); } }
internal static void encode_split( EntropyCoder psRangeEnc, /* I/O compressor data structure */ int p_child1, /* I pulse amplitude of first child subframe */ int p, /* I pulse amplitude of current subframe */ byte[] shell_table /* I table of shell cdfs */ ) { if (p > 0) { psRangeEnc.enc_icdf(p_child1, shell_table, Tables.silk_shell_code_table_offsets[p], 8); } }
/// <summary> /// Encodes signs of excitation /// </summary> /// <param name="psRangeEnc">I/O Compressor data structure</param> /// <param name="pulses">I pulse signal</param> /// <param name="length">I length of input</param> /// <param name="signalType">I Signal type</param> /// <param name="quantOffsetType">I Quantization offset type</param> /// <param name="sum_pulses">I Sum of absolute pulses per block [MAX_NB_SHELL_BLOCKS]</param> internal static void silk_encode_signs( EntropyCoder psRangeEnc, sbyte[] pulses, int length, int signalType, int quantOffsetType, int[] sum_pulses) { int i, j, p; byte[] icdf = new byte[2]; int q_ptr; byte[] sign_icdf = Tables.silk_sign_iCDF; int icdf_ptr; icdf[1] = 0; q_ptr = 0; i = Inlines.silk_SMULBB(7, Inlines.silk_ADD_LSHIFT(quantOffsetType, signalType, 1)); icdf_ptr = i; length = Inlines.silk_RSHIFT(length + (SilkConstants.SHELL_CODEC_FRAME_LENGTH / 2), SilkConstants.LOG2_SHELL_CODEC_FRAME_LENGTH); for (i = 0; i < length; i++) { p = sum_pulses[i]; if (p > 0) { icdf[0] = sign_icdf[icdf_ptr + Inlines.silk_min(p & 0x1F, 6)]; for (j = q_ptr; j < q_ptr + SilkConstants.SHELL_CODEC_FRAME_LENGTH; j++) { if (pulses[j] != 0) { psRangeEnc.enc_icdf(silk_enc_map(pulses[j]), icdf, 8); } } } q_ptr += SilkConstants.SHELL_CODEC_FRAME_LENGTH; } }
/// <summary> /// Encode frame with Silk /// Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what /// encControl.payloadSize_ms is set to /// </summary> /// <param name="psEnc">I/O State</param> /// <param name="encControl">I Control status</param> /// <param name="samplesIn">I Speech sample input vector</param> /// <param name="nSamplesIn">I Number of samples in input vector</param> /// <param name="psRangeEnc">I/O Compressor data structure</param> /// <param name="nBytesOut">I/O Number of bytes in payload (input: Max bytes)</param> /// <param name="prefillFlag">I Flag to indicate prefilling buffers no coding</param> /// <returns>error code</returns> internal static int silk_Encode( SilkEncoder psEnc, EncControlState encControl, short[] samplesIn, int nSamplesIn, EntropyCoder psRangeEnc, BoxedValueInt nBytesOut, int prefillFlag) { int ret = SilkError.SILK_NO_ERROR; int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0; int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; int nSamplesFromInput = 0, nSamplesFromInputMax; int speech_act_thr_for_switch_Q8; int TargetRate_bps, channelRate_bps, LBRR_symbol, sum; int[] MStargetRates_bps = new int[2]; short[] buf; int transition, curr_block, tot_blocks; nBytesOut.Val = 0; if (encControl.reducedDependency != 0) { psEnc.state_Fxx[0].first_frame_after_reset = 1; psEnc.state_Fxx[1].first_frame_after_reset = 1; } psEnc.state_Fxx[0].nFramesEncoded = psEnc.state_Fxx[1].nFramesEncoded = 0; /* Check values in encoder control structure */ ret += encControl.check_control_input(); if (ret != SilkError.SILK_NO_ERROR) { Inlines.OpusAssert(false); return(ret); } encControl.switchReady = 0; if (encControl.nChannelsInternal > psEnc.nChannelsInternal) { /* Mono . Stereo transition: init state of second channel and stereo state */ ret += SilkEncoder.silk_init_encoder(psEnc.state_Fxx[1]); Arrays.MemSetShort(psEnc.sStereo.pred_prev_Q13, 0, 2); Arrays.MemSetShort(psEnc.sStereo.sSide, 0, 2); psEnc.sStereo.mid_side_amp_Q0[0] = 0; psEnc.sStereo.mid_side_amp_Q0[1] = 1; psEnc.sStereo.mid_side_amp_Q0[2] = 0; psEnc.sStereo.mid_side_amp_Q0[3] = 1; psEnc.sStereo.width_prev_Q14 = 0; psEnc.sStereo.smth_width_Q14 = (short)(((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/); if (psEnc.nChannelsAPI == 2) { psEnc.state_Fxx[1].resampler_state.Assign(psEnc.state_Fxx[0].resampler_state); Array.Copy(psEnc.state_Fxx[0].In_HP_State, psEnc.state_Fxx[1].In_HP_State, 2); } } transition = ((encControl.payloadSize_ms != psEnc.state_Fxx[0].PacketSize_ms) || (psEnc.nChannelsInternal != encControl.nChannelsInternal)) ? 1 : 0; psEnc.nChannelsAPI = encControl.nChannelsAPI; psEnc.nChannelsInternal = encControl.nChannelsInternal; nBlocksOf10ms = Inlines.silk_DIV32(100 * nSamplesIn, encControl.API_sampleRate); tot_blocks = (nBlocksOf10ms > 1) ? nBlocksOf10ms >> 1 : 1; curr_block = 0; if (prefillFlag != 0) { /* Only accept input length of 10 ms */ if (nBlocksOf10ms != 1) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } /* Reset Encoder */ for (n = 0; n < encControl.nChannelsInternal; n++) { ret += SilkEncoder.silk_init_encoder(psEnc.state_Fxx[n]); Inlines.OpusAssert(ret == SilkError.SILK_NO_ERROR); } tmp_payloadSize_ms = encControl.payloadSize_ms; encControl.payloadSize_ms = 10; tmp_complexity = encControl.complexity; encControl.complexity = 0; for (n = 0; n < encControl.nChannelsInternal; n++) { psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].prefillFlag = 1; } } else { /* Only accept input lengths that are a multiple of 10 ms */ if (nBlocksOf10ms * encControl.API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } /* Make sure no more than one packet can be produced */ if (1000 * (int)nSamplesIn > encControl.payloadSize_ms * encControl.API_sampleRate) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } } TargetRate_bps = Inlines.silk_RSHIFT32(encControl.bitRate, encControl.nChannelsInternal - 1); for (n = 0; n < encControl.nChannelsInternal; n++) { /* Force the side channel to the same rate as the mid */ int force_fs_kHz = (n == 1) ? psEnc.state_Fxx[0].fs_kHz : 0; ret += psEnc.state_Fxx[n].silk_control_encoder(encControl, TargetRate_bps, psEnc.allowBandwidthSwitch, n, force_fs_kHz); if (ret != SilkError.SILK_NO_ERROR) { Inlines.OpusAssert(false); return(ret); } if (psEnc.state_Fxx[n].first_frame_after_reset != 0 || transition != 0) { for (i = 0; i < psEnc.state_Fxx[0].nFramesPerPacket; i++) { psEnc.state_Fxx[n].LBRR_flags[i] = 0; } } psEnc.state_Fxx[n].inDTX = psEnc.state_Fxx[n].useDTX; } Inlines.OpusAssert(encControl.nChannelsInternal == 1 || psEnc.state_Fxx[0].fs_kHz == psEnc.state_Fxx[1].fs_kHz); /* Input buffering/resampling and encoding */ nSamplesToBufferMax = 10 * nBlocksOf10ms * psEnc.state_Fxx[0].fs_kHz; nSamplesFromInputMax = Inlines.silk_DIV32_16(nSamplesToBufferMax * psEnc.state_Fxx[0].API_fs_Hz, (short)(psEnc.state_Fxx[0].fs_kHz * 1000)); buf = new short[nSamplesFromInputMax]; int samplesIn_ptr = 0; while (true) { nSamplesToBuffer = psEnc.state_Fxx[0].frame_length - psEnc.state_Fxx[0].inputBufIx; nSamplesToBuffer = Inlines.silk_min(nSamplesToBuffer, nSamplesToBufferMax); nSamplesFromInput = Inlines.silk_DIV32_16(nSamplesToBuffer * psEnc.state_Fxx[0].API_fs_Hz, psEnc.state_Fxx[0].fs_kHz * 1000); /* Resample and write to buffer */ if (encControl.nChannelsAPI == 2 && encControl.nChannelsInternal == 2) { int id = psEnc.state_Fxx[0].nFramesEncoded; for (n = 0; n < nSamplesFromInput; n++) { buf[n] = samplesIn[samplesIn_ptr + (2 * n)]; } /* Making sure to start both resamplers from the same state when switching from mono to stereo */ if (psEnc.nPrevChannelsInternal == 1 && id == 0) { //silk_memcpy(&psEnc.state_Fxx[1].resampler_state, &psEnc.state_Fxx[0].resampler_state, sizeof(psEnc.state_Fxx[1].resampler_state)); psEnc.state_Fxx[1].resampler_state.Assign(psEnc.state_Fxx[0].resampler_state); } ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; nSamplesToBuffer = psEnc.state_Fxx[1].frame_length - psEnc.state_Fxx[1].inputBufIx; nSamplesToBuffer = Inlines.silk_min(nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc.state_Fxx[1].fs_kHz); for (n = 0; n < nSamplesFromInput; n++) { buf[n] = samplesIn[samplesIn_ptr + (2 * n) + 1]; } ret += Resampler.silk_resampler( psEnc.state_Fxx[1].resampler_state, psEnc.state_Fxx[1].inputBuf, psEnc.state_Fxx[1].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[1].inputBufIx += nSamplesToBuffer; } else if (encControl.nChannelsAPI == 2 && encControl.nChannelsInternal == 1) { /* Combine left and right channels before resampling */ for (n = 0; n < nSamplesFromInput; n++) { sum = samplesIn[samplesIn_ptr + (2 * n)] + samplesIn[samplesIn_ptr + (2 * n) + 1]; buf[n] = (short)Inlines.silk_RSHIFT_ROUND(sum, 1); } ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); /* On the first mono frame, average the results for the two resampler states */ if (psEnc.nPrevChannelsInternal == 2 && psEnc.state_Fxx[0].nFramesEncoded == 0) { ret += Resampler.silk_resampler( psEnc.state_Fxx[1].resampler_state, psEnc.state_Fxx[1].inputBuf, psEnc.state_Fxx[1].inputBufIx + 2, buf, 0, nSamplesFromInput); for (n = 0; n < psEnc.state_Fxx[0].frame_length; n++) { psEnc.state_Fxx[0].inputBuf[psEnc.state_Fxx[0].inputBufIx + n + 2] = (short)(Inlines.silk_RSHIFT(psEnc.state_Fxx[0].inputBuf[psEnc.state_Fxx[0].inputBufIx + n + 2] + psEnc.state_Fxx[1].inputBuf[psEnc.state_Fxx[1].inputBufIx + n + 2], 1)); } } psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; } else { Inlines.OpusAssert(encControl.nChannelsAPI == 1 && encControl.nChannelsInternal == 1); Array.Copy(samplesIn, samplesIn_ptr, buf, 0, nSamplesFromInput); ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; } samplesIn_ptr += (nSamplesFromInput * encControl.nChannelsAPI); nSamplesIn -= nSamplesFromInput; /* Default */ psEnc.allowBandwidthSwitch = 0; /* Silk encoder */ if (psEnc.state_Fxx[0].inputBufIx >= psEnc.state_Fxx[0].frame_length) { /* Enough data in input buffer, so encode */ Inlines.OpusAssert(psEnc.state_Fxx[0].inputBufIx == psEnc.state_Fxx[0].frame_length); Inlines.OpusAssert(encControl.nChannelsInternal == 1 || psEnc.state_Fxx[1].inputBufIx == psEnc.state_Fxx[1].frame_length); /* Deal with LBRR data */ if (psEnc.state_Fxx[0].nFramesEncoded == 0 && prefillFlag == 0) { /* Create space at start of payload for VAD and FEC flags */ byte[] iCDF = { 0, 0 }; iCDF[0] = (byte)(256 - Inlines.silk_RSHIFT(256, (psEnc.state_Fxx[0].nFramesPerPacket + 1) * encControl.nChannelsInternal)); psRangeEnc.enc_icdf(0, iCDF, 8); /* Encode any LBRR data from previous packet */ /* Encode LBRR flags */ for (n = 0; n < encControl.nChannelsInternal; n++) { LBRR_symbol = 0; for (i = 0; i < psEnc.state_Fxx[n].nFramesPerPacket; i++) { LBRR_symbol |= Inlines.silk_LSHIFT(psEnc.state_Fxx[n].LBRR_flags[i], i); } psEnc.state_Fxx[n].LBRR_flag = (sbyte)(LBRR_symbol > 0 ? 1 : 0); if (LBRR_symbol != 0 && psEnc.state_Fxx[n].nFramesPerPacket > 1) { psRangeEnc.enc_icdf(LBRR_symbol - 1, Tables.silk_LBRR_flags_iCDF_ptr[psEnc.state_Fxx[n].nFramesPerPacket - 2], 8); } } /* Code LBRR indices and excitation signals */ for (i = 0; i < psEnc.state_Fxx[0].nFramesPerPacket; i++) { for (n = 0; n < encControl.nChannelsInternal; n++) { if (psEnc.state_Fxx[n].LBRR_flags[i] != 0) { int condCoding; if (encControl.nChannelsInternal == 2 && n == 0) { Stereo.silk_stereo_encode_pred(psRangeEnc, psEnc.sStereo.predIx[i]); /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ if (psEnc.state_Fxx[1].LBRR_flags[i] == 0) { Stereo.silk_stereo_encode_mid_only(psRangeEnc, psEnc.sStereo.mid_only_flags[i]); } } /* Use conditional coding if previous frame available */ if (i > 0 && psEnc.state_Fxx[n].LBRR_flags[i - 1] != 0) { condCoding = SilkConstants.CODE_CONDITIONALLY; } else { condCoding = SilkConstants.CODE_INDEPENDENTLY; } EncodeIndices.silk_encode_indices(psEnc.state_Fxx[n], psRangeEnc, i, 1, condCoding); EncodePulses.silk_encode_pulses(psRangeEnc, psEnc.state_Fxx[n].indices_LBRR[i].signalType, psEnc.state_Fxx[n].indices_LBRR[i].quantOffsetType, psEnc.state_Fxx[n].pulses_LBRR[i], psEnc.state_Fxx[n].frame_length); } } } /* Reset LBRR flags */ for (n = 0; n < encControl.nChannelsInternal; n++) { Arrays.MemSetInt(psEnc.state_Fxx[n].LBRR_flags, 0, SilkConstants.MAX_FRAMES_PER_PACKET); } psEnc.nBitsUsedLBRR = psRangeEnc.tell(); } HPVariableCutoff.silk_HP_variable_cutoff(psEnc.state_Fxx); /* Total target bits for packet */ nBits = Inlines.silk_DIV32_16(Inlines.silk_MUL(encControl.bitRate, encControl.payloadSize_ms), 1000); /* Subtract bits used for LBRR */ if (prefillFlag == 0) { nBits -= psEnc.nBitsUsedLBRR; } /* Divide by number of uncoded frames left in packet */ nBits = Inlines.silk_DIV32_16(nBits, psEnc.state_Fxx[0].nFramesPerPacket); /* Convert to bits/second */ if (encControl.payloadSize_ms == 10) { TargetRate_bps = Inlines.silk_SMULBB(nBits, 100); } else { TargetRate_bps = Inlines.silk_SMULBB(nBits, 50); } /* Subtract fraction of bits in excess of target in previous frames and packets */ TargetRate_bps -= Inlines.silk_DIV32_16(Inlines.silk_MUL(psEnc.nBitsExceeded, 1000), TuningParameters.BITRESERVOIR_DECAY_TIME_MS); if (prefillFlag == 0 && psEnc.state_Fxx[0].nFramesEncoded > 0) { /* Compare actual vs target bits so far in this packet */ int bitsBalance = psRangeEnc.tell() - psEnc.nBitsUsedLBRR - nBits * psEnc.state_Fxx[0].nFramesEncoded; TargetRate_bps -= Inlines.silk_DIV32_16(Inlines.silk_MUL(bitsBalance, 1000), TuningParameters.BITRESERVOIR_DECAY_TIME_MS); } /* Never exceed input bitrate */ TargetRate_bps = Inlines.silk_LIMIT(TargetRate_bps, encControl.bitRate, 5000); /* Convert Left/Right to Mid/Side */ if (encControl.nChannelsInternal == 2) { BoxedValueSbyte midOnlyFlagBoxed = new BoxedValueSbyte(psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded]); Stereo.silk_stereo_LR_to_MS(psEnc.sStereo, psEnc.state_Fxx[0].inputBuf, 2, psEnc.state_Fxx[1].inputBuf, 2, psEnc.sStereo.predIx[psEnc.state_Fxx[0].nFramesEncoded], midOnlyFlagBoxed, MStargetRates_bps, TargetRate_bps, psEnc.state_Fxx[0].speech_activity_Q8, encControl.toMono, psEnc.state_Fxx[0].fs_kHz, psEnc.state_Fxx[0].frame_length); psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded] = midOnlyFlagBoxed.Val; if (midOnlyFlagBoxed.Val == 0) { /* Reset side channel encoder memory for first frame with side coding */ if (psEnc.prev_decode_only_middle == 1) { psEnc.state_Fxx[1].sShape.Reset(); psEnc.state_Fxx[1].sPrefilt.Reset(); psEnc.state_Fxx[1].sNSQ.Reset(); Arrays.MemSetShort(psEnc.state_Fxx[1].prev_NLSFq_Q15, 0, SilkConstants.MAX_LPC_ORDER); Arrays.MemSetInt(psEnc.state_Fxx[1].sLP.In_LP_State, 0, 2); psEnc.state_Fxx[1].prevLag = 100; psEnc.state_Fxx[1].sNSQ.lagPrev = 100; psEnc.state_Fxx[1].sShape.LastGainIndex = 10; psEnc.state_Fxx[1].prevSignalType = SilkConstants.TYPE_NO_VOICE_ACTIVITY; psEnc.state_Fxx[1].sNSQ.prev_gain_Q16 = 65536; psEnc.state_Fxx[1].first_frame_after_reset = 1; } psEnc.state_Fxx[1].silk_encode_do_VAD(); } else { psEnc.state_Fxx[1].VAD_flags[psEnc.state_Fxx[0].nFramesEncoded] = 0; } if (prefillFlag == 0) { Stereo.silk_stereo_encode_pred(psRangeEnc, psEnc.sStereo.predIx[psEnc.state_Fxx[0].nFramesEncoded]); if (psEnc.state_Fxx[1].VAD_flags[psEnc.state_Fxx[0].nFramesEncoded] == 0) { Stereo.silk_stereo_encode_mid_only(psRangeEnc, psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded]); } } } else { /* Buffering */ Array.Copy(psEnc.sStereo.sMid, psEnc.state_Fxx[0].inputBuf, 2); Array.Copy(psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].frame_length, psEnc.sStereo.sMid, 0, 2); } psEnc.state_Fxx[0].silk_encode_do_VAD(); /* Encode */ for (n = 0; n < encControl.nChannelsInternal; n++) { int maxBits, useCBR; /* Handling rate constraints */ maxBits = encControl.maxBits; if (tot_blocks == 2 && curr_block == 0) { maxBits = maxBits * 3 / 5; } else if (tot_blocks == 3) { if (curr_block == 0) { maxBits = maxBits * 2 / 5; } else if (curr_block == 1) { maxBits = maxBits * 3 / 4; } } useCBR = (encControl.useCBR != 0 && curr_block == tot_blocks - 1) ? 1 : 0; if (encControl.nChannelsInternal == 1) { channelRate_bps = TargetRate_bps; } else { channelRate_bps = MStargetRates_bps[n]; if (n == 0 && MStargetRates_bps[1] > 0) { useCBR = 0; /* Give mid up to 1/2 of the max bits for that frame */ maxBits -= encControl.maxBits / (tot_blocks * 2); } } if (channelRate_bps > 0) { int condCoding; psEnc.state_Fxx[n].silk_control_SNR(channelRate_bps); /* Use independent coding if no previous frame available */ if (psEnc.state_Fxx[0].nFramesEncoded - n <= 0) { condCoding = SilkConstants.CODE_INDEPENDENTLY; } else if (n > 0 && psEnc.prev_decode_only_middle != 0) { /* If we skipped a side frame in this packet, we don't * need LTP scaling; the LTP state is well-defined. */ condCoding = SilkConstants.CODE_INDEPENDENTLY_NO_LTP_SCALING; } else { condCoding = SilkConstants.CODE_CONDITIONALLY; } ret += psEnc.state_Fxx[n].silk_encode_frame(nBytesOut, psRangeEnc, condCoding, maxBits, useCBR); Inlines.OpusAssert(ret == SilkError.SILK_NO_ERROR); } psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].inputBufIx = 0; psEnc.state_Fxx[n].nFramesEncoded++; } psEnc.prev_decode_only_middle = psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded - 1]; /* Insert VAD and FEC flags at beginning of bitstream */ if (nBytesOut.Val > 0 && psEnc.state_Fxx[0].nFramesEncoded == psEnc.state_Fxx[0].nFramesPerPacket) { flags = 0; for (n = 0; n < encControl.nChannelsInternal; n++) { for (i = 0; i < psEnc.state_Fxx[n].nFramesPerPacket; i++) { flags = Inlines.silk_LSHIFT(flags, 1); flags |= (int)psEnc.state_Fxx[n].VAD_flags[i]; } flags = Inlines.silk_LSHIFT(flags, 1); flags |= (int)psEnc.state_Fxx[n].LBRR_flag; } if (prefillFlag == 0) { psRangeEnc.enc_patch_initial_bits((uint)flags, (uint)((psEnc.state_Fxx[0].nFramesPerPacket + 1) * encControl.nChannelsInternal)); } /* Return zero bytes if all channels DTXed */ if (psEnc.state_Fxx[0].inDTX != 0 && (encControl.nChannelsInternal == 1 || psEnc.state_Fxx[1].inDTX != 0)) { nBytesOut.Val = 0; } psEnc.nBitsExceeded += nBytesOut.Val * 8; psEnc.nBitsExceeded -= Inlines.silk_DIV32_16(Inlines.silk_MUL(encControl.bitRate, encControl.payloadSize_ms), 1000); psEnc.nBitsExceeded = Inlines.silk_LIMIT(psEnc.nBitsExceeded, 0, 10000); /* Update flag indicating if bandwidth switching is allowed */ speech_act_thr_for_switch_Q8 = Inlines.silk_SMLAWB(((int)((TuningParameters.SPEECH_ACTIVITY_DTX_THRES) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SPEECH_ACTIVITY_DTX_THRES, 8)*/, ((int)(((1 - TuningParameters.SPEECH_ACTIVITY_DTX_THRES) / TuningParameters.MAX_BANDWIDTH_SWITCH_DELAY_MS) * ((long)1 << (16 + 8)) + 0.5)) /*Inlines.SILK_CONST((1 - TuningParameters.SPEECH_ACTIVITY_DTX_THRES) / TuningParameters.MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8)*/, psEnc.timeSinceSwitchAllowed_ms); if (psEnc.state_Fxx[0].speech_activity_Q8 < speech_act_thr_for_switch_Q8) { psEnc.allowBandwidthSwitch = 1; psEnc.timeSinceSwitchAllowed_ms = 0; } else { psEnc.allowBandwidthSwitch = 0; psEnc.timeSinceSwitchAllowed_ms += encControl.payloadSize_ms; } } if (nSamplesIn == 0) { break; } } else { break; } curr_block++; } psEnc.nPrevChannelsInternal = encControl.nChannelsInternal; encControl.allowBandwidthSwitch = psEnc.allowBandwidthSwitch; encControl.inWBmodeWithoutVariableLP = (psEnc.state_Fxx[0].fs_kHz == 16 && psEnc.state_Fxx[0].sLP.mode == 0) ? 1 : 0; encControl.internalSampleRate = Inlines.silk_SMULBB(psEnc.state_Fxx[0].fs_kHz, 1000); encControl.stereoWidth_Q14 = encControl.toMono != 0 ? 0 : psEnc.sStereo.smth_width_Q14; if (prefillFlag != 0) { encControl.payloadSize_ms = tmp_payloadSize_ms; encControl.complexity = tmp_complexity; for (n = 0; n < encControl.nChannelsInternal; n++) { psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].prefillFlag = 0; } } return(ret); }
internal static int quant_coarse_energy_impl(CeltMode m, int start, int end, int[][] eBands, int[][] oldEBands, int budget, int tell, byte[] prob_model, int[][] error, EntropyCoder enc, int C, int LM, int intra, int max_decay, int lfe) { int i, c; int badness = 0; int[] prev = { 0, 0 }; int coef; int beta; if (tell + 3 <= budget) { enc.enc_bit_logp(intra, 3); } if (intra != 0) { coef = 0; beta = beta_intra; } else { beta = beta_coef[LM]; coef = pred_coef[LM]; } /* Encode at a fixed coarse resolution */ for (i = start; i < end; i++) { c = 0; do { int bits_left; int qi, qi0; int q; int x; int f, tmp; int oldE; int decay_bound; x = eBands[c][i]; oldE = Inlines.MAX16(-((short)(0.5 + (9.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(9.0f, CeltConstants.DB_SHIFT)*/, oldEBands[c][i]); f = Inlines.SHL32(Inlines.EXTEND32(x), 7) - Inlines.PSHR32(Inlines.MULT16_16(coef, oldE), 8) - prev[c]; /* Rounding to nearest integer here is really important! */ qi = (f + ((int)(0.5 + (.5f) * (((int)1) << (CeltConstants.DB_SHIFT + 7)))) /*Inlines.QCONST32(.5f, CeltConstants.DB_SHIFT + 7)*/) >> (CeltConstants.DB_SHIFT + 7); decay_bound = Inlines.EXTRACT16(Inlines.MAX32(-((short)(0.5 + (28.0f) * (((int)1) << (CeltConstants.DB_SHIFT)))) /*Inlines.QCONST16(28.0f, CeltConstants.DB_SHIFT)*/, Inlines.SUB32((int)oldEBands[c][i], max_decay))); /* Prevent the energy from going down too quickly (e.g. for bands * that have just one bin) */ if (qi < 0 && x < decay_bound) { qi += (int)Inlines.SHR16(Inlines.SUB16(decay_bound, x), CeltConstants.DB_SHIFT); if (qi > 0) { qi = 0; } } qi0 = qi; /* If we don't have enough bits to encode all the energy, just assume * something safe. */ tell = enc.tell(); bits_left = budget - tell - 3 * C * (end - i); if (i != start && bits_left < 30) { if (bits_left < 24) { qi = Inlines.IMIN(1, qi); } if (bits_left < 16) { qi = Inlines.IMAX(-1, qi); } } if (lfe != 0 && i >= 2) { qi = Inlines.IMIN(qi, 0); } if (budget - tell >= 15) { int pi; pi = 2 * Inlines.IMIN(i, 20); Laplace.ec_laplace_encode(enc, ref qi, (((uint)prob_model[pi]) << 7), ((int)prob_model[pi + 1]) << 6); } else if (budget - tell >= 2) { qi = Inlines.IMAX(-1, Inlines.IMIN(qi, 1)); enc.enc_icdf(2 * qi ^ (0 - (qi < 0 ? 1 : 0)), small_energy_icdf, 2); } else if (budget - tell >= 1) { qi = Inlines.IMIN(0, qi); enc.enc_bit_logp(-qi, 1); } else { qi = -1; } error[c][i] = (Inlines.PSHR32(f, 7) - Inlines.SHL16((qi), CeltConstants.DB_SHIFT)); badness += Inlines.abs(qi0 - qi); q = (int)Inlines.SHL32(qi, CeltConstants.DB_SHIFT); // opus bug: useless extend32 tmp = Inlines.PSHR32(Inlines.MULT16_16(coef, oldE), 8) + prev[c] + Inlines.SHL32(q, 7); tmp = Inlines.MAX32(-((int)(0.5 + (28.0f) * (((int)1) << (CeltConstants.DB_SHIFT + 7)))) /*Inlines.QCONST32(28.0f, CeltConstants.DB_SHIFT + 7)*/, tmp); oldEBands[c][i] = (Inlines.PSHR32(tmp, 7)); prev[c] = prev[c] + Inlines.SHL32(q, 7) - Inlines.MULT16_16(beta, Inlines.PSHR32(q, 8)); } while (++c < C); } return(lfe != 0 ? 0 : badness); }
/// <summary> /// Entropy code the mid-only flag /// </summary> /// <param name="psRangeEnc">I/O Compressor data structure</param> /// <param name="mid_only_flag"></param> internal static void silk_stereo_encode_mid_only(EntropyCoder psRangeEnc, sbyte mid_only_flag) { /* Encode flag that only mid channel is coded */ psRangeEnc.enc_icdf(mid_only_flag, Tables.silk_stereo_only_code_mid_iCDF, 8); }
/// <summary> /// Encode quantization indices of excitation /// </summary> /// <param name="psRangeEnc">I/O compressor data structure</param> /// <param name="signalType">I Signal type</param> /// <param name="quantOffsetType">I quantOffsetType</param> /// <param name="pulses">I quantization indices</param> /// <param name="frame_length">I Frame length</param> internal static void silk_encode_pulses( EntropyCoder psRangeEnc, int signalType, int quantOffsetType, sbyte[] pulses, int frame_length) { int i, k, j, iter, bit, nLS, scale_down, RateLevelIndex = 0; int abs_q, minSumBits_Q5, sumBits_Q5; int[] abs_pulses; int[] sum_pulses; int[] nRshifts; int[] pulses_comb = new int[8]; int abs_pulses_ptr; int pulses_ptr; byte[] nBits_ptr; Arrays.MemSetInt(pulses_comb, 0, 8); /****************************/ /* Prepare for shell coding */ /****************************/ /* Calculate number of shell blocks */ Inlines.OpusAssert(1 << SilkConstants.LOG2_SHELL_CODEC_FRAME_LENGTH == SilkConstants.SHELL_CODEC_FRAME_LENGTH); iter = Inlines.silk_RSHIFT(frame_length, SilkConstants.LOG2_SHELL_CODEC_FRAME_LENGTH); if (iter * SilkConstants.SHELL_CODEC_FRAME_LENGTH < frame_length) { Inlines.OpusAssert(frame_length == 12 * 10); /* Make sure only happens for 10 ms @ 12 kHz */ iter++; Arrays.MemSetWithOffset <sbyte>(pulses, 0, frame_length, SilkConstants.SHELL_CODEC_FRAME_LENGTH); } /* Take the absolute value of the pulses */ abs_pulses = new int[iter * SilkConstants.SHELL_CODEC_FRAME_LENGTH]; Inlines.OpusAssert((SilkConstants.SHELL_CODEC_FRAME_LENGTH & 3) == 0); // unrolled loop for (i = 0; i < iter * SilkConstants.SHELL_CODEC_FRAME_LENGTH; i += 4) { abs_pulses[i + 0] = (int)Inlines.silk_abs(pulses[i + 0]); abs_pulses[i + 1] = (int)Inlines.silk_abs(pulses[i + 1]); abs_pulses[i + 2] = (int)Inlines.silk_abs(pulses[i + 2]); abs_pulses[i + 3] = (int)Inlines.silk_abs(pulses[i + 3]); } /* Calc sum pulses per shell code frame */ sum_pulses = new int[iter]; nRshifts = new int[iter]; abs_pulses_ptr = 0; for (i = 0; i < iter; i++) { nRshifts[i] = 0; while (true) { /* 1+1 . 2 */ scale_down = combine_and_check(pulses_comb, 0, abs_pulses, abs_pulses_ptr, Tables.silk_max_pulses_table[0], 8); /* 2+2 . 4 */ scale_down += combine_and_check(pulses_comb, pulses_comb, Tables.silk_max_pulses_table[1], 4); /* 4+4 . 8 */ scale_down += combine_and_check(pulses_comb, pulses_comb, Tables.silk_max_pulses_table[2], 2); /* 8+8 . 16 */ scale_down += combine_and_check(sum_pulses, i, pulses_comb, 0, Tables.silk_max_pulses_table[3], 1); if (scale_down != 0) { /* We need to downscale the quantization signal */ nRshifts[i]++; for (k = abs_pulses_ptr; k < abs_pulses_ptr + SilkConstants.SHELL_CODEC_FRAME_LENGTH; k++) { abs_pulses[k] = Inlines.silk_RSHIFT(abs_pulses[k], 1); } } else { /* Jump out of while(1) loop and go to next shell coding frame */ break; } } abs_pulses_ptr += SilkConstants.SHELL_CODEC_FRAME_LENGTH; } /**************/ /* Rate level */ /**************/ /* find rate level that leads to fewest bits for coding of pulses per block info */ minSumBits_Q5 = int.MaxValue; for (k = 0; k < SilkConstants.N_RATE_LEVELS - 1; k++) { nBits_ptr = Tables.silk_pulses_per_block_BITS_Q5[k]; sumBits_Q5 = Tables.silk_rate_levels_BITS_Q5[signalType >> 1][k]; for (i = 0; i < iter; i++) { if (nRshifts[i] > 0) { sumBits_Q5 += nBits_ptr[SilkConstants.SILK_MAX_PULSES + 1]; } else { sumBits_Q5 += nBits_ptr[sum_pulses[i]]; } } if (sumBits_Q5 < minSumBits_Q5) { minSumBits_Q5 = sumBits_Q5; RateLevelIndex = k; } } psRangeEnc.enc_icdf(RateLevelIndex, Tables.silk_rate_levels_iCDF[signalType >> 1], 8); /***************************************************/ /* Sum-Weighted-Pulses Encoding */ /***************************************************/ for (i = 0; i < iter; i++) { if (nRshifts[i] == 0) { psRangeEnc.enc_icdf(sum_pulses[i], Tables.silk_pulses_per_block_iCDF[RateLevelIndex], 8); } else { psRangeEnc.enc_icdf(SilkConstants.SILK_MAX_PULSES + 1, Tables.silk_pulses_per_block_iCDF[RateLevelIndex], 8); for (k = 0; k < nRshifts[i] - 1; k++) { psRangeEnc.enc_icdf(SilkConstants.SILK_MAX_PULSES + 1, Tables.silk_pulses_per_block_iCDF[SilkConstants.N_RATE_LEVELS - 1], 8); } psRangeEnc.enc_icdf(sum_pulses[i], Tables.silk_pulses_per_block_iCDF[SilkConstants.N_RATE_LEVELS - 1], 8); } } /******************/ /* Shell Encoding */ /******************/ for (i = 0; i < iter; i++) { if (sum_pulses[i] > 0) { ShellCoder.silk_shell_encoder(psRangeEnc, abs_pulses, i * SilkConstants.SHELL_CODEC_FRAME_LENGTH); } } /****************/ /* LSB Encoding */ /****************/ for (i = 0; i < iter; i++) { if (nRshifts[i] > 0) { pulses_ptr = i * SilkConstants.SHELL_CODEC_FRAME_LENGTH; nLS = nRshifts[i] - 1; for (k = 0; k < SilkConstants.SHELL_CODEC_FRAME_LENGTH; k++) { abs_q = (sbyte)Inlines.silk_abs(pulses[pulses_ptr + k]); for (j = nLS; j > 0; j--) { bit = Inlines.silk_RSHIFT(abs_q, j) & 1; psRangeEnc.enc_icdf(bit, Tables.silk_lsb_iCDF, 8); } bit = abs_q & 1; psRangeEnc.enc_icdf(bit, Tables.silk_lsb_iCDF, 8); } } } /****************/ /* Encode signs */ /****************/ CodeSigns.silk_encode_signs(psRangeEnc, pulses, frame_length, signalType, quantOffsetType, sum_pulses); }
/// <summary> /// Encode side-information parameters to payload /// </summary> /// <param name="psEncC">I/O Encoder state</param> /// <param name="psRangeEnc">I/O Compressor data structure</param> /// <param name="FrameIndex">I Frame number</param> /// <param name="encode_LBRR">I Flag indicating LBRR data is being encoded</param> /// <param name="condCoding">I The type of conditional coding to use</param> internal static void silk_encode_indices( SilkChannelEncoder psEncC, EntropyCoder psRangeEnc, int FrameIndex, int encode_LBRR, int condCoding) { int i, k, typeOffset; int encode_absolute_lagIndex, delta_lagIndex; short[] ec_ix = new short[SilkConstants.MAX_LPC_ORDER]; byte[] pred_Q8 = new byte[SilkConstants.MAX_LPC_ORDER]; SideInfoIndices psIndices; if (encode_LBRR != 0) { psIndices = psEncC.indices_LBRR[FrameIndex]; } else { psIndices = psEncC.indices; } /*******************************************/ /* Encode signal type and quantizer offset */ /*******************************************/ typeOffset = 2 * psIndices.signalType + psIndices.quantOffsetType; Inlines.OpusAssert(typeOffset >= 0 && typeOffset < 6); Inlines.OpusAssert(encode_LBRR == 0 || typeOffset >= 2); if (encode_LBRR != 0 || typeOffset >= 2) { psRangeEnc.enc_icdf(typeOffset - 2, Tables.silk_type_offset_VAD_iCDF, 8); } else { psRangeEnc.enc_icdf(typeOffset, Tables.silk_type_offset_no_VAD_iCDF, 8); } /****************/ /* Encode gains */ /****************/ /* first subframe */ if (condCoding == SilkConstants.CODE_CONDITIONALLY) { /* conditional coding */ Inlines.OpusAssert(psIndices.GainsIndices[0] >= 0 && psIndices.GainsIndices[0] < SilkConstants.MAX_DELTA_GAIN_QUANT - SilkConstants.MIN_DELTA_GAIN_QUANT + 1); psRangeEnc.enc_icdf(psIndices.GainsIndices[0], Tables.silk_delta_gain_iCDF, 8); } else { /* independent coding, in two stages: MSB bits followed by 3 LSBs */ Inlines.OpusAssert(psIndices.GainsIndices[0] >= 0 && psIndices.GainsIndices[0] < SilkConstants.N_LEVELS_QGAIN); psRangeEnc.enc_icdf(Inlines.silk_RSHIFT(psIndices.GainsIndices[0], 3), Tables.silk_gain_iCDF[psIndices.signalType], 8); psRangeEnc.enc_icdf(psIndices.GainsIndices[0] & 7, Tables.silk_uniform8_iCDF, 8); } /* remaining subframes */ for (i = 1; i < psEncC.nb_subfr; i++) { Inlines.OpusAssert(psIndices.GainsIndices[i] >= 0 && psIndices.GainsIndices[i] < SilkConstants.MAX_DELTA_GAIN_QUANT - SilkConstants.MIN_DELTA_GAIN_QUANT + 1); psRangeEnc.enc_icdf(psIndices.GainsIndices[i], Tables.silk_delta_gain_iCDF, 8); } /****************/ /* Encode NLSFs */ /****************/ psRangeEnc.enc_icdf(psIndices.NLSFIndices[0], psEncC.psNLSF_CB.CB1_iCDF, ((psIndices.signalType >> 1) * psEncC.psNLSF_CB.nVectors), 8); NLSF.silk_NLSF_unpack(ec_ix, pred_Q8, psEncC.psNLSF_CB, psIndices.NLSFIndices[0]); Inlines.OpusAssert(psEncC.psNLSF_CB.order == psEncC.predictLPCOrder); for (i = 0; i < psEncC.psNLSF_CB.order; i++) { if (psIndices.NLSFIndices[i + 1] >= SilkConstants.NLSF_QUANT_MAX_AMPLITUDE) { psRangeEnc.enc_icdf(2 * SilkConstants.NLSF_QUANT_MAX_AMPLITUDE, psEncC.psNLSF_CB.ec_iCDF, (ec_ix[i]), 8); psRangeEnc.enc_icdf(psIndices.NLSFIndices[i + 1] - SilkConstants.NLSF_QUANT_MAX_AMPLITUDE, Tables.silk_NLSF_EXT_iCDF, 8); } else if (psIndices.NLSFIndices[i + 1] <= 0 - SilkConstants.NLSF_QUANT_MAX_AMPLITUDE) { psRangeEnc.enc_icdf(0, psEncC.psNLSF_CB.ec_iCDF, ec_ix[i], 8); psRangeEnc.enc_icdf(-psIndices.NLSFIndices[i + 1] - SilkConstants.NLSF_QUANT_MAX_AMPLITUDE, Tables.silk_NLSF_EXT_iCDF, 8); } else { psRangeEnc.enc_icdf(psIndices.NLSFIndices[i + 1] + SilkConstants.NLSF_QUANT_MAX_AMPLITUDE, psEncC.psNLSF_CB.ec_iCDF, ec_ix[i], 8); } } /* Encode NLSF interpolation factor */ if (psEncC.nb_subfr == SilkConstants.MAX_NB_SUBFR) { Inlines.OpusAssert(psIndices.NLSFInterpCoef_Q2 >= 0 && psIndices.NLSFInterpCoef_Q2 < 5); psRangeEnc.enc_icdf(psIndices.NLSFInterpCoef_Q2, Tables.silk_NLSF_interpolation_factor_iCDF, 8); } if (psIndices.signalType == SilkConstants.TYPE_VOICED) { /*********************/ /* Encode pitch lags */ /*********************/ /* lag index */ encode_absolute_lagIndex = 1; if (condCoding == SilkConstants.CODE_CONDITIONALLY && psEncC.ec_prevSignalType == SilkConstants.TYPE_VOICED) { /* Delta Encoding */ delta_lagIndex = psIndices.lagIndex - psEncC.ec_prevLagIndex; if (delta_lagIndex < -8 || delta_lagIndex > 11) { delta_lagIndex = 0; } else { delta_lagIndex = delta_lagIndex + 9; encode_absolute_lagIndex = 0; /* Only use delta */ } Inlines.OpusAssert(delta_lagIndex >= 0 && delta_lagIndex < 21); psRangeEnc.enc_icdf(delta_lagIndex, Tables.silk_pitch_delta_iCDF, 8); } if (encode_absolute_lagIndex != 0) { /* Absolute encoding */ int pitch_high_bits, pitch_low_bits; pitch_high_bits = Inlines.silk_DIV32_16(psIndices.lagIndex, Inlines.silk_RSHIFT(psEncC.fs_kHz, 1)); pitch_low_bits = psIndices.lagIndex - Inlines.silk_SMULBB(pitch_high_bits, Inlines.silk_RSHIFT(psEncC.fs_kHz, 1)); Inlines.OpusAssert(pitch_low_bits < psEncC.fs_kHz / 2); Inlines.OpusAssert(pitch_high_bits < 32); psRangeEnc.enc_icdf(pitch_high_bits, Tables.silk_pitch_lag_iCDF, 8); psRangeEnc.enc_icdf(pitch_low_bits, psEncC.pitch_lag_low_bits_iCDF, 8); } psEncC.ec_prevLagIndex = psIndices.lagIndex; /* Countour index */ Inlines.OpusAssert(psIndices.contourIndex >= 0); Inlines.OpusAssert((psIndices.contourIndex < 34 && psEncC.fs_kHz > 8 && psEncC.nb_subfr == 4) || (psIndices.contourIndex < 11 && psEncC.fs_kHz == 8 && psEncC.nb_subfr == 4) || (psIndices.contourIndex < 12 && psEncC.fs_kHz > 8 && psEncC.nb_subfr == 2) || (psIndices.contourIndex < 3 && psEncC.fs_kHz == 8 && psEncC.nb_subfr == 2)); psRangeEnc.enc_icdf(psIndices.contourIndex, psEncC.pitch_contour_iCDF, 8); /********************/ /* Encode LTP gains */ /********************/ /* PERIndex value */ Inlines.OpusAssert(psIndices.PERIndex >= 0 && psIndices.PERIndex < 3); psRangeEnc.enc_icdf(psIndices.PERIndex, Tables.silk_LTP_per_index_iCDF, 8); /* Codebook Indices */ for (k = 0; k < psEncC.nb_subfr; k++) { Inlines.OpusAssert(psIndices.LTPIndex[k] >= 0 && psIndices.LTPIndex[k] < (8 << psIndices.PERIndex)); psRangeEnc.enc_icdf(psIndices.LTPIndex[k], Tables.silk_LTP_gain_iCDF_ptrs[psIndices.PERIndex], 8); } /**********************/ /* Encode LTP scaling */ /**********************/ if (condCoding == SilkConstants.CODE_INDEPENDENTLY) { Inlines.OpusAssert(psIndices.LTP_scaleIndex >= 0 && psIndices.LTP_scaleIndex < 3); psRangeEnc.enc_icdf(psIndices.LTP_scaleIndex, Tables.silk_LTPscale_iCDF, 8); } Inlines.OpusAssert(condCoding == 0 || psIndices.LTP_scaleIndex == 0); } psEncC.ec_prevSignalType = psIndices.signalType; /***************/ /* Encode seed */ /***************/ Inlines.OpusAssert(psIndices.Seed >= 0 && psIndices.Seed < 4); psRangeEnc.enc_icdf(psIndices.Seed, Tables.silk_uniform4_iCDF, 8); }