/// <summary> /// Creates a multichannel Opus encoder using the "new API". This constructor allows you to use predefined Vorbis channel mappings, or specify your own. /// </summary> /// <param name="Fs">The samples rate of the input</param> /// <param name="channels">The total number of channels to encode (1 - 255)</param> /// <param name="mapping_family">The mapping family to use. 0 = mono/stereo, 1 = use Vorbis mappings, 255 = use raw channel mapping</param> /// <param name="streams">The number of streams to encode</param> /// <param name="coupled_streams">The number of coupled streams</param> /// <param name="mapping">A raw mapping of input/output channels</param> /// <param name="application">The application to use for the encoders</param> public static OpusMSEncoder CreateSurround( int Fs, int channels, int mapping_family, out int streams, out int coupled_streams, byte[] mapping, OpusApplication application ) { int ret; OpusMSEncoder st; if ((channels > 255) || (channels < 1) || application == OpusApplication.OPUS_APPLICATION_UNIMPLEMENTED) { throw new ArgumentException("Invalid channel count or application"); } BoxedValueInt nb_streams = new BoxedValueInt(); BoxedValueInt nb_coupled_streams = new BoxedValueInt(); GetStreamCount(channels, mapping_family, nb_streams, nb_coupled_streams); st = new OpusMSEncoder(nb_streams.Val, nb_coupled_streams.Val); ret = st.opus_multistream_surround_encoder_init(Fs, channels, mapping_family, out streams, out coupled_streams, mapping, application); if (ret != OpusError.OPUS_OK) { if (ret == OpusError.OPUS_BAD_ARG) { throw new ArgumentException("Bad argument passed to CreateSurround"); } throw new OpusException("Could not create multistream encoder", ret); } return(st); }
/** * Update pointers after we processed a frame. A complex logic used in two places in fe_process_frames */ public static int fe_check_prespeech(Pointer <fe_t> fet, BoxedValueInt inout_nframes, Pointer <Pointer <float> > buf_cep, int outidx, BoxedValueInt out_frameidx, BoxedValueInt inout_nsamps, int orig_nsamps) { if (fet.Deref.vad_data.Deref.in_speech != 0) { if (fe_prespch_buf.fe_prespch_ncep(fet.Deref.vad_data.Deref.prespch_buf) > 0) { /* Previous frame triggered vad into speech state. Last frame is in the end of * prespeech buffer, so overwrite it */ outidx = fe_copy_from_prespch(fet, inout_nframes, buf_cep, outidx); /* Sets the start frame for the returned data so that caller can update timings */ if (out_frameidx != null) { out_frameidx.Val = checked ((int)(fet.Deref.num_processed_samps + orig_nsamps - inout_nsamps.Val) / fet.Deref.frame_shift - fet.Deref.pre_speech); } } else { outidx++; (inout_nframes.Val)--; } } /* Amount of data behind the original input which is still needed. */ if (fet.Deref.num_overflow_samps > 0) { fet.Deref.num_overflow_samps -= fet.Deref.frame_shift; } return(outidx); }
internal static void GetStreamCount(int channels, int mapping_family, BoxedValueInt nb_streams, BoxedValueInt nb_coupled_streams) { if (mapping_family == 0) { if (channels == 1) { nb_streams.Val = 1; nb_coupled_streams.Val = 0; } else if (channels == 2) { nb_streams.Val = 1; nb_coupled_streams.Val = 1; } else { throw new ArgumentException("More than 2 channels requires custom mappings"); } } else if (mapping_family == 1 && channels <= 8 && channels >= 1) { nb_streams.Val = VorbisLayout.vorbis_mappings[channels - 1].nb_streams; nb_coupled_streams.Val = VorbisLayout.vorbis_mappings[channels - 1].nb_coupled_streams; } else if (mapping_family == 255) { nb_streams.Val = channels; nb_coupled_streams.Val = 0; } else { throw new ArgumentException("Invalid mapping family"); } }
public static int kws_search_step(ps_search_t search, int frame_idx) { Pointer <short> senscr; kws_search_t kwss = (kws_search_t)search; Pointer <acmod_t> _acmod = search.acmod; /* Activate senones */ if (_acmod.Deref.compallsen == 0) { kws_search_sen_active(kwss); } /* Calculate senone scores for current frame. */ BoxedValueInt boxed_frame_idx = new BoxedValueInt(frame_idx); senscr = acmod.acmod_score(_acmod, boxed_frame_idx); frame_idx = boxed_frame_idx.Val; /* Evaluate hmms in phone loop and in active keyphrase nodes */ kws_search_hmm_eval(kwss, senscr); /* Prune hmms with low prob */ kws_search_hmm_prune(kwss); /* Do hmms transitions */ kws_search_trans(kwss); ++kwss.frame; return(0); }
/* Autocorrelations for a warped frequency axis */ internal static void silk_warped_autocorrelation( int[] corr, /* O Result [order + 1] */ BoxedValueInt scale, /* O Scaling of the correlation vector */ short[] input, /* I Input data to correlate */ int warping_Q16, /* I Warping coefficient */ int length, /* I Length of input */ int order /* I Correlation order (even) */ ) { int n, i, lsh; int tmp1_QS, tmp2_QS; int[] state_QS = new int[order + 1]; // = { 0 }; long[] corr_QC = new long[order + 1]; // = { 0 }; /* Order must be even */ Inlines.OpusAssert((order & 1) == 0); Inlines.OpusAssert(2 * QS - QC >= 0); /* Loop over samples */ for (n = 0; n < length; n++) { tmp1_QS = Inlines.silk_LSHIFT32((int)input[n], QS); /* Loop over allpass sections */ for (i = 0; i < order; i += 2) { /* Output of allpass section */ tmp2_QS = Inlines.silk_SMLAWB(state_QS[i], state_QS[i + 1] - tmp1_QS, warping_Q16); state_QS[i] = tmp1_QS; corr_QC[i] += Inlines.silk_RSHIFT64(Inlines.silk_SMULL(tmp1_QS, state_QS[0]), 2 * QS - QC); /* Output of allpass section */ tmp1_QS = Inlines.silk_SMLAWB(state_QS[i + 1], state_QS[i + 2] - tmp2_QS, warping_Q16); state_QS[i + 1] = tmp2_QS; corr_QC[i + 1] += Inlines.silk_RSHIFT64(Inlines.silk_SMULL(tmp2_QS, state_QS[0]), 2 * QS - QC); } state_QS[order] = tmp1_QS; corr_QC[order] += Inlines.silk_RSHIFT64(Inlines.silk_SMULL(tmp1_QS, state_QS[0]), 2 * QS - QC); } lsh = Inlines.silk_CLZ64(corr_QC[0]) - 35; lsh = Inlines.silk_LIMIT(lsh, -12 - QC, 30 - QC); scale.Val = -(QC + lsh); Inlines.OpusAssert(scale.Val >= -30 && scale.Val <= 12); if (lsh >= 0) { for (i = 0; i < order + 1; i++) { corr[i] = (int)(Inlines.silk_LSHIFT64(corr_QC[i], lsh)); } } else { for (i = 0; i < order + 1; i++) { corr[i] = (int)(Inlines.silk_RSHIFT64(corr_QC[i], -lsh)); } } Inlines.OpusAssert(corr_QC[0] >= 0); /* If breaking, decrease QC*/ }
/// <summary> /// Decode mid-only flag /// </summary> /// <param name="psRangeDec">I/O Compressor data structure</param> /// <param name="decode_only_mid">O Flag that only mid channel has been coded</param> internal static void silk_stereo_decode_mid_only( EntropyCoder psRangeDec, BoxedValueInt decode_only_mid ) { /* Decode flag that only mid channel is coded */ decode_only_mid.Val = psRangeDec.dec_icdf(Tables.silk_stereo_only_code_mid_iCDF, 8); }
/** * Copy frames collected in prespeech buffer */ public static int fe_copy_from_prespch(Pointer <fe_t> fet, BoxedValueInt inout_nframes, Pointer <Pointer <float> > buf_cep, int outidx) { while ((inout_nframes.Val) > 0 && fe_prespch_buf.fe_prespch_read_cep(fet.Deref.vad_data.Deref.prespch_buf, buf_cep[outidx]) > 0) { outidx++; (inout_nframes.Val)--; } return(outidx); }
public static int fe_process_frames(Pointer <fe_t> fet, BoxedValue <Pointer <short> > inout_spch, BoxedValueInt inout_nsamps, Pointer <Pointer <float> > buf_cep, BoxedValueInt inout_nframes, BoxedValueInt out_frameidx) { return(fe_process_frames_ext(fet, inout_spch, inout_nsamps, buf_cep, inout_nframes, PointerHelpers.NULL <short>(), PointerHelpers.NULL <int>(), out_frameidx)); }
public static Pointer <byte> ps_get_hyp(Pointer <ps_decoder_t> ps, BoxedValueInt out_best_score) { Pointer <byte> hyp; profile.ptmr_start(ps.Deref.perf); hyp = ps_search_hyp(ps.Deref.search, out_best_score); profile.ptmr_stop(ps.Deref.perf); return(hyp); }
public static int mdef_ciphone_id(Pointer <mdef_t> m, Pointer <byte> ci) { BoxedValueInt id = new BoxedValueInt(); if (hash_table.hash_table_lookup_int32(m.Deref.ciphone_ht, ci, id) < 0) { return(-1); } return(id.Val); }
public static void fe_write_frame(Pointer <fe_t> fe, Pointer <float> feat, int store_pcm) { BoxedValueInt is_speech = new BoxedValueInt(); fe_spec_magnitude(fe); fe_mel_spec(fe); fe_noise.fe_track_snr(fe, is_speech); fe_mel_cep(fe, feat); fe_lifter(fe, feat); fe_noise.fe_vad_hangover(fe, feat, is_speech.Val, store_pcm); }
/* Compute autocorrelation */ internal static void silk_autocorr( int[] results, /* O Result (length correlationCount) */ BoxedValueInt scale, /* O Scaling of the correlation vector */ short[] inputData, /* I Input data to correlate */ int inputDataSize, /* I Length of input */ int correlationCount /* I Number of correlation taps to compute */ ) { int corrCount = Inlines.silk_min_int(inputDataSize, correlationCount); scale.Val = Autocorrelation._celt_autocorr(inputData, results, corrCount - 1, inputDataSize); }
public static int dict_wordid(Pointer <dict_t> d, Pointer <byte> word) { BoxedValueInt w = new BoxedValueInt(); SphinxAssert.assert(d.IsNonNull); SphinxAssert.assert(word.IsNonNull); if (hash_table.hash_table_lookup_int32(d.Deref.ht, word, w) < 0) { return(s3types.BAD_S3WID); } return(w.Val); }
public static void ps_seg_frames(ps_seg_t seg, BoxedValueInt out_sf, BoxedValueInt out_ef) { int uf; uf = acmod.acmod_stream_offset(seg.search.acmod); if (out_sf != null) { out_sf.Val = seg.sf + uf; } if (out_ef != null) { out_ef.Val = seg.ef + uf; } }
public static int ps_seg_prob(ps_seg_t seg, BoxedValueInt out_ascr, BoxedValueInt out_lscr, BoxedValueInt out_lback) { if (out_ascr != null) { out_ascr.Val = seg.ascr; } if (out_lscr != null) { out_lscr.Val = seg.lscr; } if (out_lback != null) { out_lback.Val = seg.lback; } return(seg.prob); }
public static Pointer <byte> kws_search_hyp(ps_search_t search, BoxedValueInt out_score) { kws_search_t kwss = (kws_search_t)search; if (out_score != null) { out_score.Val = 0; } if (search.hyp_str.IsNonNull) { ckd_alloc.ckd_free(search.hyp_str); } search.hyp_str = kws_detections.kws_detections_hyp_str(kwss.detections, kwss.frame, kwss.delay); return(search.hyp_str); }
/// <summary> /// Find least-squares prediction gain for one signal based on another and quantize it /// </summary> /// <param name="ratio_Q14">O Ratio of residual and mid energies</param> /// <param name="x">I Basis signal</param> /// <param name="y">I Target signal</param> /// <param name="mid_res_amp_Q0">I/O Smoothed mid, residual norms</param> /// <param name="mid_res_amp_Q0_ptr"></param> /// <param name="length">I Number of samples</param> /// <param name="smooth_coef_Q16">I Smoothing coefficient</param> /// <returns>O Returns predictor in Q13</returns> internal static int silk_stereo_find_predictor( BoxedValueInt ratio_Q14, short[] x, short[] y, int[] mid_res_amp_Q0, int mid_res_amp_Q0_ptr, int length, int smooth_coef_Q16) { int scale; int nrgx, nrgy, scale1, scale2; int corr, pred_Q13, pred2_Q10; /* Find predictor */ SumSqrShift.silk_sum_sqr_shift(out nrgx, out scale1, x, length); SumSqrShift.silk_sum_sqr_shift(out nrgy, out scale2, y, length); scale = Inlines.silk_max_int(scale1, scale2); scale = scale + (scale & 1); /* make even */ nrgy = Inlines.silk_RSHIFT32(nrgy, scale - scale2); nrgx = Inlines.silk_RSHIFT32(nrgx, scale - scale1); nrgx = Inlines.silk_max_int(nrgx, 1); corr = Inlines.silk_inner_prod_aligned_scale(x, y, scale, length); pred_Q13 = Inlines.silk_DIV32_varQ(corr, nrgx, 13); pred_Q13 = Inlines.silk_LIMIT(pred_Q13, -(1 << 14), 1 << 14); pred2_Q10 = Inlines.silk_SMULWB(pred_Q13, pred_Q13); /* Faster update for signals with large prediction parameters */ smooth_coef_Q16 = (int)Inlines.silk_max_int(smooth_coef_Q16, Inlines.silk_abs(pred2_Q10)); /* Smoothed mid and residual norms */ Inlines.OpusAssert(smooth_coef_Q16 < 32768); scale = Inlines.silk_RSHIFT(scale, 1); mid_res_amp_Q0[mid_res_amp_Q0_ptr] = Inlines.silk_SMLAWB(mid_res_amp_Q0[mid_res_amp_Q0_ptr], Inlines.silk_LSHIFT(Inlines.silk_SQRT_APPROX(nrgx), scale) - mid_res_amp_Q0[mid_res_amp_Q0_ptr], smooth_coef_Q16); /* Residual energy = nrgy - 2 * pred * corr + pred^2 * nrgx */ nrgy = Inlines.silk_SUB_LSHIFT32(nrgy, Inlines.silk_SMULWB(corr, pred_Q13), 3 + 1); nrgy = Inlines.silk_ADD_LSHIFT32(nrgy, Inlines.silk_SMULWB(nrgx, pred2_Q10), 6); mid_res_amp_Q0[mid_res_amp_Q0_ptr + 1] = Inlines.silk_SMLAWB(mid_res_amp_Q0[mid_res_amp_Q0_ptr + 1], Inlines.silk_LSHIFT(Inlines.silk_SQRT_APPROX(nrgy), scale) - mid_res_amp_Q0[mid_res_amp_Q0_ptr + 1], smooth_coef_Q16); /* Ratio of smoothed residual and mid norms */ ratio_Q14.Val = Inlines.silk_DIV32_varQ(mid_res_amp_Q0[mid_res_amp_Q0_ptr + 1], Inlines.silk_max(mid_res_amp_Q0[mid_res_amp_Q0_ptr], 1), 14); ratio_Q14.Val = Inlines.silk_LIMIT(ratio_Q14.Val, 0, 32767); return(pred_Q13); }
public static int phone_loop_search_step(ps_search_t search, int frame_idx) { phone_loop_search_t pls = (phone_loop_search_t)search; Pointer <acmod_t> acModel = pocketsphinx.ps_search_acmod(search); Pointer <short> senscr; int i; /* All CI senones are active all the time. */ if (pocketsphinx.ps_search_acmod(pls).Deref.compallsen == 0) { acmod.acmod_clear_active(pocketsphinx.ps_search_acmod(pls)); for (i = 0; i < pls.n_phones; ++i) { acmod.acmod_activate_hmm(acModel, pls.hmms.Point(i)); } } /* Calculate senone scores for current frame. */ BoxedValueInt boxed_frame_idx = new BoxedValueInt(frame_idx); senscr = acmod.acmod_score(acModel, boxed_frame_idx); frame_idx = boxed_frame_idx.Val; /* Renormalize, if necessary. */ if (pls.best_score + (2 * pls.beam) < hmm.WORST_SCORE) { err.E_INFO(string.Format("Renormalizing Scores at frame {0}, best score {1}\n", frame_idx, pls.best_score)); renormalize_hmms(pls, frame_idx, pls.best_score); } /* Evaluate phone HMMs for current frame. */ evaluate_hmms(pls, senscr, frame_idx); /* Store hmm scores for senone penaly calculation */ store_scores(pls, frame_idx); /* Prune phone HMMs. */ prune_hmms(pls, frame_idx); /* Do phone transitions. */ phone_transition(pls, frame_idx); return(0); }
public static int fe_end_utt(Pointer <fe_t> fet, Pointer <float> cepvector, BoxedValueInt nframes) { /* Process any remaining data, not very accurate for the VAD */ nframes.Val = 0; if (fet.Deref.num_overflow_samps > 0) { fe_sigproc.fe_read_frame(fet, fet.Deref.overflow_samps, fet.Deref.num_overflow_samps); fe_sigproc.fe_write_frame(fet, cepvector, 0); if (fet.Deref.vad_data.Deref.in_speech != 0) { nframes.Val = 1; } } /* reset overflow buffers... */ fet.Deref.num_overflow_samps = 0; return(0); }
internal static int RunTest1(bool no_fuzz) { byte[] mapping /*[256]*/ = { 0, 1, 255 }; byte[] db62 = new byte[36]; int i; int rc, j; BoxedValueInt err = new BoxedValueInt(); OpusEncoder enc; OpusDecoder dec; OpusDecoder[] dec_err = new OpusDecoder[10]; Pointer <short> inbuf; Pointer <short> outbuf; Pointer <short> out2buf; //int bitrate_bps; Pointer <byte> packet = Pointer.Malloc <byte>(MAX_PACKET + 257); uint enc_final_range; uint dec_final_range; //int fswitch; //int fsize; int count; /*FIXME: encoder api tests, fs!=48k, mono, VBR*/ Console.WriteLine(" Encode+Decode tests."); enc = OpusEncoder.Create(48000, 2, OpusApplication.VOIP); if (err.Val != OpusError.OPUS_OK || enc == null) { TestFailed(); } dec = OpusDecoder.Create(48000, 2); if (err.Val != OpusError.OPUS_OK || dec == null) { TestFailed(); } // fixme: this tests assign() performed on a decoder struct, which doesn't exist //dec_err[0] = (OpusDecoder*)malloc(OpusDecoder_get_size(2)); //memcpy(dec_err[0], dec, OpusDecoder_get_size(2)); dec_err[0] = OpusDecoder.Create(48000, 2); dec_err[1] = OpusDecoder.Create(48000, 1); dec_err[2] = OpusDecoder.Create(24000, 2); dec_err[3] = OpusDecoder.Create(24000, 1); dec_err[4] = OpusDecoder.Create(16000, 2); dec_err[5] = OpusDecoder.Create(16000, 1); dec_err[6] = OpusDecoder.Create(12000, 2); dec_err[7] = OpusDecoder.Create(12000, 1); dec_err[8] = OpusDecoder.Create(8000, 2); dec_err[9] = OpusDecoder.Create(8000, 1); for (i = 1; i < 10; i++) { if (dec_err[i] == null) { TestFailed(); } } //{ // OpusEncoder* enccpy; // /*The opus state structures contain no pointers and can be freely copied*/ // enccpy = (OpusEncoder*)malloc(opus_encoder_get_size(2)); // memcpy(enccpy, enc, opus_encoder_get_size(2)); // memset(enc, 255, opus_encoder_get_size(2)); // opus_encoder_destroy(enc); // enc = enccpy; //} inbuf = Pointer.Malloc <short>(SAMPLES * 2); outbuf = Pointer.Malloc <short>(SAMPLES * 2); out2buf = Pointer.Malloc <short>(MAX_FRAME_SAMP * 3); if (inbuf == null || outbuf == null || out2buf == null) { TestFailed(); } GenerateMusic(inbuf, SAMPLES); ///* FILE *foo; //foo = fopen("foo.sw", "wb+"); //fwrite(inbuf, 1, SAMPLES*2*2, foo); //fclose(foo);*/ enc.Bandwidth = (OpusBandwidth.OPUS_BANDWIDTH_AUTO); for (rc = 0; rc < 3; rc++) { enc.UseVBR = (rc < 2); enc.UseConstrainedVBR = (rc == 1); enc.UseInbandFEC = (rc == 0); int[] modes = { 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2 }; int[] rates = { 6000, 12000, 48000, 16000, 32000, 48000, 64000, 512000, 13000, 24000, 48000, 64000, 96000 }; int[] frame = { 960 * 2, 960, 480, 960, 960, 960, 480, 960 * 3, 960 * 3, 960, 480, 240, 120 }; for (j = 0; j < modes.Length; j++) { int rate; rate = rates[j] + (int)FastRand() % rates[j]; count = i = 0; do { OpusBandwidth bw; int len, out_samples, frame_size; frame_size = frame[j]; if ((FastRand() & 255) == 0) { enc.ResetState(); dec.ResetState(); if ((FastRand() & 1) != 0) { dec_err[FastRand() & 1].ResetState(); } } if ((FastRand() & 127) == 0) { dec_err[FastRand() & 1].ResetState(); } if (FastRand() % 10 == 0) { int complex = (int)(FastRand() % 11); enc.Complexity = (complex); } if (FastRand() % 50 == 0) { dec.ResetState(); } enc.UseInbandFEC = (rc == 0); enc.ForceMode = (OpusMode.MODE_SILK_ONLY + modes[j]); enc.UseDTX = ((FastRand() & 1) != 0); enc.Bitrate = (rate); enc.ForceChannels = (rates[j] >= 64000 ? 2 : 1); enc.Complexity = ((count >> 2) % 11); enc.PacketLossPercent = ((int)((FastRand() & 15) & (FastRand() % 15))); bw = modes[j] == 0 ? OpusBandwidth.OPUS_BANDWIDTH_NARROWBAND + (int)(FastRand() % 3) : modes[j] == 1 ? OpusBandwidth.OPUS_BANDWIDTH_SUPERWIDEBAND + (int)(FastRand() & 1) : OpusBandwidth.OPUS_BANDWIDTH_NARROWBAND + (int)(FastRand() % 5); if (modes[j] == 2 && bw == OpusBandwidth.OPUS_BANDWIDTH_MEDIUMBAND) { bw += 3; } enc.Bandwidth = (bw); len = enc.Encode(inbuf.Data, i << 1, frame_size, packet.Data, 0, MAX_PACKET); if (len < 0 || len > MAX_PACKET) { TestFailed(); } enc_final_range = enc.FinalRange; if ((FastRand() & 3) == 0) { if (OpusRepacketizer.PadPacket(packet.Data, packet.Offset, len, len + 1) != OpusError.OPUS_OK) { TestFailed(); } len++; } if ((FastRand() & 7) == 0) { if (OpusRepacketizer.PadPacket(packet.Data, packet.Offset, len, len + 256) != OpusError.OPUS_OK) { TestFailed(); } len += 256; } if ((FastRand() & 3) == 0) { len = OpusRepacketizer.UnpadPacket(packet.Data, packet.Offset, len); if (len < 1) { TestFailed(); } } out_samples = dec.Decode(packet.Data, 0, len, outbuf.Data, i << 1, MAX_FRAME_SAMP, false); if (out_samples != frame_size) { TestFailed(); } dec_final_range = dec.FinalRange; if (enc_final_range != dec_final_range) { TestFailed(); } /*LBRR decode*/ out_samples = dec_err[0].Decode(packet.Data, 0, len, out2buf.Data, 0, frame_size, ((int)FastRand() & 3) != 0); if (out_samples != frame_size) { TestFailed(); } out_samples = dec_err[1].Decode(packet.Data, 0, (FastRand() & 3) == 0 ? 0 : len, out2buf.Data, 0, /*MAX_FRAME_SAMP*/ frame_size, ((int)FastRand() & 7) != 0); if (out_samples < 120) { TestFailed(); } i += frame_size; count++; } while (i < (SSAMPLES - MAX_FRAME_SAMP)); Console.WriteLine(" Mode {0} FB encode {1}, {2} bps OK.", mstrings[modes[j]], rc == 0 ? " VBR" : rc == 1 ? "CVBR" : " CBR", rate); } } //if (opus_encoder_ctl(enc, OPUS_RESET_STATE) != OpusError.OPUS_OK) test_failed(); //opus_encoder_destroy(enc); //if (opus_multistream_encoder_ctl(MSenc, OPUS_RESET_STATE) != OpusError.OPUS_OK) test_failed(); //opus_multistream_encoder_destroy(MSenc); //if (OpusDecoder_ctl(dec, OPUS_RESET_STATE) != OpusError.OPUS_OK) test_failed(); //OpusDecoder_destroy(dec); //if (opus_multistream_decoder_ctl(MSdec, OPUS_RESET_STATE) != OpusError.OPUS_OK) test_failed(); return(0); }
/* Finds LPC vector from correlations, and converts to NLSF */ internal static void silk_find_LPC( SilkChannelEncoder psEncC, /* I/O Encoder state */ short[] NLSF_Q15, /* O NLSFs */ short[] x, /* I Input signal */ int minInvGain_Q30 /* I Inverse of max prediction gain */ ) { int k, subfr_length; int[] a_Q16 = new int[SilkConstants.MAX_LPC_ORDER]; int isInterpLower, shift; int res_nrg0, res_nrg1; int rshift0, rshift1; BoxedValueInt scratch_box1 = new BoxedValueInt(); BoxedValueInt scratch_box2 = new BoxedValueInt(); /* Used only for LSF interpolation */ int[] a_tmp_Q16 = new int[SilkConstants.MAX_LPC_ORDER]; int res_nrg_interp, res_nrg, res_tmp_nrg; int res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q; short[] a_tmp_Q12 = new short[SilkConstants.MAX_LPC_ORDER]; short[] NLSF0_Q15 = new short[SilkConstants.MAX_LPC_ORDER]; subfr_length = psEncC.subfr_length + psEncC.predictLPCOrder; /* Default: no interpolation */ psEncC.indices.NLSFInterpCoef_Q2 = 4; /* Burg AR analysis for the full frame */ BurgModified.silk_burg_modified(scratch_box1, scratch_box2, a_Q16, x, 0, minInvGain_Q30, subfr_length, psEncC.nb_subfr, psEncC.predictLPCOrder); res_nrg = scratch_box1.Val; res_nrg_Q = scratch_box2.Val; if (psEncC.useInterpolatedNLSFs != 0 && psEncC.first_frame_after_reset == 0 && psEncC.nb_subfr == SilkConstants.MAX_NB_SUBFR) { short[] LPC_res; /* Optimal solution for last 10 ms */ BurgModified.silk_burg_modified(scratch_box1, scratch_box2, a_tmp_Q16, x, (2 * subfr_length), minInvGain_Q30, subfr_length, 2, psEncC.predictLPCOrder); res_tmp_nrg = scratch_box1.Val; res_tmp_nrg_Q = scratch_box2.Val; /* subtract residual energy here, as that's easier than adding it to the */ /* residual energy of the first 10 ms in each iteration of the search below */ shift = res_tmp_nrg_Q - res_nrg_Q; if (shift >= 0) { if (shift < 32) { res_nrg = res_nrg - Inlines.silk_RSHIFT(res_tmp_nrg, shift); } } else { Inlines.OpusAssert(shift > -32); res_nrg = Inlines.silk_RSHIFT(res_nrg, -shift) - res_tmp_nrg; res_nrg_Q = res_tmp_nrg_Q; } /* Convert to NLSFs */ NLSF.silk_A2NLSF(NLSF_Q15, a_tmp_Q16, psEncC.predictLPCOrder); LPC_res = new short[2 * subfr_length]; /* Search over interpolation indices to find the one with lowest residual energy */ for (k = 3; k >= 0; k--) { /* Interpolate NLSFs for first half */ Inlines.silk_interpolate(NLSF0_Q15, psEncC.prev_NLSFq_Q15, NLSF_Q15, k, psEncC.predictLPCOrder); /* Convert to LPC for residual energy evaluation */ NLSF.silk_NLSF2A(a_tmp_Q12, NLSF0_Q15, psEncC.predictLPCOrder); /* Calculate residual energy with NLSF interpolation */ Filters.silk_LPC_analysis_filter(LPC_res, 0, x, 0, a_tmp_Q12, 0, 2 * subfr_length, psEncC.predictLPCOrder); SumSqrShift.silk_sum_sqr_shift(out res_nrg0, out rshift0, LPC_res, psEncC.predictLPCOrder, subfr_length - psEncC.predictLPCOrder); SumSqrShift.silk_sum_sqr_shift(out res_nrg1, out rshift1, LPC_res, psEncC.predictLPCOrder + subfr_length, subfr_length - psEncC.predictLPCOrder); /* Add subframe energies from first half frame */ shift = rshift0 - rshift1; if (shift >= 0) { res_nrg1 = Inlines.silk_RSHIFT(res_nrg1, shift); res_nrg_interp_Q = -rshift0; } else { res_nrg0 = Inlines.silk_RSHIFT(res_nrg0, -shift); res_nrg_interp_Q = -rshift1; } res_nrg_interp = Inlines.silk_ADD32(res_nrg0, res_nrg1); /* Compare with first half energy without NLSF interpolation, or best interpolated value so far */ shift = res_nrg_interp_Q - res_nrg_Q; if (shift >= 0) { if (Inlines.silk_RSHIFT(res_nrg_interp, shift) < res_nrg) { isInterpLower = (true ? 1 : 0); } else { isInterpLower = (false ? 1 : 0); } } else { if (-shift < 32) { if (res_nrg_interp < Inlines.silk_RSHIFT(res_nrg, -shift)) { isInterpLower = (true ? 1 : 0); } else { isInterpLower = (false ? 1 : 0); } } else { isInterpLower = (false ? 1 : 0); } } /* Determine whether current interpolated NLSFs are best so far */ if (isInterpLower == (true ? 1 : 0)) { /* Interpolation has lower residual energy */ res_nrg = res_nrg_interp; res_nrg_Q = res_nrg_interp_Q; psEncC.indices.NLSFInterpCoef_Q2 = (sbyte)k; } } } if (psEncC.indices.NLSFInterpCoef_Q2 == 4) { /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */ NLSF.silk_A2NLSF(NLSF_Q15, a_Q16, psEncC.predictLPCOrder); } Inlines.OpusAssert(psEncC.indices.NLSFInterpCoef_Q2 == 4 || (psEncC.useInterpolatedNLSFs != 0 && psEncC.first_frame_after_reset == 0 && psEncC.nb_subfr == SilkConstants.MAX_NB_SUBFR)); }
public static int dict_add_word(Pointer <dict_t> d, Pointer <byte> word, Pointer <short> p, int np) { int len; Pointer <dictword_t> wordp; int newwid; Pointer <byte> wword; if (d.Deref.n_word >= d.Deref.max_words) { err.E_INFO(string.Format("Reallocating to {0} KiB for word entries\n", (d.Deref.max_words + S3DICT_INC_SZ) * 28 / 1024)); d.Deref.word = ckd_alloc.ckd_realloc(d.Deref.word, (d.Deref.max_words + S3DICT_INC_SZ)); d.Deref.max_words = d.Deref.max_words + S3DICT_INC_SZ; } wordp = d.Deref.word + d.Deref.n_word; wordp.Deref.word = (Pointer <byte>)ckd_alloc.ckd_salloc(word); /* Freed in dict_free */ /* Determine base/alt wids */ wword = ckd_alloc.ckd_salloc(word); if ((len = dict_word2basestr(wword)) > 0) { BoxedValueInt w = new BoxedValueInt(); /* Truncated to a baseword string; find its ID */ if (hash_table.hash_table_lookup_int32(d.Deref.ht, wword, w) < 0) { err.E_ERROR(string.Format("Missing base word for: {0}\n", cstring.FromCString(word))); ckd_alloc.ckd_free(wword); ckd_alloc.ckd_free(wordp.Deref.word); wordp.Deref.word = PointerHelpers.NULL <byte>(); return(s3types.BAD_S3WID); } /* Link into alt list */ wordp.Deref.basewid = w.Val; wordp.Deref.alt = d.Deref.word[w.Val].alt; d.Deref.word[w.Val].alt = d.Deref.n_word; } else { wordp.Deref.alt = s3types.BAD_S3WID; wordp.Deref.basewid = d.Deref.n_word; } ckd_alloc.ckd_free(wword); /* Associate word string with d.Deref.n_word in hash table */ if (hash_table.hash_table_enter_int32(d.Deref.ht, wordp.Deref.word, d.Deref.n_word) != d.Deref.n_word) { ckd_alloc.ckd_free(wordp.Deref.word); wordp.Deref.word = PointerHelpers.NULL <byte>(); return(s3types.BAD_S3WID); } /* Fill in word entry, and set defaults */ if (p.IsNonNull && (np > 0)) { wordp.Deref.ciphone = ckd_alloc.ckd_malloc <short>(np); /* Freed in dict_free */ p.MemCopyTo(wordp.Deref.ciphone, np); wordp.Deref.pronlen = np; } else { wordp.Deref.ciphone = PointerHelpers.NULL <short>(); wordp.Deref.pronlen = 0; } newwid = d.Deref.n_word++; return(newwid); }
/* Compute reflection coefficients from input signal */ internal static void silk_burg_modified( BoxedValueInt res_nrg, /* O Residual energy */ BoxedValueInt res_nrg_Q, /* O Residual energy Q value */ int[] A_Q16, /* O Prediction coefficients (length order) */ short[] x, /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ int x_ptr, int minInvGain_Q30, /* I Inverse of max prediction gain */ int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ int nb_subfr, /* I Number of subframes stacked in x */ int D /* I Order */ ) { int k, n, s, lz, rshifts, reached_max_gain; int C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; int x_offset; int[] C_first_row = new int[SilkConstants.SILK_MAX_ORDER_LPC]; int[] C_last_row = new int[SilkConstants.SILK_MAX_ORDER_LPC]; int[] Af_QA = new int[SilkConstants.SILK_MAX_ORDER_LPC]; int[] CAf = new int[SilkConstants.SILK_MAX_ORDER_LPC + 1]; int[] CAb = new int[SilkConstants.SILK_MAX_ORDER_LPC + 1]; int[] xcorr = new int[SilkConstants.SILK_MAX_ORDER_LPC]; long C0_64; Inlines.OpusAssert(subfr_length * nb_subfr <= MAX_FRAME_SIZE); /* Compute autocorrelations, added over subframes */ C0_64 = Inlines.silk_inner_prod16_aligned_64(x, x_ptr, x, x_ptr, subfr_length * nb_subfr); lz = Inlines.silk_CLZ64(C0_64); rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz; if (rshifts > MAX_RSHIFTS) { rshifts = MAX_RSHIFTS; } if (rshifts < MIN_RSHIFTS) { rshifts = MIN_RSHIFTS; } if (rshifts > 0) { C0 = (int)Inlines.silk_RSHIFT64(C0_64, rshifts); } else { C0 = Inlines.silk_LSHIFT32((int)C0_64, -rshifts); } CAb[0] = CAf[0] = C0 + Inlines.silk_SMMUL(((int)((TuningParameters.FIND_LPC_COND_FAC) * ((long)1 << (32)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_LPC_COND_FAC, 32)*/, C0) + 1; /* Q(-rshifts) */ Arrays.MemSetInt(C_first_row, 0, SilkConstants.SILK_MAX_ORDER_LPC); if (rshifts > 0) { for (s = 0; s < nb_subfr; s++) { x_offset = x_ptr + s * subfr_length; for (n = 1; n < D + 1; n++) { C_first_row[n - 1] += (int)Inlines.silk_RSHIFT64( Inlines.silk_inner_prod16_aligned_64(x, x_offset, x, x_offset + n, subfr_length - n), rshifts); } } } else { for (s = 0; s < nb_subfr; s++) { int i; int d; x_offset = x_ptr + s * subfr_length; CeltPitchXCorr.pitch_xcorr(x, x_offset, x, x_offset + 1, xcorr, subfr_length - D, D); for (n = 1; n < D + 1; n++) { for (i = n + subfr_length - D, d = 0; i < subfr_length; i++) { d = Inlines.MAC16_16(d, x[x_offset + i], x[x_offset + i - n]); } xcorr[n - 1] += d; } for (n = 1; n < D + 1; n++) { C_first_row[n - 1] += Inlines.silk_LSHIFT32(xcorr[n - 1], -rshifts); } } } Array.Copy(C_first_row, C_last_row, SilkConstants.SILK_MAX_ORDER_LPC); /* Initialize */ CAb[0] = CAf[0] = C0 + Inlines.silk_SMMUL(((int)((TuningParameters.FIND_LPC_COND_FAC) * ((long)1 << (32)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_LPC_COND_FAC, 32)*/, C0) + 1; /* Q(-rshifts) */ invGain_Q30 = (int)1 << 30; reached_max_gain = 0; for (n = 0; n < D; n++) { /* Update first row of correlation matrix (without first element) */ /* Update last row of correlation matrix (without last element, stored in reversed order) */ /* Update C * Af */ /* Update C * flipud(Af) (stored in reversed order) */ if (rshifts > -2) { for (s = 0; s < nb_subfr; s++) { x_offset = x_ptr + s * subfr_length; x1 = -Inlines.silk_LSHIFT32((int)x[x_offset + n], 16 - rshifts); /* Q(16-rshifts) */ x2 = -Inlines.silk_LSHIFT32((int)x[x_offset + subfr_length - n - 1], 16 - rshifts); /* Q(16-rshifts) */ tmp1 = Inlines.silk_LSHIFT32((int)x[x_offset + n], QA - 16); /* Q(QA-16) */ tmp2 = Inlines.silk_LSHIFT32((int)x[x_offset + subfr_length - n - 1], QA - 16); /* Q(QA-16) */ for (k = 0; k < n; k++) { C_first_row[k] = Inlines.silk_SMLAWB(C_first_row[k], x1, x[x_offset + n - k - 1]); /* Q( -rshifts ) */ C_last_row[k] = Inlines.silk_SMLAWB(C_last_row[k], x2, x[x_offset + subfr_length - n + k]); /* Q( -rshifts ) */ Atmp_QA = Af_QA[k]; tmp1 = Inlines.silk_SMLAWB(tmp1, Atmp_QA, x[x_offset + n - k - 1]); /* Q(QA-16) */ tmp2 = Inlines.silk_SMLAWB(tmp2, Atmp_QA, x[x_offset + subfr_length - n + k]); /* Q(QA-16) */ } tmp1 = Inlines.silk_LSHIFT32(-tmp1, 32 - QA - rshifts); /* Q(16-rshifts) */ tmp2 = Inlines.silk_LSHIFT32(-tmp2, 32 - QA - rshifts); /* Q(16-rshifts) */ for (k = 0; k <= n; k++) { CAf[k] = Inlines.silk_SMLAWB(CAf[k], tmp1, x[x_offset + n - k]); /* Q( -rshift ) */ CAb[k] = Inlines.silk_SMLAWB(CAb[k], tmp2, x[x_offset + subfr_length - n + k - 1]); /* Q( -rshift ) */ } } } else { for (s = 0; s < nb_subfr; s++) { x_offset = x_ptr + s * subfr_length; x1 = -Inlines.silk_LSHIFT32((int)x[x_offset + n], -rshifts); /* Q( -rshifts ) */ x2 = -Inlines.silk_LSHIFT32((int)x[x_offset + subfr_length - n - 1], -rshifts); /* Q( -rshifts ) */ tmp1 = Inlines.silk_LSHIFT32((int)x[x_offset + n], 17); /* Q17 */ tmp2 = Inlines.silk_LSHIFT32((int)x[x_offset + subfr_length - n - 1], 17); /* Q17 */ for (k = 0; k < n; k++) { C_first_row[k] = Inlines.silk_MLA(C_first_row[k], x1, x[x_offset + n - k - 1]); /* Q( -rshifts ) */ C_last_row[k] = Inlines.silk_MLA(C_last_row[k], x2, x[x_offset + subfr_length - n + k]); /* Q( -rshifts ) */ Atmp1 = Inlines.silk_RSHIFT_ROUND(Af_QA[k], QA - 17); /* Q17 */ tmp1 = Inlines.silk_MLA(tmp1, x[x_offset + n - k - 1], Atmp1); /* Q17 */ tmp2 = Inlines.silk_MLA(tmp2, x[x_offset + subfr_length - n + k], Atmp1); /* Q17 */ } tmp1 = -tmp1; /* Q17 */ tmp2 = -tmp2; /* Q17 */ for (k = 0; k <= n; k++) { CAf[k] = Inlines.silk_SMLAWW(CAf[k], tmp1, Inlines.silk_LSHIFT32((int)x[x_offset + n - k], -rshifts - 1)); /* Q( -rshift ) */ CAb[k] = Inlines.silk_SMLAWW(CAb[k], tmp2, Inlines.silk_LSHIFT32((int)x[x_offset + subfr_length - n + k - 1], -rshifts - 1)); /* Q( -rshift ) */ } } } /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ tmp1 = C_first_row[n]; /* Q( -rshifts ) */ tmp2 = C_last_row[n]; /* Q( -rshifts ) */ num = 0; /* Q( -rshifts ) */ nrg = Inlines.silk_ADD32(CAb[0], CAf[0]); /* Q( 1-rshifts ) */ for (k = 0; k < n; k++) { Atmp_QA = Af_QA[k]; lz = Inlines.silk_CLZ32(Inlines.silk_abs(Atmp_QA)) - 1; lz = Inlines.silk_min(32 - QA, lz); Atmp1 = Inlines.silk_LSHIFT32(Atmp_QA, lz); /* Q( QA + lz ) */ tmp1 = Inlines.silk_ADD_LSHIFT32(tmp1, Inlines.silk_SMMUL(C_last_row[n - k - 1], Atmp1), 32 - QA - lz); /* Q( -rshifts ) */ tmp2 = Inlines.silk_ADD_LSHIFT32(tmp2, Inlines.silk_SMMUL(C_first_row[n - k - 1], Atmp1), 32 - QA - lz); /* Q( -rshifts ) */ num = Inlines.silk_ADD_LSHIFT32(num, Inlines.silk_SMMUL(CAb[n - k], Atmp1), 32 - QA - lz); /* Q( -rshifts ) */ nrg = Inlines.silk_ADD_LSHIFT32(nrg, Inlines.silk_SMMUL(Inlines.silk_ADD32(CAb[k + 1], CAf[k + 1]), Atmp1), 32 - QA - lz); /* Q( 1-rshifts ) */ } CAf[n + 1] = tmp1; /* Q( -rshifts ) */ CAb[n + 1] = tmp2; /* Q( -rshifts ) */ num = Inlines.silk_ADD32(num, tmp2); /* Q( -rshifts ) */ num = Inlines.silk_LSHIFT32(-num, 1); /* Q( 1-rshifts ) */ /* Calculate the next order reflection (parcor) coefficient */ if (Inlines.silk_abs(num) < nrg) { rc_Q31 = Inlines.silk_DIV32_varQ(num, nrg, 31); } else { rc_Q31 = (num > 0) ? int.MaxValue : int.MinValue; } /* Update inverse prediction gain */ tmp1 = ((int)1 << 30) - Inlines.silk_SMMUL(rc_Q31, rc_Q31); tmp1 = Inlines.silk_LSHIFT(Inlines.silk_SMMUL(invGain_Q30, tmp1), 2); if (tmp1 <= minInvGain_Q30) { /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ tmp2 = ((int)1 << 30) - Inlines.silk_DIV32_varQ(minInvGain_Q30, invGain_Q30, 30); /* Q30 */ rc_Q31 = Inlines.silk_SQRT_APPROX(tmp2); /* Q15 */ /* Newton-Raphson iteration */ rc_Q31 = Inlines.silk_RSHIFT32(rc_Q31 + Inlines.silk_DIV32(tmp2, rc_Q31), 1); /* Q15 */ rc_Q31 = Inlines.silk_LSHIFT32(rc_Q31, 16); /* Q31 */ if (num < 0) { /* Ensure adjusted reflection coefficients has the original sign */ rc_Q31 = -rc_Q31; } invGain_Q30 = minInvGain_Q30; reached_max_gain = 1; } else { invGain_Q30 = tmp1; } /* Update the AR coefficients */ for (k = 0; k < (n + 1) >> 1; k++) { tmp1 = Af_QA[k]; /* QA */ tmp2 = Af_QA[n - k - 1]; /* QA */ Af_QA[k] = Inlines.silk_ADD_LSHIFT32(tmp1, Inlines.silk_SMMUL(tmp2, rc_Q31), 1); /* QA */ Af_QA[n - k - 1] = Inlines.silk_ADD_LSHIFT32(tmp2, Inlines.silk_SMMUL(tmp1, rc_Q31), 1); /* QA */ } Af_QA[n] = Inlines.silk_RSHIFT32(rc_Q31, 31 - QA); /* QA */ if (reached_max_gain != 0) { /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ for (k = n + 1; k < D; k++) { Af_QA[k] = 0; } break; } /* Update C * Af and C * Ab */ for (k = 0; k <= n + 1; k++) { tmp1 = CAf[k]; /* Q( -rshifts ) */ tmp2 = CAb[n - k + 1]; /* Q( -rshifts ) */ CAf[k] = Inlines.silk_ADD_LSHIFT32(tmp1, Inlines.silk_SMMUL(tmp2, rc_Q31), 1); /* Q( -rshifts ) */ CAb[n - k + 1] = Inlines.silk_ADD_LSHIFT32(tmp2, Inlines.silk_SMMUL(tmp1, rc_Q31), 1); /* Q( -rshifts ) */ } } if (reached_max_gain != 0) { for (k = 0; k < D; k++) { /* Scale coefficients */ A_Q16[k] = -Inlines.silk_RSHIFT_ROUND(Af_QA[k], QA - 16); } /* Subtract energy of preceding samples from C0 */ if (rshifts > 0) { for (s = 0; s < nb_subfr; s++) { x_offset = x_ptr + s * subfr_length; C0 -= (int)Inlines.silk_RSHIFT64(Inlines.silk_inner_prod16_aligned_64(x, x_offset, x, x_offset, D), rshifts); } } else { for (s = 0; s < nb_subfr; s++) { x_offset = x_ptr + s * subfr_length; C0 -= Inlines.silk_LSHIFT32(Inlines.silk_inner_prod_self(x, x_offset, D), -rshifts); } } /* Approximate residual energy */ res_nrg.Val = Inlines.silk_LSHIFT(Inlines.silk_SMMUL(invGain_Q30, C0), 2); res_nrg_Q.Val = 0 - rshifts; } else { /* Return residual energy */ nrg = CAf[0]; /* Q( -rshifts ) */ tmp1 = (int)1 << 16; /* Q16 */ for (k = 0; k < D; k++) { Atmp1 = Inlines.silk_RSHIFT_ROUND(Af_QA[k], QA - 16); /* Q16 */ nrg = Inlines.silk_SMLAWW(nrg, CAf[k + 1], Atmp1); /* Q( -rshifts ) */ tmp1 = Inlines.silk_SMLAWW(tmp1, Atmp1, Atmp1); /* Q16 */ A_Q16[k] = -Atmp1; } res_nrg.Val = Inlines.silk_SMLAWW(nrg, Inlines.silk_SMMUL(((int)((TuningParameters.FIND_LPC_COND_FAC) * ((long)1 << (32)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_LPC_COND_FAC, 32)*/, C0), -tmp1);/* Q( -rshifts ) */ res_nrg_Q.Val = -rshifts; } }
/* Calculates correlation matrix X'*X */ internal static void silk_corrMatrix( short[] x, /* I x vector [L + order - 1] used to form data matrix X */ int x_ptr, int L, /* I Length of vectors */ int order, /* I Max lag for correlation */ int head_room, /* I Desired headroom */ int[] XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ int XX_ptr, BoxedValueInt rshifts /* I/O Right shifts of correlations */ ) { int i, j, lag, head_room_rshifts; int energy, rshifts_local; int ptr1, ptr2; /* Calculate energy to find shift used to fit in 32 bits */ SumSqrShift.silk_sum_sqr_shift(out energy, out rshifts_local, x, x_ptr, L + order - 1); /* Add shifts to get the desired head room */ head_room_rshifts = Inlines.silk_max(head_room - Inlines.silk_CLZ32(energy), 0); energy = Inlines.silk_RSHIFT32(energy, head_room_rshifts); rshifts_local += head_room_rshifts; /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */ /* Remove contribution of first order - 1 samples */ for (i = x_ptr; i < x_ptr + order - 1; i++) { energy -= Inlines.silk_RSHIFT32(Inlines.silk_SMULBB(x[i], x[i]), rshifts_local); } if (rshifts_local < rshifts.Val) { /* Adjust energy */ energy = Inlines.silk_RSHIFT32(energy, rshifts.Val - rshifts_local); rshifts_local = rshifts.Val; } /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */ /* Fill out the diagonal of the correlation matrix */ Inlines.MatrixSet(XX, XX_ptr, 0, 0, order, energy); ptr1 = x_ptr + order - 1; /* First sample of column 0 of X */ for (j = 1; j < order; j++) { energy = Inlines.silk_SUB32(energy, Inlines.silk_RSHIFT32(Inlines.silk_SMULBB(x[ptr1 + L - j], x[ptr1 + L - j]), rshifts_local)); energy = Inlines.silk_ADD32(energy, Inlines.silk_RSHIFT32(Inlines.silk_SMULBB(x[ptr1 - j], x[ptr1 - j]), rshifts_local)); Inlines.MatrixSet(XX, XX_ptr, j, j, order, energy); } ptr2 = x_ptr + order - 2; /* First sample of column 1 of X */ /* Calculate the remaining elements of the correlation matrix */ if (rshifts_local > 0) { /* Right shifting used */ for (lag = 1; lag < order; lag++) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ energy = 0; for (i = 0; i < L; i++) { energy += Inlines.silk_RSHIFT32(Inlines.silk_SMULBB(x[ptr1 + i], x[ptr2 + i]), rshifts_local); } /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ Inlines.MatrixSet(XX, XX_ptr, lag, 0, order, energy); Inlines.MatrixSet(XX, XX_ptr, 0, lag, order, energy); for (j = 1; j < (order - lag); j++) { energy = Inlines.silk_SUB32(energy, Inlines.silk_RSHIFT32(Inlines.silk_SMULBB(x[ptr1 + L - j], x[ptr2 + L - j]), rshifts_local)); energy = Inlines.silk_ADD32(energy, Inlines.silk_RSHIFT32(Inlines.silk_SMULBB(x[ptr1 - j], x[ptr2 - j]), rshifts_local)); Inlines.MatrixSet(XX, XX_ptr, lag + j, j, order, energy); Inlines.MatrixSet(XX, XX_ptr, j, lag + j, order, energy); } ptr2--; /* Update pointer to first sample of next column (lag) in X */ } } else { for (lag = 1; lag < order; lag++) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ energy = Inlines.silk_inner_prod(x, ptr1, x, ptr2, L); Inlines.MatrixSet(XX, XX_ptr, lag, 0, order, energy); Inlines.MatrixSet(XX, XX_ptr, 0, lag, order, energy); /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ for (j = 1; j < (order - lag); j++) { energy = Inlines.silk_SUB32(energy, Inlines.silk_SMULBB(x[ptr1 + L - j], x[ptr2 + L - j])); energy = Inlines.silk_SMLABB(energy, x[ptr1 - j], x[ptr2 - j]); Inlines.MatrixSet(XX, XX_ptr, lag + j, j, order, energy); Inlines.MatrixSet(XX, XX_ptr, j, lag + j, order, energy); } ptr2--;/* Update pointer to first sample of next column (lag) in X */ } } rshifts.Val = rshifts_local; }
/// <summary> /// Convert Left/Right stereo signal to adaptive Mid/Side representation /// </summary> /// <param name="state">I/O State</param> /// <param name="x1">I/O Left input signal, becomes mid signal</param> /// <param name="x1_ptr"></param> /// <param name="x2">I/O Right input signal, becomes side signal</param> /// <param name="x2_ptr"></param> /// <param name="ix">O Quantization indices [ 2 ][ 3 ]</param> /// <param name="mid_only_flag">O Flag: only mid signal coded</param> /// <param name="mid_side_rates_bps">O Bitrates for mid and side signals</param> /// <param name="total_rate_bps">I Total bitrate</param> /// <param name="prev_speech_act_Q8">I Speech activity level in previous frame</param> /// <param name="toMono">I Last frame before a stereo.mono transition</param> /// <param name="fs_kHz">I Sample rate (kHz)</param> /// <param name="frame_length">I Number of samples</param> internal static void silk_stereo_LR_to_MS( StereoEncodeState state, short[] x1, int x1_ptr, short[] x2, int x2_ptr, sbyte[][] ix, BoxedValueSbyte mid_only_flag, int[] mid_side_rates_bps, int total_rate_bps, int prev_speech_act_Q8, int toMono, int fs_kHz, int frame_length) { int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13; int sum, diff, smooth_coef_Q16, pred0_Q13, pred1_Q13; int[] pred_Q13 = new int[2]; int frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24; BoxedValueInt LP_ratio_Q14 = new BoxedValueInt(); BoxedValueInt HP_ratio_Q14 = new BoxedValueInt(); short[] side; short[] LP_mid; short[] HP_mid; short[] LP_side; short[] HP_side; int mid = x1_ptr - 2; side = new short[frame_length + 2]; /* Convert to basic mid/side signals */ for (n = 0; n < frame_length + 2; n++) { sum = x1[x1_ptr + n - 2] + (int)x2[x2_ptr + n - 2]; diff = x1[x1_ptr + n - 2] - (int)x2[x2_ptr + n - 2]; x1[mid + n] = (short)Inlines.silk_RSHIFT_ROUND(sum, 1); side[n] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT_ROUND(diff, 1)); } /* Buffering */ Array.Copy(state.sMid, 0, x1, mid, 2); Array.Copy(state.sSide, side, 2); Array.Copy(x1, mid + frame_length, state.sMid, 0, 2); Array.Copy(side, frame_length, state.sSide, 0, 2); /* LP and HP filter mid signal */ LP_mid = new short[frame_length]; HP_mid = new short[frame_length]; for (n = 0; n < frame_length; n++) { sum = Inlines.silk_RSHIFT_ROUND(Inlines.silk_ADD_LSHIFT32(x1[mid + n] + x1[mid + n + 2], x1[mid + n + 1], 1), 2); LP_mid[n] = (short)(sum); HP_mid[n] = (short)(x1[mid + n + 1] - sum); } /* LP and HP filter side signal */ LP_side = new short[frame_length]; HP_side = new short[frame_length]; for (n = 0; n < frame_length; n++) { sum = Inlines.silk_RSHIFT_ROUND(Inlines.silk_ADD_LSHIFT32(side[n] + side[n + 2], side[n + 1], 1), 2); LP_side[n] = (short)(sum); HP_side[n] = (short)(side[n + 1] - sum); } /* Find energies and predictors */ is10msFrame = (frame_length == 10 * fs_kHz ? 1 : 0); smooth_coef_Q16 = is10msFrame != 0 ? ((int)((SilkConstants.STEREO_RATIO_SMOOTH_COEF / 2) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.STEREO_RATIO_SMOOTH_COEF / 2, 16)*/ : ((int)((SilkConstants.STEREO_RATIO_SMOOTH_COEF) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.STEREO_RATIO_SMOOTH_COEF, 16)*/; smooth_coef_Q16 = Inlines.silk_SMULWB(Inlines.silk_SMULBB(prev_speech_act_Q8, prev_speech_act_Q8), smooth_coef_Q16); pred_Q13[0] = silk_stereo_find_predictor(LP_ratio_Q14, LP_mid, LP_side, state.mid_side_amp_Q0, 0, frame_length, smooth_coef_Q16); pred_Q13[1] = silk_stereo_find_predictor(HP_ratio_Q14, HP_mid, HP_side, state.mid_side_amp_Q0, 2, frame_length, smooth_coef_Q16); /* Ratio of the norms of residual and mid signals */ frac_Q16 = Inlines.silk_SMLABB(HP_ratio_Q14.Val, LP_ratio_Q14.Val, 3); frac_Q16 = Inlines.silk_min(frac_Q16, ((int)((1) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1, 16)*/); /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */ total_rate_bps -= is10msFrame != 0 ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */ if (total_rate_bps < 1) { total_rate_bps = 1; } min_mid_rate_bps = Inlines.silk_SMLABB(2000, fs_kHz, 900); Inlines.OpusAssert(min_mid_rate_bps < 32767); /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */ frac_3_Q16 = Inlines.silk_MUL(3, frac_Q16); mid_side_rates_bps[0] = Inlines.silk_DIV32_varQ(total_rate_bps, ((int)((8 + 5) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(8 + 5, 16)*/ + frac_3_Q16, 16 + 3); /* If Mid bitrate below minimum, reduce stereo width */ if (mid_side_rates_bps[0] < min_mid_rate_bps) { mid_side_rates_bps[0] = min_mid_rate_bps; mid_side_rates_bps[1] = total_rate_bps - mid_side_rates_bps[0]; /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */ width_Q14 = Inlines.silk_DIV32_varQ(Inlines.silk_LSHIFT(mid_side_rates_bps[1], 1) - min_mid_rate_bps, Inlines.silk_SMULWB(((int)((1) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(1, 16)*/ + frac_3_Q16, min_mid_rate_bps), 14 + 2); width_Q14 = Inlines.silk_LIMIT(width_Q14, 0, ((int)((1) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1, 14)*/); } else { mid_side_rates_bps[1] = total_rate_bps - mid_side_rates_bps[0]; width_Q14 = ((int)((1) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1, 14)*/; } /* Smoother */ state.smth_width_Q14 = (short)Inlines.silk_SMLAWB(state.smth_width_Q14, width_Q14 - state.smth_width_Q14, smooth_coef_Q16); /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */ mid_only_flag.Val = 0; if (toMono != 0) { /* Last frame before stereo.mono transition; collapse stereo width */ width_Q14 = 0; pred_Q13[0] = 0; pred_Q13[1] = 0; silk_stereo_quant_pred(pred_Q13, ix); } else if (state.width_prev_Q14 == 0 && (8 * total_rate_bps < 13 * min_mid_rate_bps || Inlines.silk_SMULWB(frac_Q16, state.smth_width_Q14) < ((int)((0.05f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.05f, 14)*/)) { /* Code as panned-mono; previous frame already had zero width */ /* Scale down and quantize predictors */ pred_Q13[0] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[0]), 14); pred_Q13[1] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[1]), 14); silk_stereo_quant_pred(pred_Q13, ix); /* Collapse stereo width */ width_Q14 = 0; pred_Q13[0] = 0; pred_Q13[1] = 0; mid_side_rates_bps[0] = total_rate_bps; mid_side_rates_bps[1] = 0; mid_only_flag.Val = 1; } else if (state.width_prev_Q14 != 0 && (8 * total_rate_bps < 11 * min_mid_rate_bps || Inlines.silk_SMULWB(frac_Q16, state.smth_width_Q14) < ((int)((0.02f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.02f, 14)*/)) { /* Transition to zero-width stereo */ /* Scale down and quantize predictors */ pred_Q13[0] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[0]), 14); pred_Q13[1] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[1]), 14); silk_stereo_quant_pred(pred_Q13, ix); /* Collapse stereo width */ width_Q14 = 0; pred_Q13[0] = 0; pred_Q13[1] = 0; } else if (state.smth_width_Q14 > ((int)((0.95f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.95f, 14)*/) { /* Full-width stereo coding */ silk_stereo_quant_pred(pred_Q13, ix); width_Q14 = ((int)((1) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1, 14)*/; } else { /* Reduced-width stereo coding; scale down and quantize predictors */ pred_Q13[0] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[0]), 14); pred_Q13[1] = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(state.smth_width_Q14, pred_Q13[1]), 14); silk_stereo_quant_pred(pred_Q13, ix); width_Q14 = state.smth_width_Q14; } /* Make sure to keep on encoding until the tapered output has been transmitted */ if (mid_only_flag.Val == 1) { state.silent_side_len += (short)(frame_length - SilkConstants.STEREO_INTERP_LEN_MS * fs_kHz); if (state.silent_side_len < SilkConstants.LA_SHAPE_MS * fs_kHz) { mid_only_flag.Val = 0; } else { /* Limit to avoid wrapping around */ state.silent_side_len = 10000; } } else { state.silent_side_len = 0; } if (mid_only_flag.Val == 0 && mid_side_rates_bps[1] < 1) { mid_side_rates_bps[1] = 1; mid_side_rates_bps[0] = Inlines.silk_max_int(1, total_rate_bps - mid_side_rates_bps[1]); } /* Interpolate predictors and subtract prediction from side channel */ pred0_Q13 = -state.pred_prev_Q13[0]; pred1_Q13 = -state.pred_prev_Q13[1]; w_Q24 = Inlines.silk_LSHIFT(state.width_prev_Q14, 10); denom_Q16 = Inlines.silk_DIV32_16((int)1 << 16, SilkConstants.STEREO_INTERP_LEN_MS * fs_kHz); delta0_Q13 = 0 - Inlines.silk_RSHIFT_ROUND(Inlines.silk_SMULBB(pred_Q13[0] - state.pred_prev_Q13[0], denom_Q16), 16); delta1_Q13 = 0 - Inlines.silk_RSHIFT_ROUND(Inlines.silk_SMULBB(pred_Q13[1] - state.pred_prev_Q13[1], denom_Q16), 16); deltaw_Q24 = Inlines.silk_LSHIFT(Inlines.silk_SMULWB(width_Q14 - state.width_prev_Q14, denom_Q16), 10); for (n = 0; n < SilkConstants.STEREO_INTERP_LEN_MS * fs_kHz; n++) { pred0_Q13 += delta0_Q13; pred1_Q13 += delta1_Q13; w_Q24 += deltaw_Q24; sum = Inlines.silk_LSHIFT(Inlines.silk_ADD_LSHIFT(x1[mid + n] + x1[mid + n + 2], x1[mid + n + 1], 1), 9); /* Q11 */ sum = Inlines.silk_SMLAWB(Inlines.silk_SMULWB(w_Q24, side[n + 1]), sum, pred0_Q13); /* Q8 */ sum = Inlines.silk_SMLAWB(sum, Inlines.silk_LSHIFT((int)x1[mid + n + 1], 11), pred1_Q13); /* Q8 */ x2[x2_ptr + n - 1] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT_ROUND(sum, 8)); } pred0_Q13 = 0 - pred_Q13[0]; pred1_Q13 = 0 - pred_Q13[1]; w_Q24 = Inlines.silk_LSHIFT(width_Q14, 10); for (n = SilkConstants.STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++) { sum = Inlines.silk_LSHIFT(Inlines.silk_ADD_LSHIFT(x1[mid + n] + x1[mid + n + 2], x1[mid + n + 1], 1), 9); /* Q11 */ sum = Inlines.silk_SMLAWB(Inlines.silk_SMULWB(w_Q24, side[n + 1]), sum, pred0_Q13); /* Q8 */ sum = Inlines.silk_SMLAWB(sum, Inlines.silk_LSHIFT((int)x1[mid + n + 1], 11), pred1_Q13); /* Q8 */ x2[x2_ptr + n - 1] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT_ROUND(sum, 8)); } state.pred_prev_Q13[0] = (short)pred_Q13[0]; state.pred_prev_Q13[1] = (short)pred_Q13[1]; state.width_prev_Q14 = (short)width_Q14; }
internal static int RunTest2(bool no_fuzz) { byte[] mapping /*[256]*/ = { 0, 1, 255 }; byte[] db62 = new byte[36]; int i; int rc, j; BoxedValueInt err = new BoxedValueInt(0); OpusMSEncoder MSenc; OpusMSDecoder MSdec; OpusMSDecoder MSdec_err; OpusDecoder[] dec_err = new OpusDecoder[10]; short[] inbuf; short[] out2buf; byte[] packet = new byte[MAX_PACKET + 257]; uint enc_final_range; uint dec_final_range; int count; inbuf = new short[SAMPLES * 2]; out2buf = new short[MAX_FRAME_SAMP * 3]; if (inbuf == null || out2buf == null) { TestFailed(); } GenerateMusic(inbuf.GetPointer(), SAMPLES); for (i = 0; i < 2; i++) { try { MSenc = OpusMSEncoder.Create(8000, 2, 2, 0, mapping, OpusApplication.OPUS_APPLICATION_UNIMPLEMENTED); TestFailed(); } catch (ArgumentException) { } try { MSenc = OpusMSEncoder.Create(8000, 0, 1, 0, mapping, OpusApplication.VOIP); TestFailed(); } catch (ArgumentException) { } try { MSenc = OpusMSEncoder.Create(44100, 2, 2, 0, mapping, OpusApplication.VOIP); TestFailed(); } catch (ArgumentException) { } try { MSenc = OpusMSEncoder.Create(8000, 2, 2, 3, mapping, OpusApplication.VOIP); TestFailed(); } catch (ArgumentException) { } try { MSenc = OpusMSEncoder.Create(8000, 2, -1, 0, mapping, OpusApplication.VOIP); TestFailed(); } catch (ArgumentException) { } try { MSenc = OpusMSEncoder.Create(8000, 256, 2, 0, mapping, OpusApplication.VOIP); TestFailed(); } catch (ArgumentException) { } } MSenc = OpusMSEncoder.Create(8000, 2, 2, 0, mapping, OpusApplication.AUDIO); if (err.Val != OpusError.OPUS_OK || MSenc == null) { TestFailed(); } MSdec = new OpusMSDecoder(48000, 2, 2, 0, mapping); if (err.Val != OpusError.OPUS_OK || MSdec == null) { TestFailed(); } MSdec_err = new OpusMSDecoder(48000, 3, 2, 0, mapping); if (err.Val != OpusError.OPUS_OK || MSdec_err == null) { TestFailed(); } /*Some multistream encoder API tests*/ i = MSenc.Bitrate; i = MSenc.LSBDepth; if (i < 16) { TestFailed(); } { OpusEncoder tmp_enc; tmp_enc = MSenc.GetMultistreamEncoderState(1); if (tmp_enc == null) { TestFailed(); } j = tmp_enc.LSBDepth; if (i != j) { TestFailed(); } try { MSenc.GetMultistreamEncoderState(2); TestFailed(); } catch (ArgumentException) { } } OpusMode[] modes = { OpusMode.MODE_SILK_ONLY, OpusMode.MODE_SILK_ONLY, OpusMode.MODE_SILK_ONLY, OpusMode.MODE_SILK_ONLY, OpusMode.MODE_SILK_ONLY, OpusMode.MODE_SILK_ONLY, OpusMode.MODE_SILK_ONLY, OpusMode.MODE_SILK_ONLY, OpusMode.MODE_CELT_ONLY, OpusMode.MODE_CELT_ONLY, OpusMode.MODE_CELT_ONLY, OpusMode.MODE_CELT_ONLY, OpusMode.MODE_CELT_ONLY, OpusMode.MODE_CELT_ONLY, OpusMode.MODE_CELT_ONLY, OpusMode.MODE_CELT_ONLY }; int[] rates = { 4000, 12000, 32000, 8000, 16000, 32000, 48000, 88000, 4000, 12000, 32000, 8000, 16000, 32000, 48000, 88000 }; int[] frame = { 160 * 1, 160, 80, 160, 160, 80, 40, 20, 160 * 1, 160, 80, 160, 160, 80, 40, 20 }; for (rc = 0; rc < 3; rc++) { MSenc.UseVBR = (rc < 2); MSenc.UseConstrainedVBR = (rc == 1); MSenc.UseInbandFEC = (rc == 0); for (j = 0; j < 16; j++) { int rate; MSenc.UseInbandFEC = (rc == 0 && j == 1); MSenc.ForceMode = (modes[j]); rate = rates[j] + ((int)FastRand() % rates[j]); MSenc.UseDTX = ((FastRand() & 1U) != 0); MSenc.Bitrate = (rate); count = i = 0; do { int len, out_samples, frame_size; bool loss; bool pred = MSenc.PredictionDisabled; MSenc.PredictionDisabled = ((int)(FastRand() & 15) < (pred ? 11 : 4)); frame_size = frame[j]; MSenc.Complexity = ((count >> 2) % 11); MSenc.PacketLossPercent = (((int)FastRand() & 15) & ((int)FastRand() % 15)); if ((FastRand() & 255) == 0) { MSenc.ResetState(); MSdec.ResetState(); if ((FastRand() & 3) != 0) { MSdec_err.ResetState(); } } if ((FastRand() & 255) == 0) { MSdec_err.ResetState(); } len = MSenc.EncodeMultistream(inbuf, i << 1, frame_size, packet, 0, MAX_PACKET); if (len < 0 || len > MAX_PACKET) { TestFailed(); } enc_final_range = MSenc.FinalRange; if ((FastRand() & 3) == 0) { if (OpusRepacketizer.PadMultistreamPacket(packet, 0, len, len + 1, 2) != OpusError.OPUS_OK) { TestFailed(); } len++; } if ((FastRand() & 7) == 0) { if (OpusRepacketizer.PadMultistreamPacket(packet, 0, len, len + 256, 2) != OpusError.OPUS_OK) { TestFailed(); } len += 256; } //if ((fast_rand() & 3) == 0) //{ // len = Repacketizer.opus_multistream_packet_unpad(packet, len, 2); // if (len < 1) test_failed(); //} out_samples = MSdec.DecodeMultistream(packet, 0, len, out2buf, 0, MAX_FRAME_SAMP, 0); if (out_samples != frame_size * 6) { TestFailed(); } dec_final_range = MSdec.FinalRange; if (enc_final_range != dec_final_range) { TestFailed(); } /*LBRR decode*/ loss = (FastRand() & 63) == 0; out_samples = MSdec_err.DecodeMultistream(packet, 0, loss ? 0 : len, out2buf, 0, frame_size * 6, ((FastRand() & 3) != 0) ? 1 : 0); if (out_samples != (frame_size * 6)) { TestFailed(); } i += frame_size; count++; } while (i < (SSAMPLES / 12 - MAX_FRAME_SAMP)); Console.WriteLine(" Mode {0} NB dual-mono MS encode {1}, {2} bps OK.", mstrings[(int)modes[j] - (int)OpusMode.MODE_SILK_ONLY], rc == 0 ? " VBR" : rc == 1 ? "CVBR" : " CBR", rate); } } return(0); }
/// <summary> /// Encode frame with Silk /// Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what /// encControl.payloadSize_ms is set to /// </summary> /// <param name="psEnc">I/O State</param> /// <param name="encControl">I Control status</param> /// <param name="samplesIn">I Speech sample input vector</param> /// <param name="nSamplesIn">I Number of samples in input vector</param> /// <param name="psRangeEnc">I/O Compressor data structure</param> /// <param name="nBytesOut">I/O Number of bytes in payload (input: Max bytes)</param> /// <param name="prefillFlag">I Flag to indicate prefilling buffers no coding</param> /// <returns>error code</returns> internal static int silk_Encode( SilkEncoder psEnc, EncControlState encControl, short[] samplesIn, int nSamplesIn, EntropyCoder psRangeEnc, BoxedValueInt nBytesOut, int prefillFlag) { int ret = SilkError.SILK_NO_ERROR; int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0; int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; int nSamplesFromInput = 0, nSamplesFromInputMax; int speech_act_thr_for_switch_Q8; int TargetRate_bps, channelRate_bps, LBRR_symbol, sum; int[] MStargetRates_bps = new int[2]; short[] buf; int transition, curr_block, tot_blocks; nBytesOut.Val = 0; if (encControl.reducedDependency != 0) { psEnc.state_Fxx[0].first_frame_after_reset = 1; psEnc.state_Fxx[1].first_frame_after_reset = 1; } psEnc.state_Fxx[0].nFramesEncoded = psEnc.state_Fxx[1].nFramesEncoded = 0; /* Check values in encoder control structure */ ret += encControl.check_control_input(); if (ret != SilkError.SILK_NO_ERROR) { Inlines.OpusAssert(false); return(ret); } encControl.switchReady = 0; if (encControl.nChannelsInternal > psEnc.nChannelsInternal) { /* Mono . Stereo transition: init state of second channel and stereo state */ ret += SilkEncoder.silk_init_encoder(psEnc.state_Fxx[1]); Arrays.MemSetShort(psEnc.sStereo.pred_prev_Q13, 0, 2); Arrays.MemSetShort(psEnc.sStereo.sSide, 0, 2); psEnc.sStereo.mid_side_amp_Q0[0] = 0; psEnc.sStereo.mid_side_amp_Q0[1] = 1; psEnc.sStereo.mid_side_amp_Q0[2] = 0; psEnc.sStereo.mid_side_amp_Q0[3] = 1; psEnc.sStereo.width_prev_Q14 = 0; psEnc.sStereo.smth_width_Q14 = (short)(((int)((1.0f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(1.0f, 14)*/); if (psEnc.nChannelsAPI == 2) { psEnc.state_Fxx[1].resampler_state.Assign(psEnc.state_Fxx[0].resampler_state); Array.Copy(psEnc.state_Fxx[0].In_HP_State, psEnc.state_Fxx[1].In_HP_State, 2); } } transition = ((encControl.payloadSize_ms != psEnc.state_Fxx[0].PacketSize_ms) || (psEnc.nChannelsInternal != encControl.nChannelsInternal)) ? 1 : 0; psEnc.nChannelsAPI = encControl.nChannelsAPI; psEnc.nChannelsInternal = encControl.nChannelsInternal; nBlocksOf10ms = Inlines.silk_DIV32(100 * nSamplesIn, encControl.API_sampleRate); tot_blocks = (nBlocksOf10ms > 1) ? nBlocksOf10ms >> 1 : 1; curr_block = 0; if (prefillFlag != 0) { /* Only accept input length of 10 ms */ if (nBlocksOf10ms != 1) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } /* Reset Encoder */ for (n = 0; n < encControl.nChannelsInternal; n++) { ret += SilkEncoder.silk_init_encoder(psEnc.state_Fxx[n]); Inlines.OpusAssert(ret == SilkError.SILK_NO_ERROR); } tmp_payloadSize_ms = encControl.payloadSize_ms; encControl.payloadSize_ms = 10; tmp_complexity = encControl.complexity; encControl.complexity = 0; for (n = 0; n < encControl.nChannelsInternal; n++) { psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].prefillFlag = 1; } } else { /* Only accept input lengths that are a multiple of 10 ms */ if (nBlocksOf10ms * encControl.API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } /* Make sure no more than one packet can be produced */ if (1000 * (int)nSamplesIn > encControl.payloadSize_ms * encControl.API_sampleRate) { Inlines.OpusAssert(false); return(SilkError.SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES); } } TargetRate_bps = Inlines.silk_RSHIFT32(encControl.bitRate, encControl.nChannelsInternal - 1); for (n = 0; n < encControl.nChannelsInternal; n++) { /* Force the side channel to the same rate as the mid */ int force_fs_kHz = (n == 1) ? psEnc.state_Fxx[0].fs_kHz : 0; ret += psEnc.state_Fxx[n].silk_control_encoder(encControl, TargetRate_bps, psEnc.allowBandwidthSwitch, n, force_fs_kHz); if (ret != SilkError.SILK_NO_ERROR) { Inlines.OpusAssert(false); return(ret); } if (psEnc.state_Fxx[n].first_frame_after_reset != 0 || transition != 0) { for (i = 0; i < psEnc.state_Fxx[0].nFramesPerPacket; i++) { psEnc.state_Fxx[n].LBRR_flags[i] = 0; } } psEnc.state_Fxx[n].inDTX = psEnc.state_Fxx[n].useDTX; } Inlines.OpusAssert(encControl.nChannelsInternal == 1 || psEnc.state_Fxx[0].fs_kHz == psEnc.state_Fxx[1].fs_kHz); /* Input buffering/resampling and encoding */ nSamplesToBufferMax = 10 * nBlocksOf10ms * psEnc.state_Fxx[0].fs_kHz; nSamplesFromInputMax = Inlines.silk_DIV32_16(nSamplesToBufferMax * psEnc.state_Fxx[0].API_fs_Hz, (short)(psEnc.state_Fxx[0].fs_kHz * 1000)); buf = new short[nSamplesFromInputMax]; int samplesIn_ptr = 0; while (true) { nSamplesToBuffer = psEnc.state_Fxx[0].frame_length - psEnc.state_Fxx[0].inputBufIx; nSamplesToBuffer = Inlines.silk_min(nSamplesToBuffer, nSamplesToBufferMax); nSamplesFromInput = Inlines.silk_DIV32_16(nSamplesToBuffer * psEnc.state_Fxx[0].API_fs_Hz, psEnc.state_Fxx[0].fs_kHz * 1000); /* Resample and write to buffer */ if (encControl.nChannelsAPI == 2 && encControl.nChannelsInternal == 2) { int id = psEnc.state_Fxx[0].nFramesEncoded; for (n = 0; n < nSamplesFromInput; n++) { buf[n] = samplesIn[samplesIn_ptr + (2 * n)]; } /* Making sure to start both resamplers from the same state when switching from mono to stereo */ if (psEnc.nPrevChannelsInternal == 1 && id == 0) { //silk_memcpy(&psEnc.state_Fxx[1].resampler_state, &psEnc.state_Fxx[0].resampler_state, sizeof(psEnc.state_Fxx[1].resampler_state)); psEnc.state_Fxx[1].resampler_state.Assign(psEnc.state_Fxx[0].resampler_state); } ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; nSamplesToBuffer = psEnc.state_Fxx[1].frame_length - psEnc.state_Fxx[1].inputBufIx; nSamplesToBuffer = Inlines.silk_min(nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc.state_Fxx[1].fs_kHz); for (n = 0; n < nSamplesFromInput; n++) { buf[n] = samplesIn[samplesIn_ptr + (2 * n) + 1]; } ret += Resampler.silk_resampler( psEnc.state_Fxx[1].resampler_state, psEnc.state_Fxx[1].inputBuf, psEnc.state_Fxx[1].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[1].inputBufIx += nSamplesToBuffer; } else if (encControl.nChannelsAPI == 2 && encControl.nChannelsInternal == 1) { /* Combine left and right channels before resampling */ for (n = 0; n < nSamplesFromInput; n++) { sum = samplesIn[samplesIn_ptr + (2 * n)] + samplesIn[samplesIn_ptr + (2 * n) + 1]; buf[n] = (short)Inlines.silk_RSHIFT_ROUND(sum, 1); } ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); /* On the first mono frame, average the results for the two resampler states */ if (psEnc.nPrevChannelsInternal == 2 && psEnc.state_Fxx[0].nFramesEncoded == 0) { ret += Resampler.silk_resampler( psEnc.state_Fxx[1].resampler_state, psEnc.state_Fxx[1].inputBuf, psEnc.state_Fxx[1].inputBufIx + 2, buf, 0, nSamplesFromInput); for (n = 0; n < psEnc.state_Fxx[0].frame_length; n++) { psEnc.state_Fxx[0].inputBuf[psEnc.state_Fxx[0].inputBufIx + n + 2] = (short)(Inlines.silk_RSHIFT(psEnc.state_Fxx[0].inputBuf[psEnc.state_Fxx[0].inputBufIx + n + 2] + psEnc.state_Fxx[1].inputBuf[psEnc.state_Fxx[1].inputBufIx + n + 2], 1)); } } psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; } else { Inlines.OpusAssert(encControl.nChannelsAPI == 1 && encControl.nChannelsInternal == 1); Array.Copy(samplesIn, samplesIn_ptr, buf, 0, nSamplesFromInput); ret += Resampler.silk_resampler( psEnc.state_Fxx[0].resampler_state, psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].inputBufIx + 2, buf, 0, nSamplesFromInput); psEnc.state_Fxx[0].inputBufIx += nSamplesToBuffer; } samplesIn_ptr += (nSamplesFromInput * encControl.nChannelsAPI); nSamplesIn -= nSamplesFromInput; /* Default */ psEnc.allowBandwidthSwitch = 0; /* Silk encoder */ if (psEnc.state_Fxx[0].inputBufIx >= psEnc.state_Fxx[0].frame_length) { /* Enough data in input buffer, so encode */ Inlines.OpusAssert(psEnc.state_Fxx[0].inputBufIx == psEnc.state_Fxx[0].frame_length); Inlines.OpusAssert(encControl.nChannelsInternal == 1 || psEnc.state_Fxx[1].inputBufIx == psEnc.state_Fxx[1].frame_length); /* Deal with LBRR data */ if (psEnc.state_Fxx[0].nFramesEncoded == 0 && prefillFlag == 0) { /* Create space at start of payload for VAD and FEC flags */ byte[] iCDF = { 0, 0 }; iCDF[0] = (byte)(256 - Inlines.silk_RSHIFT(256, (psEnc.state_Fxx[0].nFramesPerPacket + 1) * encControl.nChannelsInternal)); psRangeEnc.enc_icdf(0, iCDF, 8); /* Encode any LBRR data from previous packet */ /* Encode LBRR flags */ for (n = 0; n < encControl.nChannelsInternal; n++) { LBRR_symbol = 0; for (i = 0; i < psEnc.state_Fxx[n].nFramesPerPacket; i++) { LBRR_symbol |= Inlines.silk_LSHIFT(psEnc.state_Fxx[n].LBRR_flags[i], i); } psEnc.state_Fxx[n].LBRR_flag = (sbyte)(LBRR_symbol > 0 ? 1 : 0); if (LBRR_symbol != 0 && psEnc.state_Fxx[n].nFramesPerPacket > 1) { psRangeEnc.enc_icdf(LBRR_symbol - 1, Tables.silk_LBRR_flags_iCDF_ptr[psEnc.state_Fxx[n].nFramesPerPacket - 2], 8); } } /* Code LBRR indices and excitation signals */ for (i = 0; i < psEnc.state_Fxx[0].nFramesPerPacket; i++) { for (n = 0; n < encControl.nChannelsInternal; n++) { if (psEnc.state_Fxx[n].LBRR_flags[i] != 0) { int condCoding; if (encControl.nChannelsInternal == 2 && n == 0) { Stereo.silk_stereo_encode_pred(psRangeEnc, psEnc.sStereo.predIx[i]); /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ if (psEnc.state_Fxx[1].LBRR_flags[i] == 0) { Stereo.silk_stereo_encode_mid_only(psRangeEnc, psEnc.sStereo.mid_only_flags[i]); } } /* Use conditional coding if previous frame available */ if (i > 0 && psEnc.state_Fxx[n].LBRR_flags[i - 1] != 0) { condCoding = SilkConstants.CODE_CONDITIONALLY; } else { condCoding = SilkConstants.CODE_INDEPENDENTLY; } EncodeIndices.silk_encode_indices(psEnc.state_Fxx[n], psRangeEnc, i, 1, condCoding); EncodePulses.silk_encode_pulses(psRangeEnc, psEnc.state_Fxx[n].indices_LBRR[i].signalType, psEnc.state_Fxx[n].indices_LBRR[i].quantOffsetType, psEnc.state_Fxx[n].pulses_LBRR[i], psEnc.state_Fxx[n].frame_length); } } } /* Reset LBRR flags */ for (n = 0; n < encControl.nChannelsInternal; n++) { Arrays.MemSetInt(psEnc.state_Fxx[n].LBRR_flags, 0, SilkConstants.MAX_FRAMES_PER_PACKET); } psEnc.nBitsUsedLBRR = psRangeEnc.tell(); } HPVariableCutoff.silk_HP_variable_cutoff(psEnc.state_Fxx); /* Total target bits for packet */ nBits = Inlines.silk_DIV32_16(Inlines.silk_MUL(encControl.bitRate, encControl.payloadSize_ms), 1000); /* Subtract bits used for LBRR */ if (prefillFlag == 0) { nBits -= psEnc.nBitsUsedLBRR; } /* Divide by number of uncoded frames left in packet */ nBits = Inlines.silk_DIV32_16(nBits, psEnc.state_Fxx[0].nFramesPerPacket); /* Convert to bits/second */ if (encControl.payloadSize_ms == 10) { TargetRate_bps = Inlines.silk_SMULBB(nBits, 100); } else { TargetRate_bps = Inlines.silk_SMULBB(nBits, 50); } /* Subtract fraction of bits in excess of target in previous frames and packets */ TargetRate_bps -= Inlines.silk_DIV32_16(Inlines.silk_MUL(psEnc.nBitsExceeded, 1000), TuningParameters.BITRESERVOIR_DECAY_TIME_MS); if (prefillFlag == 0 && psEnc.state_Fxx[0].nFramesEncoded > 0) { /* Compare actual vs target bits so far in this packet */ int bitsBalance = psRangeEnc.tell() - psEnc.nBitsUsedLBRR - nBits * psEnc.state_Fxx[0].nFramesEncoded; TargetRate_bps -= Inlines.silk_DIV32_16(Inlines.silk_MUL(bitsBalance, 1000), TuningParameters.BITRESERVOIR_DECAY_TIME_MS); } /* Never exceed input bitrate */ TargetRate_bps = Inlines.silk_LIMIT(TargetRate_bps, encControl.bitRate, 5000); /* Convert Left/Right to Mid/Side */ if (encControl.nChannelsInternal == 2) { BoxedValueSbyte midOnlyFlagBoxed = new BoxedValueSbyte(psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded]); Stereo.silk_stereo_LR_to_MS(psEnc.sStereo, psEnc.state_Fxx[0].inputBuf, 2, psEnc.state_Fxx[1].inputBuf, 2, psEnc.sStereo.predIx[psEnc.state_Fxx[0].nFramesEncoded], midOnlyFlagBoxed, MStargetRates_bps, TargetRate_bps, psEnc.state_Fxx[0].speech_activity_Q8, encControl.toMono, psEnc.state_Fxx[0].fs_kHz, psEnc.state_Fxx[0].frame_length); psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded] = midOnlyFlagBoxed.Val; if (midOnlyFlagBoxed.Val == 0) { /* Reset side channel encoder memory for first frame with side coding */ if (psEnc.prev_decode_only_middle == 1) { psEnc.state_Fxx[1].sShape.Reset(); psEnc.state_Fxx[1].sPrefilt.Reset(); psEnc.state_Fxx[1].sNSQ.Reset(); Arrays.MemSetShort(psEnc.state_Fxx[1].prev_NLSFq_Q15, 0, SilkConstants.MAX_LPC_ORDER); Arrays.MemSetInt(psEnc.state_Fxx[1].sLP.In_LP_State, 0, 2); psEnc.state_Fxx[1].prevLag = 100; psEnc.state_Fxx[1].sNSQ.lagPrev = 100; psEnc.state_Fxx[1].sShape.LastGainIndex = 10; psEnc.state_Fxx[1].prevSignalType = SilkConstants.TYPE_NO_VOICE_ACTIVITY; psEnc.state_Fxx[1].sNSQ.prev_gain_Q16 = 65536; psEnc.state_Fxx[1].first_frame_after_reset = 1; } psEnc.state_Fxx[1].silk_encode_do_VAD(); } else { psEnc.state_Fxx[1].VAD_flags[psEnc.state_Fxx[0].nFramesEncoded] = 0; } if (prefillFlag == 0) { Stereo.silk_stereo_encode_pred(psRangeEnc, psEnc.sStereo.predIx[psEnc.state_Fxx[0].nFramesEncoded]); if (psEnc.state_Fxx[1].VAD_flags[psEnc.state_Fxx[0].nFramesEncoded] == 0) { Stereo.silk_stereo_encode_mid_only(psRangeEnc, psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded]); } } } else { /* Buffering */ Array.Copy(psEnc.sStereo.sMid, psEnc.state_Fxx[0].inputBuf, 2); Array.Copy(psEnc.state_Fxx[0].inputBuf, psEnc.state_Fxx[0].frame_length, psEnc.sStereo.sMid, 0, 2); } psEnc.state_Fxx[0].silk_encode_do_VAD(); /* Encode */ for (n = 0; n < encControl.nChannelsInternal; n++) { int maxBits, useCBR; /* Handling rate constraints */ maxBits = encControl.maxBits; if (tot_blocks == 2 && curr_block == 0) { maxBits = maxBits * 3 / 5; } else if (tot_blocks == 3) { if (curr_block == 0) { maxBits = maxBits * 2 / 5; } else if (curr_block == 1) { maxBits = maxBits * 3 / 4; } } useCBR = (encControl.useCBR != 0 && curr_block == tot_blocks - 1) ? 1 : 0; if (encControl.nChannelsInternal == 1) { channelRate_bps = TargetRate_bps; } else { channelRate_bps = MStargetRates_bps[n]; if (n == 0 && MStargetRates_bps[1] > 0) { useCBR = 0; /* Give mid up to 1/2 of the max bits for that frame */ maxBits -= encControl.maxBits / (tot_blocks * 2); } } if (channelRate_bps > 0) { int condCoding; psEnc.state_Fxx[n].silk_control_SNR(channelRate_bps); /* Use independent coding if no previous frame available */ if (psEnc.state_Fxx[0].nFramesEncoded - n <= 0) { condCoding = SilkConstants.CODE_INDEPENDENTLY; } else if (n > 0 && psEnc.prev_decode_only_middle != 0) { /* If we skipped a side frame in this packet, we don't * need LTP scaling; the LTP state is well-defined. */ condCoding = SilkConstants.CODE_INDEPENDENTLY_NO_LTP_SCALING; } else { condCoding = SilkConstants.CODE_CONDITIONALLY; } ret += psEnc.state_Fxx[n].silk_encode_frame(nBytesOut, psRangeEnc, condCoding, maxBits, useCBR); Inlines.OpusAssert(ret == SilkError.SILK_NO_ERROR); } psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].inputBufIx = 0; psEnc.state_Fxx[n].nFramesEncoded++; } psEnc.prev_decode_only_middle = psEnc.sStereo.mid_only_flags[psEnc.state_Fxx[0].nFramesEncoded - 1]; /* Insert VAD and FEC flags at beginning of bitstream */ if (nBytesOut.Val > 0 && psEnc.state_Fxx[0].nFramesEncoded == psEnc.state_Fxx[0].nFramesPerPacket) { flags = 0; for (n = 0; n < encControl.nChannelsInternal; n++) { for (i = 0; i < psEnc.state_Fxx[n].nFramesPerPacket; i++) { flags = Inlines.silk_LSHIFT(flags, 1); flags |= (int)psEnc.state_Fxx[n].VAD_flags[i]; } flags = Inlines.silk_LSHIFT(flags, 1); flags |= (int)psEnc.state_Fxx[n].LBRR_flag; } if (prefillFlag == 0) { psRangeEnc.enc_patch_initial_bits((uint)flags, (uint)((psEnc.state_Fxx[0].nFramesPerPacket + 1) * encControl.nChannelsInternal)); } /* Return zero bytes if all channels DTXed */ if (psEnc.state_Fxx[0].inDTX != 0 && (encControl.nChannelsInternal == 1 || psEnc.state_Fxx[1].inDTX != 0)) { nBytesOut.Val = 0; } psEnc.nBitsExceeded += nBytesOut.Val * 8; psEnc.nBitsExceeded -= Inlines.silk_DIV32_16(Inlines.silk_MUL(encControl.bitRate, encControl.payloadSize_ms), 1000); psEnc.nBitsExceeded = Inlines.silk_LIMIT(psEnc.nBitsExceeded, 0, 10000); /* Update flag indicating if bandwidth switching is allowed */ speech_act_thr_for_switch_Q8 = Inlines.silk_SMLAWB(((int)((TuningParameters.SPEECH_ACTIVITY_DTX_THRES) * ((long)1 << (8)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.SPEECH_ACTIVITY_DTX_THRES, 8)*/, ((int)(((1 - TuningParameters.SPEECH_ACTIVITY_DTX_THRES) / TuningParameters.MAX_BANDWIDTH_SWITCH_DELAY_MS) * ((long)1 << (16 + 8)) + 0.5)) /*Inlines.SILK_CONST((1 - TuningParameters.SPEECH_ACTIVITY_DTX_THRES) / TuningParameters.MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8)*/, psEnc.timeSinceSwitchAllowed_ms); if (psEnc.state_Fxx[0].speech_activity_Q8 < speech_act_thr_for_switch_Q8) { psEnc.allowBandwidthSwitch = 1; psEnc.timeSinceSwitchAllowed_ms = 0; } else { psEnc.allowBandwidthSwitch = 0; psEnc.timeSinceSwitchAllowed_ms += encControl.payloadSize_ms; } } if (nSamplesIn == 0) { break; } } else { break; } curr_block++; } psEnc.nPrevChannelsInternal = encControl.nChannelsInternal; encControl.allowBandwidthSwitch = psEnc.allowBandwidthSwitch; encControl.inWBmodeWithoutVariableLP = (psEnc.state_Fxx[0].fs_kHz == 16 && psEnc.state_Fxx[0].sLP.mode == 0) ? 1 : 0; encControl.internalSampleRate = Inlines.silk_SMULBB(psEnc.state_Fxx[0].fs_kHz, 1000); encControl.stereoWidth_Q14 = encControl.toMono != 0 ? 0 : psEnc.sStereo.smth_width_Q14; if (prefillFlag != 0) { encControl.payloadSize_ms = tmp_payloadSize_ms; encControl.complexity = tmp_complexity; for (n = 0; n < encControl.nChannelsInternal; n++) { psEnc.state_Fxx[n].controlled_since_last_payload = 0; psEnc.state_Fxx[n].prefillFlag = 0; } } return(ret); }
public static Pointer <byte> phone_loop_search_hyp(ps_search_t search, BoxedValueInt out_score) { err.E_WARN("Hypotheses are not returned from phone loop search"); return(PointerHelpers.NULL <byte>()); }
/// <summary> /// Finds linear prediction coeffecients and weights /// </summary> /// <param name="b_Q14"></param> /// <param name="WLTP"></param> /// <param name="LTPredCodGain_Q7"></param> /// <param name="r_lpc"></param> /// <param name="lag"></param> /// <param name="Wght_Q15"></param> /// <param name="subfr_length"></param> /// <param name="nb_subfr"></param> /// <param name="mem_offset"></param> /// <param name="corr_rshifts"></param> /// <param name="arch"></param> internal static void silk_find_LTP( short[] b_Q14, /* O LTP coefs [SilkConstants.MAX_NB_SUBFR * SilkConstants.LTP_ORDER] */ int[] WLTP, /* O Weight for LTP quantization [SilkConstants.MAX_NB_SUBFR * SilkConstants.LTP_ORDER * SilkConstants.LTP_ORDER] */ BoxedValueInt LTPredCodGain_Q7, /* O LTP coding gain */ short[] r_lpc, /* I residual signal after LPC signal + state for first 10 ms */ int[] lag, /* I LTP lags [SilkConstants.MAX_NB_SUBFR] */ int[] Wght_Q15, /* I weights [SilkConstants.MAX_NB_SUBFR] */ int subfr_length, /* I subframe length */ int nb_subfr, /* I number of subframes */ int mem_offset, /* I number of samples in LTP memory */ int[] corr_rshifts /* O right shifts applied to correlations [SilkConstants.MAX_NB_SUBFR] */ ) { int i, k, lshift; int r_ptr; int lag_ptr; int b_Q14_ptr; int regu; int WLTP_ptr; int[] b_Q16 = new int[SilkConstants.LTP_ORDER]; int[] delta_b_Q14 = new int[SilkConstants.LTP_ORDER]; int[] d_Q14 = new int[SilkConstants.MAX_NB_SUBFR]; int[] nrg = new int[SilkConstants.MAX_NB_SUBFR]; int g_Q26; int[] w = new int[SilkConstants.MAX_NB_SUBFR]; int WLTP_max, max_abs_d_Q14, max_w_bits; int temp32, denom32; int extra_shifts; int rr_shifts, maxRshifts, maxRshifts_wxtra, LZs; int LPC_res_nrg, LPC_LTP_res_nrg, div_Q16; int[] Rr = new int[SilkConstants.LTP_ORDER]; int[] rr = new int[SilkConstants.MAX_NB_SUBFR]; int wd, m_Q12; b_Q14_ptr = 0; WLTP_ptr = 0; r_ptr = mem_offset; for (k = 0; k < nb_subfr; k++) { lag_ptr = r_ptr - (lag[k] + SilkConstants.LTP_ORDER / 2); SumSqrShift.silk_sum_sqr_shift(out rr[k], out rr_shifts, r_lpc, r_ptr, subfr_length); /* rr[ k ] in Q( -rr_shifts ) */ /* Assure headroom */ LZs = Inlines.silk_CLZ32(rr[k]); if (LZs < LTP_CORRS_HEAD_ROOM) { rr[k] = Inlines.silk_RSHIFT_ROUND(rr[k], LTP_CORRS_HEAD_ROOM - LZs); rr_shifts += (LTP_CORRS_HEAD_ROOM - LZs); } corr_rshifts[k] = rr_shifts; BoxedValueInt boxed_shifts = new BoxedValueInt(corr_rshifts[k]); CorrelateMatrix.silk_corrMatrix(r_lpc, lag_ptr, subfr_length, SilkConstants.LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP, WLTP_ptr, boxed_shifts); /* WLTP_ptr in Q( -corr_rshifts[ k ] ) */ corr_rshifts[k] = boxed_shifts.Val; /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */ CorrelateMatrix.silk_corrVector(r_lpc, lag_ptr, r_lpc, r_ptr, subfr_length, SilkConstants.LTP_ORDER, Rr, corr_rshifts[k]); /* Rr_ptr in Q( -corr_rshifts[ k ] ) */ if (corr_rshifts[k] > rr_shifts) { rr[k] = Inlines.silk_RSHIFT(rr[k], corr_rshifts[k] - rr_shifts); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */ } Inlines.OpusAssert(rr[k] >= 0); regu = 1; regu = Inlines.silk_SMLAWB(regu, rr[k], ((int)((TuningParameters.LTP_DAMPING / 3) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LTP_DAMPING / 3, 16)*/); regu = Inlines.silk_SMLAWB(regu, Inlines.MatrixGet(WLTP, WLTP_ptr, 0, 0, SilkConstants.LTP_ORDER), ((int)((TuningParameters.LTP_DAMPING / 3) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LTP_DAMPING / 3, 16)*/); regu = Inlines.silk_SMLAWB(regu, Inlines.MatrixGet(WLTP, WLTP_ptr, SilkConstants.LTP_ORDER - 1, SilkConstants.LTP_ORDER - 1, SilkConstants.LTP_ORDER), ((int)((TuningParameters.LTP_DAMPING / 3) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LTP_DAMPING / 3, 16)*/); RegularizeCorrelations.silk_regularize_correlations(WLTP, WLTP_ptr, rr, k, regu, SilkConstants.LTP_ORDER); LinearAlgebra.silk_solve_LDL(WLTP, WLTP_ptr, SilkConstants.LTP_ORDER, Rr, b_Q16); /* WLTP_ptr and Rr_ptr both in Q(-corr_rshifts[k]) */ /* Limit and store in Q14 */ silk_fit_LTP(b_Q16, b_Q14, b_Q14_ptr); /* Calculate residual energy */ nrg[k] = ResidualEnergy.silk_residual_energy16_covar(b_Q14, b_Q14_ptr, WLTP, WLTP_ptr, Rr, rr[k], SilkConstants.LTP_ORDER, 14); /* nrg in Q( -corr_rshifts[ k ] ) */ /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */ extra_shifts = Inlines.silk_min_int(corr_rshifts[k], LTP_CORRS_HEAD_ROOM); denom32 = Inlines.silk_LSHIFT_SAT32(Inlines.silk_SMULWB(nrg[k], Wght_Q15[k]), 1 + extra_shifts) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */ Inlines.silk_RSHIFT(Inlines.silk_SMULWB((int)subfr_length, 655), corr_rshifts[k] - extra_shifts); /* Q( -corr_rshifts[ k ] + extra_shifts ) */ denom32 = Inlines.silk_max(denom32, 1); Inlines.OpusAssert(((long)Wght_Q15[k] << 16) < int.MaxValue); /* Wght always < 0.5 in Q0 */ temp32 = Inlines.silk_DIV32(Inlines.silk_LSHIFT((int)Wght_Q15[k], 16), denom32); /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */ temp32 = Inlines.silk_RSHIFT(temp32, 31 + corr_rshifts[k] - extra_shifts - 26); /* Q26 */ /* Limit temp such that the below scaling never wraps around */ WLTP_max = 0; for (i = WLTP_ptr; i < WLTP_ptr + (SilkConstants.LTP_ORDER * SilkConstants.LTP_ORDER); i++) { WLTP_max = Inlines.silk_max(WLTP[i], WLTP_max); } lshift = Inlines.silk_CLZ32(WLTP_max) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor */ Inlines.OpusAssert(26 - 18 + lshift >= 0); if (26 - 18 + lshift < 31) { temp32 = Inlines.silk_min_32(temp32, Inlines.silk_LSHIFT((int)1, 26 - 18 + lshift)); } Inlines.silk_scale_vector32_Q26_lshift_18(WLTP, WLTP_ptr, temp32, SilkConstants.LTP_ORDER * SilkConstants.LTP_ORDER); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */ w[k] = Inlines.MatrixGet(WLTP, WLTP_ptr, SilkConstants.LTP_ORDER / 2, SilkConstants.LTP_ORDER / 2, SilkConstants.LTP_ORDER); /* w in Q( 18 - corr_rshifts[ k ] ) */ Inlines.OpusAssert(w[k] >= 0); r_ptr += subfr_length; b_Q14_ptr += SilkConstants.LTP_ORDER; WLTP_ptr += (SilkConstants.LTP_ORDER * SilkConstants.LTP_ORDER); } maxRshifts = 0; for (k = 0; k < nb_subfr; k++) { maxRshifts = Inlines.silk_max_int(corr_rshifts[k], maxRshifts); } /* Compute LTP coding gain */ if (LTPredCodGain_Q7 != null) { LPC_LTP_res_nrg = 0; LPC_res_nrg = 0; Inlines.OpusAssert(LTP_CORRS_HEAD_ROOM >= 2); /* Check that no overflow will happen when adding */ for (k = 0; k < nb_subfr; k++) { LPC_res_nrg = Inlines.silk_ADD32(LPC_res_nrg, Inlines.silk_RSHIFT(Inlines.silk_ADD32(Inlines.silk_SMULWB(rr[k], Wght_Q15[k]), 1), 1 + (maxRshifts - corr_rshifts[k]))); /* Q( -maxRshifts ) */ LPC_LTP_res_nrg = Inlines.silk_ADD32(LPC_LTP_res_nrg, Inlines.silk_RSHIFT(Inlines.silk_ADD32(Inlines.silk_SMULWB(nrg[k], Wght_Q15[k]), 1), 1 + (maxRshifts - corr_rshifts[k]))); /* Q( -maxRshifts ) */ } LPC_LTP_res_nrg = Inlines.silk_max(LPC_LTP_res_nrg, 1); /* avoid division by zero */ div_Q16 = Inlines.silk_DIV32_varQ(LPC_res_nrg, LPC_LTP_res_nrg, 16); LTPredCodGain_Q7.Val = (int)Inlines.silk_SMULBB(3, Inlines.silk_lin2log(div_Q16) - (16 << 7)); Inlines.OpusAssert(LTPredCodGain_Q7.Val == (int)Inlines.silk_SAT16(Inlines.silk_MUL(3, Inlines.silk_lin2log(div_Q16) - (16 << 7)))); } /* smoothing */ /* d = sum( B, 1 ); */ b_Q14_ptr = 0; for (k = 0; k < nb_subfr; k++) { d_Q14[k] = 0; for (i = b_Q14_ptr; i < b_Q14_ptr + SilkConstants.LTP_ORDER; i++) { d_Q14[k] += b_Q14[i]; } b_Q14_ptr += SilkConstants.LTP_ORDER; } /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */ max_abs_d_Q14 = 0; max_w_bits = 0; for (k = 0; k < nb_subfr; k++) { max_abs_d_Q14 = Inlines.silk_max_32(max_abs_d_Q14, Inlines.silk_abs(d_Q14[k])); /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */ /* Find bits needed in Q( 18 - maxRshifts ) */ max_w_bits = Inlines.silk_max_32(max_w_bits, 32 - Inlines.silk_CLZ32(w[k]) + corr_rshifts[k] - maxRshifts); } /* max_abs_d_Q14 = (5 << 15); worst case, i.e. SilkConstants.LTP_ORDER * -silk_int16_MIN */ Inlines.OpusAssert(max_abs_d_Q14 <= (5 << 15)); /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */ extra_shifts = max_w_bits + 32 - Inlines.silk_CLZ32(max_abs_d_Q14) - 14; /* Subtract what we got available; bits in output var plus maxRshifts */ extra_shifts -= (32 - 1 - 2 + maxRshifts); /* Keep sign bit free as well as 2 bits for accumulation */ extra_shifts = Inlines.silk_max_int(extra_shifts, 0); maxRshifts_wxtra = maxRshifts + extra_shifts; temp32 = Inlines.silk_RSHIFT(262, maxRshifts + extra_shifts) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */ wd = 0; for (k = 0; k < nb_subfr; k++) { /* w has at least 2 bits of headroom so no overflow should happen */ temp32 = Inlines.silk_ADD32(temp32, Inlines.silk_RSHIFT(w[k], maxRshifts_wxtra - corr_rshifts[k])); /* Q( 18 - maxRshifts_wxtra ) */ wd = Inlines.silk_ADD32(wd, Inlines.silk_LSHIFT(Inlines.silk_SMULWW(Inlines.silk_RSHIFT(w[k], maxRshifts_wxtra - corr_rshifts[k]), d_Q14[k]), 2)); /* Q( 18 - maxRshifts_wxtra ) */ } m_Q12 = Inlines.silk_DIV32_varQ(wd, temp32, 12); b_Q14_ptr = 0; for (k = 0; k < nb_subfr; k++) { /* w[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */ if (2 - corr_rshifts[k] > 0) { temp32 = Inlines.silk_RSHIFT(w[k], 2 - corr_rshifts[k]); } else { temp32 = Inlines.silk_LSHIFT_SAT32(w[k], corr_rshifts[k] - 2); } g_Q26 = Inlines.silk_MUL( Inlines.silk_DIV32( ((int)((TuningParameters.LTP_SMOOTHING) * ((long)1 << (26)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LTP_SMOOTHING, 26)*/, Inlines.silk_RSHIFT(((int)((TuningParameters.LTP_SMOOTHING) * ((long)1 << (26)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.LTP_SMOOTHING, 26)*/, 10) + temp32), /* Q10 */ Inlines.silk_LSHIFT_SAT32(Inlines.silk_SUB_SAT32((int)m_Q12, Inlines.silk_RSHIFT(d_Q14[k], 2)), 4)); /* Q16 */ temp32 = 0; for (i = 0; i < SilkConstants.LTP_ORDER; i++) { delta_b_Q14[i] = Inlines.silk_max_16(b_Q14[b_Q14_ptr + i], 1638); /* 1638_Q14 = 0.1_Q0 */ temp32 += delta_b_Q14[i]; /* Q14 */ } temp32 = Inlines.silk_DIV32(g_Q26, temp32); /* Q14 . Q12 */ for (i = 0; i < SilkConstants.LTP_ORDER; i++) { b_Q14[b_Q14_ptr + i] = (short)(Inlines.silk_LIMIT_32((int)b_Q14[b_Q14_ptr + i] + Inlines.silk_SMULWB(Inlines.silk_LSHIFT_SAT32(temp32, 4), delta_b_Q14[i]), -16000, 28000)); } b_Q14_ptr += SilkConstants.LTP_ORDER; } }
/****************/ /* Decode frame */ /****************/ internal int silk_decode_frame( EntropyCoder psRangeDec, /* I/O Compressor data structure */ short[] pOut, /* O Pointer to output speech frame */ int pOut_ptr, BoxedValueInt pN, /* O Pointer to size of output frame */ int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ int condCoding /* I The type of conditional coding to use */ ) { SilkDecoderControl thisCtrl = new SilkDecoderControl(); int L, mv_len, ret = 0; L = this.frame_length; thisCtrl.LTP_scale_Q14 = 0; /* Safety checks */ Inlines.OpusAssert(L > 0 && L <= SilkConstants.MAX_FRAME_LENGTH); if (lostFlag == DecoderAPIFlag.FLAG_DECODE_NORMAL || (lostFlag == DecoderAPIFlag.FLAG_DECODE_LBRR && this.LBRR_flags[this.nFramesDecoded] == 1)) { short[] pulses = new short[(L + SilkConstants.SHELL_CODEC_FRAME_LENGTH - 1) & ~(SilkConstants.SHELL_CODEC_FRAME_LENGTH - 1)]; /*********************************************/ /* Decode quantization indices of side info */ /*********************************************/ DecodeIndices.silk_decode_indices(this, psRangeDec, this.nFramesDecoded, lostFlag, condCoding); /*********************************************/ /* Decode quantization indices of excitation */ /*********************************************/ DecodePulses.silk_decode_pulses(psRangeDec, pulses, this.indices.signalType, this.indices.quantOffsetType, this.frame_length); /********************************************/ /* Decode parameters and pulse signal */ /********************************************/ DecodeParameters.silk_decode_parameters(this, thisCtrl, condCoding); /********************************************************/ /* Run inverse NSQ */ /********************************************************/ DecodeCore.silk_decode_core(this, thisCtrl, pOut, pOut_ptr, pulses); /********************************************************/ /* Update PLC state */ /********************************************************/ PLC.silk_PLC(this, thisCtrl, pOut, pOut_ptr, 0); this.lossCnt = 0; this.prevSignalType = this.indices.signalType; Inlines.OpusAssert(this.prevSignalType >= 0 && this.prevSignalType <= 2); /* A frame has been decoded without errors */ this.first_frame_after_reset = 0; } else { /* Handle packet loss by extrapolation */ PLC.silk_PLC(this, thisCtrl, pOut, pOut_ptr, 1); } /*************************/ /* Update output buffer. */ /*************************/ Inlines.OpusAssert(this.ltp_mem_length >= this.frame_length); mv_len = this.ltp_mem_length - this.frame_length; Arrays.MemMoveShort(this.outBuf, this.frame_length, 0, mv_len); Array.Copy(pOut, pOut_ptr, this.outBuf, mv_len, this.frame_length); /************************************************/ /* Comfort noise generation / estimation */ /************************************************/ CNG.silk_CNG(this, thisCtrl, pOut, pOut_ptr, L); /****************************************************************/ /* Ensure smooth connection of extrapolated and good frames */ /****************************************************************/ PLC.silk_PLC_glue_frames(this, pOut, pOut_ptr, L); /* Update some decoder state variables */ this.lagPrev = thisCtrl.pitchL[this.nb_subfr - 1]; /* Set output frame length */ pN.Val = L; return(ret); }