internal static int parse_size(byte[] data, int data_ptr, int len, BoxedValueShort size) { if (len < 1) { size.Val = -1; return(-1); } else if (data[data_ptr] < 252) { size.Val = data[data_ptr]; return(1); } else if (len < 2) { size.Val = -1; return(-1); } else { size.Val = (short)(4 * data[data_ptr + 1] + data[data_ptr]); return(2); } }
internal static int opus_packet_parse_impl(byte[] data, int data_ptr, int len, int self_delimited, out byte out_toc, byte[][] frames, int[] frames_ptrs, int frames_ptr, short[] sizes, int sizes_ptr, out int payload_offset, out int packet_offset) { int i, bytes; int count; int cbr; byte ch, toc; int framesize; int last_size; int pad = 0; int data0 = data_ptr; out_toc = 0; payload_offset = 0; packet_offset = 0; if (sizes == null || len < 0) { return(OpusError.OPUS_BAD_ARG); } if (len == 0) { return(OpusError.OPUS_INVALID_PACKET); } framesize = GetNumSamplesPerFrame(data, data_ptr, 48000); cbr = 0; toc = data[data_ptr++]; len--; last_size = len; switch (toc & 0x3) { /* One frame */ case 0: count = 1; break; /* Two CBR frames */ case 1: count = 2; cbr = 1; if (self_delimited == 0) { if ((len & 0x1) != 0) { return(OpusError.OPUS_INVALID_PACKET); } last_size = len / 2; /* If last_size doesn't fit in size[0], we'll catch it later */ sizes[sizes_ptr] = (short)last_size; } break; /* Two VBR frames */ case 2: count = 2; BoxedValueShort boxed_size = new BoxedValueShort(sizes[sizes_ptr]); bytes = parse_size(data, data_ptr, len, boxed_size); sizes[sizes_ptr] = boxed_size.Val; len -= bytes; if (sizes[sizes_ptr] < 0 || sizes[sizes_ptr] > len) { return(OpusError.OPUS_INVALID_PACKET); } data_ptr += bytes; last_size = len - sizes[sizes_ptr]; break; /* Multiple CBR/VBR frames (from 0 to 120 ms) */ default: /*case 3:*/ if (len < 1) { return(OpusError.OPUS_INVALID_PACKET); } /* Number of frames encoded in bits 0 to 5 */ ch = data[data_ptr++]; count = ch & 0x3F; if (count <= 0 || framesize * count > 5760) { return(OpusError.OPUS_INVALID_PACKET); } len--; /* Padding flag is bit 6 */ if ((ch & 0x40) != 0) { int p; do { int tmp; if (len <= 0) { return(OpusError.OPUS_INVALID_PACKET); } p = data[data_ptr++]; len--; tmp = p == 255 ? 254 : p; len -= tmp; pad += tmp; } while (p == 255); } if (len < 0) { return(OpusError.OPUS_INVALID_PACKET); } /* VBR flag is bit 7 */ cbr = (ch & 0x80) != 0 ? 0 : 1; if (cbr == 0) { /* VBR case */ last_size = len; for (i = 0; i < count - 1; i++) { boxed_size = new BoxedValueShort(sizes[sizes_ptr + i]); bytes = parse_size(data, data_ptr, len, boxed_size); sizes[sizes_ptr + i] = boxed_size.Val; len -= bytes; if (sizes[sizes_ptr + i] < 0 || sizes[sizes_ptr + i] > len) { return(OpusError.OPUS_INVALID_PACKET); } data_ptr += bytes; last_size -= bytes + sizes[sizes_ptr + i]; } if (last_size < 0) { return(OpusError.OPUS_INVALID_PACKET); } } else if (self_delimited == 0) { /* CBR case */ last_size = len / count; if (last_size * count != len) { return(OpusError.OPUS_INVALID_PACKET); } for (i = 0; i < count - 1; i++) { sizes[sizes_ptr + i] = (short)last_size; } } break; } /* Self-delimited framing has an extra size for the last frame. */ if (self_delimited != 0) { BoxedValueShort boxed_size = new BoxedValueShort(sizes[sizes_ptr + count - 1]); bytes = parse_size(data, data_ptr, len, boxed_size); sizes[sizes_ptr + count - 1] = boxed_size.Val; len -= bytes; if (sizes[sizes_ptr + count - 1] < 0 || sizes[sizes_ptr + count - 1] > len) { return(OpusError.OPUS_INVALID_PACKET); } data_ptr += bytes; /* For CBR packets, apply the size to all the frames. */ if (cbr != 0) { if (sizes[sizes_ptr + count - 1] * count > len) { return(OpusError.OPUS_INVALID_PACKET); } for (i = 0; i < count - 1; i++) { sizes[sizes_ptr + i] = sizes[sizes_ptr + count - 1]; } } else if (bytes + sizes[sizes_ptr + count - 1] > last_size) { return(OpusError.OPUS_INVALID_PACKET); } } else { /* Because it's not encoded explicitly, it's possible the size of the * last packet (or all the packets, for the CBR case) is larger than * 1275. Reject them here.*/ if (last_size > 1275) { return(OpusError.OPUS_INVALID_PACKET); } sizes[sizes_ptr + count - 1] = (short)last_size; } payload_offset = (int)(data_ptr - data0); for (i = 0; i < count; i++) { if (frames != null) { frames[frames_ptr + i] = data; } if (frames_ptrs != null) { frames_ptrs[frames_ptr + i] = data_ptr; } data_ptr += sizes[sizes_ptr + i]; } packet_offset = pad + (int)(data_ptr - data0); out_toc = toc; return(count); }
/*************************************************************/ /* FIXED POINT CORE PITCH ANALYSIS FUNCTION */ /*************************************************************/ internal static int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ short[] frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ int[] pitch_out, /* O 4 pitch lag values */ BoxedValueShort lagIndex, /* O Lag Index */ BoxedValueSbyte contourIndex, /* O Pitch contour Index */ BoxedValueInt LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ int search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ int Fs_kHz, /* I Sample frequency (kHz) */ int complexity, /* I Complexity setting, 0-2, where 2 is highest */ int nb_subfr /* I number of 5 ms subframes */ ) { short[] frame_8kHz; short[] frame_4kHz; int[] filt_state = new int[6]; short[] input_frame_ptr; int i, k, d, j; short[] C; int[] xcorr32; short[] basis; int basis_ptr; short[] target; int target_ptr; int cross_corr, normalizer, energy, shift, energy_basis, energy_target; int Cmax, length_d_srch, length_d_comp; int[] d_srch = new int[SilkConstants.PE_D_SRCH_LENGTH]; short[] d_comp; int sum, threshold, lag_counter; int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new; int CCmax, CCmax_b, CCmax_new_b, CCmax_new; int[] CC = new int[SilkConstants.PE_NB_CBKS_STAGE2_EXT]; silk_pe_stage3_vals[] energies_st3; silk_pe_stage3_vals[] cross_corr_st3; int frame_length, frame_length_8kHz, frame_length_4kHz; int sf_length; int min_lag; int max_lag; int contour_bias_Q15, diff; int nb_cbk_search; int delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13; sbyte[][] Lag_CB_ptr; /* Check for valid sampling frequency */ Inlines.OpusAssert(Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16); /* Check for valid complexity setting */ Inlines.OpusAssert(complexity >= SilkConstants.SILK_PE_MIN_COMPLEX); Inlines.OpusAssert(complexity <= SilkConstants.SILK_PE_MAX_COMPLEX); Inlines.OpusAssert(search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1 << 16)); Inlines.OpusAssert(search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1 << 13)); /* Set up frame lengths max / min lag for the sampling frequency */ frame_length = (SilkConstants.PE_LTP_MEM_LENGTH_MS + nb_subfr * SilkConstants.PE_SUBFR_LENGTH_MS) * Fs_kHz; frame_length_4kHz = (SilkConstants.PE_LTP_MEM_LENGTH_MS + nb_subfr * SilkConstants.PE_SUBFR_LENGTH_MS) * 4; frame_length_8kHz = (SilkConstants.PE_LTP_MEM_LENGTH_MS + nb_subfr * SilkConstants.PE_SUBFR_LENGTH_MS) * 8; sf_length = SilkConstants.PE_SUBFR_LENGTH_MS * Fs_kHz; min_lag = SilkConstants.PE_MIN_LAG_MS * Fs_kHz; max_lag = SilkConstants.PE_MAX_LAG_MS * Fs_kHz - 1; /* Resample from input sampled at Fs_kHz to 8 kHz */ frame_8kHz = new short[frame_length_8kHz]; if (Fs_kHz == 16) { Arrays.MemSetInt(filt_state, 0, 2); Resampler.silk_resampler_down2(filt_state, frame_8kHz, frame, frame_length); } else if (Fs_kHz == 12) { Arrays.MemSetInt(filt_state, 0, 6); Resampler.silk_resampler_down2_3(filt_state, frame_8kHz, frame, frame_length); } else { Inlines.OpusAssert(Fs_kHz == 8); Array.Copy(frame, frame_8kHz, frame_length_8kHz); } /* Decimate again to 4 kHz */ Arrays.MemSetInt(filt_state, 0, 2); /* Set state to zero */ frame_4kHz = new short[frame_length_4kHz]; Resampler.silk_resampler_down2(filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz); /* Low-pass filter */ for (i = frame_length_4kHz - 1; i > 0; i--) { frame_4kHz[i] = Inlines.silk_ADD_SAT16(frame_4kHz[i], frame_4kHz[i - 1]); } /******************************************************************************* ** Scale 4 kHz signal down to prevent correlations measures from overflowing ** find scaling as max scaling for each 8kHz(?) subframe *******************************************************************************/ /* Inner product is calculated with different lengths, so scale for the worst case */ SumSqrShift.silk_sum_sqr_shift(out energy, out shift, frame_4kHz, frame_length_4kHz); if (shift > 0) { shift = Inlines.silk_RSHIFT(shift, 1); for (i = 0; i < frame_length_4kHz; i++) { frame_4kHz[i] = Inlines.silk_RSHIFT16(frame_4kHz[i], shift); } } /****************************************************************************** * FIRST STAGE, operating in 4 khz ******************************************************************************/ C = new short[nb_subfr * CSTRIDE_8KHZ]; xcorr32 = new int[MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1]; Arrays.MemSetShort(C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ); target = frame_4kHz; target_ptr = Inlines.silk_LSHIFT(SF_LENGTH_4KHZ, 2); for (k = 0; k < nb_subfr >> 1; k++) { basis = target; basis_ptr = target_ptr - MIN_LAG_4KHZ; CeltPitchXCorr.pitch_xcorr(target, target_ptr, target, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1); /* Calculate first vector products before loop */ cross_corr = xcorr32[MAX_LAG_4KHZ - MIN_LAG_4KHZ]; normalizer = Inlines.silk_inner_prod_self(target, target_ptr, SF_LENGTH_8KHZ); normalizer = Inlines.silk_ADD32(normalizer, Inlines.silk_inner_prod_self(basis, basis_ptr, SF_LENGTH_8KHZ)); normalizer = Inlines.silk_ADD32(normalizer, Inlines.silk_SMULBB(SF_LENGTH_8KHZ, 4000)); Inlines.MatrixSet(C, k, 0, CSTRIDE_4KHZ, (short)Inlines.silk_DIV32_varQ(cross_corr, normalizer, 13 + 1)); /* Q13 */ /* From now on normalizer is computed recursively */ for (d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++) { basis_ptr--; cross_corr = xcorr32[MAX_LAG_4KHZ - d]; /* Add contribution of new sample and remove contribution from oldest sample */ normalizer = Inlines.silk_ADD32(normalizer, Inlines.silk_SMULBB(basis[basis_ptr], basis[basis_ptr]) - Inlines.silk_SMULBB(basis[basis_ptr + SF_LENGTH_8KHZ], basis[basis_ptr + SF_LENGTH_8KHZ])); Inlines.MatrixSet(C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ, (short)Inlines.silk_DIV32_varQ(cross_corr, normalizer, 13 + 1)); /* Q13 */ } /* Update target pointer */ target_ptr += SF_LENGTH_8KHZ; } /* Combine two subframes into single correlation measure and apply short-lag bias */ if (nb_subfr == SilkConstants.PE_MAX_NB_SUBFR) { for (i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i--) { sum = (int)Inlines.MatrixGet(C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ) + (int)Inlines.MatrixGet(C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ); /* Q14 */ sum = Inlines.silk_SMLAWB(sum, sum, Inlines.silk_LSHIFT(-i, 4)); /* Q14 */ C[i - MIN_LAG_4KHZ] = (short)sum; /* Q14 */ } } else { /* Only short-lag bias */ for (i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i--) { sum = Inlines.silk_LSHIFT((int)C[i - MIN_LAG_4KHZ], 1); /* Q14 */ sum = Inlines.silk_SMLAWB(sum, sum, Inlines.silk_LSHIFT(-i, 4)); /* Q14 */ C[i - MIN_LAG_4KHZ] = (short)sum; /* Q14 */ } } /* Sort */ length_d_srch = Inlines.silk_ADD_LSHIFT32(4, complexity, 1); Inlines.OpusAssert(3 * length_d_srch <= SilkConstants.PE_D_SRCH_LENGTH); Sort.silk_insertion_sort_decreasing_int16(C, d_srch, CSTRIDE_4KHZ, length_d_srch); /* Escape if correlation is very low already here */ Cmax = (int)C[0]; /* Q14 */ if (Cmax < ((int)((0.2f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(0.2f, 14)*/) { Arrays.MemSetInt(pitch_out, 0, nb_subfr); LTPCorr_Q15.Val = 0; lagIndex.Val = 0; contourIndex.Val = 0; return(1); } threshold = Inlines.silk_SMULWB(search_thres1_Q16, Cmax); for (i = 0; i < length_d_srch; i++) { /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */ if (C[i] > threshold) { d_srch[i] = Inlines.silk_LSHIFT(d_srch[i] + MIN_LAG_4KHZ, 1); } else { length_d_srch = i; break; } } Inlines.OpusAssert(length_d_srch > 0); d_comp = new short[D_COMP_STRIDE]; for (i = D_COMP_MIN; i < D_COMP_MAX; i++) { d_comp[i - D_COMP_MIN] = 0; } for (i = 0; i < length_d_srch; i++) { d_comp[d_srch[i] - D_COMP_MIN] = 1; } /* Convolution */ for (i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i--) { d_comp[i - D_COMP_MIN] += (short)(d_comp[i - 1 - D_COMP_MIN] + d_comp[i - 2 - D_COMP_MIN]); } length_d_srch = 0; for (i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++) { if (d_comp[i + 1 - D_COMP_MIN] > 0) { d_srch[length_d_srch] = i; length_d_srch++; } } /* Convolution */ for (i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i--) { d_comp[i - D_COMP_MIN] += (short)(d_comp[i - 1 - D_COMP_MIN] + d_comp[i - 2 - D_COMP_MIN] + d_comp[i - 3 - D_COMP_MIN]); } length_d_comp = 0; for (i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++) { if (d_comp[i - D_COMP_MIN] > 0) { d_comp[length_d_comp] = (short)(i - 2); length_d_comp++; } } /********************************************************************************** ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation *************************************************************************************/ /****************************************************************************** ** Scale signal down to avoid correlations measures from overflowing *******************************************************************************/ /* find scaling as max scaling for each subframe */ SumSqrShift.silk_sum_sqr_shift(out energy, out shift, frame_8kHz, frame_length_8kHz); if (shift > 0) { shift = Inlines.silk_RSHIFT(shift, 1); for (i = 0; i < frame_length_8kHz; i++) { frame_8kHz[i] = Inlines.silk_RSHIFT16(frame_8kHz[i], shift); } } /********************************************************************************* * Find energy of each subframe projected onto its history, for a range of delays *********************************************************************************/ Arrays.MemSetShort(C, 0, nb_subfr * CSTRIDE_8KHZ); target = frame_8kHz; target_ptr = SilkConstants.PE_LTP_MEM_LENGTH_MS * 8; for (k = 0; k < nb_subfr; k++) { energy_target = Inlines.silk_ADD32(Inlines.silk_inner_prod(target, target_ptr, target, target_ptr, SF_LENGTH_8KHZ), 1); for (j = 0; j < length_d_comp; j++) { d = d_comp[j]; basis = target; basis_ptr = target_ptr - d; cross_corr = Inlines.silk_inner_prod(target, target_ptr, basis, basis_ptr, SF_LENGTH_8KHZ); if (cross_corr > 0) { energy_basis = Inlines.silk_inner_prod_self(basis, basis_ptr, SF_LENGTH_8KHZ); Inlines.MatrixSet(C, k, d - (MIN_LAG_8KHZ - 2), CSTRIDE_8KHZ, (short)Inlines.silk_DIV32_varQ(cross_corr, Inlines.silk_ADD32(energy_target, energy_basis), 13 + 1)); /* Q13 */ } else { Inlines.MatrixSet <short>(C, k, d - (MIN_LAG_8KHZ - 2), CSTRIDE_8KHZ, 0); } } target_ptr += SF_LENGTH_8KHZ; } /* search over lag range and lags codebook */ /* scale factor for lag codebook, as a function of center lag */ CCmax = int.MinValue; CCmax_b = int.MinValue; CBimax = 0; /* To avoid returning undefined lag values */ lag = -1; /* To check if lag with strong enough correlation has been found */ if (prevLag > 0) { if (Fs_kHz == 12) { prevLag = Inlines.silk_DIV32_16(Inlines.silk_LSHIFT(prevLag, 1), 3); } else if (Fs_kHz == 16) { prevLag = Inlines.silk_RSHIFT(prevLag, 1); } prevLag_log2_Q7 = Inlines.silk_lin2log((int)prevLag); } else { prevLag_log2_Q7 = 0; } Inlines.OpusAssert(search_thres2_Q13 == Inlines.silk_SAT16(search_thres2_Q13)); /* Set up stage 2 codebook based on number of subframes */ if (nb_subfr == SilkConstants.PE_MAX_NB_SUBFR) { Lag_CB_ptr = Tables.silk_CB_lags_stage2; if (Fs_kHz == 8 && complexity > SilkConstants.SILK_PE_MIN_COMPLEX) { /* If input is 8 khz use a larger codebook here because it is last stage */ nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE2_EXT; } else { nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE2; } } else { Lag_CB_ptr = Tables.silk_CB_lags_stage2_10_ms; nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE2_10MS; } for (k = 0; k < length_d_srch; k++) { d = d_srch[k]; for (j = 0; j < nb_cbk_search; j++) { CC[j] = 0; for (i = 0; i < nb_subfr; i++) { int d_subfr; /* Try all codebooks */ d_subfr = d + Lag_CB_ptr[i][j]; CC[j] = CC[j] + (int)Inlines.MatrixGet(C, i, d_subfr - (MIN_LAG_8KHZ - 2), CSTRIDE_8KHZ); } } /* Find best codebook */ CCmax_new = int.MinValue; CBimax_new = 0; for (i = 0; i < nb_cbk_search; i++) { if (CC[i] > CCmax_new) { CCmax_new = CC[i]; CBimax_new = i; } } /* Bias towards shorter lags */ lag_log2_Q7 = Inlines.silk_lin2log(d); /* Q7 */ Inlines.OpusAssert(lag_log2_Q7 == Inlines.silk_SAT16(lag_log2_Q7)); Inlines.OpusAssert(nb_subfr * ((int)((SilkConstants.PE_SHORTLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_SHORTLAG_BIAS, 13)*/ == Inlines.silk_SAT16(nb_subfr * ((int)((SilkConstants.PE_SHORTLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_SHORTLAG_BIAS, 13)*/)); CCmax_new_b = CCmax_new - Inlines.silk_RSHIFT(Inlines.silk_SMULBB(nb_subfr * ((int)((SilkConstants.PE_SHORTLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_SHORTLAG_BIAS, 13)*/, lag_log2_Q7), 7); /* Q13 */ /* Bias towards previous lag */ Inlines.OpusAssert(nb_subfr * ((int)((SilkConstants.PE_PREVLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_PREVLAG_BIAS, 13)*/ == Inlines.silk_SAT16(nb_subfr * ((int)((SilkConstants.PE_PREVLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_PREVLAG_BIAS, 13)*/)); if (prevLag > 0) { delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7; Inlines.OpusAssert(delta_lag_log2_sqr_Q7 == Inlines.silk_SAT16(delta_lag_log2_sqr_Q7)); delta_lag_log2_sqr_Q7 = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7), 7); prev_lag_bias_Q13 = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(nb_subfr * ((int)((SilkConstants.PE_PREVLAG_BIAS) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_PREVLAG_BIAS, 13)*/, LTPCorr_Q15.Val), 15); /* Q13 */ prev_lag_bias_Q13 = Inlines.silk_DIV32(Inlines.silk_MUL(prev_lag_bias_Q13, delta_lag_log2_sqr_Q7), delta_lag_log2_sqr_Q7 + ((int)((0.5f) * ((long)1 << (7)) + 0.5)) /*Inlines.SILK_CONST(0.5f, 7)*/); CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */ } if (CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ CCmax_new > Inlines.silk_SMULBB(nb_subfr, search_thres2_Q13) && /* Correlation needs to be high enough to be voiced */ Tables.silk_CB_lags_stage2[0][CBimax_new] <= MIN_LAG_8KHZ /* Lag must be in range */ ) { CCmax_b = CCmax_new_b; CCmax = CCmax_new; lag = d; CBimax = CBimax_new; } } if (lag == -1) { /* No suitable candidate found */ Arrays.MemSetInt(pitch_out, 0, nb_subfr); LTPCorr_Q15.Val = 0; lagIndex.Val = 0; contourIndex.Val = 0; return(1); } /* Output normalized correlation */ LTPCorr_Q15.Val = (int)Inlines.silk_LSHIFT(Inlines.silk_DIV32_16(CCmax, nb_subfr), 2); Inlines.OpusAssert(LTPCorr_Q15.Val >= 0); if (Fs_kHz > 8) { short[] scratch_mem; /***************************************************************************/ /* Scale input signal down to avoid correlations measures from overflowing */ /***************************************************************************/ /* find scaling as max scaling for each subframe */ SumSqrShift.silk_sum_sqr_shift(out energy, out shift, frame, frame_length); if (shift > 0) { scratch_mem = new short[frame_length]; /* Move signal to scratch mem because the input signal should be unchanged */ shift = Inlines.silk_RSHIFT(shift, 1); for (i = 0; i < frame_length; i++) { scratch_mem[i] = Inlines.silk_RSHIFT16(frame[i], shift); } input_frame_ptr = scratch_mem; } else { input_frame_ptr = frame; } /* Search in original signal */ CBimax_old = CBimax; /* Compensate for decimation */ Inlines.OpusAssert(lag == Inlines.silk_SAT16(lag)); if (Fs_kHz == 12) { lag = Inlines.silk_RSHIFT(Inlines.silk_SMULBB(lag, 3), 1); } else if (Fs_kHz == 16) { lag = Inlines.silk_LSHIFT(lag, 1); } else { lag = Inlines.silk_SMULBB(lag, 3); } lag = Inlines.silk_LIMIT_int(lag, min_lag, max_lag); start_lag = Inlines.silk_max_int(lag - 2, min_lag); end_lag = Inlines.silk_min_int(lag + 2, max_lag); lag_new = lag; /* to avoid undefined lag */ CBimax = 0; /* to avoid undefined lag */ CCmax = int.MinValue; /* pitch lags according to second stage */ for (k = 0; k < nb_subfr; k++) { pitch_out[k] = lag + 2 * Tables.silk_CB_lags_stage2[k][CBimax_old]; } /* Set up codebook parameters according to complexity setting and frame length */ if (nb_subfr == SilkConstants.PE_MAX_NB_SUBFR) { nb_cbk_search = (int)Tables.silk_nb_cbk_searchs_stage3[complexity]; Lag_CB_ptr = Tables.silk_CB_lags_stage3; } else { nb_cbk_search = SilkConstants.PE_NB_CBKS_STAGE3_10MS; Lag_CB_ptr = Tables.silk_CB_lags_stage3_10_ms; } /* Calculate the correlations and energies needed in stage 3 */ energies_st3 = new silk_pe_stage3_vals[nb_subfr * nb_cbk_search]; cross_corr_st3 = new silk_pe_stage3_vals[nb_subfr * nb_cbk_search]; for (int c = 0; c < nb_subfr * nb_cbk_search; c++) { energies_st3[c] = new silk_pe_stage3_vals(); // fixme: these can be replaced with a linearized array probably, or at least a struct cross_corr_st3[c] = new silk_pe_stage3_vals(); } silk_P_Ana_calc_corr_st3(cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity); silk_P_Ana_calc_energy_st3(energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity); lag_counter = 0; Inlines.OpusAssert(lag == Inlines.silk_SAT16(lag)); contour_bias_Q15 = Inlines.silk_DIV32_16(((int)((SilkConstants.PE_FLATCONTOUR_BIAS) * ((long)1 << (15)) + 0.5)) /*Inlines.SILK_CONST(SilkConstants.PE_FLATCONTOUR_BIAS, 15)*/, lag); target = input_frame_ptr; target_ptr = SilkConstants.PE_LTP_MEM_LENGTH_MS * Fs_kHz; energy_target = Inlines.silk_ADD32(Inlines.silk_inner_prod_self(target, target_ptr, nb_subfr * sf_length), 1); for (d = start_lag; d <= end_lag; d++) { for (j = 0; j < nb_cbk_search; j++) { cross_corr = 0; energy = energy_target; for (k = 0; k < nb_subfr; k++) { cross_corr = Inlines.silk_ADD32(cross_corr, Inlines.MatrixGet(cross_corr_st3, k, j, nb_cbk_search).Values[lag_counter]); energy = Inlines.silk_ADD32(energy, Inlines.MatrixGet(energies_st3, k, j, nb_cbk_search).Values[lag_counter]); Inlines.OpusAssert(energy >= 0); } if (cross_corr > 0) { CCmax_new = Inlines.silk_DIV32_varQ(cross_corr, energy, 13 + 1); /* Q13 */ /* Reduce depending on flatness of contour */ diff = short.MaxValue - Inlines.silk_MUL(contour_bias_Q15, j); /* Q15 */ Inlines.OpusAssert(diff == Inlines.silk_SAT16(diff)); CCmax_new = Inlines.silk_SMULWB(CCmax_new, diff); /* Q14 */ } else { CCmax_new = 0; } if (CCmax_new > CCmax && (d + Tables.silk_CB_lags_stage3[0][j]) <= max_lag) { CCmax = CCmax_new; lag_new = d; CBimax = j; } } lag_counter++; } for (k = 0; k < nb_subfr; k++) { pitch_out[k] = lag_new + Lag_CB_ptr[k][CBimax]; pitch_out[k] = Inlines.silk_LIMIT(pitch_out[k], min_lag, SilkConstants.PE_MAX_LAG_MS * Fs_kHz); } lagIndex.Val = (short)(lag_new - min_lag); contourIndex.Val = (sbyte)CBimax; } else /* Fs_kHz == 8 */ /* Save Lags */ { for (k = 0; k < nb_subfr; k++) { pitch_out[k] = lag + Lag_CB_ptr[k][CBimax]; pitch_out[k] = Inlines.silk_LIMIT(pitch_out[k], MIN_LAG_8KHZ, SilkConstants.PE_MAX_LAG_MS * 8); } lagIndex.Val = (short)(lag - MIN_LAG_8KHZ); contourIndex.Val = (sbyte)CBimax; } Inlines.OpusAssert(lagIndex.Val >= 0); /* return as voiced */ return(0); }
/* Find pitch lags */ internal static void silk_find_pitch_lags( SilkChannelEncoder psEnc, /* I/O encoder state */ SilkEncoderControl psEncCtrl, /* I/O encoder control */ short[] res, /* O residual */ short[] x, /* I Speech signal */ int x_ptr ) { int buf_len, i, scale; int thrhld_Q13, res_nrg; int x_buf, x_buf_ptr; short[] Wsig; int Wsig_ptr; int[] auto_corr = new int[SilkConstants.MAX_FIND_PITCH_LPC_ORDER + 1]; short[] rc_Q15 = new short[SilkConstants.MAX_FIND_PITCH_LPC_ORDER]; int[] A_Q24 = new int[SilkConstants.MAX_FIND_PITCH_LPC_ORDER]; short[] A_Q12 = new short[SilkConstants.MAX_FIND_PITCH_LPC_ORDER]; /******************************************/ /* Set up buffer lengths etc based on Fs */ /******************************************/ buf_len = psEnc.la_pitch + psEnc.frame_length + psEnc.ltp_mem_length; /* Safety check */ Inlines.OpusAssert(buf_len >= psEnc.pitch_LPC_win_length); x_buf = x_ptr - psEnc.ltp_mem_length; /*************************************/ /* Estimate LPC AR coefficients */ /*************************************/ /* Calculate windowed signal */ Wsig = new short[psEnc.pitch_LPC_win_length]; /* First LA_LTP samples */ x_buf_ptr = x_buf + buf_len - psEnc.pitch_LPC_win_length; Wsig_ptr = 0; ApplySineWindow.silk_apply_sine_window(Wsig, Wsig_ptr, x, x_buf_ptr, 1, psEnc.la_pitch); /* Middle un - windowed samples */ Wsig_ptr += psEnc.la_pitch; x_buf_ptr += psEnc.la_pitch; Array.Copy(x, x_buf_ptr, Wsig, Wsig_ptr, (psEnc.pitch_LPC_win_length - Inlines.silk_LSHIFT(psEnc.la_pitch, 1))); /* Last LA_LTP samples */ Wsig_ptr += psEnc.pitch_LPC_win_length - Inlines.silk_LSHIFT(psEnc.la_pitch, 1); x_buf_ptr += psEnc.pitch_LPC_win_length - Inlines.silk_LSHIFT(psEnc.la_pitch, 1); ApplySineWindow.silk_apply_sine_window(Wsig, Wsig_ptr, x, x_buf_ptr, 2, psEnc.la_pitch); /* Calculate autocorrelation sequence */ BoxedValueInt boxed_scale = new BoxedValueInt(); Autocorrelation.silk_autocorr(auto_corr, boxed_scale, Wsig, psEnc.pitch_LPC_win_length, psEnc.pitchEstimationLPCOrder + 1); scale = boxed_scale.Val; /* Add white noise, as fraction of energy */ auto_corr[0] = Inlines.silk_SMLAWB(auto_corr[0], auto_corr[0], ((int)((TuningParameters.FIND_PITCH_WHITE_NOISE_FRACTION) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_PITCH_WHITE_NOISE_FRACTION, 16)*/) + 1; /* Calculate the reflection coefficients using schur */ res_nrg = Schur.silk_schur(rc_Q15, auto_corr, psEnc.pitchEstimationLPCOrder); /* Prediction gain */ psEncCtrl.predGain_Q16 = Inlines.silk_DIV32_varQ(auto_corr[0], Inlines.silk_max_int(res_nrg, 1), 16); /* Convert reflection coefficients to prediction coefficients */ K2A.silk_k2a(A_Q24, rc_Q15, psEnc.pitchEstimationLPCOrder); /* Convert From 32 bit Q24 to 16 bit Q12 coefs */ for (i = 0; i < psEnc.pitchEstimationLPCOrder; i++) { A_Q12[i] = (short)Inlines.silk_SAT16(Inlines.silk_RSHIFT(A_Q24[i], 12)); } /* Do BWE */ BWExpander.silk_bwexpander(A_Q12, psEnc.pitchEstimationLPCOrder, ((int)((TuningParameters.FIND_PITCH_BANDWIDTH_EXPANSION) * ((long)1 << (16)) + 0.5)) /*Inlines.SILK_CONST(TuningParameters.FIND_PITCH_BANDWIDTH_EXPANSION, 16)*/); /*****************************************/ /* LPC analysis filtering */ /*****************************************/ Filters.silk_LPC_analysis_filter(res, 0, x, x_buf, A_Q12, 0, buf_len, psEnc.pitchEstimationLPCOrder); if (psEnc.indices.signalType != SilkConstants.TYPE_NO_VOICE_ACTIVITY && psEnc.first_frame_after_reset == 0) { /* Threshold for pitch estimator */ thrhld_Q13 = ((int)((0.6f) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(0.6f, 13)*/; thrhld_Q13 = Inlines.silk_SMLABB(thrhld_Q13, ((int)((-0.004f) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(-0.004f, 13)*/, psEnc.pitchEstimationLPCOrder); thrhld_Q13 = Inlines.silk_SMLAWB(thrhld_Q13, ((int)((-0.1f) * ((long)1 << (21)) + 0.5)) /*Inlines.SILK_CONST(-0.1f, 21)*/, psEnc.speech_activity_Q8); thrhld_Q13 = Inlines.silk_SMLABB(thrhld_Q13, ((int)((-0.15f) * ((long)1 << (13)) + 0.5)) /*Inlines.SILK_CONST(-0.15f, 13)*/, Inlines.silk_RSHIFT(psEnc.prevSignalType, 1)); thrhld_Q13 = Inlines.silk_SMLAWB(thrhld_Q13, ((int)((-0.1f) * ((long)1 << (14)) + 0.5)) /*Inlines.SILK_CONST(-0.1f, 14)*/, psEnc.input_tilt_Q15); thrhld_Q13 = Inlines.silk_SAT16(thrhld_Q13); /*****************************************/ /* Call pitch estimator */ /*****************************************/ BoxedValueShort boxed_lagIndex = new BoxedValueShort(psEnc.indices.lagIndex); BoxedValueSbyte boxed_contourIndex = new BoxedValueSbyte(psEnc.indices.contourIndex); BoxedValueInt boxed_LTPcorr = new BoxedValueInt(psEnc.LTPCorr_Q15); if (PitchAnalysisCore.silk_pitch_analysis_core(res, psEncCtrl.pitchL, boxed_lagIndex, boxed_contourIndex, boxed_LTPcorr, psEnc.prevLag, psEnc.pitchEstimationThreshold_Q16, (int)thrhld_Q13, psEnc.fs_kHz, psEnc.pitchEstimationComplexity, psEnc.nb_subfr) == 0) { psEnc.indices.signalType = SilkConstants.TYPE_VOICED; } else { psEnc.indices.signalType = SilkConstants.TYPE_UNVOICED; } psEnc.indices.lagIndex = boxed_lagIndex.Val; psEnc.indices.contourIndex = boxed_contourIndex.Val; psEnc.LTPCorr_Q15 = boxed_LTPcorr.Val; } else { Arrays.MemSetInt(psEncCtrl.pitchL, 0, SilkConstants.MAX_NB_SUBFR); psEnc.indices.lagIndex = 0; psEnc.indices.contourIndex = 0; psEnc.LTPCorr_Q15 = 0; } }