/// <summary> /// Pitch estimation: from spectral peaks /// </summary> /// <param name="signal"></param> /// <param name="startPos"></param> /// <param name="endPos"></param> /// <returns></returns> public static float FromSpectralPeaks(DiscreteSignal signal, int startPos = 0, int endPos = -1, float low = 80, float high = 400, int fftSize = 0) { if (endPos == -1) { endPos = signal.Length; } if (startPos != 0 || endPos != signal.Length) { signal = signal[startPos, endPos]; } signal.ApplyWindow(WindowTypes.Hann); var size = fftSize > 0 ? fftSize : MathUtils.NextPowerOfTwo(signal.Length); var fft = new RealFft(size); var spectrum = fft.PowerSpectrum(signal, false).Samples; return(FromSpectralPeaks(spectrum, signal.SamplingRate, low, high)); }
float[] ComputeSpectrum(int idx) { var pos = (int)(_signal.SamplingRate * HopDuration * idx); return(_fft.PowerSpectrum(_signal[pos, pos + 512], normalize: false) .Samples); }
private void UpdateSpectrumAndCepstrum() { var fftSize = int.Parse(fftSizeTextBox.Text); var cepstrumSize = int.Parse(cepstrumSizeTextBox.Text); _hopSize = int.Parse(hopSizeTextBox.Text); if (fftSize != _fftSize) { _fftSize = fftSize; _fft = new RealFft(fftSize); _cepstralTransform = new CepstralTransform(cepstrumSize, _fftSize); } if (cepstrumSize != _cepstrumSize) { _cepstrumSize = cepstrumSize; _cepstralTransform = new CepstralTransform(_cepstrumSize, _fftSize); } var pos = _hopSize * _specNo; var block = _signal[pos, pos + _fftSize]; //block.ApplyWindow(WindowTypes.Hamming); var cepstrum = new float[_fftSize]; _cepstralTransform.RealCepstrum(block.Samples, cepstrum); // ************************************************************************ // just visualize spectrum estimated from cepstral coefficients: // ************************************************************************ var real = new float[_fftSize]; var imag = new float[_fftSize]; for (var i = 0; i < 32; i++) { real[i] = cepstrum[i]; } _fft.Direct(real, real, imag); var spectrum = _fft.PowerSpectrum(block, normalize: false).Samples; var avg = spectrum.Average(s => LevelScale.ToDecibel(s)); var spectrumEstimate = real.Take(_fftSize / 2 + 1) .Select(s => (float)LevelScale.FromDecibel(s * 40 - avg)) .ToArray(); spectrumPanel.Line = spectrum; spectrumPanel.Markline = spectrumEstimate; spectrumPanel.ToDecibel(); var pitch = Pitch.FromCepstrum(block); cepstrumPanel.Line = cepstrum; cepstrumPanel.Mark = (int)(_signal.SamplingRate / pitch); }
/// <summary> /// <para>Computes S(implified)PNCC vector in one frame according to [Kim and Stern, 2016].</para> /// <para> /// General algorithm: /// <list type="number"> /// <item>Apply window</item> /// <item>Obtain power spectrum</item> /// <item>Apply gammatone filters (squared)</item> /// <item>Mean power normalization</item> /// <item>Apply nonlinearity</item> /// <item>Do DCT-II (normalized)</item> /// </list> /// </para> /// </summary> /// <param name="block">Block of data</param> /// <param name="features">Features (one SPNCC feature vector) computed in the block</param> public override void ProcessFrame(float[] block, float[] features) { const float meanPower = 1e10f; // 0) base extractor applies window // 1) calculate power spectrum _fft.PowerSpectrum(block, _spectrum, false); // 2) apply gammatone filterbank FilterBanks.Apply(FilterBank, _spectrum, _filteredSpectrum); // 3) mean power normalization: var sumPower = 0.0f; for (var j = 0; j < _filteredSpectrum.Length; j++) { sumPower += _filteredSpectrum[j]; } _mean = LambdaMu * _mean + (1 - LambdaMu) * sumPower; for (var j = 0; j < _filteredSpectrum.Length; j++) { _filteredSpectrum[j] *= meanPower / _mean; } // 4) nonlinearity (pow ^ d or Log10) if (_power != 0) { for (var j = 0; j < _filteredSpectrum.Length; j++) { _filteredSpectrum[j] = (float)Math.Pow(_filteredSpectrum[j], 1.0 / _power); } } else { for (var j = 0; j < _filteredSpectrum.Length; j++) { _filteredSpectrum[j] = (float)Math.Log10(_filteredSpectrum[j] + float.Epsilon); } } // 5) dct-II (normalized) _dct.DirectNorm(_filteredSpectrum, features); // 6) (optional) replace first coeff with log(energy) if (_includeEnergy) { features[0] = (float)Math.Log(Math.Max(block.Sum(x => x * x), _logEnergyFloor)); } }
/// <summary> /// <para>Computes PLP-RASTA feature vector in one frame.</para> /// <para> /// General algorithm: /// <list type="number"> /// <item>Apply window</item> /// <item>Obtain power spectrum</item> /// <item>Apply filterbank of bark bands (or mel bands)</item> /// <item>[Optional] filter each component of the processed spectrum with a RASTA filter</item> /// <item>Apply equal loudness curve</item> /// <item>Apply nonlinearity (take cubic root)</item> /// <item>Do LPC</item> /// <item>Convert LPC to cepstrum</item> /// <item>[Optional] lifter cepstrum</item> /// </list> /// </para> /// </summary> /// <param name="block">Block of data</param> /// <param name="features">Features (one PLP feature vector) computed in the block</param> public override void ProcessFrame(float[] block, float[] features) { // 0) base extractor applies window // 1) calculate power spectrum (without normalization) _fft.PowerSpectrum(block, _spectrum, false); // 2) apply filterbank on the result (bark frequencies by default) FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum); // 3) RASTA filtering in log-domain [optional] if (_rasta > 0) { for (var k = 0; k < _bandSpectrum.Length; k++) { var log = (float)Math.Log(_bandSpectrum[k] + float.Epsilon); log = _rastaFilters[k].Process(log); _bandSpectrum[k] = (float)Math.Exp(log); } } // 4) and 5) apply equal loudness curve and take cubic root for (var k = 0; k < _bandSpectrum.Length; k++) { _bandSpectrum[k] = (float)Math.Pow(Math.Max(_bandSpectrum[k], 1.0) * _equalLoudnessCurve[k], 0.33); } // 6) LPC from power spectrum: var n = _idftTable[0].Length; // get autocorrelation samples from post-processed power spectrum (via IDFT): for (var k = 0; k < _idftTable.Length; k++) { var acc = _idftTable[k][0] * _bandSpectrum[0] + _idftTable[k][n - 1] * _bandSpectrum[n - 3]; // add values at two duplicated edges right away for (var j = 1; j < n - 1; j++) { acc += _idftTable[k][j] * _bandSpectrum[j - 1]; } _cc[k] = acc / (2 * (n - 1)); } // LPC: for (var k = 0; k < _lpc.Length; _lpc[k] = 0, k++) { ; } var err = Lpc.LevinsonDurbin(_cc, _lpc, _lpcOrder); // 7) compute LPCC coefficients from LPC Lpc.ToCepstrum(_lpc, err, features); // 8) (optional) liftering if (_lifterCoeffs != null) { features.ApplyWindow(_lifterCoeffs); } // 9) (optional) replace first coeff with log(energy) if (_includeEnergy) { features[0] = (float)Math.Log(Math.Max(block.Sum(x => x * x), _logEnergyFloor)); } }
/// <summary> /// Computes chroma feature vector in one frame. /// </summary> /// <param name="block">Block of data</param> /// <param name="features">Features (one chroma feature vector) computed in the block</param> public override void ProcessFrame(float[] block, float[] features) { _fft.PowerSpectrum(block, _spectrum, false); FilterBanks.Apply(_filterBank, _spectrum, features); }
/// <summary> /// Method for computing modulation spectra. /// Each vector representing one modulation spectrum is a flattened version of 2D spectrum. /// </summary> /// <param name="samples">Samples for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of flattened modulation spectra</returns> public override List <float[]> ComputeFrom(float[] samples, int startSample, int endSample) { Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos"); var frameSize = FrameSize; var hopSize = HopSize; var featureVectors = new List <float[]>(); var en = 0; var i = startSample; if (_featuregram == null) { _envelopes = new float[_filterbank.Length][]; for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n] = new float[samples.Length / hopSize]; } var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f; var lastSample = endSample - Math.Max(frameSize, hopSize); // ===================== compute local FFTs (do STFT) ======================= for (i = startSample; i < lastSample; i += hopSize) { // copy frameSize samples samples.FastCopyTo(_block, frameSize, i); // fill zeros to fftSize if frameSize < fftSize for (var k = frameSize; k < _block.Length; _block[k++] = 0) { ; } // 0) pre-emphasis (if needed) if (_preEmphasis > 1e-10f) { for (var k = 0; k < frameSize; k++) { var y = _block[k] - prevSample * _preEmphasis; prevSample = _block[k]; _block[k] = y; } prevSample = samples[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { _block.ApplyWindow(_windowSamples); } // 2) calculate power spectrum _fft.PowerSpectrum(_block, _spectrum); // 3) apply filterbank... FilterBanks.Apply(_filterbank, _spectrum, _filteredSpectrum); // ...and save results for future calculations for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n][en] = _filteredSpectrum[n]; } en++; } } else { en = _featuregram.Length; _envelopes = new float[_featuregram[0].Length][]; for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n] = new float[en]; for (i = 0; i < en; i++) { _envelopes[n][i] = _featuregram[i][n]; } } } // =========================== modulation analysis ======================= var envelopeLength = en; // long-term AVG-normalization foreach (var envelope in _envelopes) { var avg = 0.0f; for (var k = 0; k < envelopeLength; k++) { avg += (k >= 0) ? envelope[k] : -envelope[k]; } avg /= envelopeLength; if (avg >= 1e-10f) // this happens more frequently { for (var k = 0; k < envelopeLength; k++) { envelope[k] /= avg; } } } i = 0; while (i < envelopeLength) { var vector = new float[_envelopes.Length * (_modulationFftSize / 2 + 1)]; var offset = 0; foreach (var envelope in _envelopes) { // copy modFftSize samples (or envelopeLength - i in the end) var len = Math.Min(_modulationFftSize, envelopeLength - i); envelope.FastCopyTo(_modBlock, len, i); // fill zeros to modFftSize if len < modFftSize for (var k = len; k < _modBlock.Length; _modBlock[k++] = 0) { } _modulationFft.PowerSpectrum(_modBlock, _modSpectrum); _modSpectrum.FastCopyTo(vector, _modSpectrum.Length, 0, offset); offset += _modSpectrum.Length; } featureVectors.Add(vector); i += _modulationHopSize; } return(featureVectors); }
/// <summary> /// Constructor /// </summary> /// <param name="samplingRate"></param> /// <param name="featureCount"></param> /// <param name="filterbank"></param> /// <param name="frameDuration"></param> /// <param name="hopDuration"></param> /// <param name="preEmphasis"></param> /// <param name="nonLinearity"></param> /// <param name="spectrumType"></param> /// <param name="window"></param> /// <param name="logFloor"></param> public FilterbankExtractor(int samplingRate, float[][] filterbank, double frameDuration = 0.0256 /*sec*/, double hopDuration = 0.010 /*sec*/, double preEmphasis = 0, NonLinearityType nonLinearity = NonLinearityType.None, SpectrumType spectrumType = SpectrumType.Power, WindowTypes window = WindowTypes.Hamming, float logFloor = float.Epsilon) : base(samplingRate, frameDuration, hopDuration, preEmphasis) { FilterBank = filterbank; FeatureCount = filterbank.Length; _blockSize = 2 * (filterbank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); _fft = new RealFft(_blockSize); _window = window; _windowSamples = Window.OfType(_window, FrameSize); // setup spectrum post-processing: ======================================================= _logFloor = logFloor; _nonLinearityType = nonLinearity; switch (nonLinearity) { case NonLinearityType.Log10: _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.LogE: _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.ToDecibel: _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.CubicRoot: _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _bandSpectrum, 0.33); break; default: _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum); break; } _spectrumType = spectrumType; switch (_spectrumType) { case SpectrumType.Magnitude: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break; case SpectrumType.Power: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break; case SpectrumType.MagnitudeNormalized: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break; case SpectrumType.PowerNormalized: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break; } // reserve memory for reusable blocks _spectrum = new float[_blockSize / 2 + 1]; _bandSpectrum = new float[filterbank.Length]; }
/// <summary> /// Constructs extractor from configuration <paramref name="options"/>. /// </summary> public MfccExtractor(MfccOptions options) : base(options) { FeatureCount = options.FeatureCount; var filterbankSize = options.FilterBankSize; if (options.FilterBank is null) { _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize); var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency); FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel); // HTK/Kaldi-style } else { FilterBank = options.FilterBank; filterbankSize = FilterBank.Length; _blockSize = 2 * (FilterBank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); } _fft = new RealFft(_blockSize); _lifterSize = options.LifterSize; _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; _includeEnergy = options.IncludeEnergy; _logEnergyFloor = options.LogEnergyFloor; // setup DCT: ============================================================================ _dctType = options.DctType; switch (_dctType[0]) { case '1': _dct = new Dct1(filterbankSize); break; case '3': _dct = new Dct3(filterbankSize); break; case '4': _dct = new Dct4(filterbankSize); break; default: _dct = new Dct2(filterbankSize); break; } if (_dctType.EndsWith("N", StringComparison.OrdinalIgnoreCase)) { _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs); } else { _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs); } // setup spectrum post-processing: ======================================================= _logFloor = options.LogFloor; _nonLinearityType = options.NonLinearity; switch (_nonLinearityType) { case NonLinearityType.Log10: _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.LogE: _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.ToDecibel: _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.CubicRoot: _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33); break; default: _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum); break; } _spectrumType = options.SpectrumType; switch (_spectrumType) { case SpectrumType.Magnitude: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break; case SpectrumType.MagnitudeNormalized: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break; case SpectrumType.PowerNormalized: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break; default: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break; } // reserve memory for reusable blocks _spectrum = new float[_blockSize / 2 + 1]; _melSpectrum = new float[filterbankSize]; }
/// <summary> /// Constructor /// </summary> /// <param name="options">Filterbank options</param> public FilterbankExtractor(FilterbankOptions options) : base(options) { var filterbankSize = options.FilterBankSize; if (options.FilterBank == null) { _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize); var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency, false); FilterBank = FilterBanks.Rectangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel); } else { FilterBank = options.FilterBank; filterbankSize = FilterBank.Length; _blockSize = 2 * (FilterBank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); } FeatureCount = filterbankSize; _fft = new RealFft(_blockSize); // setup spectrum post-processing: ======================================================= _logFloor = options.LogFloor; _nonLinearityType = options.NonLinearity; switch (_nonLinearityType) { case NonLinearityType.Log10: _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.LogE: _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.ToDecibel: _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.CubicRoot: _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _bandSpectrum, 0.33); break; default: _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum); break; } _spectrumType = options.SpectrumType; switch (_spectrumType) { case SpectrumType.Magnitude: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break; case SpectrumType.MagnitudeNormalized: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break; case SpectrumType.PowerNormalized: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break; default: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break; } // reserve memory for reusable blocks _spectrum = new float[_blockSize / 2 + 1]; _bandSpectrum = new float[filterbankSize]; }
/// <summary> /// Constructor /// </summary> /// <param name="samplingRate"></param> /// <param name="featureCount"></param> /// <param name="frameDuration"></param> /// <param name="hopDuration"></param> /// <param name="filterbankSize"></param> /// <param name="lowFreq"></param> /// <param name="highFreq"></param> /// <param name="fftSize"></param> /// <param name="filterbank"></param> /// <param name="lifterSize"></param> /// <param name="preEmphasis"></param> /// <param name="includeEnergy"></param> /// <param name="dctType">"1", "1N", "2", "2N", "3", "3N", "4", "4N"</param> /// <param name="nonLinearity"></param> /// <param name="spectrumType"></param> /// <param name="window"></param> /// <param name="logFloor"></param> public MfccExtractor(int samplingRate, int featureCount, double frameDuration = 0.0256 /*sec*/, double hopDuration = 0.010 /*sec*/, int filterbankSize = 24, double lowFreq = 0, double highFreq = 0, int fftSize = 0, float[][] filterbank = null, int lifterSize = 0, double preEmphasis = 0, bool includeEnergy = false, string dctType = "2N", NonLinearityType nonLinearity = NonLinearityType.Log10, SpectrumType spectrumType = SpectrumType.Power, WindowTypes window = WindowTypes.Hamming, float logFloor = float.Epsilon) : base(samplingRate, frameDuration, hopDuration, preEmphasis) { FeatureCount = featureCount; _lowFreq = lowFreq; _highFreq = highFreq; if (filterbank == null) { _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize); var melBands = FilterBanks.MelBands(filterbankSize, _blockSize, SamplingRate, _lowFreq, _highFreq); FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel); // HTK/Kaldi-style } else { FilterBank = filterbank; filterbankSize = filterbank.Length; _blockSize = 2 * (filterbank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); } _fft = new RealFft(_blockSize); _window = window; _windowSamples = Window.OfType(_window, FrameSize); _lifterSize = lifterSize; _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; _includeEnergy = includeEnergy; // setup DCT: ============================================================================ _dctType = dctType; switch (dctType[0]) { case '1': _dct = new Dct1(filterbankSize); break; case '2': _dct = new Dct2(filterbankSize); break; case '3': _dct = new Dct3(filterbankSize); break; case '4': _dct = new Dct4(filterbankSize); break; default: throw new ArgumentException("Only DCT-1, 2, 3 and 4 are supported!"); } if (dctType.Length > 1 && char.ToUpper(dctType[1]) == 'N') { _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs); } else { _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs); } // setup spectrum post-processing: ======================================================= _logFloor = logFloor; _nonLinearityType = nonLinearity; switch (nonLinearity) { case NonLinearityType.Log10: _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.LogE: _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.ToDecibel: _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.CubicRoot: _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33); break; default: _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum); break; } _spectrumType = spectrumType; switch (_spectrumType) { case SpectrumType.Magnitude: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break; case SpectrumType.Power: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break; case SpectrumType.MagnitudeNormalized: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break; case SpectrumType.PowerNormalized: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break; } // reserve memory for reusable blocks _spectrum = new float[_blockSize / 2 + 1]; _melSpectrum = new float[filterbankSize]; }
/// <summary> /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]. /// In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum /// 3) Apply gammatone filters (squared) /// 4) Mean power normalization /// 5) Apply nonlinearity /// 6) Do dct-II (normalized) /// /// </summary> /// <param name="samples">Samples for analysis</param> /// <returns>List of pncc vectors</returns> public override float[] ProcessFrame(float[] block) { const float meanPower = 1e10f; // fill zeros to fftSize if frameSize < fftSize for (var k = FrameSize; k < block.Length; block[k++] = 0) { ; } // 1) apply window block.ApplyWindow(_windowSamples); // 2) calculate power spectrum _fft.PowerSpectrum(block, _spectrum, false); // 3) apply gammatone filterbank FilterBanks.Apply(FilterBank, _spectrum, _filteredSpectrum); // 4) mean power normalization: var sumPower = 0.0f; for (var j = 0; j < _filteredSpectrum.Length; j++) { sumPower += _filteredSpectrum[j]; } _mean = LambdaMu * _mean + (1 - LambdaMu) * sumPower; for (var j = 0; j < _filteredSpectrum.Length; j++) { _filteredSpectrum[j] *= meanPower / _mean; } // 5) nonlinearity (pow ^ d or Log10) if (_power != 0) { for (var j = 0; j < _filteredSpectrum.Length; j++) { _filteredSpectrum[j] = (float)Math.Pow(_filteredSpectrum[j], 1.0 / _power); } } else { for (var j = 0; j < _filteredSpectrum.Length; j++) { _filteredSpectrum[j] = (float)Math.Log10(_filteredSpectrum[j] + float.Epsilon); } } // 6) dct-II (normalized) var spnccs = new float[FeatureCount]; _dct.DirectNorm(_filteredSpectrum, spnccs); return(spnccs); }
/// <summary> /// Standard method for computing PLP features. /// In each frame do: /// /// 1) Apply window /// 2) Obtain power spectrum /// 3) Apply filterbank of bark bands (or mel bands) /// 4) [Optional] filter each component of the processed spectrum with a RASTA filter /// 5) Apply equal loudness curve /// 6) Take cubic root /// 7) Do LPC /// 8) Convert LPC to cepstrum /// 9) [Optional] lifter cepstrum /// /// </summary> /// <param name="block">Samples for analysis</param> /// <returns>PLP vector</returns> public override float[] ProcessFrame(float[] block) { // fill zeros to fftSize if frameSize < fftSize (blockSize) for (var k = FrameSize; k < block.Length; block[k++] = 0) { ; } // 1) apply window block.ApplyWindow(_windowSamples); // 2) calculate power spectrum (without normalization) _fft.PowerSpectrum(block, _spectrum, false); // 3) apply filterbank on the result (bark frequencies by default) FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum); // 4) RASTA filtering in log-domain [optional] if (_rasta > 0) { for (var k = 0; k < _bandSpectrum.Length; k++) { var log = (float)Math.Log(_bandSpectrum[k] + float.Epsilon); log = _rastaFilters[k].Process(log); _bandSpectrum[k] = (float)Math.Exp(log); } } // 5) and 6) apply equal loudness curve and take cubic root for (var k = 0; k < _bandSpectrum.Length; k++) { _bandSpectrum[k] = (float)Math.Pow(Math.Max(_bandSpectrum[k], 1.0) * _equalLoudnessCurve[k], 0.33); } // 7) LPC from power spectrum: var n = _idftTable[0].Length; // get autocorrelation samples from post-processed power spectrum (via IDFT): for (var k = 0; k < _idftTable.Length; k++) { var acc = _idftTable[k][0] * _bandSpectrum[0] + _idftTable[k][n - 1] * _bandSpectrum[n - 3]; // add values at two duplicated edges right away for (var j = 1; j < n - 1; j++) { acc += _idftTable[k][j] * _bandSpectrum[j - 1]; } _cc[k] = acc / (2 * (n - 1)); } // LPC: for (var k = 0; k < _lpc.Length; _lpc[k] = 0, k++) { ; } var err = Lpc.LevinsonDurbin(_cc, _lpc, _lpcOrder); // 8) compute LPCC coefficients from LPC var lpcc = new float[FeatureCount]; Lpc.ToCepstrum(_lpc, err, lpcc); // 9) (optional) liftering if (_lifterCoeffs != null) { lpcc.ApplyWindow(_lifterCoeffs); } return(lpcc); }