/// <summary> /// Method for computing direct STFT of a signal block. /// STFT (spectrogram) is essentially the list of spectra in time. /// </summary> /// <param name="samples">The samples of signal</param> /// <returns>STFT of the signal</returns> public List <Tuple <float[], float[]> > Direct(float[] samples) { // pre-allocate memory: var len = (samples.Length - _windowSize) / _hopSize; var stft = new List <Tuple <float[], float[]> >(); for (var i = 0; i <= len; i++) { stft.Add(new Tuple <float[], float[]>(new float[_fftSize], new float[_fftSize])); } // stft: var windowedBuffer = new float[_windowSize]; for (int pos = 0, i = 0; pos + _windowSize < samples.Length; pos += _hopSize, i++) { samples.FastCopyTo(windowedBuffer, _windowSize, pos); if (_window != WindowTypes.Rectangular) { windowedBuffer.ApplyWindow(_windowSamples); } _fft.Direct(windowedBuffer, stft[i].Item1, stft[i].Item2); } return(stft); }
/// <summary> /// Method for computing a spectrogram. /// The spectrogram is essentially a list of power spectra in time. /// </summary> /// <param name="samples">The samples of signal</param> /// <returns>Spectrogram of the signal</returns> public List <float[]> Spectrogram(float[] samples) { var block = new float[_fftSize]; var zeroblock = new float[_fftSize]; var spectrogram = new List <float[]>(); for (var pos = 0; pos + _windowSize < samples.Length; pos += _hopSize) { zeroblock.FastCopyTo(block, _fftSize); samples.FastCopyTo(block, _windowSize, pos); if (_window != WindowTypes.Rectangular) { block.ApplyWindow(_windowSamples); } var spectrum = new float[_fftSize / 2 + 1]; _fft.PowerSpectrum(block, spectrum); spectrogram.Add(spectrum); } return(spectrogram); }
/// <summary> /// Method for computing direct STFT of a signal block. /// STFT (spectrogram) is essentially the list of spectra in time. /// </summary> /// <param name="samples">The samples of signal</param> /// <returns>STFT of the signal</returns> public List <(float[], float[])> Direct(float[] samples) { // pre-allocate memory: var len = (samples.Length - _windowSize) / _hopSize; var stft = new List <(float[], float[])>(len + 1); for (var i = 0; i <= len; i++) { stft.Add((new float[_fftSize], new float[_fftSize])); } // stft: var windowedBuffer = new float[_fftSize]; for (int pos = 0, i = 0; pos + _windowSize < samples.Length; pos += _hopSize, i++) { samples.FastCopyTo(windowedBuffer, _windowSize, pos); windowedBuffer.ApplyWindow(_windowSamples); var(re, im) = stft[i]; _fft.Direct(windowedBuffer, re, im); } return(stft); }
/// <summary> /// Compute the sequence of feature vectors from some part of array of samples. /// </summary> /// <param name="samples">Array of real-valued samples</param> /// <param name="startSample">The offset (position) of the first sample for processing</param> /// <param name="endSample">The offset (position) of last sample for processing</param> /// <param name="vectors">Pre-allocated sequence of feature vectors</param> public virtual void ComputeFrom(float[] samples, int startSample, int endSample, IList <float[]> vectors) { Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos"); var frameSize = FrameSize; var hopSize = HopSize; var prevSample = startSample > 0 ? samples[startSample - 1] : 0f; var lastSample = endSample - frameSize; var block = new float[_blockSize]; // Main processing loop: // at each iteration one frame is processed; // the frame is contained within a block which, in general, can have larger size // (usually it's a zero-padded frame for radix-2 FFT); // this block array is reused so the frame needs to be zero-padded at each iteration. // Array.Clear() is quite slow for *small* arrays compared to zero-fill in a for-loop. // Since usually the frame size is chosen to be close to block (FFT) size // we don't need to pad very big number of zeros, so we use for-loop here. for (int sample = startSample, i = 0; sample <= lastSample; sample += hopSize, i++) { // prepare new block for processing ====================================================== samples.FastCopyTo(block, frameSize, sample); // copy FrameSize samples to 'block' buffer for (var k = frameSize; k < block.Length; block[k++] = 0) { } // pad zeros to blockSize // (optionally) do pre-emphasis ========================================================== if (_preEmphasis > 1e-10f) { for (var k = 0; k < frameSize; k++) { var y = block[k] - prevSample * _preEmphasis; prevSample = block[k]; block[k] = y; } prevSample = samples[sample + hopSize - 1]; } // (optionally) apply window if (_windowSamples != null) { block.ApplyWindow(_windowSamples); } // process this block and compute features ============================================= ProcessFrame(block, vectors[i]); } }
/// <summary> /// Method for computing direct STFT of a signal block. /// STFT (spectrogram) is essentially the list of spectra in time. /// </summary> /// <param name="input">Samples of input signal</param> /// <returns>STFT of the signal</returns> public List <(float[], float[])> Direct(float[] input) { // pre-allocate memory: var len = (input.Length - _windowSize) / _hopSize + 1; var stft = new List <(float[], float[])>(len); for (var i = 0; i < len; i++) { stft.Add((new float[_fftSize], new float[_fftSize])); } // stft: var windowedBuffer = new float[_fftSize]; var pos = 0; for (var i = 0; i < len; pos += _hopSize, i++) { input.FastCopyTo(windowedBuffer, _windowSize, pos); windowedBuffer.ApplyWindow(_windowSamples); var(re, im) = stft[i]; _fft.Direct(windowedBuffer, re, im); } // last (incomplete) frame: stft.Add((new float[_fftSize], new float[_fftSize])); Array.Clear(windowedBuffer, 0, _fftSize); input.FastCopyTo(windowedBuffer, input.Length - pos, pos); windowedBuffer.ApplyWindow(_windowSamples); var(lre, lim) = stft.Last(); _fft.Direct(windowedBuffer, lre, lim); return(stft); }
/// <summary> /// Standard method for computing MFCC features. /// According to default configuration, in each frame do: /// /// 1) Apply window /// 2) Obtain power spectrum X /// 3) Apply mel filters and log() the result: Y = Log(X * H) /// 4) Do dct: mfcc = Dct(Y) /// 5) [Optional] liftering of mfcc /// /// </summary> /// <param name="block">Samples for analysis</param> /// <returns>MFCC vector</returns> public override float[] ProcessFrame(float[] block) { // fill zeros to fftSize if frameSize < fftSize for (var k = FrameSize; k < block.Length; block[k++] = 0) { ; } // 1) apply window block.ApplyWindow(_windowSamples); // 2) calculate magnitude/power spectrum (with/without normalization) _getSpectrum(block); // block -> _spectrum // 3) apply mel filterbank and take log10/ln/cubic_root of the result _postProcessSpectrum(); // _spectrum -> _melSpectrum // 4) dct var mfccs = new float[FeatureCount]; _applyDct(mfccs); // _melSpectrum -> mfccs // 5) (optional) liftering if (_lifterCoeffs != null) { mfccs.ApplyWindow(_lifterCoeffs); } // 6) (optional) replace first coeff with log(energy) if (_includeEnergy) { mfccs[0] = (float)(Math.Log(block.Sum(x => x * x))); } return(mfccs); }
/// <summary> /// Method for computing a spectrogram as arrays of Magnitude and Phase. /// </summary> /// <param name="samples">The samples of signal</param> /// <returns>Magnitude-Phase spectrogram of the signal</returns> public MagnitudePhaseList MagnitudePhaseSpectrogram(float[] samples) { // pre-allocate memory: var mag = new List <float[]>(); var phase = new List <float[]>(); var len = (samples.Length - _windowSize) / _hopSize; for (var i = 0; i <= len; i++) { mag.Add(new float[_fftSize / 2 + 1]); phase.Add(new float[_fftSize / 2 + 1]); } // magnitude-phase spectrogram: var windowedBuffer = new float[_windowSize]; var re = new float[_fftSize / 2 + 1]; var im = new float[_fftSize / 2 + 1]; for (int pos = 0, i = 0; pos + _windowSize < samples.Length; pos += _hopSize, i++) { samples.FastCopyTo(windowedBuffer, _windowSize, pos); if (_window != WindowTypes.Rectangular) { windowedBuffer.ApplyWindow(_windowSamples); } _fft.Direct(windowedBuffer, re, im); for (var j = 0; j <= _fftSize / 2; j++) { mag[i][j] = (float)(Math.Sqrt(re[j] * re[j] + im[j] * im[j])); phase[i][j] = (float)(Math.Atan2(im[j], re[j])); } } return(new MagnitudePhaseList { Magnitudes = mag, Phases = phase }); }
/// <summary> /// Method for computing LPCC features. /// It essentially duplicates LPC extractor code /// (for efficient memory usage it doesn't just delegate its work to LpcExtractor) /// and then post-processes LPC vectors to obtain LPCC coefficients. /// </summary> /// <param name="block">Samples for analysis</param> /// <returns>LPCC vector</returns> public override float[] ProcessFrame(float[] block) { // 1) apply window (usually signal isn't windowed for LPC, so we check first) if (_window != WindowTypes.Rectangular) { block.ApplyWindow(_windowSamples); } block.FastCopyTo(_reversed, FrameSize); // 2) autocorrelation _convolver.CrossCorrelate(block, _reversed, _cc); // 3) Levinson-Durbin for (int k = 0; k < _lpc.Length; _lpc[k] = 0, k++) { ; } var err = Lpc.LevinsonDurbin(_cc, _lpc, _order, FrameSize - 1); // 4) compute LPCC coefficients from LPC var lpcc = new float[FeatureCount]; Lpc.ToCepstrum(_lpc, err, lpcc); // 5) (optional) liftering if (_lifterCoeffs != null) { lpcc.ApplyWindow(_lifterCoeffs); } return(lpcc); }
/// <summary> /// Method for computing direct STFT of a signal block. /// STFT (spectrogram) is essentially the list of spectra in time. /// </summary> /// <param name="samples">The samples of signal</param> /// <returns>STFT of the signal</returns> public List <Tuple <float[], float[]> > Direct(float[] samples) { var stft = new List <Tuple <float[], float[]> >(); for (var pos = 0; pos + _windowSize < samples.Length; pos += _hopSize) { var re = new float[_fftSize]; var im = new float[_fftSize]; samples.FastCopyTo(re, _windowSize, pos); if (_window != WindowTypes.Rectangular) { re.ApplyWindow(_windowSamples); } _fft.Direct(re, im); stft.Add(new Tuple <float[], float[]>(re, im)); } return(stft); }
/// <summary> /// Method for computing a spectrogram. /// The spectrogram is essentially a list of power spectra in time. /// </summary> /// <param name="samples">The samples of signal</param> /// <returns>Spectrogram of the signal</returns> public List <float[]> Spectrogram(float[] samples) { // pre-allocate memory: var len = (samples.Length - _windowSize) / _hopSize; var spectrogram = new List <float[]>(); for (var i = 0; i <= len; i++) { spectrogram.Add(new float[_fftSize / 2 + 1]); } // spectrogram: var windowedBuffer = new float[_fftSize]; for (int pos = 0, i = 0; pos + _windowSize < samples.Length; pos += _hopSize, i++) { if (_windowSize < _fftSize) { Array.Clear(windowedBuffer, 0, _fftSize); } samples.FastCopyTo(windowedBuffer, _windowSize, pos); if (_window != WindowTypes.Rectangular) { windowedBuffer.ApplyWindow(_windowSamples); } _fft.PowerSpectrum(windowedBuffer, spectrogram[i]); } return(spectrogram); }
/// <summary> /// Method for computing LPCC features. /// It essentially duplicates LPC extractor code /// (for efficient memory usage it doesn't just delegate its work to LpcExtractor) /// and then post-processes LPC vectors to obtain LPCC coefficients. /// </summary> /// <param name="samples">Samples for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns></returns> public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample) { Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos"); var hopSize = HopSize; var frameSize = FrameSize; var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f; var lastSample = endSample - Math.Max(frameSize, hopSize); for (var i = startSample; i < lastSample; i += hopSize) { // prepare all blocks in memory for the current step: samples.FastCopyTo(_block, frameSize, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 1e-10) { for (var k = 0; k < frameSize; k++) { var y = _block[k] - prevSample * _preEmphasis; prevSample = _block[k]; _block[k] = y; } prevSample = samples[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { _block.ApplyWindow(_windowSamples); } _block.FastCopyTo(_reversed, frameSize); // 2) autocorrelation _convolver.CrossCorrelate(_block, _reversed, _cc); // 3) Levinson-Durbin for (int k = 0; k < _lpc.Length; k++) { _lpc[k] = 0; } var err = MathUtils.LevinsonDurbin(_cc, _lpc, _order, frameSize - 1); // 4) simple and efficient algorithm for obtaining LPCC coefficients from LPC var lpcc = new float[FeatureCount]; lpcc[0] = (float)Math.Log(err); for (var n = 1; n < FeatureCount; n++) { var acc = 0.0f; for (var k = 1; k < n; k++) { acc += k * lpcc[k] * _lpc[n - k]; } lpcc[n] = -_lpc[n] - acc / n; } // (optional) liftering if (_lifterCoeffs != null) { lpcc.ApplyWindow(_lifterCoeffs); } // add LPC vector to output sequence featureVectors.Add(new FeatureVector { Features = lpcc, TimePosition = (double)i / SamplingRate }); } return(featureVectors); }
/// <summary> /// Phase locking procedure /// </summary> /// <param name="signal"></param> /// <returns></returns> private DiscreteSignal PhaseLocking(DiscreteSignal signal) { var input = signal.Samples; var output = new float[(int)(input.Length * _stretch) + _fftSize]; var windowSum = new float[output.Length]; var re = new float[_fftSize]; var im = new float[_fftSize]; var zeroblock = new float[_fftSize]; var mag = new double[_fftSize / 2 + 1]; var phase = new double[_fftSize / 2 + 1]; var prevPhase = new double[_fftSize / 2 + 1]; var phaseTotal = new double[_fftSize / 2 + 1]; var delta = new double[_fftSize / 2 + 1]; var posSynthesis = 0; for (var posAnalysis = 0; posAnalysis + _fftSize < input.Length; posAnalysis += _hopAnalysis) { input.FastCopyTo(re, _fftSize, posAnalysis); zeroblock.FastCopyTo(im, _fftSize); re.ApplyWindow(_window); _fft.Direct(re, im); // spectral peaks in magnitude spectrum for (var j = 0; j < mag.Length; j++) { mag[j] = Math.Sqrt(re[j] * re[j] + im[j] * im[j]); phase[j] = Math.Atan2(im[j], re[j]); delta[j] = phase[j] - prevPhase[j]; prevPhase[j] = phase[j]; } // assign phases at peaks to all neighboring frequency bins var prevIndex = 0; var prevPhi = 0.0; for (var j = 2; j < mag.Length - 2; j++) { if (mag[j] <= mag[j - 1] || mag[j] <= mag[j - 2] || mag[j] <= mag[j + 1] || mag[j] <= mag[j + 2]) { continue; // if not a peak } var mid = prevIndex == 0 ? 0 : (prevIndex + j) / 2; for (var k = prevIndex; k < mid; k++) { phase[k] = prevPhi; } for (var k = mid; k < j; k++) { phase[k] = phase[j]; } prevIndex = j; prevPhi = phase[j]; } for (var j = prevIndex; j < mag.Length; j++) { phase[j] = prevPhi; } // phase adaptation for (var j = 0; j < mag.Length; j++) { var deltaUnwrapped = delta[j] - _hopAnalysis * _omega[j]; var deltaWrapped = MathUtils.Mod(deltaUnwrapped + Math.PI, 2 * Math.PI) - Math.PI; var freq = _omega[j] + deltaWrapped / _hopAnalysis; phaseTotal[j] += _hopSynthesis * freq; re[j] = (float)(mag[j] * Math.Cos(phaseTotal[j])); im[j] = (float)(mag[j] * Math.Sin(phaseTotal[j])); } for (var j = _fftSize / 2 + 1; j < _fftSize; j++) { re[j] = im[j] = 0.0f; } _fft.Inverse(re, im); for (var j = 0; j < re.Length; j++) { output[posSynthesis + j] += re[j] * _window[j]; windowSum[posSynthesis + j] += _windowSquared[j]; } posSynthesis += _hopSynthesis; } for (var j = 0; j < output.Length; j++) { if (windowSum[j] < 1e-3) { continue; } output[j] /= (windowSum[j] * _fftSize / 2); } return(new DiscreteSignal(signal.SamplingRate, output)); }
/// <summary> /// Spectral subtraction algorithm according to /// /// [1979] M. Berouti, R. Schwartz, J. Makhoul /// "Enhancement of Speech Corrupted by Acoustic Noise". /// /// </summary> /// <param name="signal"></param> /// <param name="noise"></param> /// <param name="fftSize"></param> /// <param name="hopSize"></param> /// <returns></returns> public static DiscreteSignal SpectralSubtract(DiscreteSignal signal, DiscreteSignal noise, int fftSize = 1024, int hopSize = 410) { var input = signal.Samples; var output = new float[input.Length]; const float beta = 0.009f; const float alphaMin = 2f; const float alphaMax = 5f; const float snrMin = -5f; const float snrMax = 20f; const float k = (alphaMin - alphaMax) / (snrMax - snrMin); const float b = alphaMax - k * snrMin; var fft = new Fft(fftSize); var hannWindow = Window.OfType(WindowTypes.Hann, fftSize); var windowSquared = hannWindow.Select(w => w * w).ToArray(); var windowSum = new float[output.Length]; var re = new float[fftSize]; var im = new float[fftSize]; var zeroblock = new float[fftSize]; // estimate noise power spectrum var noiseAcc = new float[fftSize / 2 + 1]; var noiseEstimate = new float[fftSize / 2 + 1]; var numFrames = 0; var pos = 0; for (; pos + fftSize < noise.Length; pos += hopSize, numFrames++) { noise.Samples.FastCopyTo(re, fftSize, pos); zeroblock.FastCopyTo(im, fftSize); fft.Direct(re, im); for (var j = 0; j <= fftSize / 2; j++) { noiseAcc[j] += re[j] * re[j] + im[j] * im[j]; } } // (including smoothing) for (var j = 1; j < fftSize / 2; j++) { noiseEstimate[j] = (noiseAcc[j - 1] + noiseAcc[j] + noiseAcc[j + 1]) / (3 * numFrames); } noiseEstimate[0] /= numFrames; noiseEstimate[fftSize / 2] /= numFrames; // spectral subtraction for (pos = 0; pos + fftSize < input.Length; pos += hopSize) { input.FastCopyTo(re, fftSize, pos); zeroblock.FastCopyTo(im, fftSize); re.ApplyWindow(hannWindow); fft.Direct(re, im); for (var j = 0; j <= fftSize / 2; j++) { var power = re[j] * re[j] + im[j] * im[j]; var phase = Math.Atan2(im[j], re[j]); var noisePower = noiseEstimate[j]; var snr = 10 * Math.Log10(power / noisePower); var alpha = Math.Max(Math.Min(k * snr + b, alphaMax), alphaMin); var diff = power - alpha * noisePower; var mag = Math.Sqrt(Math.Max(diff, beta * noisePower)); re[j] = (float)(mag * Math.Cos(phase)); im[j] = (float)(mag * Math.Sin(phase)); } for (var j = fftSize / 2 + 1; j < fftSize; j++) { re[j] = im[j] = 0.0f; } fft.Inverse(re, im); for (var j = 0; j < re.Length; j++) { output[pos + j] += re[j] * hannWindow[j]; windowSum[pos + j] += windowSquared[j]; } } for (var j = 0; j < output.Length; j++) { if (windowSum[j] < 1e-3) { continue; } output[j] /= (windowSum[j] * fftSize / 2); } return(new DiscreteSignal(signal.SamplingRate, output)); }
/// <summary> /// Standard method for computing mfcc features: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum X /// 3) Apply mel filters and log() the result: Y = Log10(X * H) /// 4) Do dct-II: mfcc = Dct(Y) /// 5) [Optional] liftering of mfcc /// /// </summary> /// <param name="samples">Samples for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of mfcc vectors</returns> public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample) { Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos"); var hopSize = HopSize; var frameSize = FrameSize; var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f; var i = startSample; while (i + frameSize < endSample) { // prepare next block for processing _zeroblock.FastCopyTo(_block, _fftSize); samples.FastCopyTo(_block, _windowSamples.Length, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = _block[k] - prevSample * _preEmphasis; prevSample = _block[k]; _block[k] = y; } prevSample = samples[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { _block.ApplyWindow(_windowSamples); } // 2) calculate power spectrum _fft.PowerSpectrum(_block, _spectrum); // 3) apply mel filterbank and take log() of the result FilterBanks.ApplyAndLog(FilterBank, _spectrum, _logMelSpectrum); // 4) dct-II var mfccs = new float[FeatureCount]; _dct.Direct(_logMelSpectrum, mfccs); // 5) (optional) liftering if (_lifterCoeffs != null) { mfccs.ApplyWindow(_lifterCoeffs); } // add mfcc vector to output sequence featureVectors.Add(new FeatureVector { Features = mfccs, TimePosition = (double)i / SamplingRate }); i += hopSize; } return(featureVectors); }
/// <summary> /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum /// 3) Apply gammatone filters (squared) /// 4) Mean power normalization /// 5) Apply nonlinearity /// 6) Do dct-II (normalized) /// /// </summary> /// <param name="signal">Signal for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of pncc vectors</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq); // use power spectrum: foreach (var filter in _gammatoneFilterBank) { for (var j = 0; j < filter.Length; j++) { var ps = filter[j] * filter[j]; filter[j] = ps; } } var fft = new Fft(fftSize); var dct = new Dct2(_filterbankSize, FeatureCount); var gammatoneSpectrum = new float[_filterbankSize]; const float meanPower = 1e10f; var mean = 4e07f; var d = _power != 0 ? 1.0 / _power : 0.0; var block = new float[fftSize]; // buffer for a signal block at each step var zeroblock = new float[fftSize]; // buffer of zeros for quick memset var spectrum = new float[fftSize / 2 + 1]; // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f; var i = startSample; while (i + frameSize < endSample) { // prepare next block for processing zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, frameSize, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = block[k] - prevSample * _preEmphasis; prevSample = block[k]; block[k] = y; } prevSample = signal[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply gammatone filterbank FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum); // 4) mean power normalization: var sumPower = 0.0f; for (var j = 0; j < gammatoneSpectrum.Length; j++) { sumPower += gammatoneSpectrum[j]; } mean = LambdaMu * mean + (1 - LambdaMu) * sumPower; for (var j = 0; j < gammatoneSpectrum.Length; j++) { gammatoneSpectrum[j] *= meanPower / mean; } // 5) nonlinearity (power ^ d or Log10) if (_power != 0) { for (var j = 0; j < gammatoneSpectrum.Length; j++) { gammatoneSpectrum[j] = (float)Math.Pow(gammatoneSpectrum[j], d); } } else { for (var j = 0; j < gammatoneSpectrum.Length; j++) { gammatoneSpectrum[j] = (float)Math.Log10(gammatoneSpectrum[j] + float.Epsilon); } } // 6) dct-II (normalized) var spnccs = new float[FeatureCount]; dct.DirectN(gammatoneSpectrum, spnccs); // add pncc vector to output sequence featureVectors.Add(new FeatureVector { Features = spnccs, TimePosition = (double)i / signal.SamplingRate }); i += hopSize; } return(featureVectors); }
/// <summary> /// Phase Vocoder algorithm /// </summary> /// <param name="signal"></param> /// <param name="filteringOptions"></param> /// <returns></returns> public DiscreteSignal ApplyTo(DiscreteSignal signal, FilteringOptions filteringOptions = FilteringOptions.Auto) { var stretch = (float)_hopSynthesis / _hopAnalysis; var input = signal.Samples; var output = new float[(int)(input.Length * stretch) + _fftSize]; var fft = new Fft(_fftSize); var hannWindow = Window.OfType(WindowTypes.Hann, _fftSize); var ratio = _fftSize / (2.0f * _hopAnalysis); var norm = 4.0f / (_fftSize * ratio); var omega = Enumerable.Range(0, _fftSize / 2 + 1) .Select(f => 2 * Math.PI * f / _fftSize) .ToArray(); var re = new float[_fftSize]; var im = new float[_fftSize]; var zeroblock = new float[_fftSize]; var prevPhase = new double[_fftSize / 2 + 1]; var phaseTotal = new double[_fftSize / 2 + 1]; var posSynthesis = 0; for (var posAnalysis = 0; posAnalysis + _fftSize < input.Length; posAnalysis += _hopAnalysis) { input.FastCopyTo(re, _fftSize, posAnalysis); zeroblock.FastCopyTo(im, _fftSize); re.ApplyWindow(hannWindow); fft.Direct(re, im); for (var j = 0; j < _fftSize / 2 + 1; j++) { var mag = Math.Sqrt(re[j] * re[j] + im[j] * im[j]); var phase = Math.Atan2(im[j], re[j]); var delta = phase - prevPhase[j]; var deltaUnwrapped = delta - _hopAnalysis * omega[j]; var deltaWrapped = MathUtils.Mod(deltaUnwrapped + Math.PI, 2 * Math.PI) - Math.PI; var freq = omega[j] + deltaWrapped / _hopAnalysis; phaseTotal[j] += _hopSynthesis * freq; prevPhase[j] = phase; re[j] = (float)(mag * Math.Cos(phaseTotal[j])); im[j] = (float)(mag * Math.Sin(phaseTotal[j])); } for (var j = _fftSize / 2 + 1; j < _fftSize; j++) { re[j] = im[j] = 0.0f; } fft.Inverse(re, im); for (var j = 0; j < re.Length; j++) { output[posSynthesis + j] += re[j] * hannWindow[j] * norm; } posSynthesis += _hopSynthesis; } return(new DiscreteSignal(signal.SamplingRate, output)); }
/// <summary> /// Standard method for computing LPC features. /// /// Note: /// The first LP coefficient is always equal to 1.0. /// This method replaces it with the value of prediction error. /// /// </summary> /// <param name="signal"></param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of LPC vectors</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = MathUtils.NextPowerOfTwo(2 * frameSize - 1); var blockReal = new float[fftSize]; // buffer for real parts of the currently processed block var blockImag = new float[fftSize]; // buffer for imaginary parts of the currently processed block var reversedReal = new float[fftSize]; // buffer for real parts of currently processed reversed block var reversedImag = new float[fftSize]; // buffer for imaginary parts of currently processed reversed block var zeroblock = new float[fftSize]; // just a buffer of zeros for quick memset var cc = new float[frameSize]; // buffer for (truncated) cross-correlation signal // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f; var i = startSample; while (i + frameSize < endSample) { // prepare all blocks in memory for the current step: zeroblock.FastCopyTo(blockReal, fftSize); zeroblock.FastCopyTo(blockImag, fftSize); zeroblock.FastCopyTo(reversedReal, fftSize); zeroblock.FastCopyTo(reversedImag, fftSize); signal.Samples.FastCopyTo(blockReal, frameSize, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = blockReal[k] - prevSample * _preEmphasis; prevSample = blockReal[k]; blockReal[k] = y; } prevSample = signal[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { blockReal.ApplyWindow(windowSamples); } // 2) autocorrelation Operation.CrossCorrelate(blockReal, blockImag, reversedReal, reversedImag, cc, frameSize); // 3) levinson-durbin var a = new float[_order + 1]; var err = MathUtils.LevinsonDurbin(cc, a, _order); a[0] = err; // add LPC vector to output sequence featureVectors.Add(new FeatureVector { Features = a, TimePosition = (double)i / signal.SamplingRate }); i += hopSize; } return(featureVectors); }
/// <summary> /// Method for computing LPCC features. /// It essentially duplicates LPC extractor code /// (for efficient memory usage it doesn't just delegate its work to LpcExtractor) /// and then post-processes LPC vectors to obtain LPCC coefficients. /// </summary> /// <param name="signal"></param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns></returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = MathUtils.NextPowerOfTwo(2 * frameSize - 1); var lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; var blockReal = new float[fftSize]; // buffer for real parts of the currently processed block var blockImag = new float[fftSize]; // buffer for imaginary parts of the currently processed block var reversedReal = new float[fftSize]; // buffer for real parts of currently processed reversed block var reversedImag = new float[fftSize]; // buffer for imaginary parts of currently processed reversed block var zeroblock = new float[fftSize]; // just a buffer of zeros for quick memset var cc = new float[frameSize]; // buffer for (truncated) cross-correlation signal var lpc = new float[_order + 1]; // buffer for LPC coefficients // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f; var i = startSample; while (i + frameSize < endSample) { // prepare all blocks in memory for the current step: zeroblock.FastCopyTo(blockReal, fftSize); zeroblock.FastCopyTo(blockImag, fftSize); zeroblock.FastCopyTo(reversedReal, fftSize); zeroblock.FastCopyTo(reversedImag, fftSize); signal.Samples.FastCopyTo(blockReal, frameSize, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = blockReal[k] - prevSample * _preEmphasis; prevSample = blockReal[k]; blockReal[k] = y; } prevSample = signal[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { blockReal.ApplyWindow(windowSamples); } // 2) autocorrelation Operation.CrossCorrelate(blockReal, blockImag, reversedReal, reversedImag, cc, frameSize); // 3) Levinson-Durbin zeroblock.FastCopyTo(lpc, lpc.Length); var err = MathUtils.LevinsonDurbin(cc, lpc, _order); // 4) simple and efficient algorithm for obtaining LPCC coefficients from LPC var lpcc = new float[FeatureCount]; lpcc[0] = (float)Math.Log(err); for (var n = 1; n < FeatureCount; n++) { var acc = 0.0f; for (var k = 1; k < n; k++) { acc += k * lpcc[k] * lpc[n - k]; } lpcc[n] = -lpc[n] - acc / n; } // (optional) liftering if (lifterCoeffs != null) { lpcc.ApplyWindow(lifterCoeffs); } // add LPC vector to output sequence featureVectors.Add(new FeatureVector { Features = lpcc, TimePosition = (double)i / signal.SamplingRate }); i += hopSize; } return(featureVectors); }
/// <summary> /// Standard method for computing mfcc features: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum X /// 3) Apply mel filters and log() the result: Y = Log10(X * H) /// 4) Do dct-II: mfcc = Dct(Y) /// 5) [Optional] liftering of mfcc /// /// </summary> /// <param name="signal">Signal for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of mfcc vectors</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); _melFilterBank = FilterBanks.Triangular(fftSize, signal.SamplingRate, FilterBanks.MelBands(_filterbankSize, fftSize, signal.SamplingRate, _lowFreq, _highFreq)); var lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; var fft = new Fft(fftSize); var dct = new Dct2(_filterbankSize, FeatureCount); // reserve memory for reusable blocks var spectrum = new float[fftSize / 2 + 1]; var logMelSpectrum = new float[_filterbankSize]; var block = new float[fftSize]; // buffer for currently processed signal block at each step var zeroblock = new float[fftSize]; // just a buffer of zeros for quick memset // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f; var i = startSample; while (i + frameSize < endSample) { // prepare next block for processing zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, windowSamples.Length, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = block[k] - prevSample * _preEmphasis; prevSample = block[k]; block[k] = y; } prevSample = signal[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply mel filterbank and take log() of the result FilterBanks.ApplyAndLog(_melFilterBank, spectrum, logMelSpectrum); // 4) dct-II var mfccs = new float[FeatureCount]; dct.Direct(logMelSpectrum, mfccs); // 5) (optional) liftering if (lifterCoeffs != null) { mfccs.ApplyWindow(lifterCoeffs); } // add mfcc vector to output sequence featureVectors.Add(new FeatureVector { Features = mfccs, TimePosition = (double)i / signal.SamplingRate }); i += hopSize; } return(featureVectors); }
/// <summary> /// Method for computing modulation spectra. /// Each vector representing one modulation spectrum is a flattened version of 2D spectrum. /// </summary> /// <param name="signal">Signal under analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of flattened modulation spectra</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); var fft = new Fft(fftSize); var modulationFft = new Fft(_modulationFftSize); if (_featuregram == null) { if (_filterbank == null) { _filterbank = FilterBanks.Triangular(_fftSize, signal.SamplingRate, FilterBanks.MelBands(12, _fftSize, signal.SamplingRate, 100, 3200)); } _featureCount = _filterbank.Length * (_modulationFftSize / 2 + 1); } else { _featureCount = _featuregram[0].Length * (_modulationFftSize / 2 + 1); } var length = _filterbank?.Length ?? _featuregram[0].Length; var modulationSamplingRate = (float)signal.SamplingRate / hopSize; var resolution = modulationSamplingRate / _modulationFftSize; _featureDescriptions = new string[length * (_modulationFftSize / 2 + 1)]; var idx = 0; for (var fi = 0; fi < length; fi++) { for (var fj = 0; fj <= _modulationFftSize / 2; fj++) { _featureDescriptions[idx++] = string.Format("band_{0}_mf_{1:F2}_Hz", fi + 1, fj * resolution); } } // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { var preemphasisFilter = new PreEmphasisFilter(_preEmphasis); signal = preemphasisFilter.ApplyTo(signal); } // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var en = 0; var i = startSample; if (_featuregram == null) { _envelopes = new float[_filterbank.Length][]; for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n] = new float[signal.Length / hopSize]; } var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f; // ===================== compute local FFTs (do STFT) ======================= var spectrum = new float[fftSize / 2 + 1]; var filteredSpectrum = new float[_filterbank.Length]; var block = new float[fftSize]; // buffer for currently processed signal block at each step var zeroblock = new float[fftSize]; // buffer of zeros for quick memset while (i + frameSize < endSample) { zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, frameSize, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = block[k] - prevSample * _preEmphasis; prevSample = block[k]; block[k] = y; } prevSample = signal[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply filterbank... FilterBanks.Apply(_filterbank, spectrum, filteredSpectrum); // ...and save results for future calculations for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n][en] = filteredSpectrum[n]; } en++; i += hopSize; } } else { en = _featuregram.Length; _envelopes = new float[_featuregram[0].Length][]; for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n] = new float[en]; for (i = 0; i < en; i++) { _envelopes[n][i] = _featuregram[i][n]; } } } // =========================== modulation analysis ======================= var envelopeLength = en; // long-term AVG-normalization foreach (var envelope in _envelopes) { var avg = 0.0f; for (var k = 0; k < envelopeLength; k++) { avg += (k >= 0) ? envelope[k] : -envelope[k]; } avg /= envelopeLength; if (avg >= 1e-10) // this happens more frequently { for (var k = 0; k < envelopeLength; k++) { envelope[k] /= avg; } } } var modBlock = new float[_modulationFftSize]; var zeroModblock = new float[_modulationFftSize]; var modSpectrum = new float[_modulationFftSize / 2 + 1]; i = 0; while (i < envelopeLength) { var vector = new float[_envelopes.Length * (_modulationFftSize / 2 + 1)]; var offset = 0; foreach (var envelope in _envelopes) { zeroModblock.FastCopyTo(modBlock, _modulationFftSize); envelope.FastCopyTo(modBlock, Math.Min(_modulationFftSize, envelopeLength - i), i); modulationFft.PowerSpectrum(modBlock, modSpectrum); modSpectrum.FastCopyTo(vector, modSpectrum.Length, 0, offset); offset += modSpectrum.Length; } featureVectors.Add(new FeatureVector { Features = vector, TimePosition = (double)i * hopSize / signal.SamplingRate }); i += _modulationHopSize; } return(featureVectors); }
/// <summary> /// Phase Vocoder algorithm /// </summary> /// <param name="signal"></param> /// <param name="filteringOptions"></param> /// <returns></returns> public DiscreteSignal ApplyTo(DiscreteSignal signal, FilteringOptions filteringOptions = FilteringOptions.Auto) { if (_phaseLocking) { return(PhaseLocking(signal)); } var input = signal.Samples; var output = new float[(int)(input.Length * _stretch) + _fftSize]; var windowSum = new float[output.Length]; var re = new float[_fftSize]; var im = new float[_fftSize]; var zeroblock = new float[_fftSize]; var prevPhase = new double[_fftSize / 2 + 1]; var phaseTotal = new double[_fftSize / 2 + 1]; var posSynthesis = 0; for (var posAnalysis = 0; posAnalysis + _fftSize < input.Length; posAnalysis += _hopAnalysis) { input.FastCopyTo(re, _fftSize, posAnalysis); zeroblock.FastCopyTo(im, _fftSize); re.ApplyWindow(_window); _fft.Direct(re, im); for (var j = 0; j < _fftSize / 2 + 1; j++) { var mag = Math.Sqrt(re[j] * re[j] + im[j] * im[j]); var phase = Math.Atan2(im[j], re[j]); var delta = phase - prevPhase[j]; var deltaUnwrapped = delta - _hopAnalysis * _omega[j]; var deltaWrapped = MathUtils.Mod(deltaUnwrapped + Math.PI, 2 * Math.PI) - Math.PI; var freq = _omega[j] + deltaWrapped / _hopAnalysis; phaseTotal[j] += _hopSynthesis * freq; prevPhase[j] = phase; re[j] = (float)(mag * Math.Cos(phaseTotal[j])); im[j] = (float)(mag * Math.Sin(phaseTotal[j])); } for (var j = _fftSize / 2 + 1; j < _fftSize; j++) { re[j] = im[j] = 0.0f; } _fft.Inverse(re, im); for (var j = 0; j < re.Length; j++) { output[posSynthesis + j] += re[j] * _window[j]; windowSum[posSynthesis + j] += _windowSquared[j]; } posSynthesis += _hopSynthesis; } for (var j = 0; j < output.Length; j++) { if (windowSum[j] < 1e-3) { continue; } output[j] /= (windowSum[j] * _fftSize / 2); } return(new DiscreteSignal(signal.SamplingRate, output)); }
/// <summary> /// PNCC algorithm according to [Kim & Stern, 2016]: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum /// 3) Apply gammatone filters (squared) /// 4) Medium-time processing (asymmetric noise suppression, temporal masking, spectral smoothing) /// 5) Apply nonlinearity /// 6) Do dct-II (normalized) /// /// </summary> /// <param name="signal">Signal for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of pncc vectors</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq); // use power spectrum: foreach (var filter in _gammatoneFilterBank) { for (var j = 0; j < filter.Length; j++) { var ps = filter[j] * filter[j]; filter[j] = ps; } } var fft = new Fft(fftSize); var dct = new Dct2(_filterbankSize, FeatureCount); var gammatoneSpectrum = new float[_filterbankSize]; var spectrumQOut = new float[_filterbankSize]; var filteredSpectrumQ = new float[_filterbankSize]; var spectrumS = new float[_filterbankSize]; var smoothedSpectrumS = new float[_filterbankSize]; var avgSpectrumQ1 = new float[_filterbankSize]; var avgSpectrumQ2 = new float[_filterbankSize]; var smoothedSpectrum = new float[_filterbankSize]; const float meanPower = 1e10f; var mean = 4e07f; var d = _power != 0 ? 1.0 / _power : 0.0; var block = new float[fftSize]; // buffer for currently processed signal block at each step var zeroblock = new float[fftSize]; // buffer of zeros for quick memset _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize); var spectrum = new float[fftSize / 2 + 1]; // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { var preemphasisFilter = new PreEmphasisFilter(_preEmphasis); signal = preemphasisFilter.ApplyTo(signal); } // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var i = 0; var timePos = startSample; while (timePos + frameSize < endSample) { // prepare next block for processing zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, frameSize, timePos); // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply gammatone filterbank FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum); // ============================================================= // 4) medium-time processing blocks: // 4.1) temporal integration (zero-phase moving average filter) _ringBuffer.Add(gammatoneSpectrum); var spectrumQ = _ringBuffer.AverageSpectrum; // 4.2) asymmetric noise suppression if (i == 2 * M) { for (var j = 0; j < spectrumQOut.Length; j++) { spectrumQOut[j] = spectrumQ[j] * 0.9f; } } if (i >= 2 * M) { for (var j = 0; j < spectrumQOut.Length; j++) { if (spectrumQ[j] > spectrumQOut[j]) { spectrumQOut[j] = LambdaA * spectrumQOut[j] + (1 - LambdaA) * spectrumQ[j]; } else { spectrumQOut[j] = LambdaB * spectrumQOut[j] + (1 - LambdaB) * spectrumQ[j]; } } for (var j = 0; j < filteredSpectrumQ.Length; j++) { filteredSpectrumQ[j] = Math.Max(spectrumQ[j] - spectrumQOut[j], 0.0f); if (i == 2 * M) { avgSpectrumQ1[j] = 0.9f * filteredSpectrumQ[j]; avgSpectrumQ2[j] = filteredSpectrumQ[j]; } if (filteredSpectrumQ[j] > avgSpectrumQ1[j]) { avgSpectrumQ1[j] = LambdaA * avgSpectrumQ1[j] + (1 - LambdaA) * filteredSpectrumQ[j]; } else { avgSpectrumQ1[j] = LambdaB * avgSpectrumQ1[j] + (1 - LambdaB) * filteredSpectrumQ[j]; } // 4.3) temporal masking var threshold = filteredSpectrumQ[j]; avgSpectrumQ2[j] *= LambdaT; if (spectrumQ[j] < C * spectrumQOut[j]) { filteredSpectrumQ[j] = avgSpectrumQ1[j]; } else { if (filteredSpectrumQ[j] <= avgSpectrumQ2[j]) { filteredSpectrumQ[j] = MuT * avgSpectrumQ2[j]; } } avgSpectrumQ2[j] = Math.Max(avgSpectrumQ2[j], threshold); filteredSpectrumQ[j] = Math.Max(filteredSpectrumQ[j], avgSpectrumQ1[j]); } // 4.4) spectral smoothing for (var j = 0; j < spectrumS.Length; j++) { spectrumS[j] = filteredSpectrumQ[j] / Math.Max(spectrumQ[j], float.Epsilon); } for (var j = 0; j < smoothedSpectrumS.Length; j++) { smoothedSpectrumS[j] = 0.0f; var total = 0; for (var k = Math.Max(j - N, 0); k < Math.Min(j + N + 1, _filterbankSize); k++, total++) { smoothedSpectrumS[j] += spectrumS[k]; } smoothedSpectrumS[j] /= total; } // 4.5) mean power normalization var centralSpectrum = _ringBuffer.CentralSpectrum; var sumPower = 0.0f; for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] = smoothedSpectrumS[j] * centralSpectrum[j]; sumPower += smoothedSpectrum[j]; } mean = LambdaMu * mean + (1 - LambdaMu) * sumPower; for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] *= meanPower / mean; } // ============================================================= // 5) nonlinearity (power ^ d or Log10) if (_power != 0) { for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] = (float)Math.Pow(smoothedSpectrum[j], d); } } else { for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] = (float)Math.Log10(smoothedSpectrum[j] + float.Epsilon); } } // 6) dct-II (normalized) var pnccs = new float[FeatureCount]; dct.DirectN(smoothedSpectrum, pnccs); // add pncc vector to output sequence featureVectors.Add(new FeatureVector { Features = pnccs, TimePosition = (double)timePos / signal.SamplingRate }); } i++; timePos += hopSize; } return(featureVectors); }
/// <summary> /// Standard method for computing PLP features. /// In each frame do: /// /// 1) Apply window /// 2) Obtain power spectrum /// 3) Apply filterbank of bark bands (or mel bands) /// 4) [Optional] filter each component of the processed spectrum with a RASTA filter /// 5) Apply equal loudness curve /// 6) Take cubic root /// 7) Do LPC /// 8) Convert LPC to cepstrum /// 9) [Optional] lifter cepstrum /// /// </summary> /// <param name="block">Samples for analysis</param> /// <returns>PLP vector</returns> public override float[] ProcessFrame(float[] block) { // fill zeros to fftSize if frameSize < fftSize (blockSize) for (var k = FrameSize; k < block.Length; block[k++] = 0) { ; } // 1) apply window block.ApplyWindow(_windowSamples); // 2) calculate power spectrum (without normalization) _fft.PowerSpectrum(block, _spectrum, false); // 3) apply filterbank on the result (bark frequencies by default) FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum); // 4) RASTA filtering in log-domain [optional] if (_rasta > 0) { for (var k = 0; k < _bandSpectrum.Length; k++) { var log = (float)Math.Log(_bandSpectrum[k] + float.Epsilon); log = _rastaFilters[k].Process(log); _bandSpectrum[k] = (float)Math.Exp(log); } } // 5) and 6) apply equal loudness curve and take cubic root for (var k = 0; k < _bandSpectrum.Length; k++) { _bandSpectrum[k] = (float)Math.Pow(Math.Max(_bandSpectrum[k], 1.0) * _equalLoudnessCurve[k], 0.33); } // 7) LPC from power spectrum: var n = _idftTable[0].Length; // get autocorrelation samples from post-processed power spectrum (via IDFT): for (var k = 0; k < _idftTable.Length; k++) { var acc = _idftTable[k][0] * _bandSpectrum[0] + _idftTable[k][n - 1] * _bandSpectrum[n - 3]; // add values at two duplicated edges right away for (var j = 1; j < n - 1; j++) { acc += _idftTable[k][j] * _bandSpectrum[j - 1]; } _cc[k] = acc / (2 * (n - 1)); } // LPC: for (var k = 0; k < _lpc.Length; _lpc[k] = 0, k++) { ; } var err = Lpc.LevinsonDurbin(_cc, _lpc, _lpcOrder); // 8) compute LPCC coefficients from LPC var lpcc = new float[FeatureCount]; Lpc.ToCepstrum(_lpc, err, lpcc); // 9) (optional) liftering if (_lifterCoeffs != null) { lpcc.ApplyWindow(_lifterCoeffs); } return(lpcc); }