/// <summary> /// Pitch estimation: from spectral peaks /// </summary> /// <param name="signal"></param> /// <param name="startPos"></param> /// <param name="endPos"></param> /// <returns></returns> public static float FromSpectralPeaks(DiscreteSignal signal, int startPos = 0, int endPos = -1, float low = 80, float high = 400, int fftSize = 0) { if (endPos == -1) { endPos = signal.Length; } if (startPos != 0 || endPos != signal.Length) { signal = signal[startPos, endPos]; } signal.ApplyWindow(WindowTypes.Hann); var size = fftSize > 0 ? fftSize : MathUtils.NextPowerOfTwo(signal.Length); var fft = new Fft(size); var spectrum = fft.PowerSpectrum(signal, false).Samples; return(FromSpectralPeaks(spectrum, signal.SamplingRate, low, high)); }
float[] ComputeSpectrum(int idx) { var pos = (int)(_signal.SamplingRate * HopSize * idx); return(_fft.PowerSpectrum(_signal[pos, pos + 512], normalize: false) .Samples); }
private void UpdateSpectrumAndCepstrum() { var fftSize = int.Parse(fftSizeTextBox.Text); var cepstrumSize = int.Parse(cepstrumSizeTextBox.Text); _hopSize = int.Parse(hopSizeTextBox.Text); if (fftSize != _fftSize) { _fftSize = fftSize; _fft = new Fft(fftSize); _cepstralTransform = new CepstralTransform(cepstrumSize, _fftSize); } if (cepstrumSize != _cepstrumSize) { _cepstrumSize = cepstrumSize; _cepstralTransform = new CepstralTransform(_cepstrumSize, _fftSize); } var pos = _hopSize * _specNo; var block = _signal[pos, pos + _fftSize]; //block.ApplyWindow(WindowTypes.Hamming); var cepstrum = _cepstralTransform.Direct(block); var pitch = Pitch.FromCepstrum(block); // ************************************************************************ // just visualize spectrum estimated from cepstral coefficients: // ************************************************************************ var real = new float[_fftSize]; var imag = new float[_fftSize]; for (var i = 0; i < 32; i++) { real[i] = cepstrum[i]; } _fft.Direct(real, imag); var spectrum = _fft.PowerSpectrum(block, normalize: false).Samples; var avg = spectrum.Average(s => LevelScale.ToDecibel(s)); var spectrumEstimate = real.Take(_fftSize / 2 + 1) .Select(s => (float)LevelScale.FromDecibel(s * 40 / _fftSize - avg)) .ToArray(); spectrumPanel.Line = spectrum; spectrumPanel.Markline = spectrumEstimate; spectrumPanel.ToDecibel(); cepstrumPanel.Line = cepstrum.Samples; cepstrumPanel.Mark = (int)(_signal.SamplingRate / pitch); }
private void generateSignalButton_Click(object sender, EventArgs e) { var sampleCount = int.Parse(durationTextBox.Text); var samplingRate = _signal1?.SamplingRate ?? 16000; SignalBuilder signalBuilder; switch (builderComboBox.Text) { case "Sinusoid": signalBuilder = new SineBuilder(); _signal2 = signalBuilder .SetParameter("low", -0.4f) .SetParameter("high", 0.4f) .SetParameter("freq", 233 /*Hz*/) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; case "Sawtooth": signalBuilder = new SawtoothBuilder(); _signal2 = signalBuilder .SetParameter("low", -0.3f) .SetParameter("high", 0.3f) .SetParameter("freq", 233 /*Hz*/) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; case "Triangle Wave": signalBuilder = new TriangleWaveBuilder(); _signal2 = signalBuilder .SetParameter("low", -0.3f) .SetParameter("high", 0.3f) .SetParameter("freq", 233 /*Hz*/) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; case "Square Wave": signalBuilder = new SquareWaveBuilder(); _signal2 = signalBuilder .SetParameter("low", -0.25f) .SetParameter("high", 0.25f) .SetParameter("freq", 233 /*Hz*/) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; case "Pulse Wave": signalBuilder = new PulseWaveBuilder(); _signal2 = signalBuilder .SetParameter("min", 0) .SetParameter("max", 0.5f) .SetParameter("pulse", 0.007f /*sec*/) .SetParameter("period", 0.020f /*sec*/) .OfLength(sampleCount) .DelayedBy(50) .SampledAt(samplingRate) .Build(); break; case "Chirp": signalBuilder = new ChirpBuilder(); _signal2 = signalBuilder .SetParameter("min", -0.3f) .SetParameter("max", 0.3f) .OfLength(sampleCount) .RepeatedTimes(3) .SampledAt(samplingRate) .Build(); break; case "Sinc": signalBuilder = new SincBuilder(); _signal2 = signalBuilder .SetParameter("min", 0) .SetParameter("max", 0.5f) .SetParameter("freq", 700 /*Hz*/) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; case "Ramp": signalBuilder = new RampBuilder(); _signal2 = signalBuilder .SetParameter("slope", 0.0007f) .SetParameter("intercept", -0.5f) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; case "AWGN": signalBuilder = new AwgnBuilder(); _signal2 = signalBuilder .SetParameter("sigma", 0.25f) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; case "Pink Noise": signalBuilder = new PinkNoiseBuilder(); _signal2 = signalBuilder .SetParameter("min", -0.5f) .SetParameter("max", 0.5f) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; case "Red Noise": signalBuilder = new RedNoiseBuilder(); _signal2 = signalBuilder .SetParameter("min", -0.5f) .SetParameter("max", 0.5f) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; case "Perlin Noise": signalBuilder = new PerlinNoiseBuilder(); _signal2 = signalBuilder .SetParameter("min", -0.3f) .SetParameter("max", 0.7f) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; default: signalBuilder = new WhiteNoiseBuilder(); _signal2 = signalBuilder .SetParameter("min", -0.5f) .SetParameter("max", 0.5f) .OfLength(sampleCount) .SampledAt(samplingRate) .Build(); break; } builderParametersListBox.Items.Clear(); builderParametersListBox.Items.AddRange(signalBuilder.GetParametersInfo()); builderParametersListBox.Items.Add(""); builderParametersListBox.Items.Add($"min: {_signal2.Samples.Min():F2}"); builderParametersListBox.Items.Add($"max: {_signal2.Samples.Max():F2}"); builderParametersListBox.Items.Add($"avg: {_signal2.Samples.Average():F4}"); if (_signal1 != null) { //_signal3 = _signal1 + _signal2; var positions = Enumerable.Range(0, 3).Select(pos => pos * (_signal2.Length + 2000)).ToArray(); _signal3 = _signal1.SuperimposeMany(_signal2, positions); superimposedSignalPanel.Signal = _signal3; } generatedSignalPanel.Stride = 1; generatedSignalPanel.Signal = _signal2; var spectrum = _fft.PowerSpectrum(_signal2.First(512)); spectrumPanel.Line = spectrum.Samples; spectrumPanel.ToDecibel(); }
/// <summary> /// PNCC algorithm according to [Kim & Stern, 2016]: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum /// 3) Apply gammatone filters (squared) /// 4) Medium-time processing (asymmetric noise suppression, temporal masking, spectral smoothing) /// 5) Apply nonlinearity /// 6) Do dct-II (normalized) /// /// </summary> /// <param name="signal">Signal for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of pncc vectors</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq); // use power spectrum: foreach (var filter in _gammatoneFilterBank) { for (var j = 0; j < filter.Length; j++) { var ps = filter[j] * filter[j]; filter[j] = ps; } } var fft = new Fft(fftSize); var dct = new Dct2(_filterbankSize, FeatureCount); var gammatoneSpectrum = new float[_filterbankSize]; var spectrumQOut = new float[_filterbankSize]; var filteredSpectrumQ = new float[_filterbankSize]; var spectrumS = new float[_filterbankSize]; var smoothedSpectrumS = new float[_filterbankSize]; var avgSpectrumQ1 = new float[_filterbankSize]; var avgSpectrumQ2 = new float[_filterbankSize]; var smoothedSpectrum = new float[_filterbankSize]; const float meanPower = 1e10f; var mean = 4e07f; var d = _power != 0 ? 1.0 / _power : 0.0; var block = new float[fftSize]; // buffer for currently processed signal block at each step var zeroblock = new float[fftSize]; // buffer of zeros for quick memset _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize); var spectrum = new float[fftSize / 2 + 1]; // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { var preemphasisFilter = new PreEmphasisFilter(_preEmphasis); signal = preemphasisFilter.ApplyTo(signal); } // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var i = 0; var timePos = startSample; while (timePos + frameSize < endSample) { // prepare next block for processing zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, frameSize, timePos); // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply gammatone filterbank FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum); // ============================================================= // 4) medium-time processing blocks: // 4.1) temporal integration (zero-phase moving average filter) _ringBuffer.Add(gammatoneSpectrum); var spectrumQ = _ringBuffer.AverageSpectrum; // 4.2) asymmetric noise suppression if (i == 2 * M) { for (var j = 0; j < spectrumQOut.Length; j++) { spectrumQOut[j] = spectrumQ[j] * 0.9f; } } if (i >= 2 * M) { for (var j = 0; j < spectrumQOut.Length; j++) { if (spectrumQ[j] > spectrumQOut[j]) { spectrumQOut[j] = LambdaA * spectrumQOut[j] + (1 - LambdaA) * spectrumQ[j]; } else { spectrumQOut[j] = LambdaB * spectrumQOut[j] + (1 - LambdaB) * spectrumQ[j]; } } for (var j = 0; j < filteredSpectrumQ.Length; j++) { filteredSpectrumQ[j] = Math.Max(spectrumQ[j] - spectrumQOut[j], 0.0f); if (i == 2 * M) { avgSpectrumQ1[j] = 0.9f * filteredSpectrumQ[j]; avgSpectrumQ2[j] = filteredSpectrumQ[j]; } if (filteredSpectrumQ[j] > avgSpectrumQ1[j]) { avgSpectrumQ1[j] = LambdaA * avgSpectrumQ1[j] + (1 - LambdaA) * filteredSpectrumQ[j]; } else { avgSpectrumQ1[j] = LambdaB * avgSpectrumQ1[j] + (1 - LambdaB) * filteredSpectrumQ[j]; } // 4.3) temporal masking var threshold = filteredSpectrumQ[j]; avgSpectrumQ2[j] *= LambdaT; if (spectrumQ[j] < C * spectrumQOut[j]) { filteredSpectrumQ[j] = avgSpectrumQ1[j]; } else { if (filteredSpectrumQ[j] <= avgSpectrumQ2[j]) { filteredSpectrumQ[j] = MuT * avgSpectrumQ2[j]; } } avgSpectrumQ2[j] = Math.Max(avgSpectrumQ2[j], threshold); filteredSpectrumQ[j] = Math.Max(filteredSpectrumQ[j], avgSpectrumQ1[j]); } // 4.4) spectral smoothing for (var j = 0; j < spectrumS.Length; j++) { spectrumS[j] = filteredSpectrumQ[j] / Math.Max(spectrumQ[j], float.Epsilon); } for (var j = 0; j < smoothedSpectrumS.Length; j++) { smoothedSpectrumS[j] = 0.0f; var total = 0; for (var k = Math.Max(j - N, 0); k < Math.Min(j + N + 1, _filterbankSize); k++, total++) { smoothedSpectrumS[j] += spectrumS[k]; } smoothedSpectrumS[j] /= total; } // 4.5) mean power normalization var centralSpectrum = _ringBuffer.CentralSpectrum; var sumPower = 0.0f; for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] = smoothedSpectrumS[j] * centralSpectrum[j]; sumPower += smoothedSpectrum[j]; } mean = LambdaMu * mean + (1 - LambdaMu) * sumPower; for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] *= meanPower / mean; } // ============================================================= // 5) nonlinearity (power ^ d or Log10) if (_power != 0) { for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] = (float)Math.Pow(smoothedSpectrum[j], d); } } else { for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] = (float)Math.Log10(smoothedSpectrum[j] + float.Epsilon); } } // 6) dct-II (normalized) var pnccs = new float[FeatureCount]; dct.DirectN(smoothedSpectrum, pnccs); // add pncc vector to output sequence featureVectors.Add(new FeatureVector { Features = pnccs, TimePosition = (double)timePos / signal.SamplingRate }); } i++; timePos += hopSize; } return(featureVectors); }
/// <summary> /// Standard method for computing mfcc features: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum X /// 3) Apply mel filters and log() the result: Y = Log10(X * H) /// 4) Do dct-II: mfcc = Dct(Y) /// 5) [Optional] liftering of mfcc /// /// </summary> /// <param name="samples">Samples for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of mfcc vectors</returns> public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample) { Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos"); var hopSize = HopSize; var frameSize = FrameSize; var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f; var i = startSample; while (i + frameSize < endSample) { // prepare next block for processing _zeroblock.FastCopyTo(_block, _fftSize); samples.FastCopyTo(_block, _windowSamples.Length, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = _block[k] - prevSample * _preEmphasis; prevSample = _block[k]; _block[k] = y; } prevSample = samples[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { _block.ApplyWindow(_windowSamples); } // 2) calculate power spectrum _fft.PowerSpectrum(_block, _spectrum); // 3) apply mel filterbank and take log() of the result FilterBanks.ApplyAndLog(FilterBank, _spectrum, _logMelSpectrum); // 4) dct-II var mfccs = new float[FeatureCount]; _dct.Direct(_logMelSpectrum, mfccs); // 5) (optional) liftering if (_lifterCoeffs != null) { mfccs.ApplyWindow(_lifterCoeffs); } // add mfcc vector to output sequence featureVectors.Add(new FeatureVector { Features = mfccs, TimePosition = (double)i / SamplingRate }); i += hopSize; } return(featureVectors); }
// TODO: remove this ))) private void featuresListView_SelectedIndexChanged(object sender, EventArgs e) { if (featuresListView.SelectedItems.Count == 0) { return; } var pos = featuresListView.SelectedIndices[0]; var fft = new Fft(512); var spectrum = fft.PowerSpectrum(_signal[pos * _hopSize, pos * _hopSize + _frameSize]).Samples; var peaks = new int[10]; var freqs = new float[10]; Harmonic.Peaks(spectrum, peaks, freqs, _signal.SamplingRate); peaksListBox.Items.Clear(); for (var p = 0; p < peaks.Length; p++) { peaksListBox.Items.Add($"peak #{p+1,-2} : {freqs[p],-7} Hz"); } _spectrumImage = new Bitmap(512, spectrumPictureBox.Height); var g = Graphics.FromImage(_spectrumImage); g.Clear(Color.White); var pen = new Pen(ForeColor); var redpen = new Pen(Color.Red, 2); var i = 1; var Stride = 4; var PaddingX = 5; var PaddingY = 5; var x = PaddingX + Stride; var min = spectrum.Min(); var max = spectrum.Max(); var height = _spectrumImage.Height; var gain = max - min < 1e-6 ? 1 : (height - 2 * PaddingY) / (max - min); var offset = (int)(height - PaddingY + min * gain); for (; i < spectrum.Length; i++) { g.DrawLine(pen, x - Stride, -spectrum[i - 1] * gain + offset, x, -spectrum[i] * gain + offset); x += Stride; } for (i = 0; i < peaks.Length; i++) { g.DrawLine(redpen, PaddingX + peaks[i] * Stride, PaddingY + offset, PaddingX + peaks[i] * Stride, -PaddingY - spectrum[peaks[i]] * gain + offset); } pen.Dispose(); redpen.Dispose(); g.Dispose(); spectrumPictureBox.Image = _spectrumImage; }
/// <summary> /// Method for computing modulation spectra. /// Each vector representing one modulation spectrum is a flattened version of 2D spectrum. /// </summary> /// <param name="samples">Samples for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of flattened modulation spectra</returns> public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample) { Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos"); var frameSize = FrameSize; var hopSize = HopSize; var featureVectors = new List <FeatureVector>(); var en = 0; var i = startSample; if (_featuregram == null) { _envelopes = new float[_filterbank.Length][]; for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n] = new float[samples.Length / hopSize]; } var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f; var lastSample = endSample - Math.Max(frameSize, hopSize); // ===================== compute local FFTs (do STFT) ======================= for (i = startSample; i < lastSample; i += hopSize) { _zeroblock.FastCopyTo(_block, _zeroblock.Length); samples.FastCopyTo(_block, frameSize, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 1e-10) { for (var k = 0; k < frameSize; k++) { var y = _block[k] - prevSample * _preEmphasis; prevSample = _block[k]; _block[k] = y; } prevSample = samples[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { _block.ApplyWindow(_windowSamples); } // 2) calculate power spectrum _fft.PowerSpectrum(_block, _spectrum); // 3) apply filterbank... FilterBanks.Apply(_filterbank, _spectrum, _filteredSpectrum); // ...and save results for future calculations for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n][en] = _filteredSpectrum[n]; } en++; } } else { en = _featuregram.Length; _envelopes = new float[_featuregram[0].Length][]; for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n] = new float[en]; for (i = 0; i < en; i++) { _envelopes[n][i] = _featuregram[i][n]; } } } // =========================== modulation analysis ======================= var envelopeLength = en; // long-term AVG-normalization foreach (var envelope in _envelopes) { var avg = 0.0f; for (var k = 0; k < envelopeLength; k++) { avg += (k >= 0) ? envelope[k] : -envelope[k]; } avg /= envelopeLength; if (avg >= 1e-10) // this happens more frequently { for (var k = 0; k < envelopeLength; k++) { envelope[k] /= avg; } } } i = 0; while (i < envelopeLength) { var vector = new float[_envelopes.Length * (_modulationFftSize / 2 + 1)]; var offset = 0; foreach (var envelope in _envelopes) { _zeroModblock.FastCopyTo(_modBlock, _modulationFftSize); envelope.FastCopyTo(_modBlock, Math.Min(_modulationFftSize, envelopeLength - i), i); _modulationFft.PowerSpectrum(_modBlock, _modSpectrum); _modSpectrum.FastCopyTo(vector, _modSpectrum.Length, 0, offset); offset += _modSpectrum.Length; } featureVectors.Add(new FeatureVector { Features = vector, TimePosition = (double)i * hopSize / SamplingRate }); i += _modulationHopSize; } return(featureVectors); }
/// <summary> /// Method creates overlapping ERB filters (ported from Malcolm Slaney's MATLAB code). /// </summary> /// <param name="erbFilterCount">Number of ERB filters</param> /// <param name="fftSize">Assumed size of FFT</param> /// <param name="samplingRate">Assumed sampling rate</param> /// <param name="lowFreq">Lower bound of the frequency range</param> /// <param name="highFreq">Upper bound of the frequency range</param> /// <param name="normalizeGain">True if gain should be normalized; false if all filters should have same height 1.0</param> /// <returns>Array of ERB filters</returns> public static float[][] Erb( int erbFilterCount, int fftSize, int samplingRate, double lowFreq = 0, double highFreq = 0, bool normalizeGain = true) { if (lowFreq < 0) { lowFreq = 0; } if (highFreq <= lowFreq) { highFreq = samplingRate / 2.0; } const double earQ = 9.26449; const double minBw = 24.7; const double bw = earQ * minBw; const int order = 1; var t = 1.0 / samplingRate; var frequencies = new double[erbFilterCount]; for (var i = 1; i <= erbFilterCount; i++) { frequencies[erbFilterCount - i] = -bw + Math.Exp(i * (-Math.Log(highFreq + bw) + Math.Log(lowFreq + bw)) / erbFilterCount) * (highFreq + bw); } var ucirc = new Complex[fftSize / 2 + 1]; for (var i = 0; i < ucirc.Length; i++) { ucirc[i] = Complex.Exp((2 * Complex.ImaginaryOne * i * Math.PI) / fftSize); } var rootPos = Math.Sqrt(3 + Math.Pow(2, 1.5)); var rootNeg = Math.Sqrt(3 - Math.Pow(2, 1.5)); var fft = new Fft(fftSize); var erbFilterBank = new float[erbFilterCount][]; for (var i = 0; i < erbFilterCount; i++) { var cf = frequencies[i]; var erb = Math.Pow(Math.Pow(cf / earQ, order) + Math.Pow(minBw, order), 1.0 / order); var b = 1.019 * 2 * Math.PI * erb; var theta = 2 * cf * Math.PI * t; var itheta = Complex.Exp(2 * Complex.ImaginaryOne * theta); var a0 = t; var a2 = 0.0; var b0 = 1.0; var b1 = -2 * Math.Cos(theta) / Math.Exp(b * t); var b2 = Math.Exp(-2 * b * t); var common = -t *Math.Exp(-b *t); var k1 = Math.Cos(theta) + rootPos * Math.Sin(theta); var k2 = Math.Cos(theta) - rootPos * Math.Sin(theta); var k3 = Math.Cos(theta) + rootNeg * Math.Sin(theta); var k4 = Math.Cos(theta) - rootNeg * Math.Sin(theta); var a11 = common * k1; var a12 = common * k2; var a13 = common * k3; var a14 = common * k4; var gainArg = Complex.Exp(Complex.ImaginaryOne * theta - b * t); var gain = Complex.Abs( (itheta - gainArg * k1) * (itheta - gainArg * k2) * (itheta - gainArg * k3) * (itheta - gainArg * k4) * Complex.Pow(t * Math.Exp(b * t) / (-1.0 / Math.Exp(b * t) + 1 + itheta * (1 - Math.Exp(b * t))), 4.0)); var filter1 = new IirFilter(new[] { a0, a11, a2 }, new[] { b0, b1, b2 }); var filter2 = new IirFilter(new[] { a0, a12, a2 }, new[] { b0, b1, b2 }); var filter3 = new IirFilter(new[] { a0, a13, a2 }, new[] { b0, b1, b2 }); var filter4 = new IirFilter(new[] { a0, a14, a2 }, new[] { b0, b1, b2 }); var ir = new double[fftSize]; ir[0] = 1.0; // for doubles the following code will work ok // (however there's a crucial lost of precision in case of floats): //var filter = filter1 * filter2 * filter3 * filter4; //ir = filter.ApplyTo(ir); // this code is ok both for floats and for doubles: ir = filter1.ApplyTo(ir); ir = filter2.ApplyTo(ir); ir = filter3.ApplyTo(ir); ir = filter4.ApplyTo(ir); var kernel = new DiscreteSignal(1, ir.Select(s => (float)(s / gain))); erbFilterBank[i] = fft.PowerSpectrum(kernel, false).Samples; } // normalize gain (by default) if (!normalizeGain) { return(erbFilterBank); } foreach (var filter in erbFilterBank) { var sum = 0.0; for (var j = 0; j < filter.Length; j++) { sum += Math.Abs(filter[j] * filter[j]); } var weight = Math.Sqrt(sum * samplingRate / fftSize); for (var j = 0; j < filter.Length; j++) { filter[j] = (float)(filter[j] / weight); } } return(erbFilterBank); }
/// <summary> /// Method for computing modulation spectra. /// Each vector representing one modulation spectrum is a flattened version of 2D spectrum. /// </summary> /// <param name="signal">Signal under analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of flattened modulation spectra</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); var fft = new Fft(fftSize); var modulationFft = new Fft(_modulationFftSize); if (_featuregram == null) { if (_filterbank == null) { _filterbank = FilterBanks.Triangular(_fftSize, signal.SamplingRate, FilterBanks.MelBands(12, _fftSize, signal.SamplingRate, 100, 3200)); } _featureCount = _filterbank.Length * (_modulationFftSize / 2 + 1); } else { _featureCount = _featuregram[0].Length * (_modulationFftSize / 2 + 1); } var length = _filterbank?.Length ?? _featuregram[0].Length; var modulationSamplingRate = (float)signal.SamplingRate / hopSize; var resolution = modulationSamplingRate / _modulationFftSize; _featureDescriptions = new string[length * (_modulationFftSize / 2 + 1)]; var idx = 0; for (var fi = 0; fi < length; fi++) { for (var fj = 0; fj <= _modulationFftSize / 2; fj++) { _featureDescriptions[idx++] = string.Format("band_{0}_mf_{1:F2}_Hz", fi + 1, fj * resolution); } } // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { var preemphasisFilter = new PreEmphasisFilter(_preEmphasis); signal = preemphasisFilter.ApplyTo(signal); } // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var en = 0; var i = startSample; if (_featuregram == null) { _envelopes = new float[_filterbank.Length][]; for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n] = new float[signal.Length / hopSize]; } var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f; // ===================== compute local FFTs (do STFT) ======================= var spectrum = new float[fftSize / 2 + 1]; var filteredSpectrum = new float[_filterbank.Length]; var block = new float[fftSize]; // buffer for currently processed signal block at each step var zeroblock = new float[fftSize]; // buffer of zeros for quick memset while (i + frameSize < endSample) { zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, frameSize, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = block[k] - prevSample * _preEmphasis; prevSample = block[k]; block[k] = y; } prevSample = signal[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply filterbank... FilterBanks.Apply(_filterbank, spectrum, filteredSpectrum); // ...and save results for future calculations for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n][en] = filteredSpectrum[n]; } en++; i += hopSize; } } else { en = _featuregram.Length; _envelopes = new float[_featuregram[0].Length][]; for (var n = 0; n < _envelopes.Length; n++) { _envelopes[n] = new float[en]; for (i = 0; i < en; i++) { _envelopes[n][i] = _featuregram[i][n]; } } } // =========================== modulation analysis ======================= var envelopeLength = en; // long-term AVG-normalization foreach (var envelope in _envelopes) { var avg = 0.0f; for (var k = 0; k < envelopeLength; k++) { avg += (k >= 0) ? envelope[k] : -envelope[k]; } avg /= envelopeLength; if (avg >= 1e-10) // this happens more frequently { for (var k = 0; k < envelopeLength; k++) { envelope[k] /= avg; } } } var modBlock = new float[_modulationFftSize]; var zeroModblock = new float[_modulationFftSize]; var modSpectrum = new float[_modulationFftSize / 2 + 1]; i = 0; while (i < envelopeLength) { var vector = new float[_envelopes.Length * (_modulationFftSize / 2 + 1)]; var offset = 0; foreach (var envelope in _envelopes) { zeroModblock.FastCopyTo(modBlock, _modulationFftSize); envelope.FastCopyTo(modBlock, Math.Min(_modulationFftSize, envelopeLength - i), i); modulationFft.PowerSpectrum(modBlock, modSpectrum); modSpectrum.FastCopyTo(vector, modSpectrum.Length, 0, offset); offset += modSpectrum.Length; } featureVectors.Add(new FeatureVector { Features = vector, TimePosition = (double)i * hopSize / signal.SamplingRate }); i += _modulationHopSize; } return(featureVectors); }
/// <summary> /// Standard method for computing mfcc features: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum X /// 3) Apply mel filters and log() the result: Y = Log10(X * H) /// 4) Do dct-II: mfcc = Dct(Y) /// 5) [Optional] liftering of mfcc /// /// </summary> /// <param name="signal">Signal for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of mfcc vectors</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); _melFilterBank = FilterBanks.Triangular(fftSize, signal.SamplingRate, FilterBanks.MelBands(_filterbankSize, fftSize, signal.SamplingRate, _lowFreq, _highFreq)); var lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; var fft = new Fft(fftSize); var dct = new Dct2(_filterbankSize, FeatureCount); // reserve memory for reusable blocks var spectrum = new float[fftSize / 2 + 1]; var logMelSpectrum = new float[_filterbankSize]; var block = new float[fftSize]; // buffer for currently processed signal block at each step var zeroblock = new float[fftSize]; // just a buffer of zeros for quick memset // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f; var i = startSample; while (i + frameSize < endSample) { // prepare next block for processing zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, windowSamples.Length, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = block[k] - prevSample * _preEmphasis; prevSample = block[k]; block[k] = y; } prevSample = signal[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply mel filterbank and take log() of the result FilterBanks.ApplyAndLog(_melFilterBank, spectrum, logMelSpectrum); // 4) dct-II var mfccs = new float[FeatureCount]; dct.Direct(logMelSpectrum, mfccs); // 5) (optional) liftering if (lifterCoeffs != null) { mfccs.ApplyWindow(lifterCoeffs); } // add mfcc vector to output sequence featureVectors.Add(new FeatureVector { Features = mfccs, TimePosition = (double)i / signal.SamplingRate }); i += hopSize; } return(featureVectors); }
/// <summary> /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum /// 3) Apply gammatone filters (squared) /// 4) Mean power normalization /// 5) Apply nonlinearity /// 6) Do dct-II (normalized) /// /// </summary> /// <param name="samples">Samples for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of pncc vectors</returns> public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample) { Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos"); var frameSize = FrameSize; var hopSize = HopSize; const float meanPower = 1e10f; var mean = 4e07f; var d = _power != 0 ? 1.0 / _power : 0.0; var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f; var i = startSample; while (i + FrameSize < endSample) { // prepare next block for processing _zeroblock.FastCopyTo(_block, _zeroblock.Length); samples.FastCopyTo(_block, frameSize, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = _block[k] - prevSample * _preEmphasis; prevSample = _block[k]; _block[k] = y; } prevSample = samples[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { _block.ApplyWindow(_windowSamples); } // 2) calculate power spectrum _fft.PowerSpectrum(_block, _spectrum); // 3) apply gammatone filterbank FilterBanks.Apply(FilterBank, _spectrum, _filteredSpectrum); // 4) mean power normalization: var sumPower = 0.0f; for (var j = 0; j < _filteredSpectrum.Length; j++) { sumPower += _filteredSpectrum[j]; } mean = LambdaMu * mean + (1 - LambdaMu) * sumPower; for (var j = 0; j < _filteredSpectrum.Length; j++) { _filteredSpectrum[j] *= meanPower / mean; } // 5) nonlinearity (power ^ d or Log10) if (_power != 0) { for (var j = 0; j < _filteredSpectrum.Length; j++) { _filteredSpectrum[j] = (float)Math.Pow(_filteredSpectrum[j], d); } } else { for (var j = 0; j < _filteredSpectrum.Length; j++) { _filteredSpectrum[j] = (float)Math.Log10(_filteredSpectrum[j] + float.Epsilon); } } // 6) dct-II (normalized) var spnccs = new float[FeatureCount]; _dct.DirectN(_filteredSpectrum, spnccs); // add pncc vector to output sequence featureVectors.Add(new FeatureVector { Features = spnccs, TimePosition = (double)i / SamplingRate }); i += hopSize; } return(featureVectors); }
/// <summary> /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum /// 3) Apply gammatone filters (squared) /// 4) Mean power normalization /// 5) Apply nonlinearity /// 6) Do dct-II (normalized) /// /// </summary> /// <param name="signal">Signal for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of pncc vectors</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq); // use power spectrum: foreach (var filter in _gammatoneFilterBank) { for (var j = 0; j < filter.Length; j++) { var ps = filter[j] * filter[j]; filter[j] = ps; } } var fft = new Fft(fftSize); var dct = new Dct2(_filterbankSize, FeatureCount); var gammatoneSpectrum = new float[_filterbankSize]; const float meanPower = 1e10f; var mean = 4e07f; var d = _power != 0 ? 1.0 / _power : 0.0; var block = new float[fftSize]; // buffer for a signal block at each step var zeroblock = new float[fftSize]; // buffer of zeros for quick memset var spectrum = new float[fftSize / 2 + 1]; // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f; var i = startSample; while (i + frameSize < endSample) { // prepare next block for processing zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, frameSize, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = block[k] - prevSample * _preEmphasis; prevSample = block[k]; block[k] = y; } prevSample = signal[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply gammatone filterbank FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum); // 4) mean power normalization: var sumPower = 0.0f; for (var j = 0; j < gammatoneSpectrum.Length; j++) { sumPower += gammatoneSpectrum[j]; } mean = LambdaMu * mean + (1 - LambdaMu) * sumPower; for (var j = 0; j < gammatoneSpectrum.Length; j++) { gammatoneSpectrum[j] *= meanPower / mean; } // 5) nonlinearity (power ^ d or Log10) if (_power != 0) { for (var j = 0; j < gammatoneSpectrum.Length; j++) { gammatoneSpectrum[j] = (float)Math.Pow(gammatoneSpectrum[j], d); } } else { for (var j = 0; j < gammatoneSpectrum.Length; j++) { gammatoneSpectrum[j] = (float)Math.Log10(gammatoneSpectrum[j] + float.Epsilon); } } // 6) dct-II (normalized) var spnccs = new float[FeatureCount]; dct.DirectN(gammatoneSpectrum, spnccs); // add pncc vector to output sequence featureVectors.Add(new FeatureVector { Features = spnccs, TimePosition = (double)i / signal.SamplingRate }); i += hopSize; } return(featureVectors); }