private void buttonCompute_Click(object sender, EventArgs e) { var filterCount = int.Parse(textBoxSize.Text); var samplingRate = _signal.SamplingRate; var fftSize = int.Parse(textBoxFftSize.Text); var lowFreq = float.Parse(textBoxLowFreq.Text); var highFreq = float.Parse(textBoxHighFreq.Text); Tuple <double, double, double>[] bands; float[][] filterbank = null; VtlnWarper vtln = null; if (checkBoxVtln.Checked) { var alpha = float.Parse(textBoxVtlnAlpha.Text); var vtlnLow = float.Parse(textBoxVtlnLow.Text); var vtlnHigh = float.Parse(textBoxVtlnHigh.Text); vtln = new VtlnWarper(alpha, lowFreq, highFreq, vtlnLow, vtlnHigh); } switch (comboBoxFilterbank.Text) { case "Mel": bands = FilterBanks.MelBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked); break; case "Mel Slaney": bands = FilterBanks.MelBandsSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked); filterbank = FilterBanks.MelBankSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxNormalize.Checked, vtln); break; case "Bark": bands = FilterBanks.BarkBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked); break; case "Bark Slaney": bands = FilterBanks.BarkBandsSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked); filterbank = FilterBanks.BarkBankSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq); break; case "Critical bands": bands = FilterBanks.CriticalBands(filterCount, fftSize, samplingRate, lowFreq, highFreq); break; case "Octave bands": bands = FilterBanks.OctaveBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked); break; case "ERB": bands = null; filterbank = FilterBanks.Erb(filterCount, fftSize, samplingRate, lowFreq, highFreq); break; default: bands = FilterBanks.HerzBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked); break; } if (bands != null && filterbank == null) { switch (comboBoxShape.Text) { case "Triangular": filterbank = FilterBanks.Triangular(fftSize, samplingRate, bands, vtln, Utils.Scale.HerzToMel); break; case "Trapezoidal": filterbank = FilterBanks.Trapezoidal(fftSize, samplingRate, bands, vtln); break; case "BiQuad": filterbank = FilterBanks.BiQuad(fftSize, samplingRate, bands); break; default: filterbank = FilterBanks.Rectangular(fftSize, samplingRate, bands, vtln); break; } if (checkBoxNormalize.Checked) { FilterBanks.Normalize(filterCount, bands, filterbank); } } var spectrumType = (SpectrumType)comboBoxSpectrum.SelectedIndex; var nonLinearity = (NonLinearityType)comboBoxNonLinearity.SelectedIndex; var logFloor = float.Parse(textBoxLogFloor.Text); var mfccExtractor = new MfccExtractor(//samplingRate, 13, 0.025, 0.01, samplingRate, 13, 512.0 / samplingRate, 0.01, filterbank: filterbank, //filterbankSize: 26, //highFreq: 8000, //preEmphasis: 0.97, //lifterSize: 22, //includeEnergy: true, spectrumType: spectrumType, nonLinearity: nonLinearity, dctType: comboBoxDct.Text, window: WindowTypes.Hamming, logFloor: logFloor); _mfccVectors = mfccExtractor.ComputeFrom(_signal); //_mfccVectors = mfccExtractor.ComputeFrom(_signal * 32768); //var mfccVectorsP = mfccExtractor.ParallelComputeFrom(_signal * 32768); //for (var i = 0; i < _mfccVectors.Count; i++) //{ // for (var j = 0; j < _mfccVectors[i].Features.Length; j++) // { // if (Math.Abs(_mfccVectors[i].Features[j] - mfccVectorsP[i].Features[j]) > 1e-32f) // { // MessageBox.Show($"Nope: {i} - {j}"); // return; // } // if (Math.Abs(_mfccVectors[i].TimePosition - mfccVectorsP[i].TimePosition) > 1e-32f) // { // MessageBox.Show($"Time: {i} - {j}"); // return; // } // } //} //FeaturePostProcessing.NormalizeMean(_mfccVectors); // optional (but REQUIRED for PNCC!) //FeaturePostProcessing.AddDeltas(_mfccVectors); var header = mfccExtractor.FeatureDescriptions; //.Concat(mfccExtractor.DeltaFeatureDescriptions) //.Concat(mfccExtractor.DeltaDeltaFeatureDescriptions); FillFeaturesList(_mfccVectors, header); mfccListView.Items[0].Selected = true; melFilterBankPanel.Groups = mfccExtractor.FilterBank; mfccPanel.Line = _mfccVectors[0].Features; }
/// <summary> /// Constructs extractor from configuration <paramref name="options"/>. /// </summary> public PlpExtractor(PlpOptions options) : base(options) { FeatureCount = options.FeatureCount; // ================================ Prepare filter bank and center frequencies: =========================================== var filterbankSize = options.FilterBankSize; if (options.FilterBank is null) { _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize); var low = options.LowFrequency; var high = options.HighFrequency; FilterBank = FilterBanks.BarkBankSlaney(filterbankSize, _blockSize, SamplingRate, low, high); var barkBands = FilterBanks.BarkBandsSlaney(filterbankSize, SamplingRate, low, high); _centerFrequencies = barkBands.Select(b => b.Item2).ToArray(); } else { FilterBank = options.FilterBank; filterbankSize = FilterBank.Length; _blockSize = 2 * (FilterBank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); if (options.CenterFrequencies != null) { _centerFrequencies = options.CenterFrequencies; } else { var herzResolution = (double)SamplingRate / _blockSize; // try to determine center frequencies automatically from filterbank weights: _centerFrequencies = new double[filterbankSize]; for (var i = 0; i < FilterBank.Length; i++) { var minPos = 0; var maxPos = _blockSize / 2; for (var j = 0; j < FilterBank[i].Length; j++) { if (FilterBank[i][j] > 0) { minPos = j; break; } } for (var j = minPos; j < FilterBank[i].Length; j++) { if (FilterBank[i][j] == 0) { maxPos = j; break; } } _centerFrequencies[i] = herzResolution * (maxPos + minPos) / 2; } } } // ==================================== Compute equal loudness curve: ========================================= _equalLoudnessCurve = new double[filterbankSize]; for (var i = 0; i < _centerFrequencies.Length; i++) { var level2 = _centerFrequencies[i] * _centerFrequencies[i]; _equalLoudnessCurve[i] = Math.Pow(level2 / (level2 + 1.6e5), 2) * ((level2 + 1.44e6) / (level2 + 9.61e6)); } // ============================== Prepare RASTA filters (if necessary): ======================================= _rasta = options.Rasta; if (_rasta > 0) { _rastaFilters = Enumerable.Range(0, filterbankSize) .Select(f => new RastaFilter(_rasta)) .ToArray(); } // ============== Precompute IDFT table for obtaining autocorrelation coeffs from power spectrum: ============= _lpcOrder = options.LpcOrder > 0 ? options.LpcOrder : FeatureCount - 1; _idftTable = new float[_lpcOrder + 1][]; var bandCount = filterbankSize + 2; // +2 duplicated edges var freq = Math.PI / (bandCount - 1); for (var i = 0; i < _idftTable.Length; i++) { _idftTable[i] = new float[bandCount]; _idftTable[i][0] = 1.0f; for (var j = 1; j < bandCount - 1; j++) { _idftTable[i][j] = 2 * (float)Math.Cos(freq * i * j); } _idftTable[i][bandCount - 1] = (float)Math.Cos(freq * i * (bandCount - 1)); } _lpc = new float[_lpcOrder + 1]; _cc = new float[bandCount]; // =================================== Prepare everything else: ============================== _fft = new RealFft(_blockSize); _lifterSize = options.LifterSize; _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; _includeEnergy = options.IncludeEnergy; _logEnergyFloor = options.LogEnergyFloor; _spectrum = new float[_blockSize / 2 + 1]; _bandSpectrum = new float[filterbankSize]; }
/// <summary> /// Constructor /// </summary> /// <param name="samplingRate"></param> /// <param name="featureCount"></param> /// <param name="frameDuration"></param> /// <param name="hopDuration"></param> /// <param name="lpcOrder"></param> /// <param name="rasta"></param> /// <param name="filterbankSize"></param> /// <param name="lowFreq"></param> /// <param name="highFreq"></param> /// <param name="fftSize"></param> /// <param name="lifterSize"></param> /// <param name="preEmphasis"></param> /// <param name="window"></param> /// <param name="filterbank"></param> /// <param name="centerFrequencies"></param> public PlpExtractor(int samplingRate, int featureCount, double frameDuration = 0.0256 /*sec*/, double hopDuration = 0.010 /*sec*/, int lpcOrder = 0, // will be autocalculated as featureCount - 1 double rasta = 0, int filterbankSize = 24, double lowFreq = 0, double highFreq = 0, int fftSize = 0, int lifterSize = 0, double preEmphasis = 0, WindowTypes window = WindowTypes.Hamming, float[][] filterbank = null, double[] centerFrequencies = null) : base(samplingRate, frameDuration, hopDuration, preEmphasis) { FeatureCount = featureCount; // ================================ Prepare filter bank and center frequencies: =========================================== _lowFreq = lowFreq; _highFreq = highFreq; if (filterbank == null) { _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize); var barkBands = FilterBanks.BarkBandsSlaney(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq); FilterBank = FilterBanks.BarkBankSlaney(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq); _centerFrequencies = barkBands.Select(b => b.Item2).ToArray(); } else { FilterBank = filterbank; filterbankSize = filterbank.Length; _blockSize = 2 * (filterbank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); if (centerFrequencies != null) { _centerFrequencies = centerFrequencies; } else { var herzResolution = (double)samplingRate / _blockSize; // try to determine center frequencies automatically from filterbank weights: _centerFrequencies = new double[filterbankSize]; for (var i = 0; i < filterbank.Length; i++) { var minPos = 0; var maxPos = _blockSize / 2; for (var j = 0; j < filterbank[i].Length; j++) { if (filterbank[i][j] > 0) { minPos = j; break; } } for (var j = minPos; j < filterbank[i].Length; j++) { if (filterbank[i][j] == 0) { maxPos = j; break; } } _centerFrequencies[i] = herzResolution * (maxPos + minPos) / 2; } } } // ==================================== Compute equal loudness curve: ========================================= _equalLoudnessCurve = new double[filterbankSize]; for (var i = 0; i < _centerFrequencies.Length; i++) { var level2 = _centerFrequencies[i] * _centerFrequencies[i]; _equalLoudnessCurve[i] = Math.Pow(level2 / (level2 + 1.6e5), 2) * ((level2 + 1.44e6) / (level2 + 9.61e6)); } // ============================== Prepare RASTA filters (if necessary): ======================================= _rasta = rasta; if (rasta > 0) { _rastaFilters = Enumerable.Range(0, filterbankSize) .Select(f => new RastaFilter(rasta)) .ToArray(); } // ============== Precompute IDFT table for obtaining autocorrelation coeffs from power spectrum: ============= _lpcOrder = lpcOrder > 0 ? lpcOrder : FeatureCount - 1; _idftTable = new float[_lpcOrder + 1][]; var bandCount = filterbankSize + 2; // +2 duplicated edges var freq = Math.PI / (bandCount - 1); for (var i = 0; i < _idftTable.Length; i++) { _idftTable[i] = new float[bandCount]; _idftTable[i][0] = 1.0f; for (var j = 1; j < bandCount - 1; j++) { _idftTable[i][j] = 2 * (float)Math.Cos(freq * i * j); } _idftTable[i][bandCount - 1] = (float)Math.Cos(freq * i * (bandCount - 1)); } _lpc = new float[_lpcOrder + 1]; _cc = new float[bandCount]; // =================================== Prepare everything else: ============================== _fft = new RealFft(_blockSize); _window = window; _windowSamples = Window.OfType(_window, FrameSize); _lifterSize = lifterSize; _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; _spectrum = new float[_blockSize / 2 + 1]; _bandSpectrum = new float[filterbankSize]; }