/// <summary> /// Compute spectral features in one frame /// </summary> /// <param name="block"></param> /// <param name="features"></param> public override void ProcessFrame(float[] block, float[] features) { // compute and prepare spectrum _fft.MagnitudeSpectrum(block, _spectrum); if (_spectrum.Length == _frequencies.Length) { _mappedSpectrum = _spectrum; } else { for (var j = 0; j < _mappedSpectrum.Length; j++) { _mappedSpectrum[j] = _spectrum[_frequencyPositions[j]]; } } // extract spectral features for (var j = 0; j < _extractors.Count; j++) { features[j] = _extractors[j](_mappedSpectrum, _frequencies); } }
/// <summary> /// Compute MPEG7 spectral features in one frame /// </summary> /// <param name="block"></param> /// <returns></returns> public override float[] ProcessFrame(float[] block) { // fill zeros to fftSize if frameSize < fftSize for (var k = FrameSize; k < block.Length; block[k++] = 0) { ; } // apply window block.ApplyWindow(_windowSamples); // compute and prepare spectrum _fft.MagnitudeSpectrum(block, _spectrum); // apply filterbank (ignoring 0th coefficient) for (var k = 0; k < _filterbank.Length; k++) { _mappedSpectrum[k + 1] = 0.0f; for (var j = 0; j < _spectrum.Length; j++) { _mappedSpectrum[k + 1] += _filterbank[k][j] * _spectrum[j]; } } // extract spectral features var featureVector = new float[FeatureCount]; for (var j = 0; j < _extractors.Count; j++) { featureVector[j] = _extractors[j](_mappedSpectrum, _frequencies); } // ...and maybe harmonic features if (_harmonicExtractors != null) { var pitch = _pitchTrack == null?_pitchEstimator(_spectrum) : _pitchTrack[_pitchPos++]; _peaksDetector(_spectrum, _peaks, _peakFrequencies, SamplingRate, pitch); var offset = _extractors.Count; for (var j = 0; j < _harmonicExtractors.Count; j++) { featureVector[j + offset] = _harmonicExtractors[j](_spectrum, _peaks, _peakFrequencies); } } return(featureVector); }
/// <summary> /// Calculate filtering gain so that frequency response is normalized onto [0, 1] range. /// </summary> /// <param name="filter"></param> /// <param name="fftSize"></param> /// <returns>Gain for filtering operations</returns> public static float EstimateGain(this IOnlineFilter filter, int fftSize = 512) { var unit = DiscreteSignal.Unit(fftSize); // get impulse response var response = unit.Samples.Select(s => filter.Process(s)).ToArray(); // get frequency response var spectrum = new float[fftSize / 2 + 1]; var fft = new RealFft(fftSize); fft.MagnitudeSpectrum(response, spectrum); return(1 / spectrum.Max(s => Math.Abs(s))); }
/// <summary> /// Compute spectral features in one frame /// </summary> /// <param name="block"></param> /// <returns></returns> public override float[] ProcessFrame(float[] block) { // fill zeros to fftSize if frameSize < fftSize for (var k = FrameSize; k < block.Length; block[k++] = 0) { ; } // apply window block.ApplyWindow(_windowSamples); // compute and prepare spectrum _fft.MagnitudeSpectrum(block, _spectrum); var featureVector = new float[FeatureCount]; if (_spectrum.Length == _frequencies.Length) { _mappedSpectrum = _spectrum; } else { for (var j = 0; j < _mappedSpectrum.Length; j++) { _mappedSpectrum[j] = _spectrum[_frequencyPositions[j]]; } } // extract spectral features for (var j = 0; j < _extractors.Count; j++) { featureVector[j] = _extractors[j](_mappedSpectrum, _frequencies); } return(featureVector); }
/// <summary> /// Constructor /// </summary> /// <param name="samplingRate"></param> /// <param name="featureCount"></param> /// <param name="filterbank"></param> /// <param name="frameDuration"></param> /// <param name="hopDuration"></param> /// <param name="preEmphasis"></param> /// <param name="nonLinearity"></param> /// <param name="spectrumType"></param> /// <param name="window"></param> /// <param name="logFloor"></param> public FilterbankExtractor(int samplingRate, float[][] filterbank, double frameDuration = 0.0256 /*sec*/, double hopDuration = 0.010 /*sec*/, double preEmphasis = 0, NonLinearityType nonLinearity = NonLinearityType.None, SpectrumType spectrumType = SpectrumType.Power, WindowTypes window = WindowTypes.Hamming, float logFloor = float.Epsilon) : base(samplingRate, frameDuration, hopDuration, preEmphasis) { FilterBank = filterbank; FeatureCount = filterbank.Length; _blockSize = 2 * (filterbank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); _fft = new RealFft(_blockSize); _window = window; _windowSamples = Window.OfType(_window, FrameSize); // setup spectrum post-processing: ======================================================= _logFloor = logFloor; _nonLinearityType = nonLinearity; switch (nonLinearity) { case NonLinearityType.Log10: _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.LogE: _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.ToDecibel: _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.CubicRoot: _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _bandSpectrum, 0.33); break; default: _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum); break; } _spectrumType = spectrumType; switch (_spectrumType) { case SpectrumType.Magnitude: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break; case SpectrumType.Power: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break; case SpectrumType.MagnitudeNormalized: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break; case SpectrumType.PowerNormalized: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break; } // reserve memory for reusable blocks _spectrum = new float[_blockSize / 2 + 1]; _bandSpectrum = new float[filterbank.Length]; }
/// <summary> /// Constructs extractor from configuration <paramref name="options"/>. /// </summary> public MfccExtractor(MfccOptions options) : base(options) { FeatureCount = options.FeatureCount; var filterbankSize = options.FilterBankSize; if (options.FilterBank is null) { _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize); var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency); FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel); // HTK/Kaldi-style } else { FilterBank = options.FilterBank; filterbankSize = FilterBank.Length; _blockSize = 2 * (FilterBank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); } _fft = new RealFft(_blockSize); _lifterSize = options.LifterSize; _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; _includeEnergy = options.IncludeEnergy; _logEnergyFloor = options.LogEnergyFloor; // setup DCT: ============================================================================ _dctType = options.DctType; switch (_dctType[0]) { case '1': _dct = new Dct1(filterbankSize); break; case '3': _dct = new Dct3(filterbankSize); break; case '4': _dct = new Dct4(filterbankSize); break; default: _dct = new Dct2(filterbankSize); break; } if (_dctType.EndsWith("N", StringComparison.OrdinalIgnoreCase)) { _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs); } else { _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs); } // setup spectrum post-processing: ======================================================= _logFloor = options.LogFloor; _nonLinearityType = options.NonLinearity; switch (_nonLinearityType) { case NonLinearityType.Log10: _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.LogE: _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.ToDecibel: _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.CubicRoot: _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33); break; default: _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum); break; } _spectrumType = options.SpectrumType; switch (_spectrumType) { case SpectrumType.Magnitude: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break; case SpectrumType.MagnitudeNormalized: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break; case SpectrumType.PowerNormalized: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break; default: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break; } // reserve memory for reusable blocks _spectrum = new float[_blockSize / 2 + 1]; _melSpectrum = new float[filterbankSize]; }
/// <summary> /// Constructor /// </summary> /// <param name="options">Filterbank options</param> public FilterbankExtractor(FilterbankOptions options) : base(options) { var filterbankSize = options.FilterBankSize; if (options.FilterBank == null) { _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize); var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency, false); FilterBank = FilterBanks.Rectangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel); } else { FilterBank = options.FilterBank; filterbankSize = FilterBank.Length; _blockSize = 2 * (FilterBank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); } FeatureCount = filterbankSize; _fft = new RealFft(_blockSize); // setup spectrum post-processing: ======================================================= _logFloor = options.LogFloor; _nonLinearityType = options.NonLinearity; switch (_nonLinearityType) { case NonLinearityType.Log10: _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.LogE: _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.ToDecibel: _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _bandSpectrum, _logFloor); break; case NonLinearityType.CubicRoot: _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _bandSpectrum, 0.33); break; default: _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum); break; } _spectrumType = options.SpectrumType; switch (_spectrumType) { case SpectrumType.Magnitude: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break; case SpectrumType.MagnitudeNormalized: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break; case SpectrumType.PowerNormalized: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break; default: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break; } // reserve memory for reusable blocks _spectrum = new float[_blockSize / 2 + 1]; _bandSpectrum = new float[filterbankSize]; }
/// <summary> /// Constructor /// </summary> /// <param name="samplingRate"></param> /// <param name="featureCount"></param> /// <param name="frameDuration"></param> /// <param name="hopDuration"></param> /// <param name="filterbankSize"></param> /// <param name="lowFreq"></param> /// <param name="highFreq"></param> /// <param name="fftSize"></param> /// <param name="filterbank"></param> /// <param name="lifterSize"></param> /// <param name="preEmphasis"></param> /// <param name="includeEnergy"></param> /// <param name="dctType">"1", "1N", "2", "2N", "3", "3N", "4", "4N"</param> /// <param name="nonLinearity"></param> /// <param name="spectrumType"></param> /// <param name="window"></param> /// <param name="logFloor"></param> public MfccExtractor(int samplingRate, int featureCount, double frameDuration = 0.0256 /*sec*/, double hopDuration = 0.010 /*sec*/, int filterbankSize = 24, double lowFreq = 0, double highFreq = 0, int fftSize = 0, float[][] filterbank = null, int lifterSize = 0, double preEmphasis = 0, bool includeEnergy = false, string dctType = "2N", NonLinearityType nonLinearity = NonLinearityType.Log10, SpectrumType spectrumType = SpectrumType.Power, WindowTypes window = WindowTypes.Hamming, float logFloor = float.Epsilon) : base(samplingRate, frameDuration, hopDuration, preEmphasis) { FeatureCount = featureCount; _lowFreq = lowFreq; _highFreq = highFreq; if (filterbank == null) { _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize); var melBands = FilterBanks.MelBands(filterbankSize, _blockSize, SamplingRate, _lowFreq, _highFreq); FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel); // HTK/Kaldi-style } else { FilterBank = filterbank; filterbankSize = filterbank.Length; _blockSize = 2 * (filterbank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); } _fft = new RealFft(_blockSize); _window = window; _windowSamples = Window.OfType(_window, FrameSize); _lifterSize = lifterSize; _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; _includeEnergy = includeEnergy; // setup DCT: ============================================================================ _dctType = dctType; switch (dctType[0]) { case '1': _dct = new Dct1(filterbankSize); break; case '2': _dct = new Dct2(filterbankSize); break; case '3': _dct = new Dct3(filterbankSize); break; case '4': _dct = new Dct4(filterbankSize); break; default: throw new ArgumentException("Only DCT-1, 2, 3 and 4 are supported!"); } if (dctType.Length > 1 && char.ToUpper(dctType[1]) == 'N') { _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs); } else { _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs); } // setup spectrum post-processing: ======================================================= _logFloor = logFloor; _nonLinearityType = nonLinearity; switch (nonLinearity) { case NonLinearityType.Log10: _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.LogE: _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.ToDecibel: _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor); break; case NonLinearityType.CubicRoot: _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33); break; default: _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum); break; } _spectrumType = spectrumType; switch (_spectrumType) { case SpectrumType.Magnitude: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break; case SpectrumType.Power: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break; case SpectrumType.MagnitudeNormalized: _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break; case SpectrumType.PowerNormalized: _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break; } // reserve memory for reusable blocks _spectrum = new float[_blockSize / 2 + 1]; _melSpectrum = new float[filterbankSize]; }