/// <summary> /// Constructs extractor from configuration <paramref name="options"/>. /// </summary> public SpnccExtractor(PnccOptions options) : base(options) { FeatureCount = options.FeatureCount; var filterbankSize = options.FilterBankSize; if (options.FilterBank is null) { _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize); FilterBank = FilterBanks.Erb(filterbankSize, _blockSize, SamplingRate, options.LowFrequency, options.HighFrequency); } else { FilterBank = options.FilterBank; filterbankSize = FilterBank.Length; _blockSize = 2 * (FilterBank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); } _power = options.Power; _includeEnergy = options.IncludeEnergy; _logEnergyFloor = options.LogEnergyFloor; _fft = new RealFft(_blockSize); _dct = new Dct2(filterbankSize); _spectrum = new float[_blockSize / 2 + 1]; _filteredSpectrum = new float[filterbankSize]; }
/// <summary> /// Constructor /// </summary> /// <param name="samplingRate"></param> /// <param name="featureCount"></param> /// <param name="frameDuration">Length of analysis window (in seconds)</param> /// <param name="hopDuration">Length of overlap (in seconds)</param> /// <param name="power"></param> /// <param name="lowFreq"></param> /// <param name="highFreq"></param> /// <param name="filterbankSize"></param> /// <param name="filterbank"></param> /// <param name="fftSize">Size of FFT (in samples)</param> /// <param name="preEmphasis"></param> /// <param name="window"></param> public PnccExtractor(int samplingRate, int featureCount, double frameDuration = 0.0256 /*sec*/, double hopDuration = 0.010 /*sec*/, int power = 15, double lowFreq = 100, double highFreq = 6800, int filterbankSize = 40, float[][] filterbank = null, int fftSize = 0, double preEmphasis = 0, WindowTypes window = WindowTypes.Hamming) : base(samplingRate, frameDuration, hopDuration, preEmphasis) { FeatureCount = featureCount; _lowFreq = lowFreq; _highFreq = highFreq; if (filterbank == null) { _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize); FilterBank = FilterBanks.Erb(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq); } else { FilterBank = filterbank; filterbankSize = filterbank.Length; _blockSize = 2 * (filterbank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); } _fft = new RealFft(_blockSize); _dct = new Dct2(filterbankSize); _power = power; _window = window; _windowSamples = Window.OfType(_window, FrameSize); _spectrum = new float[_blockSize / 2 + 1]; _spectrumQOut = new float[filterbankSize]; _gammatoneSpectrum = new float[filterbankSize]; _filteredSpectrumQ = new float[filterbankSize]; _spectrumS = new float[filterbankSize]; _smoothedSpectrumS = new float[filterbankSize]; _avgSpectrumQ1 = new float[filterbankSize]; _avgSpectrumQ2 = new float[filterbankSize]; _smoothedSpectrum = new float[filterbankSize]; _ringBuffer = new SpectraRingBuffer(2 * M + 1, filterbankSize); _step = M - 1; }
/// <summary> /// Main constructor /// </summary> /// <param name="samplingRate"></param> /// <param name="featureCount"></param> /// <param name="frameDuration">Length of analysis window (in seconds)</param> /// <param name="hopDuration">Length of overlap (in seconds)</param> /// <param name="power"></param> /// <param name="lowFreq"></param> /// <param name="highFreq"></param> /// <param name="filterbankSize"></param> /// <param name="filterbank"></param> /// <param name="fftSize">Size of FFT (in samples)</param> /// <param name="preEmphasis"></param> /// <param name="window"></param> public SpnccExtractor(int samplingRate, int featureCount, double frameDuration = 0.0256 /*sec*/, double hopDuration = 0.010 /*sec*/, int power = 15, double lowFreq = 100, double highFreq = 6800, int filterbankSize = 40, float[][] filterbank = null, int fftSize = 0, double preEmphasis = 0, WindowTypes window = WindowTypes.Hamming) : base(samplingRate, frameDuration, hopDuration, preEmphasis) { FeatureCount = featureCount; _power = power; if (filterbank == null) { _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize); _lowFreq = lowFreq; _highFreq = highFreq; FilterBank = FilterBanks.Erb(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq); // use power spectrum: foreach (var filter in FilterBank) { for (var j = 0; j < filter.Length; j++) { var ps = filter[j] * filter[j]; filter[j] = ps; } } } else { FilterBank = filterbank; filterbankSize = filterbank.Length; _blockSize = 2 * (filterbank[0].Length - 1); Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size"); } _fft = new RealFft(_blockSize); _dct = new Dct2(filterbankSize); _window = window; _windowSamples = Window.OfType(_window, FrameSize); _spectrum = new float[_blockSize / 2 + 1]; _filteredSpectrum = new float[filterbankSize]; }
/// <summary> /// Main constructor /// </summary> /// <param name="samplingRate"></param> /// <param name="featureCount"></param> /// <param name="frameDuration"></param> /// <param name="hopDuration"></param> /// <param name="filterbankSize"></param> /// <param name="lowFreq"></param> /// <param name="highFreq"></param> /// <param name="fftSize"></param> /// <param name="filterbank"></param> /// <param name="lifterSize"></param> /// <param name="preEmphasis"></param> /// <param name="window"></param> public MfccExtractor(int samplingRate, int featureCount, double frameDuration = 0.0256 /*sec*/, double hopDuration = 0.010 /*sec*/, int filterbankSize = 20, double lowFreq = 0, double highFreq = 0, int fftSize = 0, float[][] filterbank = null, int lifterSize = 22, double preEmphasis = 0.0, WindowTypes window = WindowTypes.Hamming) : base(samplingRate, frameDuration, hopDuration) { FeatureCount = featureCount; if (filterbank == null) { _fftSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize); _filterbankSize = filterbankSize; _lowFreq = lowFreq; _highFreq = highFreq; FilterBank = FilterBanks.Triangular(_fftSize, SamplingRate, FilterBanks.MelBands(_filterbankSize, _fftSize, SamplingRate, _lowFreq, _highFreq)); } else { FilterBank = filterbank; _filterbankSize = filterbank.Length; _fftSize = 2 * (filterbank[0].Length - 1); } _fft = new Fft(_fftSize); _dct = new Dct2(_filterbankSize, FeatureCount); _window = window; if (_window != WindowTypes.Rectangular) { _windowSamples = Window.OfType(_window, FrameSize); } _lifterSize = lifterSize; _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; _preEmphasis = (float)preEmphasis; // reserve memory for reusable blocks _spectrum = new float[_fftSize / 2 + 1]; _logMelSpectrum = new float[_filterbankSize]; _block = new float[_fftSize]; _zeroblock = new float[_fftSize]; }
public void TestDct2Norm() { float[] res = new float[6]; float[] resDct2 = { 0.91923882f, -0.11214018f, 0.35370055f, -0.30289775f, 0.49497475f, 0.18332565f }; var dct2 = new Dct2(8); dct2.DirectNorm(_test, res); Assert.That(res, Is.EqualTo(resDct2).Within(1e-5)); }
public void TestDct2() { float[] res = new float[6]; float[] resDct2 = { 5.2f, -0.44856072f, 1.41480218f, -1.21159099f, 1.97989899f, 0.73330259f }; var dct2 = new Dct2(8); dct2.Direct(_test, res); Assert.That(res, Is.EqualTo(resDct2).Within(1e-5)); }
public void TestIdct2() { float[] res = new float[8]; float[] resDct2 = { 2.6f, -0.22428036f, 0.70740109f, -0.6057955f, 0.98994949f, 0.3666513f, -0.13994175f, -0.41021575f }; var invdct = new Dct2(8, 8); invdct.Inverse(resDct2, res); Assert.That(res, Is.EqualTo(_test).Within(1e-5)); }
public void TestDct2() { float[] res = new float[6]; float[] resDct2 = { 2.6f, -0.22428036f, 0.70740109f, -0.6057955f, 0.98994949f, 0.3666513f }; var dct2 = new Dct2(8, 6); dct2.Direct(_test, res); Assert.That(res, Is.EqualTo(resDct2).Within(1e-5)); }
public void TestIdct2() { float[] output = new float[8]; float[] input = { 5.2f, -0.44856072f, 1.41480218f, -1.21159099f, 1.97989899f, 0.73330259f }; float[] expected = { 8.53433006f, 1.77122807f, 3.48148502f, 7.77645215f, 2.99512072f, -0.84717044f, 5.19445736f, 12.69409707f }; var invdct = new Dct2(8); invdct.Inverse(input, output); Assert.That(output, Is.EqualTo(expected).Within(1e-5)); }
/// <summary> /// PNCC algorithm according to [Kim & Stern, 2016]: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum /// 3) Apply gammatone filters (squared) /// 4) Medium-time processing (asymmetric noise suppression, temporal masking, spectral smoothing) /// 5) Apply nonlinearity /// 6) Do dct-II (normalized) /// /// </summary> /// <param name="signal">Signal for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of pncc vectors</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq); // use power spectrum: foreach (var filter in _gammatoneFilterBank) { for (var j = 0; j < filter.Length; j++) { var ps = filter[j] * filter[j]; filter[j] = ps; } } var fft = new Fft(fftSize); var dct = new Dct2(_filterbankSize, FeatureCount); var gammatoneSpectrum = new float[_filterbankSize]; var spectrumQOut = new float[_filterbankSize]; var filteredSpectrumQ = new float[_filterbankSize]; var spectrumS = new float[_filterbankSize]; var smoothedSpectrumS = new float[_filterbankSize]; var avgSpectrumQ1 = new float[_filterbankSize]; var avgSpectrumQ2 = new float[_filterbankSize]; var smoothedSpectrum = new float[_filterbankSize]; const float meanPower = 1e10f; var mean = 4e07f; var d = _power != 0 ? 1.0 / _power : 0.0; var block = new float[fftSize]; // buffer for currently processed signal block at each step var zeroblock = new float[fftSize]; // buffer of zeros for quick memset _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize); var spectrum = new float[fftSize / 2 + 1]; // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { var preemphasisFilter = new PreEmphasisFilter(_preEmphasis); signal = preemphasisFilter.ApplyTo(signal); } // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var i = 0; var timePos = startSample; while (timePos + frameSize < endSample) { // prepare next block for processing zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, frameSize, timePos); // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply gammatone filterbank FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum); // ============================================================= // 4) medium-time processing blocks: // 4.1) temporal integration (zero-phase moving average filter) _ringBuffer.Add(gammatoneSpectrum); var spectrumQ = _ringBuffer.AverageSpectrum; // 4.2) asymmetric noise suppression if (i == 2 * M) { for (var j = 0; j < spectrumQOut.Length; j++) { spectrumQOut[j] = spectrumQ[j] * 0.9f; } } if (i >= 2 * M) { for (var j = 0; j < spectrumQOut.Length; j++) { if (spectrumQ[j] > spectrumQOut[j]) { spectrumQOut[j] = LambdaA * spectrumQOut[j] + (1 - LambdaA) * spectrumQ[j]; } else { spectrumQOut[j] = LambdaB * spectrumQOut[j] + (1 - LambdaB) * spectrumQ[j]; } } for (var j = 0; j < filteredSpectrumQ.Length; j++) { filteredSpectrumQ[j] = Math.Max(spectrumQ[j] - spectrumQOut[j], 0.0f); if (i == 2 * M) { avgSpectrumQ1[j] = 0.9f * filteredSpectrumQ[j]; avgSpectrumQ2[j] = filteredSpectrumQ[j]; } if (filteredSpectrumQ[j] > avgSpectrumQ1[j]) { avgSpectrumQ1[j] = LambdaA * avgSpectrumQ1[j] + (1 - LambdaA) * filteredSpectrumQ[j]; } else { avgSpectrumQ1[j] = LambdaB * avgSpectrumQ1[j] + (1 - LambdaB) * filteredSpectrumQ[j]; } // 4.3) temporal masking var threshold = filteredSpectrumQ[j]; avgSpectrumQ2[j] *= LambdaT; if (spectrumQ[j] < C * spectrumQOut[j]) { filteredSpectrumQ[j] = avgSpectrumQ1[j]; } else { if (filteredSpectrumQ[j] <= avgSpectrumQ2[j]) { filteredSpectrumQ[j] = MuT * avgSpectrumQ2[j]; } } avgSpectrumQ2[j] = Math.Max(avgSpectrumQ2[j], threshold); filteredSpectrumQ[j] = Math.Max(filteredSpectrumQ[j], avgSpectrumQ1[j]); } // 4.4) spectral smoothing for (var j = 0; j < spectrumS.Length; j++) { spectrumS[j] = filteredSpectrumQ[j] / Math.Max(spectrumQ[j], float.Epsilon); } for (var j = 0; j < smoothedSpectrumS.Length; j++) { smoothedSpectrumS[j] = 0.0f; var total = 0; for (var k = Math.Max(j - N, 0); k < Math.Min(j + N + 1, _filterbankSize); k++, total++) { smoothedSpectrumS[j] += spectrumS[k]; } smoothedSpectrumS[j] /= total; } // 4.5) mean power normalization var centralSpectrum = _ringBuffer.CentralSpectrum; var sumPower = 0.0f; for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] = smoothedSpectrumS[j] * centralSpectrum[j]; sumPower += smoothedSpectrum[j]; } mean = LambdaMu * mean + (1 - LambdaMu) * sumPower; for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] *= meanPower / mean; } // ============================================================= // 5) nonlinearity (power ^ d or Log10) if (_power != 0) { for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] = (float)Math.Pow(smoothedSpectrum[j], d); } } else { for (var j = 0; j < smoothedSpectrum.Length; j++) { smoothedSpectrum[j] = (float)Math.Log10(smoothedSpectrum[j] + float.Epsilon); } } // 6) dct-II (normalized) var pnccs = new float[FeatureCount]; dct.DirectN(smoothedSpectrum, pnccs); // add pncc vector to output sequence featureVectors.Add(new FeatureVector { Features = pnccs, TimePosition = (double)timePos / signal.SamplingRate }); } i++; timePos += hopSize; } return(featureVectors); }
/// <summary> /// Tests the express where-clause specified in param 'clause' /// </summary> /// <param name="clause">The express clause to test</param> /// <returns>true if the clause is satisfied.</returns> public bool ValidateClause(IfcDimensionCurveClause clause) { var retVal = false; try { switch (clause) { case IfcDimensionCurveClause.WR51: retVal = Functions.SIZEOF(Functions.USEDIN(this, "IFC2X3.IFCDRAUGHTINGCALLOUT.CONTENTS")) >= 1; break; case IfcDimensionCurveClause.WR52: retVal = (Functions.SIZEOF(Functions.USEDIN(this, "IFC2X3." + "IFCTERMINATORSYMBOL.ANNOTATEDCURVE").Where(Dct1 => (Dct1.AsIfcDimensionCurveTerminator().Role == IfcDimensionExtentUsage.ORIGIN))) <= 1) && (Functions.SIZEOF(Functions.USEDIN(this, "IFC2X3." + "IFCTERMINATORSYMBOL.ANNOTATEDCURVE").Where(Dct2 => (Dct2.AsIfcDimensionCurveTerminator().Role == IfcDimensionExtentUsage.TARGET))) <= 1); break; case IfcDimensionCurveClause.WR53: retVal = Functions.SIZEOF(AnnotatedBySymbols.Where(Dct => !(Functions.TYPEOF(Dct).Contains("IFC2X3.IFCDIMENSIONCURVETERMINATOR")))) == 0; break; } } catch (Exception ex) { var log = Validation.ValidationLogging.CreateLogger <Xbim.Ifc2x3.PresentationDimensioningResource.IfcDimensionCurve>(); log?.LogError(string.Format("Exception thrown evaluating where-clause 'IfcDimensionCurve.{0}' for #{1}.", clause, EntityLabel), ex); } return(retVal); }
/// <summary> /// Main constructor /// </summary> /// <param name="samplingRate"></param> /// <param name="featureCount"></param> /// <param name="frameDuration">Length of analysis window (in seconds)</param> /// <param name="hopDuration">Length of overlap (in seconds)</param> /// <param name="power"></param> /// <param name="lowFreq"></param> /// <param name="highFreq"></param> /// <param name="filterbankSize"></param> /// <param name="filterbank"></param> /// <param name="fftSize">Size of FFT (in samples)</param> /// <param name="preEmphasis"></param> /// <param name="window"></param> public PnccExtractor(int samplingRate, int featureCount, double frameDuration = 0.0256 /*sec*/, double hopDuration = 0.010 /*sec*/, int power = 15, double lowFreq = 100, double highFreq = 6800, int filterbankSize = 40, float[][] filterbank = null, int fftSize = 0, double preEmphasis = 0.0, WindowTypes window = WindowTypes.Hamming) : base(samplingRate, frameDuration, hopDuration) { FeatureCount = featureCount; _power = power; if (filterbank == null) { _fftSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize); _filterbankSize = filterbankSize; _lowFreq = lowFreq; _highFreq = highFreq; FilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, samplingRate, _lowFreq, _highFreq); // use power spectrum: foreach (var filter in FilterBank) { for (var j = 0; j < filter.Length; j++) { var ps = filter[j] * filter[j]; filter[j] = ps; } } } else { FilterBank = filterbank; _filterbankSize = filterbank.Length; _fftSize = 2 * (filterbank[0].Length - 1); } _fft = new Fft(_fftSize); _dct = new Dct2(_filterbankSize, FeatureCount); _preEmphasis = (float)preEmphasis; _window = window; if (_window != WindowTypes.Rectangular) { _windowSamples = Window.OfType(_window, FrameSize); } _block = new float[_fftSize]; _spectrum = new float[_fftSize / 2 + 1]; _spectrumQOut = new float[_filterbankSize]; _gammatoneSpectrum = new float[_filterbankSize]; _filteredSpectrumQ = new float[_filterbankSize]; _spectrumS = new float[_filterbankSize]; _smoothedSpectrumS = new float[_filterbankSize]; _avgSpectrumQ1 = new float[_filterbankSize]; _avgSpectrumQ2 = new float[_filterbankSize]; _smoothedSpectrum = new float[_filterbankSize]; _zeroblock = new float[_fftSize]; _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize); }
/// <summary> /// Standard method for computing mfcc features: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum X /// 3) Apply mel filters and log() the result: Y = Log10(X * H) /// 4) Do dct-II: mfcc = Dct(Y) /// 5) [Optional] liftering of mfcc /// /// </summary> /// <param name="signal">Signal for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of mfcc vectors</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); _melFilterBank = FilterBanks.Triangular(fftSize, signal.SamplingRate, FilterBanks.MelBands(_filterbankSize, fftSize, signal.SamplingRate, _lowFreq, _highFreq)); var lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null; var fft = new Fft(fftSize); var dct = new Dct2(_filterbankSize, FeatureCount); // reserve memory for reusable blocks var spectrum = new float[fftSize / 2 + 1]; var logMelSpectrum = new float[_filterbankSize]; var block = new float[fftSize]; // buffer for currently processed signal block at each step var zeroblock = new float[fftSize]; // just a buffer of zeros for quick memset // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f; var i = startSample; while (i + frameSize < endSample) { // prepare next block for processing zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, windowSamples.Length, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = block[k] - prevSample * _preEmphasis; prevSample = block[k]; block[k] = y; } prevSample = signal[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply mel filterbank and take log() of the result FilterBanks.ApplyAndLog(_melFilterBank, spectrum, logMelSpectrum); // 4) dct-II var mfccs = new float[FeatureCount]; dct.Direct(logMelSpectrum, mfccs); // 5) (optional) liftering if (lifterCoeffs != null) { mfccs.ApplyWindow(lifterCoeffs); } // add mfcc vector to output sequence featureVectors.Add(new FeatureVector { Features = mfccs, TimePosition = (double)i / signal.SamplingRate }); i += hopSize; } return(featureVectors); }
/// <summary> /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]: /// 0) [Optional] pre-emphasis /// /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do: /// /// 1) Apply window (if rectangular window was specified then just do nothing) /// 2) Obtain power spectrum /// 3) Apply gammatone filters (squared) /// 4) Mean power normalization /// 5) Apply nonlinearity /// 6) Do dct-II (normalized) /// /// </summary> /// <param name="signal">Signal for analysis</param> /// <param name="startSample">The number (position) of the first sample for processing</param> /// <param name="endSample">The number (position) of last sample for processing</param> /// <returns>List of pncc vectors</returns> public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample) { // ====================================== PREPARE ======================================= var hopSize = (int)(signal.SamplingRate * HopSize); var frameSize = (int)(signal.SamplingRate * FrameSize); var windowSamples = Window.OfType(_window, frameSize); var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize); _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq); // use power spectrum: foreach (var filter in _gammatoneFilterBank) { for (var j = 0; j < filter.Length; j++) { var ps = filter[j] * filter[j]; filter[j] = ps; } } var fft = new Fft(fftSize); var dct = new Dct2(_filterbankSize, FeatureCount); var gammatoneSpectrum = new float[_filterbankSize]; const float meanPower = 1e10f; var mean = 4e07f; var d = _power != 0 ? 1.0 / _power : 0.0; var block = new float[fftSize]; // buffer for a signal block at each step var zeroblock = new float[fftSize]; // buffer of zeros for quick memset var spectrum = new float[fftSize / 2 + 1]; // ================================= MAIN PROCESSING ================================== var featureVectors = new List <FeatureVector>(); var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f; var i = startSample; while (i + frameSize < endSample) { // prepare next block for processing zeroblock.FastCopyTo(block, zeroblock.Length); signal.Samples.FastCopyTo(block, frameSize, i); // 0) pre-emphasis (if needed) if (_preEmphasis > 0.0) { for (var k = 0; k < frameSize; k++) { var y = block[k] - prevSample * _preEmphasis; prevSample = block[k]; block[k] = y; } prevSample = signal[i + hopSize - 1]; } // 1) apply window if (_window != WindowTypes.Rectangular) { block.ApplyWindow(windowSamples); } // 2) calculate power spectrum fft.PowerSpectrum(block, spectrum); // 3) apply gammatone filterbank FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum); // 4) mean power normalization: var sumPower = 0.0f; for (var j = 0; j < gammatoneSpectrum.Length; j++) { sumPower += gammatoneSpectrum[j]; } mean = LambdaMu * mean + (1 - LambdaMu) * sumPower; for (var j = 0; j < gammatoneSpectrum.Length; j++) { gammatoneSpectrum[j] *= meanPower / mean; } // 5) nonlinearity (power ^ d or Log10) if (_power != 0) { for (var j = 0; j < gammatoneSpectrum.Length; j++) { gammatoneSpectrum[j] = (float)Math.Pow(gammatoneSpectrum[j], d); } } else { for (var j = 0; j < gammatoneSpectrum.Length; j++) { gammatoneSpectrum[j] = (float)Math.Log10(gammatoneSpectrum[j] + float.Epsilon); } } // 6) dct-II (normalized) var spnccs = new float[FeatureCount]; dct.DirectN(gammatoneSpectrum, spnccs); // add pncc vector to output sequence featureVectors.Add(new FeatureVector { Features = spnccs, TimePosition = (double)i / signal.SamplingRate }); i += hopSize; } return(featureVectors); }