Пример #1
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="filterbank"></param>
        /// <param name="frameDuration"></param>
        /// <param name="hopDuration"></param>
        /// <param name="preEmphasis"></param>
        /// <param name="nonLinearity"></param>
        /// <param name="spectrumType"></param>
        /// <param name="window"></param>
        /// <param name="logFloor"></param>
        public FilterbankExtractor(int samplingRate,
                                   float[][] filterbank,
                                   double frameDuration          = 0.0256 /*sec*/,
                                   double hopDuration            = 0.010 /*sec*/,
                                   double preEmphasis            = 0,
                                   NonLinearityType nonLinearity = NonLinearityType.None,
                                   SpectrumType spectrumType     = SpectrumType.Power,
                                   WindowTypes window            = WindowTypes.Hamming,
                                   float logFloor = float.Epsilon)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FilterBank   = filterbank;
            FeatureCount = filterbank.Length;

            _blockSize = 2 * (filterbank[0].Length - 1);

            Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");

            _fft = new RealFft(_blockSize);

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            // setup spectrum post-processing: =======================================================

            _logFloor         = logFloor;
            _nonLinearityType = nonLinearity;
            switch (nonLinearity)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _bandSpectrum, _logFloor);
                break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _bandSpectrum, _logFloor);
                break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _bandSpectrum, _logFloor);
                break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _bandSpectrum, 0.33);
                break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum);
                break;
            }

            _spectrumType = spectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.Power:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true);
                break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true);
                break;
            }

            // reserve memory for reusable blocks

            _spectrum     = new float[_blockSize / 2 + 1];
            _bandSpectrum = new float[filterbank.Length];
        }
Пример #2
0
        /// <summary>
        /// Standard method for computing mfcc features:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum X
        ///     3) Apply mel filters and log() the result: Y = Log10(X * H)
        ///     4) Do dct-II: mfcc = Dct(Y)
        ///     5) [Optional] liftering of mfcc
        ///
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of mfcc vectors</returns>
        public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var hopSize   = HopSize;
            var frameSize = FrameSize;

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                _zeroblock.FastCopyTo(_block, _fftSize);
                samples.FastCopyTo(_block, _windowSamples.Length, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = _block[k] - prevSample * _preEmphasis;
                        prevSample = _block[k];
                        _block[k]  = y;
                    }
                    prevSample = samples[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    _block.ApplyWindow(_windowSamples);
                }


                // 2) calculate power spectrum

                _fft.PowerSpectrum(_block, _spectrum);


                // 3) apply mel filterbank and take log() of the result

                FilterBanks.ApplyAndLog(FilterBank, _spectrum, _logMelSpectrum);


                // 4) dct-II

                var mfccs = new float[FeatureCount];
                _dct.Direct(_logMelSpectrum, mfccs);


                // 5) (optional) liftering

                if (_lifterCoeffs != null)
                {
                    mfccs.ApplyWindow(_lifterCoeffs);
                }


                // add mfcc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = mfccs,
                    TimePosition = (double)i / SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
Пример #3
0
        /// <summary>
        /// Constructs extractor from configuration <paramref name="options"/>.
        /// </summary>
        public MfccExtractor(MfccOptions options) : base(options)
        {
            FeatureCount = options.FeatureCount;

            var filterbankSize = options.FilterBankSize;

            if (options.FilterBank is null)
            {
                _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency);
                FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel);   // HTK/Kaldi-style
            }
            else
            {
                FilterBank     = options.FilterBank;
                filterbankSize = FilterBank.Length;
                _blockSize     = 2 * (FilterBank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _fft = new RealFft(_blockSize);

            _lifterSize   = options.LifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _includeEnergy  = options.IncludeEnergy;
            _logEnergyFloor = options.LogEnergyFloor;

            // setup DCT: ============================================================================

            _dctType = options.DctType;
            switch (_dctType[0])
            {
            case '1': _dct = new Dct1(filterbankSize); break;

            case '3': _dct = new Dct3(filterbankSize); break;

            case '4': _dct = new Dct4(filterbankSize); break;

            default:  _dct = new Dct2(filterbankSize); break;
            }

            if (_dctType.EndsWith("N", StringComparison.OrdinalIgnoreCase))
            {
                _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs);
            }
            else
            {
                _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs);
            }

            // setup spectrum post-processing: =======================================================

            _logFloor         = options.LogFloor;
            _nonLinearityType = options.NonLinearity;
            switch (_nonLinearityType)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor); break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor); break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor); break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33); break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum); break;
            }

            _spectrumType = options.SpectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break;

            default:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break;
            }

            // reserve memory for reusable blocks

            _spectrum    = new float[_blockSize / 2 + 1];
            _melSpectrum = new float[filterbankSize];
        }
Пример #4
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="options">Filterbank options</param>
        public FilterbankExtractor(FilterbankOptions options) : base(options)
        {
            var filterbankSize = options.FilterBankSize;

            if (options.FilterBank == null)
            {
                _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency, false);
                FilterBank = FilterBanks.Rectangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel);
            }
            else
            {
                FilterBank     = options.FilterBank;
                filterbankSize = FilterBank.Length;
                _blockSize     = 2 * (FilterBank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            FeatureCount = filterbankSize;

            _fft = new RealFft(_blockSize);

            // setup spectrum post-processing: =======================================================

            _logFloor         = options.LogFloor;
            _nonLinearityType = options.NonLinearity;
            switch (_nonLinearityType)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _bandSpectrum, _logFloor); break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _bandSpectrum, _logFloor); break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _bandSpectrum, _logFloor); break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _bandSpectrum, 0.33); break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum); break;
            }

            _spectrumType = options.SpectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break;

            default:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break;
            }

            // reserve memory for reusable blocks

            _spectrum     = new float[_blockSize / 2 + 1];
            _bandSpectrum = new float[filterbankSize];
        }
Пример #5
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration"></param>
        /// <param name="hopDuration"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="fftSize"></param>
        /// <param name="filterbank"></param>
        /// <param name="lifterSize"></param>
        /// <param name="preEmphasis"></param>
        /// <param name="includeEnergy"></param>
        /// <param name="dctType">"1", "1N", "2", "2N", "3", "3N", "4", "4N"</param>
        /// <param name="nonLinearity"></param>
        /// <param name="spectrumType"></param>
        /// <param name="window"></param>
        /// <param name="logFloor"></param>
        public MfccExtractor(int samplingRate,
                             int featureCount,
                             double frameDuration          = 0.0256 /*sec*/,
                             double hopDuration            = 0.010 /*sec*/,
                             int filterbankSize            = 24,
                             double lowFreq                = 0,
                             double highFreq               = 0,
                             int fftSize                   = 0,
                             float[][] filterbank          = null,
                             int lifterSize                = 0,
                             double preEmphasis            = 0,
                             bool includeEnergy            = false,
                             string dctType                = "2N",
                             NonLinearityType nonLinearity = NonLinearityType.Log10,
                             SpectrumType spectrumType     = SpectrumType.Power,
                             WindowTypes window            = WindowTypes.Hamming,
                             float logFloor                = float.Epsilon)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FeatureCount = featureCount;

            _lowFreq  = lowFreq;
            _highFreq = highFreq;

            if (filterbank == null)
            {
                _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var melBands = FilterBanks.MelBands(filterbankSize, _blockSize, SamplingRate, _lowFreq, _highFreq);
                FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel);   // HTK/Kaldi-style
            }
            else
            {
                FilterBank     = filterbank;
                filterbankSize = filterbank.Length;
                _blockSize     = 2 * (filterbank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _fft = new RealFft(_blockSize);

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            _lifterSize   = lifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _includeEnergy = includeEnergy;

            // setup DCT: ============================================================================

            _dctType = dctType;
            switch (dctType[0])
            {
            case '1':
                _dct = new Dct1(filterbankSize);
                break;

            case '2':
                _dct = new Dct2(filterbankSize);
                break;

            case '3':
                _dct = new Dct3(filterbankSize);
                break;

            case '4':
                _dct = new Dct4(filterbankSize);
                break;

            default:
                throw new ArgumentException("Only DCT-1, 2, 3 and 4 are supported!");
            }

            if (dctType.Length > 1 && char.ToUpper(dctType[1]) == 'N')
            {
                _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs);
            }
            else
            {
                _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs);
            }

            // setup spectrum post-processing: =======================================================

            _logFloor         = logFloor;
            _nonLinearityType = nonLinearity;
            switch (nonLinearity)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor);
                break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor);
                break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor);
                break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33);
                break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum);
                break;
            }

            _spectrumType = spectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.Power:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true);
                break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true);
                break;
            }

            // reserve memory for reusable blocks

            _spectrum    = new float[_blockSize / 2 + 1];
            _melSpectrum = new float[filterbankSize];
        }
Пример #6
0
        /// <summary>
        /// Standard method for computing mfcc features:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum X
        ///     3) Apply mel filters and log() the result: Y = Log10(X * H)
        ///     4) Do dct-II: mfcc = Dct(Y)
        ///     5) [Optional] liftering of mfcc
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of mfcc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _melFilterBank = FilterBanks.Triangular(fftSize, signal.SamplingRate,
                                                    FilterBanks.MelBands(_filterbankSize, fftSize, signal.SamplingRate, _lowFreq, _highFreq));

            var lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            // reserve memory for reusable blocks

            var spectrum       = new float[fftSize / 2 + 1];
            var logMelSpectrum = new float[_filterbankSize];

            var block     = new float[fftSize];   // buffer for currently processed signal block at each step
            var zeroblock = new float[fftSize];   // just a buffer of zeros for quick memset


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, windowSamples.Length, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = block[k] - prevSample * _preEmphasis;
                        prevSample = block[k];
                        block[k]   = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply mel filterbank and take log() of the result

                FilterBanks.ApplyAndLog(_melFilterBank, spectrum, logMelSpectrum);


                // 4) dct-II

                var mfccs = new float[FeatureCount];
                dct.Direct(logMelSpectrum, mfccs);


                // 5) (optional) liftering

                if (lifterCoeffs != null)
                {
                    mfccs.ApplyWindow(lifterCoeffs);
                }


                // add mfcc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = mfccs,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }