Beispiel #1
0
        /// <summary>
        /// Pitch estimation: from spectral peaks
        /// </summary>
        /// <param name="signal"></param>
        /// <param name="startPos"></param>
        /// <param name="endPos"></param>
        /// <returns></returns>
        public static float FromSpectralPeaks(DiscreteSignal signal,
                                              int startPos = 0,
                                              int endPos   = -1,
                                              float low    = 80,
                                              float high   = 400,
                                              int fftSize  = 0)
        {
            if (endPos == -1)
            {
                endPos = signal.Length;
            }

            if (startPos != 0 || endPos != signal.Length)
            {
                signal = signal[startPos, endPos];
            }

            signal.ApplyWindow(WindowTypes.Hann);

            var size = fftSize > 0 ? fftSize : MathUtils.NextPowerOfTwo(signal.Length);
            var fft  = new RealFft(size);

            var spectrum = fft.PowerSpectrum(signal, false).Samples;

            return(FromSpectralPeaks(spectrum, signal.SamplingRate, low, high));
        }
Beispiel #2
0
        float[] ComputeSpectrum(int idx)
        {
            var pos = (int)(_signal.SamplingRate * HopDuration * idx);

            return(_fft.PowerSpectrum(_signal[pos, pos + 512], normalize: false)
                   .Samples);
        }
Beispiel #3
0
        private void UpdateSpectrumAndCepstrum()
        {
            var fftSize      = int.Parse(fftSizeTextBox.Text);
            var cepstrumSize = int.Parse(cepstrumSizeTextBox.Text);

            _hopSize = int.Parse(hopSizeTextBox.Text);

            if (fftSize != _fftSize)
            {
                _fftSize           = fftSize;
                _fft               = new RealFft(fftSize);
                _cepstralTransform = new CepstralTransform(cepstrumSize, _fftSize);
            }

            if (cepstrumSize != _cepstrumSize)
            {
                _cepstrumSize      = cepstrumSize;
                _cepstralTransform = new CepstralTransform(_cepstrumSize, _fftSize);
            }

            var pos   = _hopSize * _specNo;
            var block = _signal[pos, pos + _fftSize];

            //block.ApplyWindow(WindowTypes.Hamming);

            var cepstrum = new float[_fftSize];

            _cepstralTransform.RealCepstrum(block.Samples, cepstrum);

            // ************************************************************************
            //      just visualize spectrum estimated from cepstral coefficients:
            // ************************************************************************

            var real = new float[_fftSize];
            var imag = new float[_fftSize];

            for (var i = 0; i < 32; i++)
            {
                real[i] = cepstrum[i];
            }

            _fft.Direct(real, real, imag);

            var spectrum = _fft.PowerSpectrum(block, normalize: false).Samples;
            var avg      = spectrum.Average(s => LevelScale.ToDecibel(s));

            var spectrumEstimate = real.Take(_fftSize / 2 + 1)
                                   .Select(s => (float)LevelScale.FromDecibel(s * 40 - avg))
                                   .ToArray();

            spectrumPanel.Line     = spectrum;
            spectrumPanel.Markline = spectrumEstimate;
            spectrumPanel.ToDecibel();

            var pitch = Pitch.FromCepstrum(block);

            cepstrumPanel.Line = cepstrum;
            cepstrumPanel.Mark = (int)(_signal.SamplingRate / pitch);
        }
Beispiel #4
0
        /// <summary>
        /// <para>Computes S(implified)PNCC vector in one frame according to [Kim and Stern, 2016].</para>
        /// <para>
        /// General algorithm:
        /// <list type="number">
        ///     <item>Apply window</item>
        ///     <item>Obtain power spectrum</item>
        ///     <item>Apply gammatone filters (squared)</item>
        ///     <item>Mean power normalization</item>
        ///     <item>Apply nonlinearity</item>
        ///     <item>Do DCT-II (normalized)</item>
        /// </list>
        /// </para>
        /// </summary>
        /// <param name="block">Block of data</param>
        /// <param name="features">Features (one SPNCC feature vector) computed in the block</param>
        public override void ProcessFrame(float[] block, float[] features)
        {
            const float meanPower = 1e10f;

            // 0) base extractor applies window

            // 1) calculate power spectrum

            _fft.PowerSpectrum(block, _spectrum, false);

            // 2) apply gammatone filterbank

            FilterBanks.Apply(FilterBank, _spectrum, _filteredSpectrum);

            // 3) mean power normalization:

            var sumPower = 0.0f;

            for (var j = 0; j < _filteredSpectrum.Length; j++)
            {
                sumPower += _filteredSpectrum[j];
            }

            _mean = LambdaMu * _mean + (1 - LambdaMu) * sumPower;

            for (var j = 0; j < _filteredSpectrum.Length; j++)
            {
                _filteredSpectrum[j] *= meanPower / _mean;
            }

            // 4) nonlinearity (pow ^ d  or  Log10)

            if (_power != 0)
            {
                for (var j = 0; j < _filteredSpectrum.Length; j++)
                {
                    _filteredSpectrum[j] = (float)Math.Pow(_filteredSpectrum[j], 1.0 / _power);
                }
            }
            else
            {
                for (var j = 0; j < _filteredSpectrum.Length; j++)
                {
                    _filteredSpectrum[j] = (float)Math.Log10(_filteredSpectrum[j] + float.Epsilon);
                }
            }

            // 5) dct-II (normalized)

            _dct.DirectNorm(_filteredSpectrum, features);

            // 6) (optional) replace first coeff with log(energy)

            if (_includeEnergy)
            {
                features[0] = (float)Math.Log(Math.Max(block.Sum(x => x * x), _logEnergyFloor));
            }
        }
Beispiel #5
0
        /// <summary>
        /// <para>Computes PLP-RASTA feature vector in one frame.</para>
        /// <para>
        /// General algorithm:
        /// <list type="number">
        ///     <item>Apply window</item>
        ///     <item>Obtain power spectrum</item>
        ///     <item>Apply filterbank of bark bands (or mel bands)</item>
        ///     <item>[Optional] filter each component of the processed spectrum with a RASTA filter</item>
        ///     <item>Apply equal loudness curve</item>
        ///     <item>Apply nonlinearity (take cubic root)</item>
        ///     <item>Do LPC</item>
        ///     <item>Convert LPC to cepstrum</item>
        ///     <item>[Optional] lifter cepstrum</item>
        /// </list>
        /// </para>
        /// </summary>
        /// <param name="block">Block of data</param>
        /// <param name="features">Features (one PLP feature vector) computed in the block</param>
        public override void ProcessFrame(float[] block, float[] features)
        {
            // 0) base extractor applies window

            // 1) calculate power spectrum (without normalization)

            _fft.PowerSpectrum(block, _spectrum, false);

            // 2) apply filterbank on the result (bark frequencies by default)

            FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum);

            // 3) RASTA filtering in log-domain [optional]

            if (_rasta > 0)
            {
                for (var k = 0; k < _bandSpectrum.Length; k++)
                {
                    var log = (float)Math.Log(_bandSpectrum[k] + float.Epsilon);

                    log = _rastaFilters[k].Process(log);

                    _bandSpectrum[k] = (float)Math.Exp(log);
                }
            }

            // 4) and 5) apply equal loudness curve and take cubic root

            for (var k = 0; k < _bandSpectrum.Length; k++)
            {
                _bandSpectrum[k] = (float)Math.Pow(Math.Max(_bandSpectrum[k], 1.0) * _equalLoudnessCurve[k], 0.33);
            }

            // 6) LPC from power spectrum:

            var n = _idftTable[0].Length;

            // get autocorrelation samples from post-processed power spectrum (via IDFT):

            for (var k = 0; k < _idftTable.Length; k++)
            {
                var acc = _idftTable[k][0] * _bandSpectrum[0] +
                          _idftTable[k][n - 1] * _bandSpectrum[n - 3];  // add values at two duplicated edges right away

                for (var j = 1; j < n - 1; j++)
                {
                    acc += _idftTable[k][j] * _bandSpectrum[j - 1];
                }

                _cc[k] = acc / (2 * (n - 1));
            }

            // LPC:

            for (var k = 0; k < _lpc.Length; _lpc[k] = 0, k++)
            {
                ;
            }

            var err = Lpc.LevinsonDurbin(_cc, _lpc, _lpcOrder);

            // 7) compute LPCC coefficients from LPC

            Lpc.ToCepstrum(_lpc, err, features);


            // 8) (optional) liftering

            if (_lifterCoeffs != null)
            {
                features.ApplyWindow(_lifterCoeffs);
            }

            // 9) (optional) replace first coeff with log(energy)

            if (_includeEnergy)
            {
                features[0] = (float)Math.Log(Math.Max(block.Sum(x => x * x), _logEnergyFloor));
            }
        }
Beispiel #6
0
        /// <summary>
        /// Computes chroma feature vector in one frame.
        /// </summary>
        /// <param name="block">Block of data</param>
        /// <param name="features">Features (one chroma feature vector) computed in the block</param>
        public override void ProcessFrame(float[] block, float[] features)
        {
            _fft.PowerSpectrum(block, _spectrum, false);

            FilterBanks.Apply(_filterBank, _spectrum, features);
        }
Beispiel #7
0
        /// <summary>
        /// Method for computing modulation spectra.
        /// Each vector representing one modulation spectrum is a flattened version of 2D spectrum.
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of flattened modulation spectra</returns>
        public override List <float[]> ComputeFrom(float[] samples, int startSample, int endSample)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var frameSize = FrameSize;
            var hopSize   = HopSize;

            var featureVectors = new List <float[]>();

            var en = 0;
            var i  = startSample;

            if (_featuregram == null)
            {
                _envelopes = new float[_filterbank.Length][];
                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[samples.Length / hopSize];
                }

                var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f;

                var lastSample = endSample - Math.Max(frameSize, hopSize);

                // ===================== compute local FFTs (do STFT) =======================

                for (i = startSample; i < lastSample; i += hopSize)
                {
                    // copy frameSize samples
                    samples.FastCopyTo(_block, frameSize, i);
                    // fill zeros to fftSize if frameSize < fftSize
                    for (var k = frameSize; k < _block.Length; _block[k++] = 0)
                    {
                        ;
                    }


                    // 0) pre-emphasis (if needed)

                    if (_preEmphasis > 1e-10f)
                    {
                        for (var k = 0; k < frameSize; k++)
                        {
                            var y = _block[k] - prevSample * _preEmphasis;
                            prevSample = _block[k];
                            _block[k]  = y;
                        }
                        prevSample = samples[i + hopSize - 1];
                    }

                    // 1) apply window

                    if (_window != WindowTypes.Rectangular)
                    {
                        _block.ApplyWindow(_windowSamples);
                    }

                    // 2) calculate power spectrum

                    _fft.PowerSpectrum(_block, _spectrum);

                    // 3) apply filterbank...

                    FilterBanks.Apply(_filterbank, _spectrum, _filteredSpectrum);

                    // ...and save results for future calculations

                    for (var n = 0; n < _envelopes.Length; n++)
                    {
                        _envelopes[n][en] = _filteredSpectrum[n];
                    }
                    en++;
                }
            }
            else
            {
                en         = _featuregram.Length;
                _envelopes = new float[_featuregram[0].Length][];

                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[en];
                    for (i = 0; i < en; i++)
                    {
                        _envelopes[n][i] = _featuregram[i][n];
                    }
                }
            }

            // =========================== modulation analysis =======================

            var envelopeLength = en;

            // long-term AVG-normalization

            foreach (var envelope in _envelopes)
            {
                var avg = 0.0f;
                for (var k = 0; k < envelopeLength; k++)
                {
                    avg += (k >= 0) ? envelope[k] : -envelope[k];
                }
                avg /= envelopeLength;

                if (avg >= 1e-10f)   // this happens more frequently
                {
                    for (var k = 0; k < envelopeLength; k++)
                    {
                        envelope[k] /= avg;
                    }
                }
            }

            i = 0;
            while (i < envelopeLength)
            {
                var vector = new float[_envelopes.Length * (_modulationFftSize / 2 + 1)];
                var offset = 0;

                foreach (var envelope in _envelopes)
                {
                    // copy modFftSize samples (or envelopeLength - i in the end)
                    var len = Math.Min(_modulationFftSize, envelopeLength - i);
                    envelope.FastCopyTo(_modBlock, len, i);
                    // fill zeros to modFftSize if len < modFftSize
                    for (var k = len; k < _modBlock.Length; _modBlock[k++] = 0)
                    {
                    }

                    _modulationFft.PowerSpectrum(_modBlock, _modSpectrum);
                    _modSpectrum.FastCopyTo(vector, _modSpectrum.Length, 0, offset);

                    offset += _modSpectrum.Length;
                }

                featureVectors.Add(vector);

                i += _modulationHopSize;
            }

            return(featureVectors);
        }
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="filterbank"></param>
        /// <param name="frameDuration"></param>
        /// <param name="hopDuration"></param>
        /// <param name="preEmphasis"></param>
        /// <param name="nonLinearity"></param>
        /// <param name="spectrumType"></param>
        /// <param name="window"></param>
        /// <param name="logFloor"></param>
        public FilterbankExtractor(int samplingRate,
                                   float[][] filterbank,
                                   double frameDuration          = 0.0256 /*sec*/,
                                   double hopDuration            = 0.010 /*sec*/,
                                   double preEmphasis            = 0,
                                   NonLinearityType nonLinearity = NonLinearityType.None,
                                   SpectrumType spectrumType     = SpectrumType.Power,
                                   WindowTypes window            = WindowTypes.Hamming,
                                   float logFloor = float.Epsilon)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FilterBank   = filterbank;
            FeatureCount = filterbank.Length;

            _blockSize = 2 * (filterbank[0].Length - 1);

            Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");

            _fft = new RealFft(_blockSize);

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            // setup spectrum post-processing: =======================================================

            _logFloor         = logFloor;
            _nonLinearityType = nonLinearity;
            switch (nonLinearity)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _bandSpectrum, _logFloor);
                break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _bandSpectrum, _logFloor);
                break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _bandSpectrum, _logFloor);
                break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _bandSpectrum, 0.33);
                break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum);
                break;
            }

            _spectrumType = spectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.Power:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true);
                break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true);
                break;
            }

            // reserve memory for reusable blocks

            _spectrum     = new float[_blockSize / 2 + 1];
            _bandSpectrum = new float[filterbank.Length];
        }
Beispiel #9
0
        /// <summary>
        /// Constructs extractor from configuration <paramref name="options"/>.
        /// </summary>
        public MfccExtractor(MfccOptions options) : base(options)
        {
            FeatureCount = options.FeatureCount;

            var filterbankSize = options.FilterBankSize;

            if (options.FilterBank is null)
            {
                _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency);
                FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel);   // HTK/Kaldi-style
            }
            else
            {
                FilterBank     = options.FilterBank;
                filterbankSize = FilterBank.Length;
                _blockSize     = 2 * (FilterBank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _fft = new RealFft(_blockSize);

            _lifterSize   = options.LifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _includeEnergy  = options.IncludeEnergy;
            _logEnergyFloor = options.LogEnergyFloor;

            // setup DCT: ============================================================================

            _dctType = options.DctType;
            switch (_dctType[0])
            {
            case '1': _dct = new Dct1(filterbankSize); break;

            case '3': _dct = new Dct3(filterbankSize); break;

            case '4': _dct = new Dct4(filterbankSize); break;

            default:  _dct = new Dct2(filterbankSize); break;
            }

            if (_dctType.EndsWith("N", StringComparison.OrdinalIgnoreCase))
            {
                _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs);
            }
            else
            {
                _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs);
            }

            // setup spectrum post-processing: =======================================================

            _logFloor         = options.LogFloor;
            _nonLinearityType = options.NonLinearity;
            switch (_nonLinearityType)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor); break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor); break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor); break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33); break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum); break;
            }

            _spectrumType = options.SpectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break;

            default:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break;
            }

            // reserve memory for reusable blocks

            _spectrum    = new float[_blockSize / 2 + 1];
            _melSpectrum = new float[filterbankSize];
        }
Beispiel #10
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="options">Filterbank options</param>
        public FilterbankExtractor(FilterbankOptions options) : base(options)
        {
            var filterbankSize = options.FilterBankSize;

            if (options.FilterBank == null)
            {
                _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency, false);
                FilterBank = FilterBanks.Rectangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel);
            }
            else
            {
                FilterBank     = options.FilterBank;
                filterbankSize = FilterBank.Length;
                _blockSize     = 2 * (FilterBank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            FeatureCount = filterbankSize;

            _fft = new RealFft(_blockSize);

            // setup spectrum post-processing: =======================================================

            _logFloor         = options.LogFloor;
            _nonLinearityType = options.NonLinearity;
            switch (_nonLinearityType)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _bandSpectrum, _logFloor); break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _bandSpectrum, _logFloor); break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _bandSpectrum, _logFloor); break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _bandSpectrum, 0.33); break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum); break;
            }

            _spectrumType = options.SpectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break;

            default:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break;
            }

            // reserve memory for reusable blocks

            _spectrum     = new float[_blockSize / 2 + 1];
            _bandSpectrum = new float[filterbankSize];
        }
Beispiel #11
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration"></param>
        /// <param name="hopDuration"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="fftSize"></param>
        /// <param name="filterbank"></param>
        /// <param name="lifterSize"></param>
        /// <param name="preEmphasis"></param>
        /// <param name="includeEnergy"></param>
        /// <param name="dctType">"1", "1N", "2", "2N", "3", "3N", "4", "4N"</param>
        /// <param name="nonLinearity"></param>
        /// <param name="spectrumType"></param>
        /// <param name="window"></param>
        /// <param name="logFloor"></param>
        public MfccExtractor(int samplingRate,
                             int featureCount,
                             double frameDuration          = 0.0256 /*sec*/,
                             double hopDuration            = 0.010 /*sec*/,
                             int filterbankSize            = 24,
                             double lowFreq                = 0,
                             double highFreq               = 0,
                             int fftSize                   = 0,
                             float[][] filterbank          = null,
                             int lifterSize                = 0,
                             double preEmphasis            = 0,
                             bool includeEnergy            = false,
                             string dctType                = "2N",
                             NonLinearityType nonLinearity = NonLinearityType.Log10,
                             SpectrumType spectrumType     = SpectrumType.Power,
                             WindowTypes window            = WindowTypes.Hamming,
                             float logFloor                = float.Epsilon)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FeatureCount = featureCount;

            _lowFreq  = lowFreq;
            _highFreq = highFreq;

            if (filterbank == null)
            {
                _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var melBands = FilterBanks.MelBands(filterbankSize, _blockSize, SamplingRate, _lowFreq, _highFreq);
                FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel);   // HTK/Kaldi-style
            }
            else
            {
                FilterBank     = filterbank;
                filterbankSize = filterbank.Length;
                _blockSize     = 2 * (filterbank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _fft = new RealFft(_blockSize);

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            _lifterSize   = lifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _includeEnergy = includeEnergy;

            // setup DCT: ============================================================================

            _dctType = dctType;
            switch (dctType[0])
            {
            case '1':
                _dct = new Dct1(filterbankSize);
                break;

            case '2':
                _dct = new Dct2(filterbankSize);
                break;

            case '3':
                _dct = new Dct3(filterbankSize);
                break;

            case '4':
                _dct = new Dct4(filterbankSize);
                break;

            default:
                throw new ArgumentException("Only DCT-1, 2, 3 and 4 are supported!");
            }

            if (dctType.Length > 1 && char.ToUpper(dctType[1]) == 'N')
            {
                _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs);
            }
            else
            {
                _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs);
            }

            // setup spectrum post-processing: =======================================================

            _logFloor         = logFloor;
            _nonLinearityType = nonLinearity;
            switch (nonLinearity)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor);
                break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor);
                break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor);
                break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33);
                break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum);
                break;
            }

            _spectrumType = spectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.Power:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true);
                break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true);
                break;
            }

            // reserve memory for reusable blocks

            _spectrum    = new float[_blockSize / 2 + 1];
            _melSpectrum = new float[filterbankSize];
        }
Beispiel #12
0
        /// <summary>
        /// S(implified)PNCC algorithm according to [Kim & Stern, 2016].
        /// In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Mean power normalization
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <returns>List of pncc vectors</returns>
        public override float[] ProcessFrame(float[] block)
        {
            const float meanPower = 1e10f;

            // fill zeros to fftSize if frameSize < fftSize

            for (var k = FrameSize; k < block.Length; block[k++] = 0)
            {
                ;
            }

            // 1) apply window

            block.ApplyWindow(_windowSamples);

            // 2) calculate power spectrum

            _fft.PowerSpectrum(block, _spectrum, false);

            // 3) apply gammatone filterbank

            FilterBanks.Apply(FilterBank, _spectrum, _filteredSpectrum);

            // 4) mean power normalization:

            var sumPower = 0.0f;

            for (var j = 0; j < _filteredSpectrum.Length; j++)
            {
                sumPower += _filteredSpectrum[j];
            }

            _mean = LambdaMu * _mean + (1 - LambdaMu) * sumPower;

            for (var j = 0; j < _filteredSpectrum.Length; j++)
            {
                _filteredSpectrum[j] *= meanPower / _mean;
            }

            // 5) nonlinearity (pow ^ d  or  Log10)

            if (_power != 0)
            {
                for (var j = 0; j < _filteredSpectrum.Length; j++)
                {
                    _filteredSpectrum[j] = (float)Math.Pow(_filteredSpectrum[j], 1.0 / _power);
                }
            }
            else
            {
                for (var j = 0; j < _filteredSpectrum.Length; j++)
                {
                    _filteredSpectrum[j] = (float)Math.Log10(_filteredSpectrum[j] + float.Epsilon);
                }
            }

            // 6) dct-II (normalized)

            var spnccs = new float[FeatureCount];

            _dct.DirectNorm(_filteredSpectrum, spnccs);

            return(spnccs);
        }
Beispiel #13
0
        /// <summary>
        /// Standard method for computing PLP features.
        /// In each frame do:
        ///
        ///     1) Apply window
        ///     2) Obtain power spectrum
        ///     3) Apply filterbank of bark bands (or mel bands)
        ///     4) [Optional] filter each component of the processed spectrum with a RASTA filter
        ///     5) Apply equal loudness curve
        ///     6) Take cubic root
        ///     7) Do LPC
        ///     8) Convert LPC to cepstrum
        ///     9) [Optional] lifter cepstrum
        ///
        /// </summary>
        /// <param name="block">Samples for analysis</param>
        /// <returns>PLP vector</returns>
        public override float[] ProcessFrame(float[] block)
        {
            // fill zeros to fftSize if frameSize < fftSize (blockSize)

            for (var k = FrameSize; k < block.Length; block[k++] = 0)
            {
                ;
            }

            // 1) apply window

            block.ApplyWindow(_windowSamples);

            // 2) calculate power spectrum (without normalization)

            _fft.PowerSpectrum(block, _spectrum, false);

            // 3) apply filterbank on the result (bark frequencies by default)

            FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum);

            // 4) RASTA filtering in log-domain [optional]

            if (_rasta > 0)
            {
                for (var k = 0; k < _bandSpectrum.Length; k++)
                {
                    var log = (float)Math.Log(_bandSpectrum[k] + float.Epsilon);

                    log = _rastaFilters[k].Process(log);

                    _bandSpectrum[k] = (float)Math.Exp(log);
                }
            }

            // 5) and 6) apply equal loudness curve and take cubic root

            for (var k = 0; k < _bandSpectrum.Length; k++)
            {
                _bandSpectrum[k] = (float)Math.Pow(Math.Max(_bandSpectrum[k], 1.0) * _equalLoudnessCurve[k], 0.33);
            }

            // 7) LPC from power spectrum:

            var n = _idftTable[0].Length;

            // get autocorrelation samples from post-processed power spectrum (via IDFT):

            for (var k = 0; k < _idftTable.Length; k++)
            {
                var acc = _idftTable[k][0] * _bandSpectrum[0] +
                          _idftTable[k][n - 1] * _bandSpectrum[n - 3];  // add values at two duplicated edges right away

                for (var j = 1; j < n - 1; j++)
                {
                    acc += _idftTable[k][j] * _bandSpectrum[j - 1];
                }

                _cc[k] = acc / (2 * (n - 1));
            }

            // LPC:

            for (var k = 0; k < _lpc.Length; _lpc[k] = 0, k++)
            {
                ;
            }

            var err = Lpc.LevinsonDurbin(_cc, _lpc, _lpcOrder);

            // 8) compute LPCC coefficients from LPC

            var lpcc = new float[FeatureCount];

            Lpc.ToCepstrum(_lpc, err, lpcc);


            // 9) (optional) liftering

            if (_lifterCoeffs != null)
            {
                lpcc.ApplyWindow(_lifterCoeffs);
            }

            return(lpcc);
        }