示例#1
0
        /// <summary>
        /// Standard method for computing mfcc features:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum X
        ///     3) Apply mel filters and log() the result: Y = Log10(X * H)
        ///     4) Do dct-II: mfcc = Dct(Y)
        ///     5) [Optional] liftering of mfcc
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of mfcc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _melFilterBank = FilterBanks.Triangular(fftSize, signal.SamplingRate,
                                                    FilterBanks.MelBands(_filterbankSize, fftSize, signal.SamplingRate, _lowFreq, _highFreq));

            var lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            // reserve memory for reusable blocks

            var spectrum       = new float[fftSize / 2 + 1];
            var logMelSpectrum = new float[_filterbankSize];

            var block     = new float[fftSize];   // buffer for currently processed signal block at each step
            var zeroblock = new float[fftSize];   // just a buffer of zeros for quick memset


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, windowSamples.Length, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = block[k] - prevSample * _preEmphasis;
                        prevSample = block[k];
                        block[k]   = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply mel filterbank and take log() of the result

                FilterBanks.ApplyAndLog(_melFilterBank, spectrum, logMelSpectrum);


                // 4) dct-II

                var mfccs = new float[FeatureCount];
                dct.Direct(logMelSpectrum, mfccs);


                // 5) (optional) liftering

                if (lifterCoeffs != null)
                {
                    mfccs.ApplyWindow(lifterCoeffs);
                }


                // add mfcc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = mfccs,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
示例#2
0
        /// <summary>
        /// S(implified)PNCC algorithm according to [Kim & Stern, 2016].
        /// In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Mean power normalization
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <returns>List of pncc vectors</returns>
        public override float[] ProcessFrame(float[] block)
        {
            const float meanPower = 1e10f;

            // fill zeros to fftSize if frameSize < fftSize

            for (var k = FrameSize; k < block.Length; block[k++] = 0)
            {
                ;
            }

            // 1) apply window

            block.ApplyWindow(_windowSamples);

            // 2) calculate power spectrum

            _fft.PowerSpectrum(block, _spectrum, false);

            // 3) apply gammatone filterbank

            FilterBanks.Apply(FilterBank, _spectrum, _filteredSpectrum);

            // 4) mean power normalization:

            var sumPower = 0.0f;

            for (var j = 0; j < _filteredSpectrum.Length; j++)
            {
                sumPower += _filteredSpectrum[j];
            }

            _mean = LambdaMu * _mean + (1 - LambdaMu) * sumPower;

            for (var j = 0; j < _filteredSpectrum.Length; j++)
            {
                _filteredSpectrum[j] *= meanPower / _mean;
            }

            // 5) nonlinearity (pow ^ d  or  Log10)

            if (_power != 0)
            {
                for (var j = 0; j < _filteredSpectrum.Length; j++)
                {
                    _filteredSpectrum[j] = (float)Math.Pow(_filteredSpectrum[j], 1.0 / _power);
                }
            }
            else
            {
                for (var j = 0; j < _filteredSpectrum.Length; j++)
                {
                    _filteredSpectrum[j] = (float)Math.Log10(_filteredSpectrum[j] + float.Epsilon);
                }
            }

            // 6) dct-II (normalized)

            var spnccs = new float[FeatureCount];

            _dct.DirectNorm(_filteredSpectrum, spnccs);

            return(spnccs);
        }
示例#3
0
        /// <summary>
        /// Standard method for computing PLP features.
        /// In each frame do:
        ///
        ///     1) Apply window
        ///     2) Obtain power spectrum
        ///     3) Apply filterbank of bark bands (or mel bands)
        ///     4) [Optional] filter each component of the processed spectrum with a RASTA filter
        ///     5) Apply equal loudness curve
        ///     6) Take cubic root
        ///     7) Do LPC
        ///     8) Convert LPC to cepstrum
        ///     9) [Optional] lifter cepstrum
        ///
        /// </summary>
        /// <param name="block">Samples for analysis</param>
        /// <returns>PLP vector</returns>
        public override float[] ProcessFrame(float[] block)
        {
            // fill zeros to fftSize if frameSize < fftSize (blockSize)

            for (var k = FrameSize; k < block.Length; block[k++] = 0)
            {
                ;
            }

            // 1) apply window

            block.ApplyWindow(_windowSamples);

            // 2) calculate power spectrum (without normalization)

            _fft.PowerSpectrum(block, _spectrum, false);

            // 3) apply filterbank on the result (bark frequencies by default)

            FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum);

            // 4) RASTA filtering in log-domain [optional]

            if (_rasta > 0)
            {
                for (var k = 0; k < _bandSpectrum.Length; k++)
                {
                    var log = (float)Math.Log(_bandSpectrum[k] + float.Epsilon);

                    log = _rastaFilters[k].Process(log);

                    _bandSpectrum[k] = (float)Math.Exp(log);
                }
            }

            // 5) and 6) apply equal loudness curve and take cubic root

            for (var k = 0; k < _bandSpectrum.Length; k++)
            {
                _bandSpectrum[k] = (float)Math.Pow(Math.Max(_bandSpectrum[k], 1.0) * _equalLoudnessCurve[k], 0.33);
            }

            // 7) LPC from power spectrum:

            var n = _idftTable[0].Length;

            // get autocorrelation samples from post-processed power spectrum (via IDFT):

            for (var k = 0; k < _idftTable.Length; k++)
            {
                var acc = _idftTable[k][0] * _bandSpectrum[0] +
                          _idftTable[k][n - 1] * _bandSpectrum[n - 3];  // add values at two duplicated edges right away

                for (var j = 1; j < n - 1; j++)
                {
                    acc += _idftTable[k][j] * _bandSpectrum[j - 1];
                }

                _cc[k] = acc / (2 * (n - 1));
            }

            // LPC:

            for (var k = 0; k < _lpc.Length; _lpc[k] = 0, k++)
            {
                ;
            }

            var err = Lpc.LevinsonDurbin(_cc, _lpc, _lpcOrder);

            // 8) compute LPCC coefficients from LPC

            var lpcc = new float[FeatureCount];

            Lpc.ToCepstrum(_lpc, err, lpcc);


            // 9) (optional) liftering

            if (_lifterCoeffs != null)
            {
                lpcc.ApplyWindow(_lifterCoeffs);
            }

            return(lpcc);
        }
        /// <summary>
        /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Mean power normalization
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var frameSize = FrameSize;
            var hopSize   = HopSize;

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + FrameSize < endSample)
            {
                // prepare next block for processing

                _zeroblock.FastCopyTo(_block, _zeroblock.Length);
                samples.FastCopyTo(_block, frameSize, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = _block[k] - prevSample * _preEmphasis;
                        prevSample = _block[k];
                        _block[k]  = y;
                    }
                    prevSample = samples[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    _block.ApplyWindow(_windowSamples);
                }


                // 2) calculate power spectrum

                _fft.PowerSpectrum(_block, _spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(FilterBank, _spectrum, _filteredSpectrum);


                // 4) mean power normalization:

                var sumPower = 0.0f;
                for (var j = 0; j < _filteredSpectrum.Length; j++)
                {
                    sumPower += _filteredSpectrum[j];
                }

                mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                for (var j = 0; j < _filteredSpectrum.Length; j++)
                {
                    _filteredSpectrum[j] *= meanPower / mean;
                }


                // 5) nonlinearity (power ^ d     or     Log10)

                if (_power != 0)
                {
                    for (var j = 0; j < _filteredSpectrum.Length; j++)
                    {
                        _filteredSpectrum[j] = (float)Math.Pow(_filteredSpectrum[j], d);
                    }
                }
                else
                {
                    for (var j = 0; j < _filteredSpectrum.Length; j++)
                    {
                        _filteredSpectrum[j] = (float)Math.Log10(_filteredSpectrum[j] + float.Epsilon);
                    }
                }


                // 6) dct-II (normalized)

                var spnccs = new float[FeatureCount];
                _dct.DirectN(_filteredSpectrum, spnccs);


                // add pncc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = spnccs,
                    TimePosition = (double)i / SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
示例#5
0
        private void buttonCompute_Click(object sender, EventArgs e)
        {
            var filterCount  = int.Parse(textBoxSize.Text);
            var samplingRate = _signal.SamplingRate;
            var fftSize      = int.Parse(textBoxFftSize.Text);
            var lowFreq      = float.Parse(textBoxLowFreq.Text);
            var highFreq     = float.Parse(textBoxHighFreq.Text);

            Tuple <double, double, double>[] bands;
            float[][]  filterbank = null;
            VtlnWarper vtln       = null;

            if (checkBoxVtln.Checked)
            {
                var alpha    = float.Parse(textBoxVtlnAlpha.Text);
                var vtlnLow  = float.Parse(textBoxVtlnLow.Text);
                var vtlnHigh = float.Parse(textBoxVtlnHigh.Text);

                vtln = new VtlnWarper(alpha, lowFreq, highFreq, vtlnLow, vtlnHigh);
            }

            switch (comboBoxFilterbank.Text)
            {
            case "Mel":
                bands = FilterBanks.MelBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                break;

            case "Mel Slaney":
                bands      = FilterBanks.MelBandsSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                filterbank = FilterBanks.MelBankSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxNormalize.Checked, vtln);
                break;

            case "Bark":
                bands = FilterBanks.BarkBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                break;

            case "Bark Slaney":
                bands      = FilterBanks.BarkBandsSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                filterbank = FilterBanks.BarkBankSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq);
                break;

            case "Critical bands":
                bands = FilterBanks.CriticalBands(filterCount, fftSize, samplingRate, lowFreq, highFreq);
                break;

            case "Octave bands":
                bands = FilterBanks.OctaveBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                break;

            case "ERB":
                bands      = null;
                filterbank = FilterBanks.Erb(filterCount, fftSize, samplingRate, lowFreq, highFreq);
                break;

            default:
                bands = FilterBanks.HerzBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                break;
            }

            if (bands != null && filterbank == null)
            {
                switch (comboBoxShape.Text)
                {
                case "Triangular":
                    filterbank = FilterBanks.Triangular(fftSize, samplingRate, bands, vtln, Utils.Scale.HerzToMel);
                    break;

                case "Trapezoidal":
                    filterbank = FilterBanks.Trapezoidal(fftSize, samplingRate, bands, vtln);
                    break;

                case "BiQuad":
                    filterbank = FilterBanks.BiQuad(fftSize, samplingRate, bands);
                    break;

                default:
                    filterbank = FilterBanks.Rectangular(fftSize, samplingRate, bands, vtln);
                    break;
                }

                if (checkBoxNormalize.Checked)
                {
                    FilterBanks.Normalize(filterCount, bands, filterbank);
                }
            }


            var spectrumType = (SpectrumType)comboBoxSpectrum.SelectedIndex;
            var nonLinearity = (NonLinearityType)comboBoxNonLinearity.SelectedIndex;
            var logFloor     = float.Parse(textBoxLogFloor.Text);

            var mfccExtractor = new MfccExtractor(//samplingRate, 13, 0.025, 0.01,
                samplingRate, 13, 512.0 / samplingRate, 0.01,
                filterbank: filterbank,
                //filterbankSize: 26,
                //highFreq: 8000,
                //preEmphasis: 0.97,
                //lifterSize: 22,
                //includeEnergy: true,
                spectrumType: spectrumType,
                nonLinearity: nonLinearity,
                dctType: comboBoxDct.Text,
                window: WindowTypes.Hamming,
                logFloor: logFloor);

            _mfccVectors = mfccExtractor.ComputeFrom(_signal);


            //_mfccVectors = mfccExtractor.ComputeFrom(_signal * 32768);
            //var mfccVectorsP = mfccExtractor.ParallelComputeFrom(_signal * 32768);

            //for (var i = 0; i < _mfccVectors.Count; i++)
            //{
            //    for (var j = 0; j < _mfccVectors[i].Features.Length; j++)
            //    {
            //        if (Math.Abs(_mfccVectors[i].Features[j] - mfccVectorsP[i].Features[j]) > 1e-32f)
            //        {
            //            MessageBox.Show($"Nope: {i} - {j}");
            //            return;
            //        }

            //        if (Math.Abs(_mfccVectors[i].TimePosition - mfccVectorsP[i].TimePosition) > 1e-32f)
            //        {
            //            MessageBox.Show($"Time: {i} - {j}");
            //            return;
            //        }
            //    }
            //}

            //FeaturePostProcessing.NormalizeMean(_mfccVectors);        // optional (but REQUIRED for PNCC!)
            //FeaturePostProcessing.AddDeltas(_mfccVectors);

            var header = mfccExtractor.FeatureDescriptions;

            //.Concat(mfccExtractor.DeltaFeatureDescriptions)
            //.Concat(mfccExtractor.DeltaDeltaFeatureDescriptions);

            FillFeaturesList(_mfccVectors, header);
            mfccListView.Items[0].Selected = true;

            melFilterBankPanel.Groups = mfccExtractor.FilterBank;

            mfccPanel.Line = _mfccVectors[0].Features;
        }
示例#6
0
        /// <summary>
        /// Method for computing modulation spectra.
        /// Each vector representing one modulation spectrum is a flattened version of 2D spectrum.
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of flattened modulation spectra</returns>
        public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var frameSize = FrameSize;
            var hopSize   = HopSize;

            var featureVectors = new List <FeatureVector>();

            var en = 0;
            var i  = startSample;

            if (_featuregram == null)
            {
                _envelopes = new float[_filterbank.Length][];
                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[samples.Length / hopSize];
                }

                var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f;

                var lastSample = endSample - Math.Max(frameSize, hopSize);

                // ===================== compute local FFTs (do STFT) =======================

                for (i = startSample; i < lastSample; i += hopSize)
                {
                    _zeroblock.FastCopyTo(_block, _zeroblock.Length);
                    samples.FastCopyTo(_block, frameSize, i);

                    // 0) pre-emphasis (if needed)

                    if (_preEmphasis > 1e-10)
                    {
                        for (var k = 0; k < frameSize; k++)
                        {
                            var y = _block[k] - prevSample * _preEmphasis;
                            prevSample = _block[k];
                            _block[k]  = y;
                        }
                        prevSample = samples[i + hopSize - 1];
                    }

                    // 1) apply window

                    if (_window != WindowTypes.Rectangular)
                    {
                        _block.ApplyWindow(_windowSamples);
                    }

                    // 2) calculate power spectrum

                    _fft.PowerSpectrum(_block, _spectrum);

                    // 3) apply filterbank...

                    FilterBanks.Apply(_filterbank, _spectrum, _filteredSpectrum);

                    // ...and save results for future calculations

                    for (var n = 0; n < _envelopes.Length; n++)
                    {
                        _envelopes[n][en] = _filteredSpectrum[n];
                    }
                    en++;
                }
            }
            else
            {
                en         = _featuregram.Length;
                _envelopes = new float[_featuregram[0].Length][];

                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[en];
                    for (i = 0; i < en; i++)
                    {
                        _envelopes[n][i] = _featuregram[i][n];
                    }
                }
            }

            // =========================== modulation analysis =======================

            var envelopeLength = en;

            // long-term AVG-normalization

            foreach (var envelope in _envelopes)
            {
                var avg = 0.0f;
                for (var k = 0; k < envelopeLength; k++)
                {
                    avg += (k >= 0) ? envelope[k] : -envelope[k];
                }
                avg /= envelopeLength;

                if (avg >= 1e-10)   // this happens more frequently
                {
                    for (var k = 0; k < envelopeLength; k++)
                    {
                        envelope[k] /= avg;
                    }
                }
            }

            i = 0;
            while (i < envelopeLength)
            {
                var vector = new float[_envelopes.Length * (_modulationFftSize / 2 + 1)];
                var offset = 0;

                foreach (var envelope in _envelopes)
                {
                    _zeroModblock.FastCopyTo(_modBlock, _modulationFftSize);
                    envelope.FastCopyTo(_modBlock, Math.Min(_modulationFftSize, envelopeLength - i), i);

                    _modulationFft.PowerSpectrum(_modBlock, _modSpectrum);
                    _modSpectrum.FastCopyTo(vector, _modSpectrum.Length, 0, offset);

                    offset += _modSpectrum.Length;
                }

                featureVectors.Add(new FeatureVector
                {
                    Features     = vector,
                    TimePosition = (double)i * hopSize / SamplingRate
                });

                i += _modulationHopSize;
            }

            return(featureVectors);
        }
示例#7
0
        /// <summary>
        /// PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Medium-time processing (asymmetric noise suppression, temporal masking, spectral smoothing)
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq);

            // use power spectrum:

            foreach (var filter in _gammatoneFilterBank)
            {
                for (var j = 0; j < filter.Length; j++)
                {
                    var ps = filter[j] * filter[j];
                    filter[j] = ps;
                }
            }


            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            var gammatoneSpectrum = new float[_filterbankSize];

            var spectrumQOut      = new float[_filterbankSize];
            var filteredSpectrumQ = new float[_filterbankSize];
            var spectrumS         = new float[_filterbankSize];
            var smoothedSpectrumS = new float[_filterbankSize];
            var avgSpectrumQ1     = new float[_filterbankSize];
            var avgSpectrumQ2     = new float[_filterbankSize];
            var smoothedSpectrum  = new float[_filterbankSize];

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var block     = new float[fftSize];       // buffer for currently processed signal block at each step
            var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

            _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize);

            var spectrum = new float[fftSize / 2 + 1];


            // 0) pre-emphasis (if needed)

            if (_preEmphasis > 0.0)
            {
                var preemphasisFilter = new PreEmphasisFilter(_preEmphasis);
                signal = preemphasisFilter.ApplyTo(signal);
            }


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var i       = 0;
            var timePos = startSample;

            while (timePos + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, frameSize, timePos);


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum);



                // =============================================================
                // 4) medium-time processing blocks:

                // 4.1) temporal integration (zero-phase moving average filter)

                _ringBuffer.Add(gammatoneSpectrum);
                var spectrumQ = _ringBuffer.AverageSpectrum;

                // 4.2) asymmetric noise suppression

                if (i == 2 * M)
                {
                    for (var j = 0; j < spectrumQOut.Length; j++)
                    {
                        spectrumQOut[j] = spectrumQ[j] * 0.9f;
                    }
                }

                if (i >= 2 * M)
                {
                    for (var j = 0; j < spectrumQOut.Length; j++)
                    {
                        if (spectrumQ[j] > spectrumQOut[j])
                        {
                            spectrumQOut[j] = LambdaA * spectrumQOut[j] + (1 - LambdaA) * spectrumQ[j];
                        }
                        else
                        {
                            spectrumQOut[j] = LambdaB * spectrumQOut[j] + (1 - LambdaB) * spectrumQ[j];
                        }
                    }

                    for (var j = 0; j < filteredSpectrumQ.Length; j++)
                    {
                        filteredSpectrumQ[j] = Math.Max(spectrumQ[j] - spectrumQOut[j], 0.0f);

                        if (i == 2 * M)
                        {
                            avgSpectrumQ1[j] = 0.9f * filteredSpectrumQ[j];
                            avgSpectrumQ2[j] = filteredSpectrumQ[j];
                        }

                        if (filteredSpectrumQ[j] > avgSpectrumQ1[j])
                        {
                            avgSpectrumQ1[j] = LambdaA * avgSpectrumQ1[j] + (1 - LambdaA) * filteredSpectrumQ[j];
                        }
                        else
                        {
                            avgSpectrumQ1[j] = LambdaB * avgSpectrumQ1[j] + (1 - LambdaB) * filteredSpectrumQ[j];
                        }

                        // 4.3) temporal masking

                        var threshold = filteredSpectrumQ[j];

                        avgSpectrumQ2[j] *= LambdaT;
                        if (spectrumQ[j] < C * spectrumQOut[j])
                        {
                            filteredSpectrumQ[j] = avgSpectrumQ1[j];
                        }
                        else
                        {
                            if (filteredSpectrumQ[j] <= avgSpectrumQ2[j])
                            {
                                filteredSpectrumQ[j] = MuT * avgSpectrumQ2[j];
                            }
                        }
                        avgSpectrumQ2[j] = Math.Max(avgSpectrumQ2[j], threshold);

                        filteredSpectrumQ[j] = Math.Max(filteredSpectrumQ[j], avgSpectrumQ1[j]);
                    }


                    // 4.4) spectral smoothing

                    for (var j = 0; j < spectrumS.Length; j++)
                    {
                        spectrumS[j] = filteredSpectrumQ[j] / Math.Max(spectrumQ[j], float.Epsilon);
                    }

                    for (var j = 0; j < smoothedSpectrumS.Length; j++)
                    {
                        smoothedSpectrumS[j] = 0.0f;

                        var total = 0;
                        for (var k = Math.Max(j - N, 0);
                             k < Math.Min(j + N + 1, _filterbankSize);
                             k++, total++)
                        {
                            smoothedSpectrumS[j] += spectrumS[k];
                        }
                        smoothedSpectrumS[j] /= total;
                    }

                    // 4.5) mean power normalization

                    var centralSpectrum = _ringBuffer.CentralSpectrum;

                    var sumPower = 0.0f;
                    for (var j = 0; j < smoothedSpectrum.Length; j++)
                    {
                        smoothedSpectrum[j] = smoothedSpectrumS[j] * centralSpectrum[j];
                        sumPower           += smoothedSpectrum[j];
                    }

                    mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                    for (var j = 0; j < smoothedSpectrum.Length; j++)
                    {
                        smoothedSpectrum[j] *= meanPower / mean;
                    }

                    // =============================================================


                    // 5) nonlinearity (power ^ d    or    Log10)

                    if (_power != 0)
                    {
                        for (var j = 0; j < smoothedSpectrum.Length; j++)
                        {
                            smoothedSpectrum[j] = (float)Math.Pow(smoothedSpectrum[j], d);
                        }
                    }
                    else
                    {
                        for (var j = 0; j < smoothedSpectrum.Length; j++)
                        {
                            smoothedSpectrum[j] = (float)Math.Log10(smoothedSpectrum[j] + float.Epsilon);
                        }
                    }

                    // 6) dct-II (normalized)

                    var pnccs = new float[FeatureCount];
                    dct.DirectN(smoothedSpectrum, pnccs);


                    // add pncc vector to output sequence

                    featureVectors.Add(new FeatureVector
                    {
                        Features     = pnccs,
                        TimePosition = (double)timePos / signal.SamplingRate
                    });
                }

                i++;

                timePos += hopSize;
            }

            return(featureVectors);
        }
示例#8
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="options">Filterbank options</param>
        public FilterbankExtractor(FilterbankOptions options) : base(options)
        {
            var filterbankSize = options.FilterBankSize;

            if (options.FilterBank == null)
            {
                _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency, false);
                FilterBank = FilterBanks.Rectangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel);
            }
            else
            {
                FilterBank     = options.FilterBank;
                filterbankSize = FilterBank.Length;
                _blockSize     = 2 * (FilterBank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            FeatureCount = filterbankSize;

            _fft = new RealFft(_blockSize);

            // setup spectrum post-processing: =======================================================

            _logFloor         = options.LogFloor;
            _nonLinearityType = options.NonLinearity;
            switch (_nonLinearityType)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _bandSpectrum, _logFloor); break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _bandSpectrum, _logFloor); break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _bandSpectrum, _logFloor); break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _bandSpectrum, 0.33); break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum); break;
            }

            _spectrumType = options.SpectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break;

            default:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break;
            }

            // reserve memory for reusable blocks

            _spectrum     = new float[_blockSize / 2 + 1];
            _bandSpectrum = new float[filterbankSize];
        }
示例#9
0
        /// <summary>
        /// Standard method for computing PLP features.
        /// In each frame do:
        ///
        ///     0) Apply window (base extractor does it)
        ///     1) Obtain power spectrum
        ///     2) Apply filterbank of bark bands (or mel bands)
        ///     3) [Optional] filter each component of the processed spectrum with a RASTA filter
        ///     4) Apply equal loudness curve
        ///     5) Take cubic root
        ///     6) Do LPC
        ///     7) Convert LPC to cepstrum
        ///     8) [Optional] lifter cepstrum
        ///
        /// </summary>
        /// <param name="block">Samples for analysis</param>
        /// <param name="features">PLP vectors</param>
        public override void ProcessFrame(float[] block, float[] features)
        {
            // 1) calculate power spectrum (without normalization)

            _fft.PowerSpectrum(block, _spectrum, false);

            // 2) apply filterbank on the result (bark frequencies by default)

            FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum);

            // 3) RASTA filtering in log-domain [optional]

            if (_rasta > 0)
            {
                for (var k = 0; k < _bandSpectrum.Length; k++)
                {
                    var log = (float)Math.Log(_bandSpectrum[k] + float.Epsilon);

                    log = _rastaFilters[k].Process(log);

                    _bandSpectrum[k] = (float)Math.Exp(log);
                }
            }

            // 4) and 5) apply equal loudness curve and take cubic root

            for (var k = 0; k < _bandSpectrum.Length; k++)
            {
                _bandSpectrum[k] = (float)Math.Pow(Math.Max(_bandSpectrum[k], 1.0) * _equalLoudnessCurve[k], 0.33);
            }

            // 6) LPC from power spectrum:

            var n = _idftTable[0].Length;

            // get autocorrelation samples from post-processed power spectrum (via IDFT):

            for (var k = 0; k < _idftTable.Length; k++)
            {
                var acc = _idftTable[k][0] * _bandSpectrum[0] +
                          _idftTable[k][n - 1] * _bandSpectrum[n - 3];  // add values at two duplicated edges right away

                for (var j = 1; j < n - 1; j++)
                {
                    acc += _idftTable[k][j] * _bandSpectrum[j - 1];
                }

                _cc[k] = acc / (2 * (n - 1));
            }

            // LPC:

            for (var k = 0; k < _lpc.Length; _lpc[k] = 0, k++)
            {
                ;
            }

            var err = Lpc.LevinsonDurbin(_cc, _lpc, _lpcOrder);

            // 7) compute LPCC coefficients from LPC

            Lpc.ToCepstrum(_lpc, err, features);


            // 8) (optional) liftering

            if (_lifterCoeffs != null)
            {
                features.ApplyWindow(_lifterCoeffs);
            }

            // 9) (optional) replace first coeff with log(energy)

            if (_includeEnergy)
            {
                features[0] = (float)Math.Log(Math.Max(block.Sum(x => x * x), _logEnergyFloor));
            }
        }
示例#10
0
        private void filterbankButton_Click(object sender, EventArgs e)
        {
            var filterCount  = int.Parse(filterCountTextBox.Text);
            var samplingRate = int.Parse(samplingRateTextBox.Text);
            var fftSize      = int.Parse(fftSizeTextBox.Text);
            var lowFreq      = float.Parse(lowFreqTextBox.Text);
            var highFreq     = float.Parse(highFreqTextBox.Text);

            Tuple <double, double, double>[] bands;

            switch (filterbankComboBox.Text)
            {
            case "Mel":
                bands = FilterBanks.MelBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, overlapCheckBox.Checked);
                break;

            case "Bark":
                bands = FilterBanks.BarkBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, overlapCheckBox.Checked);
                break;

            case "Critical bands":
                bands = FilterBanks.CriticalBands(filterCount, fftSize, samplingRate, lowFreq, highFreq);
                break;

            case "Octave bands":
                bands = FilterBanks.OctaveBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, overlapCheckBox.Checked);
                break;

            case "ERB":
                bands       = null;
                _filterbank = FilterBanks.Erb(filterCount, fftSize, samplingRate, lowFreq, highFreq);

                // ====================================================
                // ===================  ! SQUARE ! ====================

                //foreach (var filter in _filterbank)
                //{
                //    for (var j = 0; j < filter.Length; j++)
                //    {
                //        var squared = filter[j] * filter[j];
                //        filter[j] = squared;
                //    }
                //}

                // normalization coefficient (for plotting)
                var scaleCoeff = (int)(1.0 / _filterbank.Max(f => f.Max()));
                filterbankPanel.Gain = 100 * scaleCoeff;


                break;

            default:
                bands = FilterBanks.HerzBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, overlapCheckBox.Checked);
                break;
            }

            if (bands != null)
            {
                switch (shapeComboBox.Text)
                {
                case "Triangular":
                    _filterbank = FilterBanks.Triangular(fftSize, samplingRate, bands);
                    break;

                case "Trapezoidal":
                    _filterbank = FilterBanks.Trapezoidal(fftSize, samplingRate, bands);
                    break;

                case "BiQuad":
                    _filterbank = FilterBanks.BiQuad(fftSize, samplingRate, bands);
                    break;

                default:
                    _filterbank = FilterBanks.Rectangular(fftSize, samplingRate, bands);
                    break;
                }
            }

            band1ComboBox.DataSource = Enumerable.Range(1, filterCount).ToArray();
            band2ComboBox.DataSource = Enumerable.Range(1, filterCount).ToArray();
            band3ComboBox.DataSource = Enumerable.Range(1, filterCount).ToArray();
            band4ComboBox.DataSource = Enumerable.Range(1, filterCount).ToArray();
            band1ComboBox.Text       = "1";
            band2ComboBox.Text       = "2";
            band3ComboBox.Text       = "3";
            band4ComboBox.Text       = "4";

            filterbankPanel.Groups = _filterbank;
        }
示例#11
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="options">PLP options</param>
        public PlpExtractor(PlpOptions options) : base(options)
        {
            FeatureCount = options.FeatureCount;

            // ================================ Prepare filter bank and center frequencies: ===========================================

            var filterbankSize = options.FilterBankSize;

            if (options.FilterBank == null)
            {
                _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var low  = options.LowFrequency;
                var high = options.HighFrequency;

                FilterBank = FilterBanks.BarkBankSlaney(filterbankSize, _blockSize, SamplingRate, low, high);

                var barkBands = FilterBanks.BarkBandsSlaney(filterbankSize, SamplingRate, low, high);
                _centerFrequencies = barkBands.Select(b => b.Item2).ToArray();
            }
            else
            {
                FilterBank     = options.FilterBank;
                filterbankSize = FilterBank.Length;
                _blockSize     = 2 * (FilterBank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");

                if (options.CenterFrequencies != null)
                {
                    _centerFrequencies = options.CenterFrequencies;
                }
                else
                {
                    var herzResolution = (double)SamplingRate / _blockSize;

                    // try to determine center frequencies automatically from filterbank weights:

                    _centerFrequencies = new double[filterbankSize];

                    for (var i = 0; i < FilterBank.Length; i++)
                    {
                        var minPos = 0;
                        var maxPos = _blockSize / 2;

                        for (var j = 0; j < FilterBank[i].Length; j++)
                        {
                            if (FilterBank[i][j] > 0)
                            {
                                minPos = j;
                                break;
                            }
                        }
                        for (var j = minPos; j < FilterBank[i].Length; j++)
                        {
                            if (FilterBank[i][j] == 0)
                            {
                                maxPos = j;
                                break;
                            }
                        }

                        _centerFrequencies[i] = herzResolution * (maxPos + minPos) / 2;
                    }
                }
            }

            // ==================================== Compute equal loudness curve: =========================================

            _equalLoudnessCurve = new double[filterbankSize];

            for (var i = 0; i < _centerFrequencies.Length; i++)
            {
                var level2 = _centerFrequencies[i] * _centerFrequencies[i];

                _equalLoudnessCurve[i] = Math.Pow(level2 / (level2 + 1.6e5), 2) * ((level2 + 1.44e6) / (level2 + 9.61e6));
            }

            // ============================== Prepare RASTA filters (if necessary): =======================================

            _rasta = options.Rasta;

            if (_rasta > 0)
            {
                _rastaFilters = Enumerable.Range(0, filterbankSize)
                                .Select(f => new RastaFilter(_rasta))
                                .ToArray();
            }

            // ============== Precompute IDFT table for obtaining autocorrelation coeffs from power spectrum: =============

            _lpcOrder = options.LpcOrder > 0 ? options.LpcOrder : FeatureCount - 1;

            _idftTable = new float[_lpcOrder + 1][];

            var bandCount = filterbankSize + 2;     // +2 duplicated edges
            var freq      = Math.PI / (bandCount - 1);

            for (var i = 0; i < _idftTable.Length; i++)
            {
                _idftTable[i] = new float[bandCount];

                _idftTable[i][0] = 1.0f;

                for (var j = 1; j < bandCount - 1; j++)
                {
                    _idftTable[i][j] = 2 * (float)Math.Cos(freq * i * j);
                }

                _idftTable[i][bandCount - 1] = (float)Math.Cos(freq * i * (bandCount - 1));
            }

            _lpc = new float[_lpcOrder + 1];
            _cc  = new float[bandCount];

            // =================================== Prepare everything else: ==============================

            _fft = new RealFft(_blockSize);

            _lifterSize   = options.LifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _includeEnergy  = options.IncludeEnergy;
            _logEnergyFloor = options.LogEnergyFloor;

            _spectrum     = new float[_blockSize / 2 + 1];
            _bandSpectrum = new float[filterbankSize];
        }
示例#12
0
        /// <summary>
        /// Constructs extractor from configuration <paramref name="options"/>.
        /// </summary>
        public AmsExtractor(AmsOptions options) : base(options)
        {
            _modulationFftSize = options.ModulationFftSize;
            _modulationHopSize = options.ModulationHopSize;
            _modulationFft     = new RealFft(_modulationFftSize);

            _featuregram = options.Featuregram?.ToArray();

            if (_featuregram != null)
            {
                FeatureCount = _featuregram[0].Length * (_modulationFftSize / 2 + 1);
            }
            else
            {
                if (options.FilterBank is null)
                {
                    _fftSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                    _filterbank = FilterBanks.Triangular(_fftSize, SamplingRate,
                                                         FilterBanks.MelBands(12, SamplingRate, 100, 3200));
                }
                else
                {
                    _filterbank = options.FilterBank;
                    _fftSize    = 2 * (_filterbank[0].Length - 1);

                    Guard.AgainstExceedance(FrameSize, _fftSize, "frame size", "FFT size");
                }

                _fft = new RealFft(_fftSize);

                FeatureCount = _filterbank.Length * (_modulationFftSize / 2 + 1);

                _spectrum         = new float[_fftSize / 2 + 1];
                _filteredSpectrum = new float[_filterbank.Length];
                _block            = new float[_fftSize];
            }

            _modBlock    = new float[_modulationFftSize];
            _modSpectrum = new float[_modulationFftSize / 2 + 1];

            // feature descriptions

            int length;

            if (_featuregram != null)
            {
                length = _featuregram[0].Length;
            }
            else
            {
                length = _filterbank.Length;
            }

            FeatureDescriptions = new List <string>();

            var modulationSamplingRate = (float)SamplingRate / HopSize;
            var resolution             = modulationSamplingRate / _modulationFftSize;

            for (var fi = 0; fi < length; fi++)
            {
                for (var fj = 0; fj <= _modulationFftSize / 2; fj++)
                {
                    FeatureDescriptions.Add(string.Format("band_{0}_mf_{1:F2}_Hz", fi + 1, fj * resolution));
                }
            }
        }
示例#13
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="filterbank"></param>
        /// <param name="frameDuration"></param>
        /// <param name="hopDuration"></param>
        /// <param name="preEmphasis"></param>
        /// <param name="nonLinearity"></param>
        /// <param name="spectrumType"></param>
        /// <param name="window"></param>
        /// <param name="logFloor"></param>
        public FilterbankExtractor(int samplingRate,
                                   int featureCount,
                                   float[][] filterbank,
                                   double frameDuration          = 0.0256 /*sec*/,
                                   double hopDuration            = 0.010 /*sec*/,
                                   double preEmphasis            = 0,
                                   NonLinearityType nonLinearity = NonLinearityType.None,
                                   SpectrumType spectrumType     = SpectrumType.Power,
                                   WindowTypes window            = WindowTypes.Hamming,
                                   float logFloor = float.Epsilon)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FeatureCount = featureCount;

            FilterBank = filterbank;

            _blockSize = 2 * (filterbank[0].Length - 1);

            Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");

            _fft = new RealFft(_blockSize);

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            // setup spectrum post-processing: =======================================================

            _logFloor         = logFloor;
            _nonLinearityType = nonLinearity;
            switch (nonLinearity)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _bandSpectrum, _logFloor);
                break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _bandSpectrum, _logFloor);
                break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _bandSpectrum, _logFloor);
                break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _bandSpectrum, 0.33);
                break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum);
                break;
            }

            _spectrumType = spectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.Power:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true);
                break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true);
                break;
            }

            // reserve memory for reusable blocks

            _spectrum     = new float[_blockSize / 2 + 1];
            _bandSpectrum = new float[filterbank.Length];
        }
示例#14
0
        /// <summary>
        /// Method for computing modulation spectra.
        /// Each vector representing one modulation spectrum is a flattened version of 2D spectrum.
        /// </summary>
        /// <param name="signal">Signal under analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of flattened modulation spectra</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            var fft           = new Fft(fftSize);
            var modulationFft = new Fft(_modulationFftSize);


            if (_featuregram == null)
            {
                if (_filterbank == null)
                {
                    _filterbank = FilterBanks.Triangular(_fftSize, signal.SamplingRate,
                                                         FilterBanks.MelBands(12, _fftSize, signal.SamplingRate, 100, 3200));
                }

                _featureCount = _filterbank.Length * (_modulationFftSize / 2 + 1);
            }
            else
            {
                _featureCount = _featuregram[0].Length * (_modulationFftSize / 2 + 1);
            }

            var length = _filterbank?.Length ?? _featuregram[0].Length;

            var modulationSamplingRate = (float)signal.SamplingRate / hopSize;
            var resolution             = modulationSamplingRate / _modulationFftSize;


            _featureDescriptions = new string[length * (_modulationFftSize / 2 + 1)];

            var idx = 0;

            for (var fi = 0; fi < length; fi++)
            {
                for (var fj = 0; fj <= _modulationFftSize / 2; fj++)
                {
                    _featureDescriptions[idx++] = string.Format("band_{0}_mf_{1:F2}_Hz", fi + 1, fj * resolution);
                }
            }


            // 0) pre-emphasis (if needed)

            if (_preEmphasis > 0.0)
            {
                var preemphasisFilter = new PreEmphasisFilter(_preEmphasis);
                signal = preemphasisFilter.ApplyTo(signal);
            }


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var en = 0;
            var i  = startSample;

            if (_featuregram == null)
            {
                _envelopes = new float[_filterbank.Length][];
                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[signal.Length / hopSize];
                }

                var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;


                // ===================== compute local FFTs (do STFT) =======================

                var spectrum         = new float[fftSize / 2 + 1];
                var filteredSpectrum = new float[_filterbank.Length];

                var block     = new float[fftSize];       // buffer for currently processed signal block at each step
                var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

                while (i + frameSize < endSample)
                {
                    zeroblock.FastCopyTo(block, zeroblock.Length);
                    signal.Samples.FastCopyTo(block, frameSize, i);

                    // 0) pre-emphasis (if needed)

                    if (_preEmphasis > 0.0)
                    {
                        for (var k = 0; k < frameSize; k++)
                        {
                            var y = block[k] - prevSample * _preEmphasis;
                            prevSample = block[k];
                            block[k]   = y;
                        }
                        prevSample = signal[i + hopSize - 1];
                    }

                    // 1) apply window

                    if (_window != WindowTypes.Rectangular)
                    {
                        block.ApplyWindow(windowSamples);
                    }

                    // 2) calculate power spectrum

                    fft.PowerSpectrum(block, spectrum);

                    // 3) apply filterbank...

                    FilterBanks.Apply(_filterbank, spectrum, filteredSpectrum);

                    // ...and save results for future calculations

                    for (var n = 0; n < _envelopes.Length; n++)
                    {
                        _envelopes[n][en] = filteredSpectrum[n];
                    }
                    en++;

                    i += hopSize;
                }
            }
            else
            {
                en         = _featuregram.Length;
                _envelopes = new float[_featuregram[0].Length][];

                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[en];
                    for (i = 0; i < en; i++)
                    {
                        _envelopes[n][i] = _featuregram[i][n];
                    }
                }
            }

            // =========================== modulation analysis =======================

            var envelopeLength = en;

            // long-term AVG-normalization

            foreach (var envelope in _envelopes)
            {
                var avg = 0.0f;
                for (var k = 0; k < envelopeLength; k++)
                {
                    avg += (k >= 0) ? envelope[k] : -envelope[k];
                }
                avg /= envelopeLength;

                if (avg >= 1e-10)   // this happens more frequently
                {
                    for (var k = 0; k < envelopeLength; k++)
                    {
                        envelope[k] /= avg;
                    }
                }
            }

            var modBlock     = new float[_modulationFftSize];
            var zeroModblock = new float[_modulationFftSize];
            var modSpectrum  = new float[_modulationFftSize / 2 + 1];

            i = 0;
            while (i < envelopeLength)
            {
                var vector = new float[_envelopes.Length * (_modulationFftSize / 2 + 1)];
                var offset = 0;

                foreach (var envelope in _envelopes)
                {
                    zeroModblock.FastCopyTo(modBlock, _modulationFftSize);
                    envelope.FastCopyTo(modBlock, Math.Min(_modulationFftSize, envelopeLength - i), i);

                    modulationFft.PowerSpectrum(modBlock, modSpectrum);
                    modSpectrum.FastCopyTo(vector, modSpectrum.Length, 0, offset);

                    offset += modSpectrum.Length;
                }

                featureVectors.Add(new FeatureVector
                {
                    Features     = vector,
                    TimePosition = (double)i * hopSize / signal.SamplingRate
                });

                i += _modulationHopSize;
            }

            return(featureVectors);
        }
示例#15
0
        /// <summary>
        /// PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Medium-time processing (asymmetric noise suppression, temporal masking, spectral smoothing)
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override float[] ProcessFrame(float[] block)
        {
            const float MeanPower = 1e10f;
            const float Epsilon   = 2.22e-16f;

            _step++;

            // fill zeros to fftSize if frameSize < fftSize

            for (var k = FrameSize; k < block.Length; block[k++] = 0)
            {
                ;
            }

            // 1) apply window

            block.ApplyWindow(_windowSamples);

            // 2) calculate power spectrum

            _fft.PowerSpectrum(block, _spectrum, false);

            // 3) apply gammatone filterbank

            FilterBanks.Apply(FilterBank, _spectrum, _gammatoneSpectrum);


            // =============================================================
            // 4) medium-time processing blocks:

            // 4.1) temporal integration (zero-phase moving average filter)

            _ringBuffer.Add(_gammatoneSpectrum);

            var spectrumQ = _ringBuffer.AverageSpectrum;

            // 4.2) asymmetric noise suppression

            if (_step == 2 * M)
            {
                for (var j = 0; j < _spectrumQOut.Length; j++)
                {
                    _spectrumQOut[j] = spectrumQ[j] * 0.9f;
                }
            }

            if (_step >= 2 * M)
            {
                for (var j = 0; j < _spectrumQOut.Length; j++)
                {
                    if (spectrumQ[j] > _spectrumQOut[j])
                    {
                        _spectrumQOut[j] = LambdaA * _spectrumQOut[j] + (1 - LambdaA) * spectrumQ[j];
                    }
                    else
                    {
                        _spectrumQOut[j] = LambdaB * _spectrumQOut[j] + (1 - LambdaB) * spectrumQ[j];
                    }
                }

                for (var j = 0; j < _filteredSpectrumQ.Length; j++)
                {
                    _filteredSpectrumQ[j] = Math.Max(spectrumQ[j] - _spectrumQOut[j], 0.0f);

                    if (_step == 2 * M)
                    {
                        _avgSpectrumQ1[j] = 0.9f * _filteredSpectrumQ[j];
                        _avgSpectrumQ2[j] = _filteredSpectrumQ[j];
                    }

                    if (_filteredSpectrumQ[j] > _avgSpectrumQ1[j])
                    {
                        _avgSpectrumQ1[j] = LambdaA * _avgSpectrumQ1[j] + (1 - LambdaA) * _filteredSpectrumQ[j];
                    }
                    else
                    {
                        _avgSpectrumQ1[j] = LambdaB * _avgSpectrumQ1[j] + (1 - LambdaB) * _filteredSpectrumQ[j];
                    }

                    // 4.3) temporal masking

                    var threshold = _filteredSpectrumQ[j];

                    _avgSpectrumQ2[j] *= LambdaT;
                    if (spectrumQ[j] < C * _spectrumQOut[j])
                    {
                        _filteredSpectrumQ[j] = _avgSpectrumQ1[j];
                    }
                    else
                    {
                        if (_filteredSpectrumQ[j] <= _avgSpectrumQ2[j])
                        {
                            _filteredSpectrumQ[j] = MuT * _avgSpectrumQ2[j];
                        }
                    }
                    _avgSpectrumQ2[j] = Math.Max(_avgSpectrumQ2[j], threshold);

                    _filteredSpectrumQ[j] = Math.Max(_filteredSpectrumQ[j], _avgSpectrumQ1[j]);
                }


                // 4.4) spectral smoothing

                for (var j = 0; j < _spectrumS.Length; j++)
                {
                    _spectrumS[j] = _filteredSpectrumQ[j] / Math.Max(spectrumQ[j], Epsilon);
                }

                for (var j = 0; j < _smoothedSpectrumS.Length; j++)
                {
                    _smoothedSpectrumS[j] = 0.0f;

                    var total = 0;
                    for (var k = Math.Max(j - N, 0);
                         k < Math.Min(j + N + 1, FilterBank.Length);
                         k++, total++)
                    {
                        _smoothedSpectrumS[j] += _spectrumS[k];
                    }
                    _smoothedSpectrumS[j] /= total;
                }

                // 4.5) mean power normalization

                var centralSpectrum = _ringBuffer.CentralSpectrum;

                var sumPower = 0.0f;
                for (var j = 0; j < _smoothedSpectrum.Length; j++)
                {
                    _smoothedSpectrum[j] = _smoothedSpectrumS[j] * centralSpectrum[j];
                    sumPower            += _smoothedSpectrum[j];
                }

                _mean = LambdaMu * _mean + (1 - LambdaMu) * sumPower;

                for (var j = 0; j < _smoothedSpectrum.Length; j++)
                {
                    _smoothedSpectrum[j] /= _mean;
                    _smoothedSpectrum[j] *= MeanPower;
                }

                // =============================================================

                // 5) nonlinearity (power ^ d  or  Log)

                if (_power != 0)
                {
                    for (var j = 0; j < _smoothedSpectrum.Length; j++)
                    {
                        _smoothedSpectrum[j] = (float)Math.Pow(_smoothedSpectrum[j], 1.0 / _power);
                    }
                }
                else
                {
                    for (var j = 0; j < _smoothedSpectrum.Length; j++)
                    {
                        _smoothedSpectrum[j] = (float)Math.Log(_smoothedSpectrum[j] + Epsilon);
                    }
                }

                // 6) dct-II (Norm = normalized)

                var pnccs = new float[FeatureCount];
                _dct.DirectNorm(_smoothedSpectrum, pnccs);

                // wow, who knows, maybe it'll happen!

                if (_step == int.MaxValue - 1)
                {
                    _step = 2 * M + 1;
                }

                return(pnccs);
            }

            // first 2*M vectors are zeros

            return(new float[FeatureCount]);
        }
示例#16
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration"></param>
        /// <param name="hopDuration"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="fftSize"></param>
        /// <param name="filterbank"></param>
        /// <param name="lifterSize"></param>
        /// <param name="preEmphasis"></param>
        /// <param name="includeEnergy"></param>
        /// <param name="dctType">"1", "1N", "2", "2N", "3", "3N", "4", "4N"</param>
        /// <param name="nonLinearity"></param>
        /// <param name="spectrumType"></param>
        /// <param name="window"></param>
        /// <param name="logFloor"></param>
        public MfccExtractor(int samplingRate,
                             int featureCount,
                             double frameDuration          = 0.0256 /*sec*/,
                             double hopDuration            = 0.010 /*sec*/,
                             int filterbankSize            = 24,
                             double lowFreq                = 0,
                             double highFreq               = 0,
                             int fftSize                   = 0,
                             float[][] filterbank          = null,
                             int lifterSize                = 0,
                             double preEmphasis            = 0,
                             bool includeEnergy            = false,
                             string dctType                = "2N",
                             NonLinearityType nonLinearity = NonLinearityType.Log10,
                             SpectrumType spectrumType     = SpectrumType.Power,
                             WindowTypes window            = WindowTypes.Hamming,
                             float logFloor                = float.Epsilon)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FeatureCount = featureCount;

            _lowFreq  = lowFreq;
            _highFreq = highFreq;

            if (filterbank == null)
            {
                _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var melBands = FilterBanks.MelBands(filterbankSize, _blockSize, SamplingRate, _lowFreq, _highFreq);
                FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel);   // HTK/Kaldi-style
            }
            else
            {
                FilterBank     = filterbank;
                filterbankSize = filterbank.Length;
                _blockSize     = 2 * (filterbank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _fft = new RealFft(_blockSize);

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            _lifterSize   = lifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _includeEnergy = includeEnergy;

            // setup DCT: ============================================================================

            _dctType = dctType;
            switch (dctType[0])
            {
            case '1':
                _dct = new Dct1(filterbankSize);
                break;

            case '2':
                _dct = new Dct2(filterbankSize);
                break;

            case '3':
                _dct = new Dct3(filterbankSize);
                break;

            case '4':
                _dct = new Dct4(filterbankSize);
                break;

            default:
                throw new ArgumentException("Only DCT-1, 2, 3 and 4 are supported!");
            }

            if (dctType.Length > 1 && char.ToUpper(dctType[1]) == 'N')
            {
                _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs);
            }
            else
            {
                _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs);
            }

            // setup spectrum post-processing: =======================================================

            _logFloor         = logFloor;
            _nonLinearityType = nonLinearity;
            switch (nonLinearity)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor);
                break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor);
                break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor);
                break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33);
                break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum);
                break;
            }

            _spectrumType = spectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.Power:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false);
                break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true);
                break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true);
                break;
            }

            // reserve memory for reusable blocks

            _spectrum    = new float[_blockSize / 2 + 1];
            _melSpectrum = new float[filterbankSize];
        }
示例#17
0
        /// <summary>
        /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Mean power normalization
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq);

            // use power spectrum:

            foreach (var filter in _gammatoneFilterBank)
            {
                for (var j = 0; j < filter.Length; j++)
                {
                    var ps = filter[j] * filter[j];
                    filter[j] = ps;
                }
            }


            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            var gammatoneSpectrum = new float[_filterbankSize];

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var block     = new float[fftSize];       // buffer for a signal block at each step
            var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

            var spectrum = new float[fftSize / 2 + 1];


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, frameSize, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = block[k] - prevSample * _preEmphasis;
                        prevSample = block[k];
                        block[k]   = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum);


                // 4) mean power normalization:

                var sumPower = 0.0f;
                for (var j = 0; j < gammatoneSpectrum.Length; j++)
                {
                    sumPower += gammatoneSpectrum[j];
                }

                mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                for (var j = 0; j < gammatoneSpectrum.Length; j++)
                {
                    gammatoneSpectrum[j] *= meanPower / mean;
                }


                // 5) nonlinearity (power ^ d     or     Log10)

                if (_power != 0)
                {
                    for (var j = 0; j < gammatoneSpectrum.Length; j++)
                    {
                        gammatoneSpectrum[j] = (float)Math.Pow(gammatoneSpectrum[j], d);
                    }
                }
                else
                {
                    for (var j = 0; j < gammatoneSpectrum.Length; j++)
                    {
                        gammatoneSpectrum[j] = (float)Math.Log10(gammatoneSpectrum[j] + float.Epsilon);
                    }
                }


                // 6) dct-II (normalized)

                var spnccs = new float[FeatureCount];
                dct.DirectN(gammatoneSpectrum, spnccs);


                // add pncc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = spnccs,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
示例#18
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="options">MFCC options</param>
        public MfccExtractor(MfccOptions options) : base(options)
        {
            FeatureCount = options.FeatureCount;

            var filterbankSize = options.FilterBankSize;

            if (options.FilterBank == null)
            {
                _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var melBands = FilterBanks.MelBands(filterbankSize, SamplingRate, options.LowFrequency, options.HighFrequency);
                FilterBank = FilterBanks.Triangular(_blockSize, SamplingRate, melBands, mapper: Scale.HerzToMel);   // HTK/Kaldi-style
            }
            else
            {
                FilterBank     = options.FilterBank;
                filterbankSize = FilterBank.Length;
                _blockSize     = 2 * (FilterBank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _fft = new RealFft(_blockSize);

            _lifterSize   = options.LifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _includeEnergy  = options.IncludeEnergy;
            _logEnergyFloor = options.LogEnergyFloor;

            // setup DCT: ============================================================================

            _dctType = options.DctType;
            switch (_dctType[0])
            {
            case '1': _dct = new Dct1(filterbankSize); break;

            case '3': _dct = new Dct3(filterbankSize); break;

            case '4': _dct = new Dct4(filterbankSize); break;

            default:  _dct = new Dct2(filterbankSize); break;
            }

            if (_dctType.EndsWith("N", StringComparison.OrdinalIgnoreCase))
            {
                _applyDct = mfccs => _dct.DirectNorm(_melSpectrum, mfccs);
            }
            else
            {
                _applyDct = mfccs => _dct.Direct(_melSpectrum, mfccs);
            }

            // setup spectrum post-processing: =======================================================

            _logFloor         = options.LogFloor;
            _nonLinearityType = options.NonLinearity;
            switch (_nonLinearityType)
            {
            case NonLinearityType.Log10:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog10(FilterBank, _spectrum, _melSpectrum, _logFloor); break;

            case NonLinearityType.LogE:
                _postProcessSpectrum = () => FilterBanks.ApplyAndLog(FilterBank, _spectrum, _melSpectrum, _logFloor); break;

            case NonLinearityType.ToDecibel:
                _postProcessSpectrum = () => FilterBanks.ApplyAndToDecibel(FilterBank, _spectrum, _melSpectrum, _logFloor); break;

            case NonLinearityType.CubicRoot:
                _postProcessSpectrum = () => FilterBanks.ApplyAndPow(FilterBank, _spectrum, _melSpectrum, 0.33); break;

            default:
                _postProcessSpectrum = () => FilterBanks.Apply(FilterBank, _spectrum, _melSpectrum); break;
            }

            _spectrumType = options.SpectrumType;
            switch (_spectrumType)
            {
            case SpectrumType.Magnitude:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, false); break;

            case SpectrumType.MagnitudeNormalized:
                _getSpectrum = block => _fft.MagnitudeSpectrum(block, _spectrum, true); break;

            case SpectrumType.PowerNormalized:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, true); break;

            default:
                _getSpectrum = block => _fft.PowerSpectrum(block, _spectrum, false); break;
            }

            // reserve memory for reusable blocks

            _spectrum    = new float[_blockSize / 2 + 1];
            _melSpectrum = new float[filterbankSize];
        }
示例#19
0
        /// <summary>
        /// PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Medium-time processing (asymmetric noise suppression, temporal masking, spectral smoothing)
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var hopSize   = HopSize;
            var frameSize = FrameSize;

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f;

            var featureVectors = new List <FeatureVector>();

            var i       = 0;
            var timePos = startSample;

            while (timePos + frameSize < endSample)
            {
                // prepare next block for processing

                _zeroblock.FastCopyTo(_block, _fftSize);
                samples.FastCopyTo(_block, frameSize, timePos);

                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = _block[k] - prevSample * _preEmphasis;
                        prevSample = _block[k];
                        _block[k]  = y;
                    }
                    prevSample = samples[i + hopSize - 1];
                }

                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    _block.ApplyWindow(_windowSamples);
                }


                // 2) calculate power spectrum

                _fft.PowerSpectrum(_block, _spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(FilterBank, _spectrum, _gammatoneSpectrum);



                // =============================================================
                // 4) medium-time processing blocks:

                // 4.1) temporal integration (zero-phase moving average filter)

                _ringBuffer.Add(_gammatoneSpectrum);
                var spectrumQ = _ringBuffer.AverageSpectrum;

                // 4.2) asymmetric noise suppression

                if (i == 2 * M)
                {
                    for (var j = 0; j < _spectrumQOut.Length; j++)
                    {
                        _spectrumQOut[j] = spectrumQ[j] * 0.9f;
                    }
                }

                if (i >= 2 * M)
                {
                    for (var j = 0; j < _spectrumQOut.Length; j++)
                    {
                        if (spectrumQ[j] > _spectrumQOut[j])
                        {
                            _spectrumQOut[j] = LambdaA * _spectrumQOut[j] + (1 - LambdaA) * spectrumQ[j];
                        }
                        else
                        {
                            _spectrumQOut[j] = LambdaB * _spectrumQOut[j] + (1 - LambdaB) * spectrumQ[j];
                        }
                    }

                    for (var j = 0; j < _filteredSpectrumQ.Length; j++)
                    {
                        _filteredSpectrumQ[j] = Math.Max(spectrumQ[j] - _spectrumQOut[j], 0.0f);

                        if (i == 2 * M)
                        {
                            _avgSpectrumQ1[j] = 0.9f * _filteredSpectrumQ[j];
                            _avgSpectrumQ2[j] = _filteredSpectrumQ[j];
                        }

                        if (_filteredSpectrumQ[j] > _avgSpectrumQ1[j])
                        {
                            _avgSpectrumQ1[j] = LambdaA * _avgSpectrumQ1[j] + (1 - LambdaA) * _filteredSpectrumQ[j];
                        }
                        else
                        {
                            _avgSpectrumQ1[j] = LambdaB * _avgSpectrumQ1[j] + (1 - LambdaB) * _filteredSpectrumQ[j];
                        }

                        // 4.3) temporal masking

                        var threshold = _filteredSpectrumQ[j];

                        _avgSpectrumQ2[j] *= LambdaT;
                        if (spectrumQ[j] < C * _spectrumQOut[j])
                        {
                            _filteredSpectrumQ[j] = _avgSpectrumQ1[j];
                        }
                        else
                        {
                            if (_filteredSpectrumQ[j] <= _avgSpectrumQ2[j])
                            {
                                _filteredSpectrumQ[j] = MuT * _avgSpectrumQ2[j];
                            }
                        }
                        _avgSpectrumQ2[j] = Math.Max(_avgSpectrumQ2[j], threshold);

                        _filteredSpectrumQ[j] = Math.Max(_filteredSpectrumQ[j], _avgSpectrumQ1[j]);
                    }


                    // 4.4) spectral smoothing

                    for (var j = 0; j < _spectrumS.Length; j++)
                    {
                        _spectrumS[j] = _filteredSpectrumQ[j] / Math.Max(spectrumQ[j], float.Epsilon);
                    }

                    for (var j = 0; j < _smoothedSpectrumS.Length; j++)
                    {
                        _smoothedSpectrumS[j] = 0.0f;

                        var total = 0;
                        for (var k = Math.Max(j - N, 0);
                             k < Math.Min(j + N + 1, _filterbankSize);
                             k++, total++)
                        {
                            _smoothedSpectrumS[j] += _spectrumS[k];
                        }
                        _smoothedSpectrumS[j] /= total;
                    }

                    // 4.5) mean power normalization

                    var centralSpectrum = _ringBuffer.CentralSpectrum;

                    var sumPower = 0.0f;
                    for (var j = 0; j < _smoothedSpectrum.Length; j++)
                    {
                        _smoothedSpectrum[j] = _smoothedSpectrumS[j] * centralSpectrum[j];
                        sumPower            += _smoothedSpectrum[j];
                    }

                    mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                    for (var j = 0; j < _smoothedSpectrum.Length; j++)
                    {
                        _smoothedSpectrum[j] *= meanPower / mean;
                    }

                    // =============================================================


                    // 5) nonlinearity (power ^ d     or    Log10)

                    if (_power != 0)
                    {
                        for (var j = 0; j < _smoothedSpectrum.Length; j++)
                        {
                            _smoothedSpectrum[j] = (float)Math.Pow(_smoothedSpectrum[j], d);
                        }
                    }
                    else
                    {
                        for (var j = 0; j < _smoothedSpectrum.Length; j++)
                        {
                            _smoothedSpectrum[j] = (float)Math.Log10(_smoothedSpectrum[j] + float.Epsilon);
                        }
                    }

                    // 6) dct-II (normalized)

                    var pnccs = new float[FeatureCount];
                    _dct.DirectN(_smoothedSpectrum, pnccs);


                    // add pncc vector to output sequence

                    featureVectors.Add(new FeatureVector
                    {
                        Features     = pnccs,
                        TimePosition = (double)timePos / SamplingRate
                    });
                }

                i++;

                timePos += hopSize;
            }

            return(featureVectors);
        }
示例#20
0
        /// <summary>
        /// Main constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="frameDuration">In seconds</param>
        /// <param name="hopDuration">In seconds</param>
        /// <param name="modulationFftSize">In samples</param>
        /// <param name="modulationHopSize">In samples</param>
        /// <param name="fftSize">In samples</param>
        /// <param name="featuregram"></param>
        /// <param name="filterbank"></param>
        /// <param name="preEmphasis"></param>
        /// <param name="window"></param>
        public AmsExtractor(int samplingRate,
                            double frameDuration              = 0.0256 /*sec*/,
                            double hopDuration                = 0.010 /*sec*/,
                            int modulationFftSize             = 64,
                            int modulationHopSize             = 4,
                            int fftSize                       = 0,
                            IEnumerable <float[]> featuregram = null,
                            float[][] filterbank              = null,
                            double preEmphasis                = 0.0,
                            WindowTypes window                = WindowTypes.Rectangular)

            : base(samplingRate, frameDuration, hopDuration)
        {
            _modulationFftSize = modulationFftSize;
            _modulationHopSize = modulationHopSize;
            _modulationFft     = new Fft(_modulationFftSize);

            _featuregram = featuregram?.ToArray();

            if (featuregram != null)
            {
                _featureCount = _featuregram[0].Length * (_modulationFftSize / 2 + 1);
            }
            else
            {
                if (_filterbank == null)
                {
                    _fftSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);

                    _filterbank = FilterBanks.Triangular(_fftSize, samplingRate,
                                                         FilterBanks.MelBands(12, _fftSize, samplingRate, 100, 3200));
                }
                else
                {
                    _filterbank = filterbank;
                    _fftSize    = 2 * (filterbank[0].Length - 1);
                }

                _fft = new Fft(_fftSize);

                _featureCount = _filterbank.Length * (_modulationFftSize / 2 + 1);

                _window = window;
                if (_window != WindowTypes.Rectangular)
                {
                    _windowSamples = Window.OfType(_window, FrameSize);
                }

                _spectrum         = new float[_fftSize / 2 + 1];
                _filteredSpectrum = new float[_filterbank.Length];
                _block            = new float[_fftSize];
                _zeroblock        = new float[_fftSize];
            }

            _preEmphasis = (float)preEmphasis;

            _modBlock     = new float[_modulationFftSize];
            _zeroModblock = new float[_modulationFftSize];
            _modSpectrum  = new float[_modulationFftSize / 2 + 1];

            // feature descriptions

            int length;

            if (_featuregram != null)
            {
                length = _featuregram[0].Length;
            }
            else
            {
                length = _filterbank.Length;
            }

            _featureDescriptions = new List <string>();

            var modulationSamplingRate = (float)samplingRate / HopSize;
            var resolution             = modulationSamplingRate / _modulationFftSize;

            for (var fi = 0; fi < length; fi++)
            {
                for (var fj = 0; fj <= _modulationFftSize / 2; fj++)
                {
                    _featureDescriptions.Add(string.Format("band_{0}_mf_{1:F2}_Hz", fi + 1, fj * resolution));
                }
            }
        }
示例#21
0
        /// <summary>
        /// Main constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration">Length of analysis window (in seconds)</param>
        /// <param name="hopDuration">Length of overlap (in seconds)</param>
        /// <param name="power"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="filterbank"></param>
        /// <param name="fftSize">Size of FFT (in samples)</param>
        /// <param name="preEmphasis"></param>
        /// <param name="window"></param>
        public SpnccExtractor(int samplingRate,
                              int featureCount,
                              double frameDuration = 0.0256 /*sec*/,
                              double hopDuration   = 0.010 /*sec*/,
                              int power            = 15,
                              double lowFreq       = 100,
                              double highFreq      = 6800,
                              int filterbankSize   = 40,
                              float[][] filterbank = null,
                              int fftSize          = 0,
                              double preEmphasis   = 0.0,
                              WindowTypes window   = WindowTypes.Hamming)

            : base(samplingRate, frameDuration, hopDuration)
        {
            FeatureCount = featureCount;

            _power = power;

            if (filterbank == null)
            {
                _fftSize        = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);
                _filterbankSize = filterbankSize;

                _lowFreq  = lowFreq;
                _highFreq = highFreq;

                FilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, samplingRate, _lowFreq, _highFreq);

                // use power spectrum:

                foreach (var filter in FilterBank)
                {
                    for (var j = 0; j < filter.Length; j++)
                    {
                        var ps = filter[j] * filter[j];
                        filter[j] = ps;
                    }
                }
            }
            else
            {
                FilterBank      = filterbank;
                _filterbankSize = filterbank.Length;
                _fftSize        = 2 * (filterbank[0].Length - 1);
            }

            _fft = new Fft(_fftSize);
            _dct = new Dct2(_filterbankSize, FeatureCount);

            _preEmphasis = (float)preEmphasis;

            _window = window;
            if (_window != WindowTypes.Rectangular)
            {
                _windowSamples = Window.OfType(_window, FrameSize);
            }

            _block            = new float[_fftSize];
            _spectrum         = new float[_fftSize / 2 + 1];
            _filteredSpectrum = new float[_filterbankSize];
            _zeroblock        = new float[_fftSize];
        }
示例#22
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureList"></param>
        /// <param name="frameDuration"></param>
        /// <param name="hopDuration"></param>
        /// <param name="fftSize"></param>
        /// <param name="frequencyBands"></param>
        /// <param name="parameters"></param>
        public Mpeg7SpectralFeaturesExtractor(int samplingRate,
                                              string featureList,
                                              double frameDuration = 0.0256 /*sec*/,
                                              double hopDuration   = 0.010 /*sec*/,
                                              int fftSize          = 0,
                                              Tuple <double, double, double>[] frequencyBands = null,
                                              WindowTypes window = WindowTypes.Hamming,
                                              IReadOnlyDictionary <string, object> parameters = null)

            : base(samplingRate, frameDuration, hopDuration)
        {
            if (featureList == "all" || featureList == "full")
            {
                featureList = FeatureSet;
            }

            var features = featureList.Split(',', '+', '-', ';', ':')
                           .Select(f => f.Trim().ToLower());

            _extractors = features.Select <string, Func <float[], float[], float> >(feature =>
            {
                switch (feature)
                {
                case "sc":
                case "centroid":
                    return(Spectral.Centroid);

                case "ss":
                case "spread":
                    return(Spectral.Spread);

                case "sfm":
                case "flatness":
                    if (parameters?.ContainsKey("minLevel") ?? false)
                    {
                        var minLevel = (float)parameters["minLevel"];
                        return((spectrum, freqs) => Spectral.Flatness(spectrum, minLevel));
                    }
                    else
                    {
                        return((spectrum, freqs) => Spectral.Flatness(spectrum));
                    }

                case "sn":
                case "noiseness":
                    if (parameters?.ContainsKey("noiseFrequency") ?? false)
                    {
                        var noiseFrequency = (float)parameters["noiseFrequency"];
                        return((spectrum, freqs) => Spectral.Noiseness(spectrum, freqs, noiseFrequency));
                    }
                    else
                    {
                        return((spectrum, freqs) => Spectral.Noiseness(spectrum, freqs));
                    }

                case "rolloff":
                    if (parameters?.ContainsKey("rolloffPercent") ?? false)
                    {
                        var rolloffPercent = (float)parameters["rolloffPercent"];
                        return((spectrum, freqs) => Spectral.Rolloff(spectrum, freqs, rolloffPercent));
                    }
                    else
                    {
                        return((spectrum, freqs) => Spectral.Rolloff(spectrum, freqs));
                    }

                case "crest":
                    return((spectrum, freqs) => Spectral.Crest(spectrum));

                case "entropy":
                case "ent":
                    return((spectrum, freqs) => Spectral.Entropy(spectrum));

                case "sd":
                case "decrease":
                    return((spectrum, freqs) => Spectral.Decrease(spectrum));

                case "loud":
                case "loudness":
                    return((spectrum, freqs) => Perceptual.Loudness(spectrum));

                case "sharp":
                case "sharpness":
                    return((spectrum, freqs) => Perceptual.Sharpness(spectrum));

                default:
                    return((spectrum, freqs) => 0);
                }
            }).ToList();

            FeatureDescriptions = features.ToList();

            _fftSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);
            _fft     = new Fft(_fftSize);

            _window = window;
            if (_window != WindowTypes.Rectangular)
            {
                _windowSamples = Window.OfType(_window, FrameSize);
            }

            _frequencyBands = frequencyBands ?? FilterBanks.OctaveBands(6, _fftSize, samplingRate);
            _filterbank     = FilterBanks.Rectangular(_fftSize, samplingRate, _frequencyBands);

            var cfs = _frequencyBands.Select(b => b.Item2).ToList();

            // insert zero frequency so that it'll be ignored during calculations
            // just like in case of FFT spectrum (0th DC component)
            cfs.Insert(0, 0);
            _frequencies = cfs.ToFloats();

            _parameters = parameters;

            // reserve memory for reusable blocks

            _spectrum       = new float[_fftSize / 2 + 1];          // buffer for magnitude spectrum
            _mappedSpectrum = new float[_filterbank.Length + 1];    // buffer for total energies in bands
            _block          = new float[_fftSize];                  // buffer for currently processed block
            _zeroblock      = new float[_fftSize];                  // just a buffer of zeros for quick memset
        }
示例#23
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration"></param>
        /// <param name="hopDuration"></param>
        /// <param name="lpcOrder"></param>
        /// <param name="rasta"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="fftSize"></param>
        /// <param name="lifterSize"></param>
        /// <param name="preEmphasis"></param>
        /// <param name="window"></param>
        /// <param name="filterbank"></param>
        /// <param name="centerFrequencies"></param>
        public PlpExtractor(int samplingRate,
                            int featureCount,
                            double frameDuration       = 0.0256 /*sec*/,
                            double hopDuration         = 0.010 /*sec*/,
                            int lpcOrder               = 0,         // will be autocalculated as featureCount - 1
                            double rasta               = 0,
                            int filterbankSize         = 24,
                            double lowFreq             = 0,
                            double highFreq            = 0,
                            int fftSize                = 0,
                            int lifterSize             = 0,
                            double preEmphasis         = 0,
                            WindowTypes window         = WindowTypes.Hamming,
                            float[][] filterbank       = null,
                            double[] centerFrequencies = null)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FeatureCount = featureCount;

            // ================================ Prepare filter bank and center frequencies: ===========================================

            _lowFreq  = lowFreq;
            _highFreq = highFreq;

            if (filterbank == null)
            {
                _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var barkBands = FilterBanks.BarkBandsSlaney(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq);
                FilterBank = FilterBanks.BarkBankSlaney(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq);

                _centerFrequencies = barkBands.Select(b => b.Item2).ToArray();
            }
            else
            {
                FilterBank     = filterbank;
                filterbankSize = filterbank.Length;
                _blockSize     = 2 * (filterbank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");

                if (centerFrequencies != null)
                {
                    _centerFrequencies = centerFrequencies;
                }
                else
                {
                    var herzResolution = (double)samplingRate / _blockSize;

                    // try to determine center frequencies automatically from filterbank weights:

                    _centerFrequencies = new double[filterbankSize];

                    for (var i = 0; i < filterbank.Length; i++)
                    {
                        var minPos = 0;
                        var maxPos = _blockSize / 2;

                        for (var j = 0; j < filterbank[i].Length; j++)
                        {
                            if (filterbank[i][j] > 0)
                            {
                                minPos = j;
                                break;
                            }
                        }
                        for (var j = minPos; j < filterbank[i].Length; j++)
                        {
                            if (filterbank[i][j] == 0)
                            {
                                maxPos = j;
                                break;
                            }
                        }

                        _centerFrequencies[i] = herzResolution * (maxPos + minPos) / 2;
                    }
                }
            }

            // ==================================== Compute equal loudness curve: =========================================

            _equalLoudnessCurve = new double[filterbankSize];

            for (var i = 0; i < _centerFrequencies.Length; i++)
            {
                var level2 = _centerFrequencies[i] * _centerFrequencies[i];

                _equalLoudnessCurve[i] = Math.Pow(level2 / (level2 + 1.6e5), 2) * ((level2 + 1.44e6) / (level2 + 9.61e6));
            }

            // ============================== Prepare RASTA filters (if necessary): =======================================

            _rasta = rasta;

            if (rasta > 0)
            {
                _rastaFilters = Enumerable.Range(0, filterbankSize)
                                .Select(f => new RastaFilter(rasta))
                                .ToArray();
            }

            // ============== Precompute IDFT table for obtaining autocorrelation coeffs from power spectrum: =============

            _lpcOrder = lpcOrder > 0 ? lpcOrder : FeatureCount - 1;

            _idftTable = new float[_lpcOrder + 1][];

            var bandCount = filterbankSize + 2;     // +2 duplicated edges
            var freq      = Math.PI / (bandCount - 1);

            for (var i = 0; i < _idftTable.Length; i++)
            {
                _idftTable[i] = new float[bandCount];

                _idftTable[i][0] = 1.0f;

                for (var j = 1; j < bandCount - 1; j++)
                {
                    _idftTable[i][j] = 2 * (float)Math.Cos(freq * i * j);
                }

                _idftTable[i][bandCount - 1] = (float)Math.Cos(freq * i * (bandCount - 1));
            }

            _lpc = new float[_lpcOrder + 1];
            _cc  = new float[bandCount];

            // =================================== Prepare everything else: ==============================

            _fft = new RealFft(_blockSize);

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            _lifterSize   = lifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _spectrum     = new float[_blockSize / 2 + 1];
            _bandSpectrum = new float[filterbankSize];
        }