Example #1
0
        /// <summary>
        /// Constructs extractor from configuration <paramref name="options"/>.
        /// </summary>
        public SpnccExtractor(PnccOptions options) : base(options)
        {
            FeatureCount = options.FeatureCount;

            var filterbankSize = options.FilterBankSize;

            if (options.FilterBank is null)
            {
                _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                FilterBank = FilterBanks.Erb(filterbankSize, _blockSize, SamplingRate, options.LowFrequency, options.HighFrequency);
            }
            else
            {
                FilterBank     = options.FilterBank;
                filterbankSize = FilterBank.Length;
                _blockSize     = 2 * (FilterBank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _power = options.Power;

            _includeEnergy  = options.IncludeEnergy;
            _logEnergyFloor = options.LogEnergyFloor;

            _fft = new RealFft(_blockSize);
            _dct = new Dct2(filterbankSize);

            _spectrum         = new float[_blockSize / 2 + 1];
            _filteredSpectrum = new float[filterbankSize];
        }
Example #2
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration">Length of analysis window (in seconds)</param>
        /// <param name="hopDuration">Length of overlap (in seconds)</param>
        /// <param name="power"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="filterbank"></param>
        /// <param name="fftSize">Size of FFT (in samples)</param>
        /// <param name="preEmphasis"></param>
        /// <param name="window"></param>
        public PnccExtractor(int samplingRate,
                             int featureCount,
                             double frameDuration = 0.0256 /*sec*/,
                             double hopDuration   = 0.010 /*sec*/,
                             int power            = 15,
                             double lowFreq       = 100,
                             double highFreq      = 6800,
                             int filterbankSize   = 40,
                             float[][] filterbank = null,
                             int fftSize          = 0,
                             double preEmphasis   = 0,
                             WindowTypes window   = WindowTypes.Hamming)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FeatureCount = featureCount;

            _lowFreq  = lowFreq;
            _highFreq = highFreq;

            if (filterbank == null)
            {
                _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);

                FilterBank = FilterBanks.Erb(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq);
            }
            else
            {
                FilterBank     = filterbank;
                filterbankSize = filterbank.Length;
                _blockSize     = 2 * (filterbank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _fft = new RealFft(_blockSize);
            _dct = new Dct2(filterbankSize);

            _power = power;

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            _spectrum          = new float[_blockSize / 2 + 1];
            _spectrumQOut      = new float[filterbankSize];
            _gammatoneSpectrum = new float[filterbankSize];
            _filteredSpectrumQ = new float[filterbankSize];
            _spectrumS         = new float[filterbankSize];
            _smoothedSpectrumS = new float[filterbankSize];
            _avgSpectrumQ1     = new float[filterbankSize];
            _avgSpectrumQ2     = new float[filterbankSize];
            _smoothedSpectrum  = new float[filterbankSize];

            _ringBuffer = new SpectraRingBuffer(2 * M + 1, filterbankSize);

            _step = M - 1;
        }
Example #3
0
        /// <summary>
        /// Main constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration">Length of analysis window (in seconds)</param>
        /// <param name="hopDuration">Length of overlap (in seconds)</param>
        /// <param name="power"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="filterbank"></param>
        /// <param name="fftSize">Size of FFT (in samples)</param>
        /// <param name="preEmphasis"></param>
        /// <param name="window"></param>
        public SpnccExtractor(int samplingRate,
                              int featureCount,
                              double frameDuration = 0.0256 /*sec*/,
                              double hopDuration   = 0.010 /*sec*/,
                              int power            = 15,
                              double lowFreq       = 100,
                              double highFreq      = 6800,
                              int filterbankSize   = 40,
                              float[][] filterbank = null,
                              int fftSize          = 0,
                              double preEmphasis   = 0,
                              WindowTypes window   = WindowTypes.Hamming)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FeatureCount = featureCount;

            _power = power;

            if (filterbank == null)
            {
                _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);

                _lowFreq  = lowFreq;
                _highFreq = highFreq;

                FilterBank = FilterBanks.Erb(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq);

                // use power spectrum:

                foreach (var filter in FilterBank)
                {
                    for (var j = 0; j < filter.Length; j++)
                    {
                        var ps = filter[j] * filter[j];
                        filter[j] = ps;
                    }
                }
            }
            else
            {
                FilterBank     = filterbank;
                filterbankSize = filterbank.Length;
                _blockSize     = 2 * (filterbank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _fft = new RealFft(_blockSize);
            _dct = new Dct2(filterbankSize);

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            _spectrum         = new float[_blockSize / 2 + 1];
            _filteredSpectrum = new float[filterbankSize];
        }
Example #4
0
        /// <summary>
        /// Main constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration"></param>
        /// <param name="hopDuration"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="fftSize"></param>
        /// <param name="filterbank"></param>
        /// <param name="lifterSize"></param>
        /// <param name="preEmphasis"></param>
        /// <param name="window"></param>
        public MfccExtractor(int samplingRate,
                             int featureCount,
                             double frameDuration = 0.0256 /*sec*/,
                             double hopDuration   = 0.010 /*sec*/,
                             int filterbankSize   = 20,
                             double lowFreq       = 0,
                             double highFreq      = 0,
                             int fftSize          = 0,
                             float[][] filterbank = null,
                             int lifterSize       = 22,
                             double preEmphasis   = 0.0,
                             WindowTypes window   = WindowTypes.Hamming)

            : base(samplingRate, frameDuration, hopDuration)
        {
            FeatureCount = featureCount;

            if (filterbank == null)
            {
                _fftSize        = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);
                _filterbankSize = filterbankSize;

                _lowFreq  = lowFreq;
                _highFreq = highFreq;

                FilterBank = FilterBanks.Triangular(_fftSize, SamplingRate,
                                                    FilterBanks.MelBands(_filterbankSize, _fftSize, SamplingRate, _lowFreq, _highFreq));
            }
            else
            {
                FilterBank      = filterbank;
                _filterbankSize = filterbank.Length;
                _fftSize        = 2 * (filterbank[0].Length - 1);
            }

            _fft = new Fft(_fftSize);
            _dct = new Dct2(_filterbankSize, FeatureCount);

            _window = window;
            if (_window != WindowTypes.Rectangular)
            {
                _windowSamples = Window.OfType(_window, FrameSize);
            }

            _lifterSize   = lifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _preEmphasis = (float)preEmphasis;

            // reserve memory for reusable blocks

            _spectrum       = new float[_fftSize / 2 + 1];
            _logMelSpectrum = new float[_filterbankSize];
            _block          = new float[_fftSize];
            _zeroblock      = new float[_fftSize];
        }
Example #5
0
        public void TestDct2Norm()
        {
            float[] res     = new float[6];
            float[] resDct2 = { 0.91923882f, -0.11214018f, 0.35370055f, -0.30289775f, 0.49497475f, 0.18332565f };

            var dct2 = new Dct2(8);

            dct2.DirectNorm(_test, res);

            Assert.That(res, Is.EqualTo(resDct2).Within(1e-5));
        }
Example #6
0
        public void TestDct2()
        {
            float[] res     = new float[6];
            float[] resDct2 = { 5.2f, -0.44856072f, 1.41480218f, -1.21159099f, 1.97989899f, 0.73330259f };

            var dct2 = new Dct2(8);

            dct2.Direct(_test, res);

            Assert.That(res, Is.EqualTo(resDct2).Within(1e-5));
        }
Example #7
0
        public void TestIdct2()
        {
            float[] res     = new float[8];
            float[] resDct2 = { 2.6f, -0.22428036f, 0.70740109f, -0.6057955f, 0.98994949f, 0.3666513f, -0.13994175f, -0.41021575f };

            var invdct = new Dct2(8, 8);

            invdct.Inverse(resDct2, res);

            Assert.That(res, Is.EqualTo(_test).Within(1e-5));
        }
Example #8
0
        public void TestDct2()
        {
            float[] res     = new float[6];
            float[] resDct2 = { 2.6f, -0.22428036f, 0.70740109f, -0.6057955f, 0.98994949f, 0.3666513f };

            var dct2 = new Dct2(8, 6);

            dct2.Direct(_test, res);

            Assert.That(res, Is.EqualTo(resDct2).Within(1e-5));
        }
Example #9
0
        public void TestIdct2()
        {
            float[] output   = new float[8];
            float[] input    = { 5.2f, -0.44856072f, 1.41480218f, -1.21159099f, 1.97989899f, 0.73330259f };
            float[] expected = { 8.53433006f,  1.77122807f, 3.48148502f, 7.77645215f,
                                 2.99512072f, -0.84717044f, 5.19445736f, 12.69409707f };

            var invdct = new Dct2(8);

            invdct.Inverse(input, output);

            Assert.That(output, Is.EqualTo(expected).Within(1e-5));
        }
        /// <summary>
        /// PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Medium-time processing (asymmetric noise suppression, temporal masking, spectral smoothing)
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq);

            // use power spectrum:

            foreach (var filter in _gammatoneFilterBank)
            {
                for (var j = 0; j < filter.Length; j++)
                {
                    var ps = filter[j] * filter[j];
                    filter[j] = ps;
                }
            }


            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            var gammatoneSpectrum = new float[_filterbankSize];

            var spectrumQOut      = new float[_filterbankSize];
            var filteredSpectrumQ = new float[_filterbankSize];
            var spectrumS         = new float[_filterbankSize];
            var smoothedSpectrumS = new float[_filterbankSize];
            var avgSpectrumQ1     = new float[_filterbankSize];
            var avgSpectrumQ2     = new float[_filterbankSize];
            var smoothedSpectrum  = new float[_filterbankSize];

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var block     = new float[fftSize];       // buffer for currently processed signal block at each step
            var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

            _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize);

            var spectrum = new float[fftSize / 2 + 1];


            // 0) pre-emphasis (if needed)

            if (_preEmphasis > 0.0)
            {
                var preemphasisFilter = new PreEmphasisFilter(_preEmphasis);
                signal = preemphasisFilter.ApplyTo(signal);
            }


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var i       = 0;
            var timePos = startSample;

            while (timePos + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, frameSize, timePos);


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum);



                // =============================================================
                // 4) medium-time processing blocks:

                // 4.1) temporal integration (zero-phase moving average filter)

                _ringBuffer.Add(gammatoneSpectrum);
                var spectrumQ = _ringBuffer.AverageSpectrum;

                // 4.2) asymmetric noise suppression

                if (i == 2 * M)
                {
                    for (var j = 0; j < spectrumQOut.Length; j++)
                    {
                        spectrumQOut[j] = spectrumQ[j] * 0.9f;
                    }
                }

                if (i >= 2 * M)
                {
                    for (var j = 0; j < spectrumQOut.Length; j++)
                    {
                        if (spectrumQ[j] > spectrumQOut[j])
                        {
                            spectrumQOut[j] = LambdaA * spectrumQOut[j] + (1 - LambdaA) * spectrumQ[j];
                        }
                        else
                        {
                            spectrumQOut[j] = LambdaB * spectrumQOut[j] + (1 - LambdaB) * spectrumQ[j];
                        }
                    }

                    for (var j = 0; j < filteredSpectrumQ.Length; j++)
                    {
                        filteredSpectrumQ[j] = Math.Max(spectrumQ[j] - spectrumQOut[j], 0.0f);

                        if (i == 2 * M)
                        {
                            avgSpectrumQ1[j] = 0.9f * filteredSpectrumQ[j];
                            avgSpectrumQ2[j] = filteredSpectrumQ[j];
                        }

                        if (filteredSpectrumQ[j] > avgSpectrumQ1[j])
                        {
                            avgSpectrumQ1[j] = LambdaA * avgSpectrumQ1[j] + (1 - LambdaA) * filteredSpectrumQ[j];
                        }
                        else
                        {
                            avgSpectrumQ1[j] = LambdaB * avgSpectrumQ1[j] + (1 - LambdaB) * filteredSpectrumQ[j];
                        }

                        // 4.3) temporal masking

                        var threshold = filteredSpectrumQ[j];

                        avgSpectrumQ2[j] *= LambdaT;
                        if (spectrumQ[j] < C * spectrumQOut[j])
                        {
                            filteredSpectrumQ[j] = avgSpectrumQ1[j];
                        }
                        else
                        {
                            if (filteredSpectrumQ[j] <= avgSpectrumQ2[j])
                            {
                                filteredSpectrumQ[j] = MuT * avgSpectrumQ2[j];
                            }
                        }
                        avgSpectrumQ2[j] = Math.Max(avgSpectrumQ2[j], threshold);

                        filteredSpectrumQ[j] = Math.Max(filteredSpectrumQ[j], avgSpectrumQ1[j]);
                    }


                    // 4.4) spectral smoothing

                    for (var j = 0; j < spectrumS.Length; j++)
                    {
                        spectrumS[j] = filteredSpectrumQ[j] / Math.Max(spectrumQ[j], float.Epsilon);
                    }

                    for (var j = 0; j < smoothedSpectrumS.Length; j++)
                    {
                        smoothedSpectrumS[j] = 0.0f;

                        var total = 0;
                        for (var k = Math.Max(j - N, 0);
                             k < Math.Min(j + N + 1, _filterbankSize);
                             k++, total++)
                        {
                            smoothedSpectrumS[j] += spectrumS[k];
                        }
                        smoothedSpectrumS[j] /= total;
                    }

                    // 4.5) mean power normalization

                    var centralSpectrum = _ringBuffer.CentralSpectrum;

                    var sumPower = 0.0f;
                    for (var j = 0; j < smoothedSpectrum.Length; j++)
                    {
                        smoothedSpectrum[j] = smoothedSpectrumS[j] * centralSpectrum[j];
                        sumPower           += smoothedSpectrum[j];
                    }

                    mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                    for (var j = 0; j < smoothedSpectrum.Length; j++)
                    {
                        smoothedSpectrum[j] *= meanPower / mean;
                    }

                    // =============================================================


                    // 5) nonlinearity (power ^ d    or    Log10)

                    if (_power != 0)
                    {
                        for (var j = 0; j < smoothedSpectrum.Length; j++)
                        {
                            smoothedSpectrum[j] = (float)Math.Pow(smoothedSpectrum[j], d);
                        }
                    }
                    else
                    {
                        for (var j = 0; j < smoothedSpectrum.Length; j++)
                        {
                            smoothedSpectrum[j] = (float)Math.Log10(smoothedSpectrum[j] + float.Epsilon);
                        }
                    }

                    // 6) dct-II (normalized)

                    var pnccs = new float[FeatureCount];
                    dct.DirectN(smoothedSpectrum, pnccs);


                    // add pncc vector to output sequence

                    featureVectors.Add(new FeatureVector
                    {
                        Features     = pnccs,
                        TimePosition = (double)timePos / signal.SamplingRate
                    });
                }

                i++;

                timePos += hopSize;
            }

            return(featureVectors);
        }
Example #11
0
        /// <summary>
        /// Tests the express where-clause specified in param 'clause'
        /// </summary>
        /// <param name="clause">The express clause to test</param>
        /// <returns>true if the clause is satisfied.</returns>
        public bool ValidateClause(IfcDimensionCurveClause clause)
        {
            var retVal = false;

            try
            {
                switch (clause)
                {
                case IfcDimensionCurveClause.WR51:
                    retVal = Functions.SIZEOF(Functions.USEDIN(this, "IFC2X3.IFCDRAUGHTINGCALLOUT.CONTENTS")) >= 1;
                    break;

                case IfcDimensionCurveClause.WR52:
                    retVal = (Functions.SIZEOF(Functions.USEDIN(this, "IFC2X3." + "IFCTERMINATORSYMBOL.ANNOTATEDCURVE").Where(Dct1 => (Dct1.AsIfcDimensionCurveTerminator().Role == IfcDimensionExtentUsage.ORIGIN))) <= 1) && (Functions.SIZEOF(Functions.USEDIN(this, "IFC2X3." + "IFCTERMINATORSYMBOL.ANNOTATEDCURVE").Where(Dct2 => (Dct2.AsIfcDimensionCurveTerminator().Role == IfcDimensionExtentUsage.TARGET))) <= 1);
                    break;

                case IfcDimensionCurveClause.WR53:
                    retVal = Functions.SIZEOF(AnnotatedBySymbols.Where(Dct => !(Functions.TYPEOF(Dct).Contains("IFC2X3.IFCDIMENSIONCURVETERMINATOR")))) == 0;
                    break;
                }
            } catch (Exception ex) {
                var log = Validation.ValidationLogging.CreateLogger <Xbim.Ifc2x3.PresentationDimensioningResource.IfcDimensionCurve>();
                log?.LogError(string.Format("Exception thrown evaluating where-clause 'IfcDimensionCurve.{0}' for #{1}.", clause, EntityLabel), ex);
            }
            return(retVal);
        }
Example #12
0
        /// <summary>
        /// Main constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration">Length of analysis window (in seconds)</param>
        /// <param name="hopDuration">Length of overlap (in seconds)</param>
        /// <param name="power"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="filterbank"></param>
        /// <param name="fftSize">Size of FFT (in samples)</param>
        /// <param name="preEmphasis"></param>
        /// <param name="window"></param>
        public PnccExtractor(int samplingRate,
                             int featureCount,
                             double frameDuration = 0.0256 /*sec*/,
                             double hopDuration   = 0.010 /*sec*/,
                             int power            = 15,
                             double lowFreq       = 100,
                             double highFreq      = 6800,
                             int filterbankSize   = 40,
                             float[][] filterbank = null,
                             int fftSize          = 0,
                             double preEmphasis   = 0.0,
                             WindowTypes window   = WindowTypes.Hamming)

            : base(samplingRate, frameDuration, hopDuration)
        {
            FeatureCount = featureCount;

            _power = power;

            if (filterbank == null)
            {
                _fftSize        = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);
                _filterbankSize = filterbankSize;

                _lowFreq  = lowFreq;
                _highFreq = highFreq;

                FilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, samplingRate, _lowFreq, _highFreq);

                // use power spectrum:

                foreach (var filter in FilterBank)
                {
                    for (var j = 0; j < filter.Length; j++)
                    {
                        var ps = filter[j] * filter[j];
                        filter[j] = ps;
                    }
                }
            }
            else
            {
                FilterBank      = filterbank;
                _filterbankSize = filterbank.Length;
                _fftSize        = 2 * (filterbank[0].Length - 1);
            }

            _fft = new Fft(_fftSize);
            _dct = new Dct2(_filterbankSize, FeatureCount);

            _preEmphasis = (float)preEmphasis;

            _window = window;
            if (_window != WindowTypes.Rectangular)
            {
                _windowSamples = Window.OfType(_window, FrameSize);
            }

            _block             = new float[_fftSize];
            _spectrum          = new float[_fftSize / 2 + 1];
            _spectrumQOut      = new float[_filterbankSize];
            _gammatoneSpectrum = new float[_filterbankSize];
            _filteredSpectrumQ = new float[_filterbankSize];
            _spectrumS         = new float[_filterbankSize];
            _smoothedSpectrumS = new float[_filterbankSize];
            _avgSpectrumQ1     = new float[_filterbankSize];
            _avgSpectrumQ2     = new float[_filterbankSize];
            _smoothedSpectrum  = new float[_filterbankSize];
            _zeroblock         = new float[_fftSize];

            _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize);
        }
Example #13
0
        /// <summary>
        /// Standard method for computing mfcc features:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum X
        ///     3) Apply mel filters and log() the result: Y = Log10(X * H)
        ///     4) Do dct-II: mfcc = Dct(Y)
        ///     5) [Optional] liftering of mfcc
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of mfcc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _melFilterBank = FilterBanks.Triangular(fftSize, signal.SamplingRate,
                                                    FilterBanks.MelBands(_filterbankSize, fftSize, signal.SamplingRate, _lowFreq, _highFreq));

            var lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            // reserve memory for reusable blocks

            var spectrum       = new float[fftSize / 2 + 1];
            var logMelSpectrum = new float[_filterbankSize];

            var block     = new float[fftSize];   // buffer for currently processed signal block at each step
            var zeroblock = new float[fftSize];   // just a buffer of zeros for quick memset


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, windowSamples.Length, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = block[k] - prevSample * _preEmphasis;
                        prevSample = block[k];
                        block[k]   = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply mel filterbank and take log() of the result

                FilterBanks.ApplyAndLog(_melFilterBank, spectrum, logMelSpectrum);


                // 4) dct-II

                var mfccs = new float[FeatureCount];
                dct.Direct(logMelSpectrum, mfccs);


                // 5) (optional) liftering

                if (lifterCoeffs != null)
                {
                    mfccs.ApplyWindow(lifterCoeffs);
                }


                // add mfcc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = mfccs,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
Example #14
0
        /// <summary>
        /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Mean power normalization
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq);

            // use power spectrum:

            foreach (var filter in _gammatoneFilterBank)
            {
                for (var j = 0; j < filter.Length; j++)
                {
                    var ps = filter[j] * filter[j];
                    filter[j] = ps;
                }
            }


            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            var gammatoneSpectrum = new float[_filterbankSize];

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var block     = new float[fftSize];       // buffer for a signal block at each step
            var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

            var spectrum = new float[fftSize / 2 + 1];


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, frameSize, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = block[k] - prevSample * _preEmphasis;
                        prevSample = block[k];
                        block[k]   = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum);


                // 4) mean power normalization:

                var sumPower = 0.0f;
                for (var j = 0; j < gammatoneSpectrum.Length; j++)
                {
                    sumPower += gammatoneSpectrum[j];
                }

                mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                for (var j = 0; j < gammatoneSpectrum.Length; j++)
                {
                    gammatoneSpectrum[j] *= meanPower / mean;
                }


                // 5) nonlinearity (power ^ d     or     Log10)

                if (_power != 0)
                {
                    for (var j = 0; j < gammatoneSpectrum.Length; j++)
                    {
                        gammatoneSpectrum[j] = (float)Math.Pow(gammatoneSpectrum[j], d);
                    }
                }
                else
                {
                    for (var j = 0; j < gammatoneSpectrum.Length; j++)
                    {
                        gammatoneSpectrum[j] = (float)Math.Log10(gammatoneSpectrum[j] + float.Epsilon);
                    }
                }


                // 6) dct-II (normalized)

                var spnccs = new float[FeatureCount];
                dct.DirectN(gammatoneSpectrum, spnccs);


                // add pncc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = spnccs,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }