Exemple #1
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration">Length of analysis window (in seconds)</param>
        /// <param name="hopDuration">Length of overlap (in seconds)</param>
        /// <param name="power"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="filterbank"></param>
        /// <param name="fftSize">Size of FFT (in samples)</param>
        /// <param name="preEmphasis"></param>
        /// <param name="window"></param>
        public PnccExtractor(int samplingRate,
                             int featureCount,
                             double frameDuration = 0.0256 /*sec*/,
                             double hopDuration   = 0.010 /*sec*/,
                             int power            = 15,
                             double lowFreq       = 100,
                             double highFreq      = 6800,
                             int filterbankSize   = 40,
                             float[][] filterbank = null,
                             int fftSize          = 0,
                             double preEmphasis   = 0,
                             WindowTypes window   = WindowTypes.Hamming)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FeatureCount = featureCount;

            _lowFreq  = lowFreq;
            _highFreq = highFreq;

            if (filterbank == null)
            {
                _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);

                FilterBank = FilterBanks.Erb(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq);
            }
            else
            {
                FilterBank     = filterbank;
                filterbankSize = filterbank.Length;
                _blockSize     = 2 * (filterbank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _fft = new RealFft(_blockSize);
            _dct = new Dct2(filterbankSize);

            _power = power;

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            _spectrum          = new float[_blockSize / 2 + 1];
            _spectrumQOut      = new float[filterbankSize];
            _gammatoneSpectrum = new float[filterbankSize];
            _filteredSpectrumQ = new float[filterbankSize];
            _spectrumS         = new float[filterbankSize];
            _smoothedSpectrumS = new float[filterbankSize];
            _avgSpectrumQ1     = new float[filterbankSize];
            _avgSpectrumQ2     = new float[filterbankSize];
            _smoothedSpectrum  = new float[filterbankSize];

            _ringBuffer = new SpectraRingBuffer(2 * M + 1, filterbankSize);

            _step = M - 1;
        }
Exemple #2
0
        /// <summary>
        /// Constructs extractor from configuration <paramref name="options"/>.
        /// </summary>
        public PnccExtractor(PnccOptions options) : base(options)
        {
            FeatureCount = options.FeatureCount;

            var filterbankSize = options.FilterBankSize;

            if (options.FilterBank is null)
            {
                _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                FilterBank = FilterBanks.Erb(options.FilterBankSize, _blockSize, SamplingRate, options.LowFrequency, options.HighFrequency);
            }
            else
            {
                FilterBank     = options.FilterBank;
                filterbankSize = FilterBank.Length;
                _blockSize     = 2 * (FilterBank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");
            }

            _fft = new RealFft(_blockSize);
            _dct = new Dct2(filterbankSize);

            _power = options.Power;

            _includeEnergy  = options.IncludeEnergy;
            _logEnergyFloor = options.LogEnergyFloor;

            _spectrum          = new float[_blockSize / 2 + 1];
            _spectrumQOut      = new float[filterbankSize];
            _gammatoneSpectrum = new float[filterbankSize];
            _filteredSpectrumQ = new float[filterbankSize];
            _spectrumS         = new float[filterbankSize];
            _smoothedSpectrumS = new float[filterbankSize];
            _avgSpectrumQ1     = new float[filterbankSize];
            _avgSpectrumQ2     = new float[filterbankSize];
            _smoothedSpectrum  = new float[filterbankSize];

            _ringBuffer = new SpectraRingBuffer(2 * M + 1, filterbankSize);

            _step = M - 1;
        }
        /// <summary>
        /// PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Medium-time processing (asymmetric noise suppression, temporal masking, spectral smoothing)
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq);

            // use power spectrum:

            foreach (var filter in _gammatoneFilterBank)
            {
                for (var j = 0; j < filter.Length; j++)
                {
                    var ps = filter[j] * filter[j];
                    filter[j] = ps;
                }
            }


            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            var gammatoneSpectrum = new float[_filterbankSize];

            var spectrumQOut      = new float[_filterbankSize];
            var filteredSpectrumQ = new float[_filterbankSize];
            var spectrumS         = new float[_filterbankSize];
            var smoothedSpectrumS = new float[_filterbankSize];
            var avgSpectrumQ1     = new float[_filterbankSize];
            var avgSpectrumQ2     = new float[_filterbankSize];
            var smoothedSpectrum  = new float[_filterbankSize];

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var block     = new float[fftSize];       // buffer for currently processed signal block at each step
            var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

            _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize);

            var spectrum = new float[fftSize / 2 + 1];


            // 0) pre-emphasis (if needed)

            if (_preEmphasis > 0.0)
            {
                var preemphasisFilter = new PreEmphasisFilter(_preEmphasis);
                signal = preemphasisFilter.ApplyTo(signal);
            }


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var i       = 0;
            var timePos = startSample;

            while (timePos + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, frameSize, timePos);


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum);



                // =============================================================
                // 4) medium-time processing blocks:

                // 4.1) temporal integration (zero-phase moving average filter)

                _ringBuffer.Add(gammatoneSpectrum);
                var spectrumQ = _ringBuffer.AverageSpectrum;

                // 4.2) asymmetric noise suppression

                if (i == 2 * M)
                {
                    for (var j = 0; j < spectrumQOut.Length; j++)
                    {
                        spectrumQOut[j] = spectrumQ[j] * 0.9f;
                    }
                }

                if (i >= 2 * M)
                {
                    for (var j = 0; j < spectrumQOut.Length; j++)
                    {
                        if (spectrumQ[j] > spectrumQOut[j])
                        {
                            spectrumQOut[j] = LambdaA * spectrumQOut[j] + (1 - LambdaA) * spectrumQ[j];
                        }
                        else
                        {
                            spectrumQOut[j] = LambdaB * spectrumQOut[j] + (1 - LambdaB) * spectrumQ[j];
                        }
                    }

                    for (var j = 0; j < filteredSpectrumQ.Length; j++)
                    {
                        filteredSpectrumQ[j] = Math.Max(spectrumQ[j] - spectrumQOut[j], 0.0f);

                        if (i == 2 * M)
                        {
                            avgSpectrumQ1[j] = 0.9f * filteredSpectrumQ[j];
                            avgSpectrumQ2[j] = filteredSpectrumQ[j];
                        }

                        if (filteredSpectrumQ[j] > avgSpectrumQ1[j])
                        {
                            avgSpectrumQ1[j] = LambdaA * avgSpectrumQ1[j] + (1 - LambdaA) * filteredSpectrumQ[j];
                        }
                        else
                        {
                            avgSpectrumQ1[j] = LambdaB * avgSpectrumQ1[j] + (1 - LambdaB) * filteredSpectrumQ[j];
                        }

                        // 4.3) temporal masking

                        var threshold = filteredSpectrumQ[j];

                        avgSpectrumQ2[j] *= LambdaT;
                        if (spectrumQ[j] < C * spectrumQOut[j])
                        {
                            filteredSpectrumQ[j] = avgSpectrumQ1[j];
                        }
                        else
                        {
                            if (filteredSpectrumQ[j] <= avgSpectrumQ2[j])
                            {
                                filteredSpectrumQ[j] = MuT * avgSpectrumQ2[j];
                            }
                        }
                        avgSpectrumQ2[j] = Math.Max(avgSpectrumQ2[j], threshold);

                        filteredSpectrumQ[j] = Math.Max(filteredSpectrumQ[j], avgSpectrumQ1[j]);
                    }


                    // 4.4) spectral smoothing

                    for (var j = 0; j < spectrumS.Length; j++)
                    {
                        spectrumS[j] = filteredSpectrumQ[j] / Math.Max(spectrumQ[j], float.Epsilon);
                    }

                    for (var j = 0; j < smoothedSpectrumS.Length; j++)
                    {
                        smoothedSpectrumS[j] = 0.0f;

                        var total = 0;
                        for (var k = Math.Max(j - N, 0);
                             k < Math.Min(j + N + 1, _filterbankSize);
                             k++, total++)
                        {
                            smoothedSpectrumS[j] += spectrumS[k];
                        }
                        smoothedSpectrumS[j] /= total;
                    }

                    // 4.5) mean power normalization

                    var centralSpectrum = _ringBuffer.CentralSpectrum;

                    var sumPower = 0.0f;
                    for (var j = 0; j < smoothedSpectrum.Length; j++)
                    {
                        smoothedSpectrum[j] = smoothedSpectrumS[j] * centralSpectrum[j];
                        sumPower           += smoothedSpectrum[j];
                    }

                    mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                    for (var j = 0; j < smoothedSpectrum.Length; j++)
                    {
                        smoothedSpectrum[j] *= meanPower / mean;
                    }

                    // =============================================================


                    // 5) nonlinearity (power ^ d    or    Log10)

                    if (_power != 0)
                    {
                        for (var j = 0; j < smoothedSpectrum.Length; j++)
                        {
                            smoothedSpectrum[j] = (float)Math.Pow(smoothedSpectrum[j], d);
                        }
                    }
                    else
                    {
                        for (var j = 0; j < smoothedSpectrum.Length; j++)
                        {
                            smoothedSpectrum[j] = (float)Math.Log10(smoothedSpectrum[j] + float.Epsilon);
                        }
                    }

                    // 6) dct-II (normalized)

                    var pnccs = new float[FeatureCount];
                    dct.DirectN(smoothedSpectrum, pnccs);


                    // add pncc vector to output sequence

                    featureVectors.Add(new FeatureVector
                    {
                        Features     = pnccs,
                        TimePosition = (double)timePos / signal.SamplingRate
                    });
                }

                i++;

                timePos += hopSize;
            }

            return(featureVectors);
        }
Exemple #4
0
        /// <summary>
        /// Main constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration">Length of analysis window (in seconds)</param>
        /// <param name="hopDuration">Length of overlap (in seconds)</param>
        /// <param name="power"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="filterbank"></param>
        /// <param name="fftSize">Size of FFT (in samples)</param>
        /// <param name="preEmphasis"></param>
        /// <param name="window"></param>
        public PnccExtractor(int samplingRate,
                             int featureCount,
                             double frameDuration = 0.0256 /*sec*/,
                             double hopDuration   = 0.010 /*sec*/,
                             int power            = 15,
                             double lowFreq       = 100,
                             double highFreq      = 6800,
                             int filterbankSize   = 40,
                             float[][] filterbank = null,
                             int fftSize          = 0,
                             double preEmphasis   = 0.0,
                             WindowTypes window   = WindowTypes.Hamming)

            : base(samplingRate, frameDuration, hopDuration)
        {
            FeatureCount = featureCount;

            _power = power;

            if (filterbank == null)
            {
                _fftSize        = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);
                _filterbankSize = filterbankSize;

                _lowFreq  = lowFreq;
                _highFreq = highFreq;

                FilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, samplingRate, _lowFreq, _highFreq);

                // use power spectrum:

                foreach (var filter in FilterBank)
                {
                    for (var j = 0; j < filter.Length; j++)
                    {
                        var ps = filter[j] * filter[j];
                        filter[j] = ps;
                    }
                }
            }
            else
            {
                FilterBank      = filterbank;
                _filterbankSize = filterbank.Length;
                _fftSize        = 2 * (filterbank[0].Length - 1);
            }

            _fft = new Fft(_fftSize);
            _dct = new Dct2(_filterbankSize, FeatureCount);

            _preEmphasis = (float)preEmphasis;

            _window = window;
            if (_window != WindowTypes.Rectangular)
            {
                _windowSamples = Window.OfType(_window, FrameSize);
            }

            _block             = new float[_fftSize];
            _spectrum          = new float[_fftSize / 2 + 1];
            _spectrumQOut      = new float[_filterbankSize];
            _gammatoneSpectrum = new float[_filterbankSize];
            _filteredSpectrumQ = new float[_filterbankSize];
            _spectrumS         = new float[_filterbankSize];
            _smoothedSpectrumS = new float[_filterbankSize];
            _avgSpectrumQ1     = new float[_filterbankSize];
            _avgSpectrumQ2     = new float[_filterbankSize];
            _smoothedSpectrum  = new float[_filterbankSize];
            _zeroblock         = new float[_fftSize];

            _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize);
        }