Beispiel #1
0
        /// <summary>
        /// Method for computing direct STFT of a signal block.
        /// STFT (spectrogram) is essentially the list of spectra in time.
        /// </summary>
        /// <param name="samples">The samples of signal</param>
        /// <returns>STFT of the signal</returns>
        public List <Tuple <float[], float[]> > Direct(float[] samples)
        {
            // pre-allocate memory:

            var len = (samples.Length - _windowSize) / _hopSize;

            var stft = new List <Tuple <float[], float[]> >();

            for (var i = 0; i <= len; i++)
            {
                stft.Add(new Tuple <float[], float[]>(new float[_fftSize], new float[_fftSize]));
            }

            // stft:

            var windowedBuffer = new float[_windowSize];

            for (int pos = 0, i = 0; pos + _windowSize < samples.Length; pos += _hopSize, i++)
            {
                samples.FastCopyTo(windowedBuffer, _windowSize, pos);

                if (_window != WindowTypes.Rectangular)
                {
                    windowedBuffer.ApplyWindow(_windowSamples);
                }

                _fft.Direct(windowedBuffer, stft[i].Item1, stft[i].Item2);
            }

            return(stft);
        }
Beispiel #2
0
        /// <summary>
        /// Method for computing a spectrogram.
        /// The spectrogram is essentially a list of power spectra in time.
        /// </summary>
        /// <param name="samples">The samples of signal</param>
        /// <returns>Spectrogram of the signal</returns>
        public List <float[]> Spectrogram(float[] samples)
        {
            var block     = new float[_fftSize];
            var zeroblock = new float[_fftSize];

            var spectrogram = new List <float[]>();

            for (var pos = 0; pos + _windowSize < samples.Length; pos += _hopSize)
            {
                zeroblock.FastCopyTo(block, _fftSize);
                samples.FastCopyTo(block, _windowSize, pos);

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(_windowSamples);
                }

                var spectrum = new float[_fftSize / 2 + 1];
                _fft.PowerSpectrum(block, spectrum);

                spectrogram.Add(spectrum);
            }

            return(spectrogram);
        }
Beispiel #3
0
        /// <summary>
        /// Method for computing direct STFT of a signal block.
        /// STFT (spectrogram) is essentially the list of spectra in time.
        /// </summary>
        /// <param name="samples">The samples of signal</param>
        /// <returns>STFT of the signal</returns>
        public List <(float[], float[])> Direct(float[] samples)
        {
            // pre-allocate memory:

            var len = (samples.Length - _windowSize) / _hopSize;

            var stft = new List <(float[], float[])>(len + 1);

            for (var i = 0; i <= len; i++)
            {
                stft.Add((new float[_fftSize], new float[_fftSize]));
            }

            // stft:

            var windowedBuffer = new float[_fftSize];

            for (int pos = 0, i = 0; pos + _windowSize < samples.Length; pos += _hopSize, i++)
            {
                samples.FastCopyTo(windowedBuffer, _windowSize, pos);

                windowedBuffer.ApplyWindow(_windowSamples);

                var(re, im) = stft[i];

                _fft.Direct(windowedBuffer, re, im);
            }

            return(stft);
        }
Beispiel #4
0
        /// <summary>
        /// Compute the sequence of feature vectors from some part of array of samples.
        /// </summary>
        /// <param name="samples">Array of real-valued samples</param>
        /// <param name="startSample">The offset (position) of the first sample for processing</param>
        /// <param name="endSample">The offset (position) of last sample for processing</param>
        /// <param name="vectors">Pre-allocated sequence of feature vectors</param>
        public virtual void ComputeFrom(float[] samples, int startSample, int endSample, IList <float[]> vectors)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var frameSize  = FrameSize;
            var hopSize    = HopSize;
            var prevSample = startSample > 0 ? samples[startSample - 1] : 0f;
            var lastSample = endSample - frameSize;

            var block = new float[_blockSize];


            // Main processing loop:

            // at each iteration one frame is processed;
            // the frame is contained within a block which, in general, can have larger size
            // (usually it's a zero-padded frame for radix-2 FFT);
            // this block array is reused so the frame needs to be zero-padded at each iteration.
            // Array.Clear() is quite slow for *small* arrays compared to zero-fill in a for-loop.
            // Since usually the frame size is chosen to be close to block (FFT) size
            // we don't need to pad very big number of zeros, so we use for-loop here.

            for (int sample = startSample, i = 0; sample <= lastSample; sample += hopSize, i++)
            {
                // prepare new block for processing ======================================================

                samples.FastCopyTo(block, frameSize, sample);  // copy FrameSize samples to 'block' buffer

                for (var k = frameSize; k < block.Length; block[k++] = 0)
                {
                }                                                                // pad zeros to blockSize


                // (optionally) do pre-emphasis ==========================================================

                if (_preEmphasis > 1e-10f)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = block[k] - prevSample * _preEmphasis;
                        prevSample = block[k];
                        block[k]   = y;
                    }
                    prevSample = samples[sample + hopSize - 1];
                }

                // (optionally) apply window

                if (_windowSamples != null)
                {
                    block.ApplyWindow(_windowSamples);
                }


                // process this block and compute features =============================================

                ProcessFrame(block, vectors[i]);
            }
        }
Beispiel #5
0
        /// <summary>
        /// Method for computing direct STFT of a signal block.
        /// STFT (spectrogram) is essentially the list of spectra in time.
        /// </summary>
        /// <param name="input">Samples of input signal</param>
        /// <returns>STFT of the signal</returns>
        public List <(float[], float[])> Direct(float[] input)
        {
            // pre-allocate memory:

            var len = (input.Length - _windowSize) / _hopSize + 1;

            var stft = new List <(float[], float[])>(len);

            for (var i = 0; i < len; i++)
            {
                stft.Add((new float[_fftSize], new float[_fftSize]));
            }

            // stft:

            var windowedBuffer = new float[_fftSize];

            var pos = 0;

            for (var i = 0; i < len; pos += _hopSize, i++)
            {
                input.FastCopyTo(windowedBuffer, _windowSize, pos);

                windowedBuffer.ApplyWindow(_windowSamples);

                var(re, im) = stft[i];

                _fft.Direct(windowedBuffer, re, im);
            }

            // last (incomplete) frame:

            stft.Add((new float[_fftSize], new float[_fftSize]));

            Array.Clear(windowedBuffer, 0, _fftSize);
            input.FastCopyTo(windowedBuffer, input.Length - pos, pos);
            windowedBuffer.ApplyWindow(_windowSamples);

            var(lre, lim) = stft.Last();

            _fft.Direct(windowedBuffer, lre, lim);

            return(stft);
        }
Beispiel #6
0
        /// <summary>
        /// Standard method for computing MFCC features.
        /// According to default configuration, in each frame do:
        ///
        ///     1) Apply window
        ///     2) Obtain power spectrum X
        ///     3) Apply mel filters and log() the result: Y = Log(X * H)
        ///     4) Do dct: mfcc = Dct(Y)
        ///     5) [Optional] liftering of mfcc
        ///
        /// </summary>
        /// <param name="block">Samples for analysis</param>
        /// <returns>MFCC vector</returns>
        public override float[] ProcessFrame(float[] block)
        {
            // fill zeros to fftSize if frameSize < fftSize

            for (var k = FrameSize; k < block.Length; block[k++] = 0)
            {
                ;
            }

            // 1) apply window

            block.ApplyWindow(_windowSamples);

            // 2) calculate magnitude/power spectrum (with/without normalization)

            _getSpectrum(block);        //  block -> _spectrum

            // 3) apply mel filterbank and take log10/ln/cubic_root of the result

            _postProcessSpectrum();     // _spectrum -> _melSpectrum

            // 4) dct

            var mfccs = new float[FeatureCount];

            _applyDct(mfccs);           // _melSpectrum -> mfccs


            // 5) (optional) liftering

            if (_lifterCoeffs != null)
            {
                mfccs.ApplyWindow(_lifterCoeffs);
            }

            // 6) (optional) replace first coeff with log(energy)

            if (_includeEnergy)
            {
                mfccs[0] = (float)(Math.Log(block.Sum(x => x * x)));
            }

            return(mfccs);
        }
Beispiel #7
0
        /// <summary>
        /// Method for computing a spectrogram as arrays of Magnitude and Phase.
        /// </summary>
        /// <param name="samples">The samples of signal</param>
        /// <returns>Magnitude-Phase spectrogram of the signal</returns>
        public MagnitudePhaseList MagnitudePhaseSpectrogram(float[] samples)
        {
            // pre-allocate memory:

            var mag   = new List <float[]>();
            var phase = new List <float[]>();

            var len = (samples.Length - _windowSize) / _hopSize;

            for (var i = 0; i <= len; i++)
            {
                mag.Add(new float[_fftSize / 2 + 1]);
                phase.Add(new float[_fftSize / 2 + 1]);
            }

            // magnitude-phase spectrogram:

            var windowedBuffer = new float[_windowSize];
            var re             = new float[_fftSize / 2 + 1];
            var im             = new float[_fftSize / 2 + 1];

            for (int pos = 0, i = 0; pos + _windowSize < samples.Length; pos += _hopSize, i++)
            {
                samples.FastCopyTo(windowedBuffer, _windowSize, pos);

                if (_window != WindowTypes.Rectangular)
                {
                    windowedBuffer.ApplyWindow(_windowSamples);
                }

                _fft.Direct(windowedBuffer, re, im);

                for (var j = 0; j <= _fftSize / 2; j++)
                {
                    mag[i][j]   = (float)(Math.Sqrt(re[j] * re[j] + im[j] * im[j]));
                    phase[i][j] = (float)(Math.Atan2(im[j], re[j]));
                }
            }

            return(new MagnitudePhaseList {
                Magnitudes = mag, Phases = phase
            });
        }
Beispiel #8
0
        /// <summary>
        /// Method for computing LPCC features.
        /// It essentially duplicates LPC extractor code
        /// (for efficient memory usage it doesn't just delegate its work to LpcExtractor)
        /// and then post-processes LPC vectors to obtain LPCC coefficients.
        /// </summary>
        /// <param name="block">Samples for analysis</param>
        /// <returns>LPCC vector</returns>
        public override float[] ProcessFrame(float[] block)
        {
            // 1) apply window (usually signal isn't windowed for LPC, so we check first)

            if (_window != WindowTypes.Rectangular)
            {
                block.ApplyWindow(_windowSamples);
            }

            block.FastCopyTo(_reversed, FrameSize);

            // 2) autocorrelation

            _convolver.CrossCorrelate(block, _reversed, _cc);

            // 3) Levinson-Durbin

            for (int k = 0; k < _lpc.Length; _lpc[k] = 0, k++)
            {
                ;
            }

            var err = Lpc.LevinsonDurbin(_cc, _lpc, _order, FrameSize - 1);

            // 4) compute LPCC coefficients from LPC

            var lpcc = new float[FeatureCount];

            Lpc.ToCepstrum(_lpc, err, lpcc);

            // 5) (optional) liftering

            if (_lifterCoeffs != null)
            {
                lpcc.ApplyWindow(_lifterCoeffs);
            }

            return(lpcc);
        }
Beispiel #9
0
        /// <summary>
        /// Method for computing direct STFT of a signal block.
        /// STFT (spectrogram) is essentially the list of spectra in time.
        /// </summary>
        /// <param name="samples">The samples of signal</param>
        /// <returns>STFT of the signal</returns>
        public List <Tuple <float[], float[]> > Direct(float[] samples)
        {
            var stft = new List <Tuple <float[], float[]> >();

            for (var pos = 0; pos + _windowSize < samples.Length; pos += _hopSize)
            {
                var re = new float[_fftSize];
                var im = new float[_fftSize];
                samples.FastCopyTo(re, _windowSize, pos);

                if (_window != WindowTypes.Rectangular)
                {
                    re.ApplyWindow(_windowSamples);
                }

                _fft.Direct(re, im);

                stft.Add(new Tuple <float[], float[]>(re, im));
            }

            return(stft);
        }
Beispiel #10
0
        /// <summary>
        /// Method for computing a spectrogram.
        /// The spectrogram is essentially a list of power spectra in time.
        /// </summary>
        /// <param name="samples">The samples of signal</param>
        /// <returns>Spectrogram of the signal</returns>
        public List <float[]> Spectrogram(float[] samples)
        {
            // pre-allocate memory:

            var len = (samples.Length - _windowSize) / _hopSize;

            var spectrogram = new List <float[]>();

            for (var i = 0; i <= len; i++)
            {
                spectrogram.Add(new float[_fftSize / 2 + 1]);
            }

            // spectrogram:

            var windowedBuffer = new float[_fftSize];

            for (int pos = 0, i = 0; pos + _windowSize < samples.Length; pos += _hopSize, i++)
            {
                if (_windowSize < _fftSize)
                {
                    Array.Clear(windowedBuffer, 0, _fftSize);
                }

                samples.FastCopyTo(windowedBuffer, _windowSize, pos);

                if (_window != WindowTypes.Rectangular)
                {
                    windowedBuffer.ApplyWindow(_windowSamples);
                }

                _fft.PowerSpectrum(windowedBuffer, spectrogram[i]);
            }

            return(spectrogram);
        }
Beispiel #11
0
        /// <summary>
        /// Method for computing LPCC features.
        /// It essentially duplicates LPC extractor code
        /// (for efficient memory usage it doesn't just delegate its work to LpcExtractor)
        /// and then post-processes LPC vectors to obtain LPCC coefficients.
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns></returns>
        public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var hopSize   = HopSize;
            var frameSize = FrameSize;

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f;

            var lastSample = endSample - Math.Max(frameSize, hopSize);

            for (var i = startSample; i < lastSample; i += hopSize)
            {
                // prepare all blocks in memory for the current step:

                samples.FastCopyTo(_block, frameSize, i);

                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 1e-10)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = _block[k] - prevSample * _preEmphasis;
                        prevSample = _block[k];
                        _block[k]  = y;
                    }
                    prevSample = samples[i + hopSize - 1];
                }

                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    _block.ApplyWindow(_windowSamples);
                }

                _block.FastCopyTo(_reversed, frameSize);

                // 2) autocorrelation

                _convolver.CrossCorrelate(_block, _reversed, _cc);

                // 3) Levinson-Durbin

                for (int k = 0; k < _lpc.Length; k++)
                {
                    _lpc[k] = 0;
                }

                var err = MathUtils.LevinsonDurbin(_cc, _lpc, _order, frameSize - 1);

                // 4) simple and efficient algorithm for obtaining LPCC coefficients from LPC

                var lpcc = new float[FeatureCount];

                lpcc[0] = (float)Math.Log(err);

                for (var n = 1; n < FeatureCount; n++)
                {
                    var acc = 0.0f;
                    for (var k = 1; k < n; k++)
                    {
                        acc += k * lpcc[k] * _lpc[n - k];
                    }
                    lpcc[n] = -_lpc[n] - acc / n;
                }

                // (optional) liftering

                if (_lifterCoeffs != null)
                {
                    lpcc.ApplyWindow(_lifterCoeffs);
                }


                // add LPC vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = lpcc,
                    TimePosition = (double)i / SamplingRate
                });
            }

            return(featureVectors);
        }
Beispiel #12
0
        /// <summary>
        /// Phase locking procedure
        /// </summary>
        /// <param name="signal"></param>
        /// <returns></returns>
        private DiscreteSignal PhaseLocking(DiscreteSignal signal)
        {
            var input  = signal.Samples;
            var output = new float[(int)(input.Length * _stretch) + _fftSize];

            var windowSum = new float[output.Length];

            var re        = new float[_fftSize];
            var im        = new float[_fftSize];
            var zeroblock = new float[_fftSize];

            var mag   = new double[_fftSize / 2 + 1];
            var phase = new double[_fftSize / 2 + 1];

            var prevPhase  = new double[_fftSize / 2 + 1];
            var phaseTotal = new double[_fftSize / 2 + 1];
            var delta      = new double[_fftSize / 2 + 1];

            var posSynthesis = 0;

            for (var posAnalysis = 0; posAnalysis + _fftSize < input.Length; posAnalysis += _hopAnalysis)
            {
                input.FastCopyTo(re, _fftSize, posAnalysis);
                zeroblock.FastCopyTo(im, _fftSize);

                re.ApplyWindow(_window);

                _fft.Direct(re, im);


                // spectral peaks in magnitude spectrum

                for (var j = 0; j < mag.Length; j++)
                {
                    mag[j]   = Math.Sqrt(re[j] * re[j] + im[j] * im[j]);
                    phase[j] = Math.Atan2(im[j], re[j]);

                    delta[j] = phase[j] - prevPhase[j];

                    prevPhase[j] = phase[j];
                }

                // assign phases at peaks to all neighboring frequency bins

                var prevIndex = 0;
                var prevPhi   = 0.0;

                for (var j = 2; j < mag.Length - 2; j++)
                {
                    if (mag[j] <= mag[j - 1] || mag[j] <= mag[j - 2] ||
                        mag[j] <= mag[j + 1] || mag[j] <= mag[j + 2])
                    {
                        continue;   // if not a peak
                    }

                    var mid = prevIndex == 0 ? 0 : (prevIndex + j) / 2;

                    for (var k = prevIndex; k < mid; k++)
                    {
                        phase[k] = prevPhi;
                    }

                    for (var k = mid; k < j; k++)
                    {
                        phase[k] = phase[j];
                    }

                    prevIndex = j;
                    prevPhi   = phase[j];
                }

                for (var j = prevIndex; j < mag.Length; j++)
                {
                    phase[j] = prevPhi;
                }


                // phase adaptation

                for (var j = 0; j < mag.Length; j++)
                {
                    var deltaUnwrapped = delta[j] - _hopAnalysis * _omega[j];
                    var deltaWrapped   = MathUtils.Mod(deltaUnwrapped + Math.PI, 2 * Math.PI) - Math.PI;

                    var freq = _omega[j] + deltaWrapped / _hopAnalysis;

                    phaseTotal[j] += _hopSynthesis * freq;

                    re[j] = (float)(mag[j] * Math.Cos(phaseTotal[j]));
                    im[j] = (float)(mag[j] * Math.Sin(phaseTotal[j]));
                }

                for (var j = _fftSize / 2 + 1; j < _fftSize; j++)
                {
                    re[j] = im[j] = 0.0f;
                }

                _fft.Inverse(re, im);

                for (var j = 0; j < re.Length; j++)
                {
                    output[posSynthesis + j]    += re[j] * _window[j];
                    windowSum[posSynthesis + j] += _windowSquared[j];
                }

                posSynthesis += _hopSynthesis;
            }

            for (var j = 0; j < output.Length; j++)
            {
                if (windowSum[j] < 1e-3)
                {
                    continue;
                }
                output[j] /= (windowSum[j] * _fftSize / 2);
            }

            return(new DiscreteSignal(signal.SamplingRate, output));
        }
        /// <summary>
        /// Spectral subtraction algorithm according to
        ///
        /// [1979] M. Berouti, R. Schwartz, J. Makhoul
        /// "Enhancement of Speech Corrupted by Acoustic Noise".
        ///
        /// </summary>
        /// <param name="signal"></param>
        /// <param name="noise"></param>
        /// <param name="fftSize"></param>
        /// <param name="hopSize"></param>
        /// <returns></returns>
        public static DiscreteSignal SpectralSubtract(DiscreteSignal signal,
                                                      DiscreteSignal noise,
                                                      int fftSize = 1024,
                                                      int hopSize = 410)
        {
            var input  = signal.Samples;
            var output = new float[input.Length];

            const float beta     = 0.009f;
            const float alphaMin = 2f;
            const float alphaMax = 5f;
            const float snrMin   = -5f;
            const float snrMax   = 20f;

            const float k = (alphaMin - alphaMax) / (snrMax - snrMin);
            const float b = alphaMax - k * snrMin;

            var fft           = new Fft(fftSize);
            var hannWindow    = Window.OfType(WindowTypes.Hann, fftSize);
            var windowSquared = hannWindow.Select(w => w * w).ToArray();
            var windowSum     = new float[output.Length];

            var re        = new float[fftSize];
            var im        = new float[fftSize];
            var zeroblock = new float[fftSize];


            // estimate noise power spectrum

            var noiseAcc      = new float[fftSize / 2 + 1];
            var noiseEstimate = new float[fftSize / 2 + 1];

            var numFrames = 0;
            var pos       = 0;

            for (; pos + fftSize < noise.Length; pos += hopSize, numFrames++)
            {
                noise.Samples.FastCopyTo(re, fftSize, pos);
                zeroblock.FastCopyTo(im, fftSize);

                fft.Direct(re, im);

                for (var j = 0; j <= fftSize / 2; j++)
                {
                    noiseAcc[j] += re[j] * re[j] + im[j] * im[j];
                }
            }

            // (including smoothing)

            for (var j = 1; j < fftSize / 2; j++)
            {
                noiseEstimate[j] = (noiseAcc[j - 1] + noiseAcc[j] + noiseAcc[j + 1]) / (3 * numFrames);
            }
            noiseEstimate[0]           /= numFrames;
            noiseEstimate[fftSize / 2] /= numFrames;


            // spectral subtraction

            for (pos = 0; pos + fftSize < input.Length; pos += hopSize)
            {
                input.FastCopyTo(re, fftSize, pos);
                zeroblock.FastCopyTo(im, fftSize);

                re.ApplyWindow(hannWindow);

                fft.Direct(re, im);

                for (var j = 0; j <= fftSize / 2; j++)
                {
                    var power = re[j] * re[j] + im[j] * im[j];
                    var phase = Math.Atan2(im[j], re[j]);

                    var noisePower = noiseEstimate[j];

                    var snr   = 10 * Math.Log10(power / noisePower);
                    var alpha = Math.Max(Math.Min(k * snr + b, alphaMax), alphaMin);

                    var diff = power - alpha * noisePower;

                    var mag = Math.Sqrt(Math.Max(diff, beta * noisePower));

                    re[j] = (float)(mag * Math.Cos(phase));
                    im[j] = (float)(mag * Math.Sin(phase));
                }

                for (var j = fftSize / 2 + 1; j < fftSize; j++)
                {
                    re[j] = im[j] = 0.0f;
                }

                fft.Inverse(re, im);

                for (var j = 0; j < re.Length; j++)
                {
                    output[pos + j]    += re[j] * hannWindow[j];
                    windowSum[pos + j] += windowSquared[j];
                }
            }

            for (var j = 0; j < output.Length; j++)
            {
                if (windowSum[j] < 1e-3)
                {
                    continue;
                }
                output[j] /= (windowSum[j] * fftSize / 2);
            }

            return(new DiscreteSignal(signal.SamplingRate, output));
        }
Beispiel #14
0
        /// <summary>
        /// Standard method for computing mfcc features:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum X
        ///     3) Apply mel filters and log() the result: Y = Log10(X * H)
        ///     4) Do dct-II: mfcc = Dct(Y)
        ///     5) [Optional] liftering of mfcc
        ///
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of mfcc vectors</returns>
        public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var hopSize   = HopSize;
            var frameSize = FrameSize;

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                _zeroblock.FastCopyTo(_block, _fftSize);
                samples.FastCopyTo(_block, _windowSamples.Length, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = _block[k] - prevSample * _preEmphasis;
                        prevSample = _block[k];
                        _block[k]  = y;
                    }
                    prevSample = samples[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    _block.ApplyWindow(_windowSamples);
                }


                // 2) calculate power spectrum

                _fft.PowerSpectrum(_block, _spectrum);


                // 3) apply mel filterbank and take log() of the result

                FilterBanks.ApplyAndLog(FilterBank, _spectrum, _logMelSpectrum);


                // 4) dct-II

                var mfccs = new float[FeatureCount];
                _dct.Direct(_logMelSpectrum, mfccs);


                // 5) (optional) liftering

                if (_lifterCoeffs != null)
                {
                    mfccs.ApplyWindow(_lifterCoeffs);
                }


                // add mfcc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = mfccs,
                    TimePosition = (double)i / SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
Beispiel #15
0
        /// <summary>
        /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Mean power normalization
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq);

            // use power spectrum:

            foreach (var filter in _gammatoneFilterBank)
            {
                for (var j = 0; j < filter.Length; j++)
                {
                    var ps = filter[j] * filter[j];
                    filter[j] = ps;
                }
            }


            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            var gammatoneSpectrum = new float[_filterbankSize];

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var block     = new float[fftSize];       // buffer for a signal block at each step
            var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

            var spectrum = new float[fftSize / 2 + 1];


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, frameSize, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = block[k] - prevSample * _preEmphasis;
                        prevSample = block[k];
                        block[k]   = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum);


                // 4) mean power normalization:

                var sumPower = 0.0f;
                for (var j = 0; j < gammatoneSpectrum.Length; j++)
                {
                    sumPower += gammatoneSpectrum[j];
                }

                mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                for (var j = 0; j < gammatoneSpectrum.Length; j++)
                {
                    gammatoneSpectrum[j] *= meanPower / mean;
                }


                // 5) nonlinearity (power ^ d     or     Log10)

                if (_power != 0)
                {
                    for (var j = 0; j < gammatoneSpectrum.Length; j++)
                    {
                        gammatoneSpectrum[j] = (float)Math.Pow(gammatoneSpectrum[j], d);
                    }
                }
                else
                {
                    for (var j = 0; j < gammatoneSpectrum.Length; j++)
                    {
                        gammatoneSpectrum[j] = (float)Math.Log10(gammatoneSpectrum[j] + float.Epsilon);
                    }
                }


                // 6) dct-II (normalized)

                var spnccs = new float[FeatureCount];
                dct.DirectN(gammatoneSpectrum, spnccs);


                // add pncc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = spnccs,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
        /// <summary>
        /// Phase Vocoder algorithm
        /// </summary>
        /// <param name="signal"></param>
        /// <param name="filteringOptions"></param>
        /// <returns></returns>
        public DiscreteSignal ApplyTo(DiscreteSignal signal,
                                      FilteringOptions filteringOptions = FilteringOptions.Auto)
        {
            var stretch = (float)_hopSynthesis / _hopAnalysis;

            var input  = signal.Samples;
            var output = new float[(int)(input.Length * stretch) + _fftSize];

            var fft        = new Fft(_fftSize);
            var hannWindow = Window.OfType(WindowTypes.Hann, _fftSize);

            var ratio = _fftSize / (2.0f * _hopAnalysis);
            var norm  = 4.0f / (_fftSize * ratio);

            var omega = Enumerable.Range(0, _fftSize / 2 + 1)
                        .Select(f => 2 * Math.PI * f / _fftSize)
                        .ToArray();

            var re        = new float[_fftSize];
            var im        = new float[_fftSize];
            var zeroblock = new float[_fftSize];

            var prevPhase  = new double[_fftSize / 2 + 1];
            var phaseTotal = new double[_fftSize / 2 + 1];

            var posSynthesis = 0;

            for (var posAnalysis = 0; posAnalysis + _fftSize < input.Length; posAnalysis += _hopAnalysis)
            {
                input.FastCopyTo(re, _fftSize, posAnalysis);
                zeroblock.FastCopyTo(im, _fftSize);

                re.ApplyWindow(hannWindow);

                fft.Direct(re, im);

                for (var j = 0; j < _fftSize / 2 + 1; j++)
                {
                    var mag   = Math.Sqrt(re[j] * re[j] + im[j] * im[j]);
                    var phase = Math.Atan2(im[j], re[j]);

                    var delta = phase - prevPhase[j];

                    var deltaUnwrapped = delta - _hopAnalysis * omega[j];
                    var deltaWrapped   = MathUtils.Mod(deltaUnwrapped + Math.PI, 2 * Math.PI) - Math.PI;

                    var freq = omega[j] + deltaWrapped / _hopAnalysis;

                    phaseTotal[j] += _hopSynthesis * freq;
                    prevPhase[j]   = phase;

                    re[j] = (float)(mag * Math.Cos(phaseTotal[j]));
                    im[j] = (float)(mag * Math.Sin(phaseTotal[j]));
                }

                for (var j = _fftSize / 2 + 1; j < _fftSize; j++)
                {
                    re[j] = im[j] = 0.0f;
                }

                fft.Inverse(re, im);

                for (var j = 0; j < re.Length; j++)
                {
                    output[posSynthesis + j] += re[j] * hannWindow[j] * norm;
                }

                posSynthesis += _hopSynthesis;
            }

            return(new DiscreteSignal(signal.SamplingRate, output));
        }
Beispiel #17
0
        /// <summary>
        /// Standard method for computing LPC features.
        ///
        /// Note:
        ///     The first LP coefficient is always equal to 1.0.
        ///     This method replaces it with the value of prediction error.
        ///
        /// </summary>
        /// <param name="signal"></param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of LPC vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);
            var fftSize       = MathUtils.NextPowerOfTwo(2 * frameSize - 1);

            var blockReal    = new float[fftSize];   // buffer for real parts of the currently processed block
            var blockImag    = new float[fftSize];   // buffer for imaginary parts of the currently processed block
            var reversedReal = new float[fftSize];   // buffer for real parts of currently processed reversed block
            var reversedImag = new float[fftSize];   // buffer for imaginary parts of currently processed reversed block
            var zeroblock    = new float[fftSize];   // just a buffer of zeros for quick memset

            var cc = new float[frameSize];           // buffer for (truncated) cross-correlation signal


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare all blocks in memory for the current step:

                zeroblock.FastCopyTo(blockReal, fftSize);
                zeroblock.FastCopyTo(blockImag, fftSize);
                zeroblock.FastCopyTo(reversedReal, fftSize);
                zeroblock.FastCopyTo(reversedImag, fftSize);

                signal.Samples.FastCopyTo(blockReal, frameSize, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = blockReal[k] - prevSample * _preEmphasis;
                        prevSample   = blockReal[k];
                        blockReal[k] = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    blockReal.ApplyWindow(windowSamples);
                }

                // 2) autocorrelation

                Operation.CrossCorrelate(blockReal, blockImag, reversedReal, reversedImag, cc, frameSize);

                // 3) levinson-durbin

                var a   = new float[_order + 1];
                var err = MathUtils.LevinsonDurbin(cc, a, _order);
                a[0] = err;

                // add LPC vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = a,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
Beispiel #18
0
        /// <summary>
        /// Method for computing LPCC features.
        /// It essentially duplicates LPC extractor code
        /// (for efficient memory usage it doesn't just delegate its work to LpcExtractor)
        /// and then post-processes LPC vectors to obtain LPCC coefficients.
        /// </summary>
        /// <param name="signal"></param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns></returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);
            var fftSize       = MathUtils.NextPowerOfTwo(2 * frameSize - 1);

            var lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;


            var blockReal    = new float[fftSize];   // buffer for real parts of the currently processed block
            var blockImag    = new float[fftSize];   // buffer for imaginary parts of the currently processed block
            var reversedReal = new float[fftSize];   // buffer for real parts of currently processed reversed block
            var reversedImag = new float[fftSize];   // buffer for imaginary parts of currently processed reversed block
            var zeroblock    = new float[fftSize];   // just a buffer of zeros for quick memset

            var cc  = new float[frameSize];          // buffer for (truncated) cross-correlation signal
            var lpc = new float[_order + 1];         // buffer for LPC coefficients


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare all blocks in memory for the current step:

                zeroblock.FastCopyTo(blockReal, fftSize);
                zeroblock.FastCopyTo(blockImag, fftSize);
                zeroblock.FastCopyTo(reversedReal, fftSize);
                zeroblock.FastCopyTo(reversedImag, fftSize);

                signal.Samples.FastCopyTo(blockReal, frameSize, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = blockReal[k] - prevSample * _preEmphasis;
                        prevSample   = blockReal[k];
                        blockReal[k] = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }

                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    blockReal.ApplyWindow(windowSamples);
                }

                // 2) autocorrelation

                Operation.CrossCorrelate(blockReal, blockImag, reversedReal, reversedImag, cc, frameSize);

                // 3) Levinson-Durbin

                zeroblock.FastCopyTo(lpc, lpc.Length);
                var err = MathUtils.LevinsonDurbin(cc, lpc, _order);

                // 4) simple and efficient algorithm for obtaining LPCC coefficients from LPC

                var lpcc = new float[FeatureCount];

                lpcc[0] = (float)Math.Log(err);

                for (var n = 1; n < FeatureCount; n++)
                {
                    var acc = 0.0f;
                    for (var k = 1; k < n; k++)
                    {
                        acc += k * lpcc[k] * lpc[n - k];
                    }
                    lpcc[n] = -lpc[n] - acc / n;
                }

                // (optional) liftering

                if (lifterCoeffs != null)
                {
                    lpcc.ApplyWindow(lifterCoeffs);
                }


                // add LPC vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = lpcc,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
Beispiel #19
0
        /// <summary>
        /// Standard method for computing mfcc features:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum X
        ///     3) Apply mel filters and log() the result: Y = Log10(X * H)
        ///     4) Do dct-II: mfcc = Dct(Y)
        ///     5) [Optional] liftering of mfcc
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of mfcc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _melFilterBank = FilterBanks.Triangular(fftSize, signal.SamplingRate,
                                                    FilterBanks.MelBands(_filterbankSize, fftSize, signal.SamplingRate, _lowFreq, _highFreq));

            var lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            // reserve memory for reusable blocks

            var spectrum       = new float[fftSize / 2 + 1];
            var logMelSpectrum = new float[_filterbankSize];

            var block     = new float[fftSize];   // buffer for currently processed signal block at each step
            var zeroblock = new float[fftSize];   // just a buffer of zeros for quick memset


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, windowSamples.Length, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = block[k] - prevSample * _preEmphasis;
                        prevSample = block[k];
                        block[k]   = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply mel filterbank and take log() of the result

                FilterBanks.ApplyAndLog(_melFilterBank, spectrum, logMelSpectrum);


                // 4) dct-II

                var mfccs = new float[FeatureCount];
                dct.Direct(logMelSpectrum, mfccs);


                // 5) (optional) liftering

                if (lifterCoeffs != null)
                {
                    mfccs.ApplyWindow(lifterCoeffs);
                }


                // add mfcc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = mfccs,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
Beispiel #20
0
        /// <summary>
        /// Method for computing modulation spectra.
        /// Each vector representing one modulation spectrum is a flattened version of 2D spectrum.
        /// </summary>
        /// <param name="signal">Signal under analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of flattened modulation spectra</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            var fft           = new Fft(fftSize);
            var modulationFft = new Fft(_modulationFftSize);


            if (_featuregram == null)
            {
                if (_filterbank == null)
                {
                    _filterbank = FilterBanks.Triangular(_fftSize, signal.SamplingRate,
                                                         FilterBanks.MelBands(12, _fftSize, signal.SamplingRate, 100, 3200));
                }

                _featureCount = _filterbank.Length * (_modulationFftSize / 2 + 1);
            }
            else
            {
                _featureCount = _featuregram[0].Length * (_modulationFftSize / 2 + 1);
            }

            var length = _filterbank?.Length ?? _featuregram[0].Length;

            var modulationSamplingRate = (float)signal.SamplingRate / hopSize;
            var resolution             = modulationSamplingRate / _modulationFftSize;


            _featureDescriptions = new string[length * (_modulationFftSize / 2 + 1)];

            var idx = 0;

            for (var fi = 0; fi < length; fi++)
            {
                for (var fj = 0; fj <= _modulationFftSize / 2; fj++)
                {
                    _featureDescriptions[idx++] = string.Format("band_{0}_mf_{1:F2}_Hz", fi + 1, fj * resolution);
                }
            }


            // 0) pre-emphasis (if needed)

            if (_preEmphasis > 0.0)
            {
                var preemphasisFilter = new PreEmphasisFilter(_preEmphasis);
                signal = preemphasisFilter.ApplyTo(signal);
            }


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var en = 0;
            var i  = startSample;

            if (_featuregram == null)
            {
                _envelopes = new float[_filterbank.Length][];
                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[signal.Length / hopSize];
                }

                var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;


                // ===================== compute local FFTs (do STFT) =======================

                var spectrum         = new float[fftSize / 2 + 1];
                var filteredSpectrum = new float[_filterbank.Length];

                var block     = new float[fftSize];       // buffer for currently processed signal block at each step
                var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

                while (i + frameSize < endSample)
                {
                    zeroblock.FastCopyTo(block, zeroblock.Length);
                    signal.Samples.FastCopyTo(block, frameSize, i);

                    // 0) pre-emphasis (if needed)

                    if (_preEmphasis > 0.0)
                    {
                        for (var k = 0; k < frameSize; k++)
                        {
                            var y = block[k] - prevSample * _preEmphasis;
                            prevSample = block[k];
                            block[k]   = y;
                        }
                        prevSample = signal[i + hopSize - 1];
                    }

                    // 1) apply window

                    if (_window != WindowTypes.Rectangular)
                    {
                        block.ApplyWindow(windowSamples);
                    }

                    // 2) calculate power spectrum

                    fft.PowerSpectrum(block, spectrum);

                    // 3) apply filterbank...

                    FilterBanks.Apply(_filterbank, spectrum, filteredSpectrum);

                    // ...and save results for future calculations

                    for (var n = 0; n < _envelopes.Length; n++)
                    {
                        _envelopes[n][en] = filteredSpectrum[n];
                    }
                    en++;

                    i += hopSize;
                }
            }
            else
            {
                en         = _featuregram.Length;
                _envelopes = new float[_featuregram[0].Length][];

                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[en];
                    for (i = 0; i < en; i++)
                    {
                        _envelopes[n][i] = _featuregram[i][n];
                    }
                }
            }

            // =========================== modulation analysis =======================

            var envelopeLength = en;

            // long-term AVG-normalization

            foreach (var envelope in _envelopes)
            {
                var avg = 0.0f;
                for (var k = 0; k < envelopeLength; k++)
                {
                    avg += (k >= 0) ? envelope[k] : -envelope[k];
                }
                avg /= envelopeLength;

                if (avg >= 1e-10)   // this happens more frequently
                {
                    for (var k = 0; k < envelopeLength; k++)
                    {
                        envelope[k] /= avg;
                    }
                }
            }

            var modBlock     = new float[_modulationFftSize];
            var zeroModblock = new float[_modulationFftSize];
            var modSpectrum  = new float[_modulationFftSize / 2 + 1];

            i = 0;
            while (i < envelopeLength)
            {
                var vector = new float[_envelopes.Length * (_modulationFftSize / 2 + 1)];
                var offset = 0;

                foreach (var envelope in _envelopes)
                {
                    zeroModblock.FastCopyTo(modBlock, _modulationFftSize);
                    envelope.FastCopyTo(modBlock, Math.Min(_modulationFftSize, envelopeLength - i), i);

                    modulationFft.PowerSpectrum(modBlock, modSpectrum);
                    modSpectrum.FastCopyTo(vector, modSpectrum.Length, 0, offset);

                    offset += modSpectrum.Length;
                }

                featureVectors.Add(new FeatureVector
                {
                    Features     = vector,
                    TimePosition = (double)i * hopSize / signal.SamplingRate
                });

                i += _modulationHopSize;
            }

            return(featureVectors);
        }
Beispiel #21
0
        /// <summary>
        /// Phase Vocoder algorithm
        /// </summary>
        /// <param name="signal"></param>
        /// <param name="filteringOptions"></param>
        /// <returns></returns>
        public DiscreteSignal ApplyTo(DiscreteSignal signal,
                                      FilteringOptions filteringOptions = FilteringOptions.Auto)
        {
            if (_phaseLocking)
            {
                return(PhaseLocking(signal));
            }

            var input  = signal.Samples;
            var output = new float[(int)(input.Length * _stretch) + _fftSize];

            var windowSum = new float[output.Length];

            var re        = new float[_fftSize];
            var im        = new float[_fftSize];
            var zeroblock = new float[_fftSize];

            var prevPhase  = new double[_fftSize / 2 + 1];
            var phaseTotal = new double[_fftSize / 2 + 1];

            var posSynthesis = 0;

            for (var posAnalysis = 0; posAnalysis + _fftSize < input.Length; posAnalysis += _hopAnalysis)
            {
                input.FastCopyTo(re, _fftSize, posAnalysis);
                zeroblock.FastCopyTo(im, _fftSize);

                re.ApplyWindow(_window);

                _fft.Direct(re, im);

                for (var j = 0; j < _fftSize / 2 + 1; j++)
                {
                    var mag   = Math.Sqrt(re[j] * re[j] + im[j] * im[j]);
                    var phase = Math.Atan2(im[j], re[j]);

                    var delta = phase - prevPhase[j];

                    var deltaUnwrapped = delta - _hopAnalysis * _omega[j];
                    var deltaWrapped   = MathUtils.Mod(deltaUnwrapped + Math.PI, 2 * Math.PI) - Math.PI;

                    var freq = _omega[j] + deltaWrapped / _hopAnalysis;

                    phaseTotal[j] += _hopSynthesis * freq;
                    prevPhase[j]   = phase;

                    re[j] = (float)(mag * Math.Cos(phaseTotal[j]));
                    im[j] = (float)(mag * Math.Sin(phaseTotal[j]));
                }

                for (var j = _fftSize / 2 + 1; j < _fftSize; j++)
                {
                    re[j] = im[j] = 0.0f;
                }

                _fft.Inverse(re, im);

                for (var j = 0; j < re.Length; j++)
                {
                    output[posSynthesis + j]    += re[j] * _window[j];
                    windowSum[posSynthesis + j] += _windowSquared[j];
                }

                posSynthesis += _hopSynthesis;
            }

            for (var j = 0; j < output.Length; j++)
            {
                if (windowSum[j] < 1e-3)
                {
                    continue;
                }
                output[j] /= (windowSum[j] * _fftSize / 2);
            }

            return(new DiscreteSignal(signal.SamplingRate, output));
        }
        /// <summary>
        /// PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Medium-time processing (asymmetric noise suppression, temporal masking, spectral smoothing)
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq);

            // use power spectrum:

            foreach (var filter in _gammatoneFilterBank)
            {
                for (var j = 0; j < filter.Length; j++)
                {
                    var ps = filter[j] * filter[j];
                    filter[j] = ps;
                }
            }


            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            var gammatoneSpectrum = new float[_filterbankSize];

            var spectrumQOut      = new float[_filterbankSize];
            var filteredSpectrumQ = new float[_filterbankSize];
            var spectrumS         = new float[_filterbankSize];
            var smoothedSpectrumS = new float[_filterbankSize];
            var avgSpectrumQ1     = new float[_filterbankSize];
            var avgSpectrumQ2     = new float[_filterbankSize];
            var smoothedSpectrum  = new float[_filterbankSize];

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var block     = new float[fftSize];       // buffer for currently processed signal block at each step
            var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

            _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize);

            var spectrum = new float[fftSize / 2 + 1];


            // 0) pre-emphasis (if needed)

            if (_preEmphasis > 0.0)
            {
                var preemphasisFilter = new PreEmphasisFilter(_preEmphasis);
                signal = preemphasisFilter.ApplyTo(signal);
            }


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var i       = 0;
            var timePos = startSample;

            while (timePos + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, frameSize, timePos);


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum);



                // =============================================================
                // 4) medium-time processing blocks:

                // 4.1) temporal integration (zero-phase moving average filter)

                _ringBuffer.Add(gammatoneSpectrum);
                var spectrumQ = _ringBuffer.AverageSpectrum;

                // 4.2) asymmetric noise suppression

                if (i == 2 * M)
                {
                    for (var j = 0; j < spectrumQOut.Length; j++)
                    {
                        spectrumQOut[j] = spectrumQ[j] * 0.9f;
                    }
                }

                if (i >= 2 * M)
                {
                    for (var j = 0; j < spectrumQOut.Length; j++)
                    {
                        if (spectrumQ[j] > spectrumQOut[j])
                        {
                            spectrumQOut[j] = LambdaA * spectrumQOut[j] + (1 - LambdaA) * spectrumQ[j];
                        }
                        else
                        {
                            spectrumQOut[j] = LambdaB * spectrumQOut[j] + (1 - LambdaB) * spectrumQ[j];
                        }
                    }

                    for (var j = 0; j < filteredSpectrumQ.Length; j++)
                    {
                        filteredSpectrumQ[j] = Math.Max(spectrumQ[j] - spectrumQOut[j], 0.0f);

                        if (i == 2 * M)
                        {
                            avgSpectrumQ1[j] = 0.9f * filteredSpectrumQ[j];
                            avgSpectrumQ2[j] = filteredSpectrumQ[j];
                        }

                        if (filteredSpectrumQ[j] > avgSpectrumQ1[j])
                        {
                            avgSpectrumQ1[j] = LambdaA * avgSpectrumQ1[j] + (1 - LambdaA) * filteredSpectrumQ[j];
                        }
                        else
                        {
                            avgSpectrumQ1[j] = LambdaB * avgSpectrumQ1[j] + (1 - LambdaB) * filteredSpectrumQ[j];
                        }

                        // 4.3) temporal masking

                        var threshold = filteredSpectrumQ[j];

                        avgSpectrumQ2[j] *= LambdaT;
                        if (spectrumQ[j] < C * spectrumQOut[j])
                        {
                            filteredSpectrumQ[j] = avgSpectrumQ1[j];
                        }
                        else
                        {
                            if (filteredSpectrumQ[j] <= avgSpectrumQ2[j])
                            {
                                filteredSpectrumQ[j] = MuT * avgSpectrumQ2[j];
                            }
                        }
                        avgSpectrumQ2[j] = Math.Max(avgSpectrumQ2[j], threshold);

                        filteredSpectrumQ[j] = Math.Max(filteredSpectrumQ[j], avgSpectrumQ1[j]);
                    }


                    // 4.4) spectral smoothing

                    for (var j = 0; j < spectrumS.Length; j++)
                    {
                        spectrumS[j] = filteredSpectrumQ[j] / Math.Max(spectrumQ[j], float.Epsilon);
                    }

                    for (var j = 0; j < smoothedSpectrumS.Length; j++)
                    {
                        smoothedSpectrumS[j] = 0.0f;

                        var total = 0;
                        for (var k = Math.Max(j - N, 0);
                             k < Math.Min(j + N + 1, _filterbankSize);
                             k++, total++)
                        {
                            smoothedSpectrumS[j] += spectrumS[k];
                        }
                        smoothedSpectrumS[j] /= total;
                    }

                    // 4.5) mean power normalization

                    var centralSpectrum = _ringBuffer.CentralSpectrum;

                    var sumPower = 0.0f;
                    for (var j = 0; j < smoothedSpectrum.Length; j++)
                    {
                        smoothedSpectrum[j] = smoothedSpectrumS[j] * centralSpectrum[j];
                        sumPower           += smoothedSpectrum[j];
                    }

                    mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                    for (var j = 0; j < smoothedSpectrum.Length; j++)
                    {
                        smoothedSpectrum[j] *= meanPower / mean;
                    }

                    // =============================================================


                    // 5) nonlinearity (power ^ d    or    Log10)

                    if (_power != 0)
                    {
                        for (var j = 0; j < smoothedSpectrum.Length; j++)
                        {
                            smoothedSpectrum[j] = (float)Math.Pow(smoothedSpectrum[j], d);
                        }
                    }
                    else
                    {
                        for (var j = 0; j < smoothedSpectrum.Length; j++)
                        {
                            smoothedSpectrum[j] = (float)Math.Log10(smoothedSpectrum[j] + float.Epsilon);
                        }
                    }

                    // 6) dct-II (normalized)

                    var pnccs = new float[FeatureCount];
                    dct.DirectN(smoothedSpectrum, pnccs);


                    // add pncc vector to output sequence

                    featureVectors.Add(new FeatureVector
                    {
                        Features     = pnccs,
                        TimePosition = (double)timePos / signal.SamplingRate
                    });
                }

                i++;

                timePos += hopSize;
            }

            return(featureVectors);
        }
Beispiel #23
0
        /// <summary>
        /// Standard method for computing PLP features.
        /// In each frame do:
        ///
        ///     1) Apply window
        ///     2) Obtain power spectrum
        ///     3) Apply filterbank of bark bands (or mel bands)
        ///     4) [Optional] filter each component of the processed spectrum with a RASTA filter
        ///     5) Apply equal loudness curve
        ///     6) Take cubic root
        ///     7) Do LPC
        ///     8) Convert LPC to cepstrum
        ///     9) [Optional] lifter cepstrum
        ///
        /// </summary>
        /// <param name="block">Samples for analysis</param>
        /// <returns>PLP vector</returns>
        public override float[] ProcessFrame(float[] block)
        {
            // fill zeros to fftSize if frameSize < fftSize (blockSize)

            for (var k = FrameSize; k < block.Length; block[k++] = 0)
            {
                ;
            }

            // 1) apply window

            block.ApplyWindow(_windowSamples);

            // 2) calculate power spectrum (without normalization)

            _fft.PowerSpectrum(block, _spectrum, false);

            // 3) apply filterbank on the result (bark frequencies by default)

            FilterBanks.Apply(FilterBank, _spectrum, _bandSpectrum);

            // 4) RASTA filtering in log-domain [optional]

            if (_rasta > 0)
            {
                for (var k = 0; k < _bandSpectrum.Length; k++)
                {
                    var log = (float)Math.Log(_bandSpectrum[k] + float.Epsilon);

                    log = _rastaFilters[k].Process(log);

                    _bandSpectrum[k] = (float)Math.Exp(log);
                }
            }

            // 5) and 6) apply equal loudness curve and take cubic root

            for (var k = 0; k < _bandSpectrum.Length; k++)
            {
                _bandSpectrum[k] = (float)Math.Pow(Math.Max(_bandSpectrum[k], 1.0) * _equalLoudnessCurve[k], 0.33);
            }

            // 7) LPC from power spectrum:

            var n = _idftTable[0].Length;

            // get autocorrelation samples from post-processed power spectrum (via IDFT):

            for (var k = 0; k < _idftTable.Length; k++)
            {
                var acc = _idftTable[k][0] * _bandSpectrum[0] +
                          _idftTable[k][n - 1] * _bandSpectrum[n - 3];  // add values at two duplicated edges right away

                for (var j = 1; j < n - 1; j++)
                {
                    acc += _idftTable[k][j] * _bandSpectrum[j - 1];
                }

                _cc[k] = acc / (2 * (n - 1));
            }

            // LPC:

            for (var k = 0; k < _lpc.Length; _lpc[k] = 0, k++)
            {
                ;
            }

            var err = Lpc.LevinsonDurbin(_cc, _lpc, _lpcOrder);

            // 8) compute LPCC coefficients from LPC

            var lpcc = new float[FeatureCount];

            Lpc.ToCepstrum(_lpc, err, lpcc);


            // 9) (optional) liftering

            if (_lifterCoeffs != null)
            {
                lpcc.ApplyWindow(_lifterCoeffs);
            }

            return(lpcc);
        }