Beispiel #1
0
        /// <summary>
        /// Pitch estimation: from spectral peaks
        /// </summary>
        /// <param name="signal"></param>
        /// <param name="startPos"></param>
        /// <param name="endPos"></param>
        /// <returns></returns>
        public static float FromSpectralPeaks(DiscreteSignal signal,
                                              int startPos = 0,
                                              int endPos   = -1,
                                              float low    = 80,
                                              float high   = 400,
                                              int fftSize  = 0)
        {
            if (endPos == -1)
            {
                endPos = signal.Length;
            }

            if (startPos != 0 || endPos != signal.Length)
            {
                signal = signal[startPos, endPos];
            }

            signal.ApplyWindow(WindowTypes.Hann);

            var size = fftSize > 0 ? fftSize : MathUtils.NextPowerOfTwo(signal.Length);
            var fft  = new Fft(size);

            var spectrum = fft.PowerSpectrum(signal, false).Samples;

            return(FromSpectralPeaks(spectrum, signal.SamplingRate, low, high));
        }
Beispiel #2
0
        float[] ComputeSpectrum(int idx)
        {
            var pos = (int)(_signal.SamplingRate * HopSize * idx);

            return(_fft.PowerSpectrum(_signal[pos, pos + 512], normalize: false)
                   .Samples);
        }
Beispiel #3
0
        private void UpdateSpectrumAndCepstrum()
        {
            var fftSize      = int.Parse(fftSizeTextBox.Text);
            var cepstrumSize = int.Parse(cepstrumSizeTextBox.Text);

            _hopSize = int.Parse(hopSizeTextBox.Text);

            if (fftSize != _fftSize)
            {
                _fftSize           = fftSize;
                _fft               = new Fft(fftSize);
                _cepstralTransform = new CepstralTransform(cepstrumSize, _fftSize);
            }

            if (cepstrumSize != _cepstrumSize)
            {
                _cepstrumSize      = cepstrumSize;
                _cepstralTransform = new CepstralTransform(_cepstrumSize, _fftSize);
            }

            var pos   = _hopSize * _specNo;
            var block = _signal[pos, pos + _fftSize];

            //block.ApplyWindow(WindowTypes.Hamming);

            var cepstrum = _cepstralTransform.Direct(block);

            var pitch = Pitch.FromCepstrum(block);

            // ************************************************************************
            //      just visualize spectrum estimated from cepstral coefficients:
            // ************************************************************************

            var real = new float[_fftSize];
            var imag = new float[_fftSize];

            for (var i = 0; i < 32; i++)
            {
                real[i] = cepstrum[i];
            }

            _fft.Direct(real, imag);

            var spectrum = _fft.PowerSpectrum(block, normalize: false).Samples;
            var avg      = spectrum.Average(s => LevelScale.ToDecibel(s));

            var spectrumEstimate = real.Take(_fftSize / 2 + 1)
                                   .Select(s => (float)LevelScale.FromDecibel(s * 40 / _fftSize - avg))
                                   .ToArray();

            spectrumPanel.Line     = spectrum;
            spectrumPanel.Markline = spectrumEstimate;
            spectrumPanel.ToDecibel();

            cepstrumPanel.Line = cepstrum.Samples;
            cepstrumPanel.Mark = (int)(_signal.SamplingRate / pitch);
        }
        private void generateSignalButton_Click(object sender, EventArgs e)
        {
            var sampleCount  = int.Parse(durationTextBox.Text);
            var samplingRate = _signal1?.SamplingRate ?? 16000;

            SignalBuilder signalBuilder;

            switch (builderComboBox.Text)
            {
            case "Sinusoid":
                signalBuilder = new SineBuilder();
                _signal2      = signalBuilder
                                .SetParameter("low", -0.4f)
                                .SetParameter("high", 0.4f)
                                .SetParameter("freq", 233 /*Hz*/)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "Sawtooth":
                signalBuilder = new SawtoothBuilder();
                _signal2      = signalBuilder
                                .SetParameter("low", -0.3f)
                                .SetParameter("high", 0.3f)
                                .SetParameter("freq", 233 /*Hz*/)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "Triangle Wave":
                signalBuilder = new TriangleWaveBuilder();
                _signal2      = signalBuilder
                                .SetParameter("low", -0.3f)
                                .SetParameter("high", 0.3f)
                                .SetParameter("freq", 233 /*Hz*/)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "Square Wave":
                signalBuilder = new SquareWaveBuilder();
                _signal2      = signalBuilder
                                .SetParameter("low", -0.25f)
                                .SetParameter("high", 0.25f)
                                .SetParameter("freq", 233 /*Hz*/)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "Pulse Wave":
                signalBuilder = new PulseWaveBuilder();
                _signal2      = signalBuilder
                                .SetParameter("min", 0)
                                .SetParameter("max", 0.5f)
                                .SetParameter("pulse", 0.007f /*sec*/)
                                .SetParameter("period", 0.020f /*sec*/)
                                .OfLength(sampleCount)
                                .DelayedBy(50)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "Chirp":
                signalBuilder = new ChirpBuilder();
                _signal2      = signalBuilder
                                .SetParameter("min", -0.3f)
                                .SetParameter("max", 0.3f)
                                .OfLength(sampleCount)
                                .RepeatedTimes(3)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "Sinc":
                signalBuilder = new SincBuilder();
                _signal2      = signalBuilder
                                .SetParameter("min", 0)
                                .SetParameter("max", 0.5f)
                                .SetParameter("freq", 700 /*Hz*/)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "Ramp":
                signalBuilder = new RampBuilder();
                _signal2      = signalBuilder
                                .SetParameter("slope", 0.0007f)
                                .SetParameter("intercept", -0.5f)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "AWGN":
                signalBuilder = new AwgnBuilder();
                _signal2      = signalBuilder
                                .SetParameter("sigma", 0.25f)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "Pink Noise":
                signalBuilder = new PinkNoiseBuilder();
                _signal2      = signalBuilder
                                .SetParameter("min", -0.5f)
                                .SetParameter("max", 0.5f)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "Red Noise":
                signalBuilder = new RedNoiseBuilder();
                _signal2      = signalBuilder
                                .SetParameter("min", -0.5f)
                                .SetParameter("max", 0.5f)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            case "Perlin Noise":
                signalBuilder = new PerlinNoiseBuilder();
                _signal2      = signalBuilder
                                .SetParameter("min", -0.3f)
                                .SetParameter("max", 0.7f)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;

            default:
                signalBuilder = new WhiteNoiseBuilder();
                _signal2      = signalBuilder
                                .SetParameter("min", -0.5f)
                                .SetParameter("max", 0.5f)
                                .OfLength(sampleCount)
                                .SampledAt(samplingRate)
                                .Build();
                break;
            }

            builderParametersListBox.Items.Clear();
            builderParametersListBox.Items.AddRange(signalBuilder.GetParametersInfo());
            builderParametersListBox.Items.Add("");
            builderParametersListBox.Items.Add($"min: {_signal2.Samples.Min():F2}");
            builderParametersListBox.Items.Add($"max: {_signal2.Samples.Max():F2}");
            builderParametersListBox.Items.Add($"avg: {_signal2.Samples.Average():F4}");

            if (_signal1 != null)
            {
                //_signal3 = _signal1 + _signal2;
                var positions = Enumerable.Range(0, 3).Select(pos => pos * (_signal2.Length + 2000)).ToArray();
                _signal3 = _signal1.SuperimposeMany(_signal2, positions);
                superimposedSignalPanel.Signal = _signal3;
            }

            generatedSignalPanel.Stride = 1;
            generatedSignalPanel.Signal = _signal2;

            var spectrum = _fft.PowerSpectrum(_signal2.First(512));

            spectrumPanel.Line = spectrum.Samples;
            spectrumPanel.ToDecibel();
        }
        /// <summary>
        /// PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Medium-time processing (asymmetric noise suppression, temporal masking, spectral smoothing)
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq);

            // use power spectrum:

            foreach (var filter in _gammatoneFilterBank)
            {
                for (var j = 0; j < filter.Length; j++)
                {
                    var ps = filter[j] * filter[j];
                    filter[j] = ps;
                }
            }


            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            var gammatoneSpectrum = new float[_filterbankSize];

            var spectrumQOut      = new float[_filterbankSize];
            var filteredSpectrumQ = new float[_filterbankSize];
            var spectrumS         = new float[_filterbankSize];
            var smoothedSpectrumS = new float[_filterbankSize];
            var avgSpectrumQ1     = new float[_filterbankSize];
            var avgSpectrumQ2     = new float[_filterbankSize];
            var smoothedSpectrum  = new float[_filterbankSize];

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var block     = new float[fftSize];       // buffer for currently processed signal block at each step
            var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

            _ringBuffer = new SpectraRingBuffer(2 * M + 1, _filterbankSize);

            var spectrum = new float[fftSize / 2 + 1];


            // 0) pre-emphasis (if needed)

            if (_preEmphasis > 0.0)
            {
                var preemphasisFilter = new PreEmphasisFilter(_preEmphasis);
                signal = preemphasisFilter.ApplyTo(signal);
            }


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var i       = 0;
            var timePos = startSample;

            while (timePos + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, frameSize, timePos);


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum);



                // =============================================================
                // 4) medium-time processing blocks:

                // 4.1) temporal integration (zero-phase moving average filter)

                _ringBuffer.Add(gammatoneSpectrum);
                var spectrumQ = _ringBuffer.AverageSpectrum;

                // 4.2) asymmetric noise suppression

                if (i == 2 * M)
                {
                    for (var j = 0; j < spectrumQOut.Length; j++)
                    {
                        spectrumQOut[j] = spectrumQ[j] * 0.9f;
                    }
                }

                if (i >= 2 * M)
                {
                    for (var j = 0; j < spectrumQOut.Length; j++)
                    {
                        if (spectrumQ[j] > spectrumQOut[j])
                        {
                            spectrumQOut[j] = LambdaA * spectrumQOut[j] + (1 - LambdaA) * spectrumQ[j];
                        }
                        else
                        {
                            spectrumQOut[j] = LambdaB * spectrumQOut[j] + (1 - LambdaB) * spectrumQ[j];
                        }
                    }

                    for (var j = 0; j < filteredSpectrumQ.Length; j++)
                    {
                        filteredSpectrumQ[j] = Math.Max(spectrumQ[j] - spectrumQOut[j], 0.0f);

                        if (i == 2 * M)
                        {
                            avgSpectrumQ1[j] = 0.9f * filteredSpectrumQ[j];
                            avgSpectrumQ2[j] = filteredSpectrumQ[j];
                        }

                        if (filteredSpectrumQ[j] > avgSpectrumQ1[j])
                        {
                            avgSpectrumQ1[j] = LambdaA * avgSpectrumQ1[j] + (1 - LambdaA) * filteredSpectrumQ[j];
                        }
                        else
                        {
                            avgSpectrumQ1[j] = LambdaB * avgSpectrumQ1[j] + (1 - LambdaB) * filteredSpectrumQ[j];
                        }

                        // 4.3) temporal masking

                        var threshold = filteredSpectrumQ[j];

                        avgSpectrumQ2[j] *= LambdaT;
                        if (spectrumQ[j] < C * spectrumQOut[j])
                        {
                            filteredSpectrumQ[j] = avgSpectrumQ1[j];
                        }
                        else
                        {
                            if (filteredSpectrumQ[j] <= avgSpectrumQ2[j])
                            {
                                filteredSpectrumQ[j] = MuT * avgSpectrumQ2[j];
                            }
                        }
                        avgSpectrumQ2[j] = Math.Max(avgSpectrumQ2[j], threshold);

                        filteredSpectrumQ[j] = Math.Max(filteredSpectrumQ[j], avgSpectrumQ1[j]);
                    }


                    // 4.4) spectral smoothing

                    for (var j = 0; j < spectrumS.Length; j++)
                    {
                        spectrumS[j] = filteredSpectrumQ[j] / Math.Max(spectrumQ[j], float.Epsilon);
                    }

                    for (var j = 0; j < smoothedSpectrumS.Length; j++)
                    {
                        smoothedSpectrumS[j] = 0.0f;

                        var total = 0;
                        for (var k = Math.Max(j - N, 0);
                             k < Math.Min(j + N + 1, _filterbankSize);
                             k++, total++)
                        {
                            smoothedSpectrumS[j] += spectrumS[k];
                        }
                        smoothedSpectrumS[j] /= total;
                    }

                    // 4.5) mean power normalization

                    var centralSpectrum = _ringBuffer.CentralSpectrum;

                    var sumPower = 0.0f;
                    for (var j = 0; j < smoothedSpectrum.Length; j++)
                    {
                        smoothedSpectrum[j] = smoothedSpectrumS[j] * centralSpectrum[j];
                        sumPower           += smoothedSpectrum[j];
                    }

                    mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                    for (var j = 0; j < smoothedSpectrum.Length; j++)
                    {
                        smoothedSpectrum[j] *= meanPower / mean;
                    }

                    // =============================================================


                    // 5) nonlinearity (power ^ d    or    Log10)

                    if (_power != 0)
                    {
                        for (var j = 0; j < smoothedSpectrum.Length; j++)
                        {
                            smoothedSpectrum[j] = (float)Math.Pow(smoothedSpectrum[j], d);
                        }
                    }
                    else
                    {
                        for (var j = 0; j < smoothedSpectrum.Length; j++)
                        {
                            smoothedSpectrum[j] = (float)Math.Log10(smoothedSpectrum[j] + float.Epsilon);
                        }
                    }

                    // 6) dct-II (normalized)

                    var pnccs = new float[FeatureCount];
                    dct.DirectN(smoothedSpectrum, pnccs);


                    // add pncc vector to output sequence

                    featureVectors.Add(new FeatureVector
                    {
                        Features     = pnccs,
                        TimePosition = (double)timePos / signal.SamplingRate
                    });
                }

                i++;

                timePos += hopSize;
            }

            return(featureVectors);
        }
Beispiel #6
0
        /// <summary>
        /// Standard method for computing mfcc features:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum X
        ///     3) Apply mel filters and log() the result: Y = Log10(X * H)
        ///     4) Do dct-II: mfcc = Dct(Y)
        ///     5) [Optional] liftering of mfcc
        ///
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of mfcc vectors</returns>
        public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var hopSize   = HopSize;
            var frameSize = FrameSize;

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                _zeroblock.FastCopyTo(_block, _fftSize);
                samples.FastCopyTo(_block, _windowSamples.Length, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = _block[k] - prevSample * _preEmphasis;
                        prevSample = _block[k];
                        _block[k]  = y;
                    }
                    prevSample = samples[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    _block.ApplyWindow(_windowSamples);
                }


                // 2) calculate power spectrum

                _fft.PowerSpectrum(_block, _spectrum);


                // 3) apply mel filterbank and take log() of the result

                FilterBanks.ApplyAndLog(FilterBank, _spectrum, _logMelSpectrum);


                // 4) dct-II

                var mfccs = new float[FeatureCount];
                _dct.Direct(_logMelSpectrum, mfccs);


                // 5) (optional) liftering

                if (_lifterCoeffs != null)
                {
                    mfccs.ApplyWindow(_lifterCoeffs);
                }


                // add mfcc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = mfccs,
                    TimePosition = (double)i / SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
Beispiel #7
0
        // TODO: remove this )))

        private void featuresListView_SelectedIndexChanged(object sender, EventArgs e)
        {
            if (featuresListView.SelectedItems.Count == 0)
            {
                return;
            }

            var pos = featuresListView.SelectedIndices[0];

            var fft = new Fft(512);

            var spectrum = fft.PowerSpectrum(_signal[pos * _hopSize, pos * _hopSize + _frameSize]).Samples;

            var peaks = new int[10];
            var freqs = new float[10];


            Harmonic.Peaks(spectrum, peaks, freqs, _signal.SamplingRate);


            peaksListBox.Items.Clear();
            for (var p = 0; p < peaks.Length; p++)
            {
                peaksListBox.Items.Add($"peak #{p+1,-2} : {freqs[p],-7} Hz");
            }


            _spectrumImage = new Bitmap(512, spectrumPictureBox.Height);

            var g = Graphics.FromImage(_spectrumImage);

            g.Clear(Color.White);

            var pen    = new Pen(ForeColor);
            var redpen = new Pen(Color.Red, 2);

            var i        = 1;
            var Stride   = 4;
            var PaddingX = 5;
            var PaddingY = 5;

            var x = PaddingX + Stride;

            var min = spectrum.Min();
            var max = spectrum.Max();

            var height = _spectrumImage.Height;
            var gain   = max - min < 1e-6 ? 1 : (height - 2 * PaddingY) / (max - min);

            var offset = (int)(height - PaddingY + min * gain);

            for (; i < spectrum.Length; i++)
            {
                g.DrawLine(pen, x - Stride, -spectrum[i - 1] * gain + offset,
                           x, -spectrum[i] * gain + offset);
                x += Stride;
            }

            for (i = 0; i < peaks.Length; i++)
            {
                g.DrawLine(redpen, PaddingX + peaks[i] * Stride, PaddingY + offset,
                           PaddingX + peaks[i] * Stride, -PaddingY - spectrum[peaks[i]] * gain + offset);
            }

            pen.Dispose();
            redpen.Dispose();
            g.Dispose();

            spectrumPictureBox.Image = _spectrumImage;
        }
Beispiel #8
0
        /// <summary>
        /// Method for computing modulation spectra.
        /// Each vector representing one modulation spectrum is a flattened version of 2D spectrum.
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of flattened modulation spectra</returns>
        public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var frameSize = FrameSize;
            var hopSize   = HopSize;

            var featureVectors = new List <FeatureVector>();

            var en = 0;
            var i  = startSample;

            if (_featuregram == null)
            {
                _envelopes = new float[_filterbank.Length][];
                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[samples.Length / hopSize];
                }

                var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f;

                var lastSample = endSample - Math.Max(frameSize, hopSize);

                // ===================== compute local FFTs (do STFT) =======================

                for (i = startSample; i < lastSample; i += hopSize)
                {
                    _zeroblock.FastCopyTo(_block, _zeroblock.Length);
                    samples.FastCopyTo(_block, frameSize, i);

                    // 0) pre-emphasis (if needed)

                    if (_preEmphasis > 1e-10)
                    {
                        for (var k = 0; k < frameSize; k++)
                        {
                            var y = _block[k] - prevSample * _preEmphasis;
                            prevSample = _block[k];
                            _block[k]  = y;
                        }
                        prevSample = samples[i + hopSize - 1];
                    }

                    // 1) apply window

                    if (_window != WindowTypes.Rectangular)
                    {
                        _block.ApplyWindow(_windowSamples);
                    }

                    // 2) calculate power spectrum

                    _fft.PowerSpectrum(_block, _spectrum);

                    // 3) apply filterbank...

                    FilterBanks.Apply(_filterbank, _spectrum, _filteredSpectrum);

                    // ...and save results for future calculations

                    for (var n = 0; n < _envelopes.Length; n++)
                    {
                        _envelopes[n][en] = _filteredSpectrum[n];
                    }
                    en++;
                }
            }
            else
            {
                en         = _featuregram.Length;
                _envelopes = new float[_featuregram[0].Length][];

                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[en];
                    for (i = 0; i < en; i++)
                    {
                        _envelopes[n][i] = _featuregram[i][n];
                    }
                }
            }

            // =========================== modulation analysis =======================

            var envelopeLength = en;

            // long-term AVG-normalization

            foreach (var envelope in _envelopes)
            {
                var avg = 0.0f;
                for (var k = 0; k < envelopeLength; k++)
                {
                    avg += (k >= 0) ? envelope[k] : -envelope[k];
                }
                avg /= envelopeLength;

                if (avg >= 1e-10)   // this happens more frequently
                {
                    for (var k = 0; k < envelopeLength; k++)
                    {
                        envelope[k] /= avg;
                    }
                }
            }

            i = 0;
            while (i < envelopeLength)
            {
                var vector = new float[_envelopes.Length * (_modulationFftSize / 2 + 1)];
                var offset = 0;

                foreach (var envelope in _envelopes)
                {
                    _zeroModblock.FastCopyTo(_modBlock, _modulationFftSize);
                    envelope.FastCopyTo(_modBlock, Math.Min(_modulationFftSize, envelopeLength - i), i);

                    _modulationFft.PowerSpectrum(_modBlock, _modSpectrum);
                    _modSpectrum.FastCopyTo(vector, _modSpectrum.Length, 0, offset);

                    offset += _modSpectrum.Length;
                }

                featureVectors.Add(new FeatureVector
                {
                    Features     = vector,
                    TimePosition = (double)i * hopSize / SamplingRate
                });

                i += _modulationHopSize;
            }

            return(featureVectors);
        }
Beispiel #9
0
        /// <summary>
        /// Method creates overlapping ERB filters (ported from Malcolm Slaney's MATLAB code).
        /// </summary>
        /// <param name="erbFilterCount">Number of ERB filters</param>
        /// <param name="fftSize">Assumed size of FFT</param>
        /// <param name="samplingRate">Assumed sampling rate</param>
        /// <param name="lowFreq">Lower bound of the frequency range</param>
        /// <param name="highFreq">Upper bound of the frequency range</param>
        /// <param name="normalizeGain">True if gain should be normalized; false if all filters should have same height 1.0</param>
        /// <returns>Array of ERB filters</returns>
        public static float[][] Erb(
            int erbFilterCount, int fftSize, int samplingRate, double lowFreq = 0, double highFreq = 0, bool normalizeGain = true)
        {
            if (lowFreq < 0)
            {
                lowFreq = 0;
            }
            if (highFreq <= lowFreq)
            {
                highFreq = samplingRate / 2.0;
            }

            const double earQ  = 9.26449;
            const double minBw = 24.7;
            const double bw    = earQ * minBw;
            const int    order = 1;

            var t = 1.0 / samplingRate;

            var frequencies = new double[erbFilterCount];

            for (var i = 1; i <= erbFilterCount; i++)
            {
                frequencies[erbFilterCount - i] =
                    -bw + Math.Exp(i * (-Math.Log(highFreq + bw) + Math.Log(lowFreq + bw)) / erbFilterCount) * (highFreq + bw);
            }

            var ucirc = new Complex[fftSize / 2 + 1];

            for (var i = 0; i < ucirc.Length; i++)
            {
                ucirc[i] = Complex.Exp((2 * Complex.ImaginaryOne * i * Math.PI) / fftSize);
            }

            var rootPos = Math.Sqrt(3 + Math.Pow(2, 1.5));
            var rootNeg = Math.Sqrt(3 - Math.Pow(2, 1.5));

            var fft = new Fft(fftSize);

            var erbFilterBank = new float[erbFilterCount][];

            for (var i = 0; i < erbFilterCount; i++)
            {
                var cf  = frequencies[i];
                var erb = Math.Pow(Math.Pow(cf / earQ, order) + Math.Pow(minBw, order), 1.0 / order);
                var b   = 1.019 * 2 * Math.PI * erb;

                var theta  = 2 * cf * Math.PI * t;
                var itheta = Complex.Exp(2 * Complex.ImaginaryOne * theta);

                var a0 = t;
                var a2 = 0.0;
                var b0 = 1.0;
                var b1 = -2 * Math.Cos(theta) / Math.Exp(b * t);
                var b2 = Math.Exp(-2 * b * t);

                var common = -t *Math.Exp(-b *t);

                var k1 = Math.Cos(theta) + rootPos * Math.Sin(theta);
                var k2 = Math.Cos(theta) - rootPos * Math.Sin(theta);
                var k3 = Math.Cos(theta) + rootNeg * Math.Sin(theta);
                var k4 = Math.Cos(theta) - rootNeg * Math.Sin(theta);

                var a11 = common * k1;
                var a12 = common * k2;
                var a13 = common * k3;
                var a14 = common * k4;

                var gainArg = Complex.Exp(Complex.ImaginaryOne * theta - b * t);

                var gain = Complex.Abs(
                    (itheta - gainArg * k1) *
                    (itheta - gainArg * k2) *
                    (itheta - gainArg * k3) *
                    (itheta - gainArg * k4) *
                    Complex.Pow(t * Math.Exp(b * t) / (-1.0 / Math.Exp(b * t) + 1 + itheta * (1 - Math.Exp(b * t))), 4.0));

                var filter1 = new IirFilter(new[] { a0, a11, a2 }, new[] { b0, b1, b2 });
                var filter2 = new IirFilter(new[] { a0, a12, a2 }, new[] { b0, b1, b2 });
                var filter3 = new IirFilter(new[] { a0, a13, a2 }, new[] { b0, b1, b2 });
                var filter4 = new IirFilter(new[] { a0, a14, a2 }, new[] { b0, b1, b2 });

                var ir = new double[fftSize];
                ir[0] = 1.0;

                // for doubles the following code will work ok
                // (however there's a crucial lost of precision in case of floats):

                //var filter = filter1 * filter2 * filter3 * filter4;
                //ir = filter.ApplyTo(ir);

                // this code is ok both for floats and for doubles:

                ir = filter1.ApplyTo(ir);
                ir = filter2.ApplyTo(ir);
                ir = filter3.ApplyTo(ir);
                ir = filter4.ApplyTo(ir);

                var kernel = new DiscreteSignal(1, ir.Select(s => (float)(s / gain)));

                erbFilterBank[i] = fft.PowerSpectrum(kernel, false).Samples;
            }

            // normalize gain (by default)

            if (!normalizeGain)
            {
                return(erbFilterBank);
            }

            foreach (var filter in erbFilterBank)
            {
                var sum = 0.0;
                for (var j = 0; j < filter.Length; j++)
                {
                    sum += Math.Abs(filter[j] * filter[j]);
                }

                var weight = Math.Sqrt(sum * samplingRate / fftSize);

                for (var j = 0; j < filter.Length; j++)
                {
                    filter[j] = (float)(filter[j] / weight);
                }
            }

            return(erbFilterBank);
        }
Beispiel #10
0
        /// <summary>
        /// Method for computing modulation spectra.
        /// Each vector representing one modulation spectrum is a flattened version of 2D spectrum.
        /// </summary>
        /// <param name="signal">Signal under analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of flattened modulation spectra</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            var fft           = new Fft(fftSize);
            var modulationFft = new Fft(_modulationFftSize);


            if (_featuregram == null)
            {
                if (_filterbank == null)
                {
                    _filterbank = FilterBanks.Triangular(_fftSize, signal.SamplingRate,
                                                         FilterBanks.MelBands(12, _fftSize, signal.SamplingRate, 100, 3200));
                }

                _featureCount = _filterbank.Length * (_modulationFftSize / 2 + 1);
            }
            else
            {
                _featureCount = _featuregram[0].Length * (_modulationFftSize / 2 + 1);
            }

            var length = _filterbank?.Length ?? _featuregram[0].Length;

            var modulationSamplingRate = (float)signal.SamplingRate / hopSize;
            var resolution             = modulationSamplingRate / _modulationFftSize;


            _featureDescriptions = new string[length * (_modulationFftSize / 2 + 1)];

            var idx = 0;

            for (var fi = 0; fi < length; fi++)
            {
                for (var fj = 0; fj <= _modulationFftSize / 2; fj++)
                {
                    _featureDescriptions[idx++] = string.Format("band_{0}_mf_{1:F2}_Hz", fi + 1, fj * resolution);
                }
            }


            // 0) pre-emphasis (if needed)

            if (_preEmphasis > 0.0)
            {
                var preemphasisFilter = new PreEmphasisFilter(_preEmphasis);
                signal = preemphasisFilter.ApplyTo(signal);
            }


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var en = 0;
            var i  = startSample;

            if (_featuregram == null)
            {
                _envelopes = new float[_filterbank.Length][];
                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[signal.Length / hopSize];
                }

                var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;


                // ===================== compute local FFTs (do STFT) =======================

                var spectrum         = new float[fftSize / 2 + 1];
                var filteredSpectrum = new float[_filterbank.Length];

                var block     = new float[fftSize];       // buffer for currently processed signal block at each step
                var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

                while (i + frameSize < endSample)
                {
                    zeroblock.FastCopyTo(block, zeroblock.Length);
                    signal.Samples.FastCopyTo(block, frameSize, i);

                    // 0) pre-emphasis (if needed)

                    if (_preEmphasis > 0.0)
                    {
                        for (var k = 0; k < frameSize; k++)
                        {
                            var y = block[k] - prevSample * _preEmphasis;
                            prevSample = block[k];
                            block[k]   = y;
                        }
                        prevSample = signal[i + hopSize - 1];
                    }

                    // 1) apply window

                    if (_window != WindowTypes.Rectangular)
                    {
                        block.ApplyWindow(windowSamples);
                    }

                    // 2) calculate power spectrum

                    fft.PowerSpectrum(block, spectrum);

                    // 3) apply filterbank...

                    FilterBanks.Apply(_filterbank, spectrum, filteredSpectrum);

                    // ...and save results for future calculations

                    for (var n = 0; n < _envelopes.Length; n++)
                    {
                        _envelopes[n][en] = filteredSpectrum[n];
                    }
                    en++;

                    i += hopSize;
                }
            }
            else
            {
                en         = _featuregram.Length;
                _envelopes = new float[_featuregram[0].Length][];

                for (var n = 0; n < _envelopes.Length; n++)
                {
                    _envelopes[n] = new float[en];
                    for (i = 0; i < en; i++)
                    {
                        _envelopes[n][i] = _featuregram[i][n];
                    }
                }
            }

            // =========================== modulation analysis =======================

            var envelopeLength = en;

            // long-term AVG-normalization

            foreach (var envelope in _envelopes)
            {
                var avg = 0.0f;
                for (var k = 0; k < envelopeLength; k++)
                {
                    avg += (k >= 0) ? envelope[k] : -envelope[k];
                }
                avg /= envelopeLength;

                if (avg >= 1e-10)   // this happens more frequently
                {
                    for (var k = 0; k < envelopeLength; k++)
                    {
                        envelope[k] /= avg;
                    }
                }
            }

            var modBlock     = new float[_modulationFftSize];
            var zeroModblock = new float[_modulationFftSize];
            var modSpectrum  = new float[_modulationFftSize / 2 + 1];

            i = 0;
            while (i < envelopeLength)
            {
                var vector = new float[_envelopes.Length * (_modulationFftSize / 2 + 1)];
                var offset = 0;

                foreach (var envelope in _envelopes)
                {
                    zeroModblock.FastCopyTo(modBlock, _modulationFftSize);
                    envelope.FastCopyTo(modBlock, Math.Min(_modulationFftSize, envelopeLength - i), i);

                    modulationFft.PowerSpectrum(modBlock, modSpectrum);
                    modSpectrum.FastCopyTo(vector, modSpectrum.Length, 0, offset);

                    offset += modSpectrum.Length;
                }

                featureVectors.Add(new FeatureVector
                {
                    Features     = vector,
                    TimePosition = (double)i * hopSize / signal.SamplingRate
                });

                i += _modulationHopSize;
            }

            return(featureVectors);
        }
Beispiel #11
0
        /// <summary>
        /// Standard method for computing mfcc features:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum X
        ///     3) Apply mel filters and log() the result: Y = Log10(X * H)
        ///     4) Do dct-II: mfcc = Dct(Y)
        ///     5) [Optional] liftering of mfcc
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of mfcc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _melFilterBank = FilterBanks.Triangular(fftSize, signal.SamplingRate,
                                                    FilterBanks.MelBands(_filterbankSize, fftSize, signal.SamplingRate, _lowFreq, _highFreq));

            var lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            // reserve memory for reusable blocks

            var spectrum       = new float[fftSize / 2 + 1];
            var logMelSpectrum = new float[_filterbankSize];

            var block     = new float[fftSize];   // buffer for currently processed signal block at each step
            var zeroblock = new float[fftSize];   // just a buffer of zeros for quick memset


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, windowSamples.Length, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = block[k] - prevSample * _preEmphasis;
                        prevSample = block[k];
                        block[k]   = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply mel filterbank and take log() of the result

                FilterBanks.ApplyAndLog(_melFilterBank, spectrum, logMelSpectrum);


                // 4) dct-II

                var mfccs = new float[FeatureCount];
                dct.Direct(logMelSpectrum, mfccs);


                // 5) (optional) liftering

                if (lifterCoeffs != null)
                {
                    mfccs.ApplyWindow(lifterCoeffs);
                }


                // add mfcc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = mfccs,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
        /// <summary>
        /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Mean power normalization
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="samples">Samples for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(float[] samples, int startSample, int endSample)
        {
            Guard.AgainstInvalidRange(startSample, endSample, "starting pos", "ending pos");

            var frameSize = FrameSize;
            var hopSize   = HopSize;

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? samples[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + FrameSize < endSample)
            {
                // prepare next block for processing

                _zeroblock.FastCopyTo(_block, _zeroblock.Length);
                samples.FastCopyTo(_block, frameSize, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = _block[k] - prevSample * _preEmphasis;
                        prevSample = _block[k];
                        _block[k]  = y;
                    }
                    prevSample = samples[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    _block.ApplyWindow(_windowSamples);
                }


                // 2) calculate power spectrum

                _fft.PowerSpectrum(_block, _spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(FilterBank, _spectrum, _filteredSpectrum);


                // 4) mean power normalization:

                var sumPower = 0.0f;
                for (var j = 0; j < _filteredSpectrum.Length; j++)
                {
                    sumPower += _filteredSpectrum[j];
                }

                mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                for (var j = 0; j < _filteredSpectrum.Length; j++)
                {
                    _filteredSpectrum[j] *= meanPower / mean;
                }


                // 5) nonlinearity (power ^ d     or     Log10)

                if (_power != 0)
                {
                    for (var j = 0; j < _filteredSpectrum.Length; j++)
                    {
                        _filteredSpectrum[j] = (float)Math.Pow(_filteredSpectrum[j], d);
                    }
                }
                else
                {
                    for (var j = 0; j < _filteredSpectrum.Length; j++)
                    {
                        _filteredSpectrum[j] = (float)Math.Log10(_filteredSpectrum[j] + float.Epsilon);
                    }
                }


                // 6) dct-II (normalized)

                var spnccs = new float[FeatureCount];
                _dct.DirectN(_filteredSpectrum, spnccs);


                // add pncc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = spnccs,
                    TimePosition = (double)i / SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }
Beispiel #13
0
        /// <summary>
        /// S(implified)PNCC algorithm according to [Kim & Stern, 2016]:
        ///     0) [Optional] pre-emphasis
        ///
        /// Decompose signal into overlapping (hopSize) frames of length fftSize. In each frame do:
        ///
        ///     1) Apply window (if rectangular window was specified then just do nothing)
        ///     2) Obtain power spectrum
        ///     3) Apply gammatone filters (squared)
        ///     4) Mean power normalization
        ///     5) Apply nonlinearity
        ///     6) Do dct-II (normalized)
        ///
        /// </summary>
        /// <param name="signal">Signal for analysis</param>
        /// <param name="startSample">The number (position) of the first sample for processing</param>
        /// <param name="endSample">The number (position) of last sample for processing</param>
        /// <returns>List of pncc vectors</returns>
        public override List <FeatureVector> ComputeFrom(DiscreteSignal signal, int startSample, int endSample)
        {
            // ====================================== PREPARE =======================================

            var hopSize       = (int)(signal.SamplingRate * HopSize);
            var frameSize     = (int)(signal.SamplingRate * FrameSize);
            var windowSamples = Window.OfType(_window, frameSize);

            var fftSize = _fftSize >= frameSize ? _fftSize : MathUtils.NextPowerOfTwo(frameSize);

            _gammatoneFilterBank = FilterBanks.Erb(_filterbankSize, _fftSize, signal.SamplingRate, _lowFreq, _highFreq);

            // use power spectrum:

            foreach (var filter in _gammatoneFilterBank)
            {
                for (var j = 0; j < filter.Length; j++)
                {
                    var ps = filter[j] * filter[j];
                    filter[j] = ps;
                }
            }


            var fft = new Fft(fftSize);
            var dct = new Dct2(_filterbankSize, FeatureCount);


            var gammatoneSpectrum = new float[_filterbankSize];

            const float meanPower = 1e10f;
            var         mean      = 4e07f;

            var d = _power != 0 ? 1.0 / _power : 0.0;

            var block     = new float[fftSize];       // buffer for a signal block at each step
            var zeroblock = new float[fftSize];       // buffer of zeros for quick memset

            var spectrum = new float[fftSize / 2 + 1];


            // ================================= MAIN PROCESSING ==================================

            var featureVectors = new List <FeatureVector>();

            var prevSample = startSample > 0 ? signal[startSample - 1] : 0.0f;

            var i = startSample;

            while (i + frameSize < endSample)
            {
                // prepare next block for processing

                zeroblock.FastCopyTo(block, zeroblock.Length);
                signal.Samples.FastCopyTo(block, frameSize, i);


                // 0) pre-emphasis (if needed)

                if (_preEmphasis > 0.0)
                {
                    for (var k = 0; k < frameSize; k++)
                    {
                        var y = block[k] - prevSample * _preEmphasis;
                        prevSample = block[k];
                        block[k]   = y;
                    }
                    prevSample = signal[i + hopSize - 1];
                }


                // 1) apply window

                if (_window != WindowTypes.Rectangular)
                {
                    block.ApplyWindow(windowSamples);
                }


                // 2) calculate power spectrum

                fft.PowerSpectrum(block, spectrum);


                // 3) apply gammatone filterbank

                FilterBanks.Apply(_gammatoneFilterBank, spectrum, gammatoneSpectrum);


                // 4) mean power normalization:

                var sumPower = 0.0f;
                for (var j = 0; j < gammatoneSpectrum.Length; j++)
                {
                    sumPower += gammatoneSpectrum[j];
                }

                mean = LambdaMu * mean + (1 - LambdaMu) * sumPower;

                for (var j = 0; j < gammatoneSpectrum.Length; j++)
                {
                    gammatoneSpectrum[j] *= meanPower / mean;
                }


                // 5) nonlinearity (power ^ d     or     Log10)

                if (_power != 0)
                {
                    for (var j = 0; j < gammatoneSpectrum.Length; j++)
                    {
                        gammatoneSpectrum[j] = (float)Math.Pow(gammatoneSpectrum[j], d);
                    }
                }
                else
                {
                    for (var j = 0; j < gammatoneSpectrum.Length; j++)
                    {
                        gammatoneSpectrum[j] = (float)Math.Log10(gammatoneSpectrum[j] + float.Epsilon);
                    }
                }


                // 6) dct-II (normalized)

                var spnccs = new float[FeatureCount];
                dct.DirectN(gammatoneSpectrum, spnccs);


                // add pncc vector to output sequence

                featureVectors.Add(new FeatureVector
                {
                    Features     = spnccs,
                    TimePosition = (double)i / signal.SamplingRate
                });

                i += hopSize;
            }

            return(featureVectors);
        }