Exemplo n.º 1
0
        private void buttonCompute_Click(object sender, EventArgs e)
        {
            var filterCount  = int.Parse(textBoxSize.Text);
            var samplingRate = _signal.SamplingRate;
            var fftSize      = int.Parse(textBoxFftSize.Text);
            var lowFreq      = float.Parse(textBoxLowFreq.Text);
            var highFreq     = float.Parse(textBoxHighFreq.Text);

            Tuple <double, double, double>[] bands;
            float[][]  filterbank = null;
            VtlnWarper vtln       = null;

            if (checkBoxVtln.Checked)
            {
                var alpha    = float.Parse(textBoxVtlnAlpha.Text);
                var vtlnLow  = float.Parse(textBoxVtlnLow.Text);
                var vtlnHigh = float.Parse(textBoxVtlnHigh.Text);

                vtln = new VtlnWarper(alpha, lowFreq, highFreq, vtlnLow, vtlnHigh);
            }

            switch (comboBoxFilterbank.Text)
            {
            case "Mel":
                bands = FilterBanks.MelBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                break;

            case "Mel Slaney":
                bands      = FilterBanks.MelBandsSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                filterbank = FilterBanks.MelBankSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxNormalize.Checked, vtln);
                break;

            case "Bark":
                bands = FilterBanks.BarkBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                break;

            case "Bark Slaney":
                bands      = FilterBanks.BarkBandsSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                filterbank = FilterBanks.BarkBankSlaney(filterCount, fftSize, samplingRate, lowFreq, highFreq);
                break;

            case "Critical bands":
                bands = FilterBanks.CriticalBands(filterCount, fftSize, samplingRate, lowFreq, highFreq);
                break;

            case "Octave bands":
                bands = FilterBanks.OctaveBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                break;

            case "ERB":
                bands      = null;
                filterbank = FilterBanks.Erb(filterCount, fftSize, samplingRate, lowFreq, highFreq);
                break;

            default:
                bands = FilterBanks.HerzBands(filterCount, fftSize, samplingRate, lowFreq, highFreq, checkBoxOverlap.Checked);
                break;
            }

            if (bands != null && filterbank == null)
            {
                switch (comboBoxShape.Text)
                {
                case "Triangular":
                    filterbank = FilterBanks.Triangular(fftSize, samplingRate, bands, vtln, Utils.Scale.HerzToMel);
                    break;

                case "Trapezoidal":
                    filterbank = FilterBanks.Trapezoidal(fftSize, samplingRate, bands, vtln);
                    break;

                case "BiQuad":
                    filterbank = FilterBanks.BiQuad(fftSize, samplingRate, bands);
                    break;

                default:
                    filterbank = FilterBanks.Rectangular(fftSize, samplingRate, bands, vtln);
                    break;
                }

                if (checkBoxNormalize.Checked)
                {
                    FilterBanks.Normalize(filterCount, bands, filterbank);
                }
            }


            var spectrumType = (SpectrumType)comboBoxSpectrum.SelectedIndex;
            var nonLinearity = (NonLinearityType)comboBoxNonLinearity.SelectedIndex;
            var logFloor     = float.Parse(textBoxLogFloor.Text);

            var mfccExtractor = new MfccExtractor(//samplingRate, 13, 0.025, 0.01,
                samplingRate, 13, 512.0 / samplingRate, 0.01,
                filterbank: filterbank,
                //filterbankSize: 26,
                //highFreq: 8000,
                //preEmphasis: 0.97,
                //lifterSize: 22,
                //includeEnergy: true,
                spectrumType: spectrumType,
                nonLinearity: nonLinearity,
                dctType: comboBoxDct.Text,
                window: WindowTypes.Hamming,
                logFloor: logFloor);

            _mfccVectors = mfccExtractor.ComputeFrom(_signal);


            //_mfccVectors = mfccExtractor.ComputeFrom(_signal * 32768);
            //var mfccVectorsP = mfccExtractor.ParallelComputeFrom(_signal * 32768);

            //for (var i = 0; i < _mfccVectors.Count; i++)
            //{
            //    for (var j = 0; j < _mfccVectors[i].Features.Length; j++)
            //    {
            //        if (Math.Abs(_mfccVectors[i].Features[j] - mfccVectorsP[i].Features[j]) > 1e-32f)
            //        {
            //            MessageBox.Show($"Nope: {i} - {j}");
            //            return;
            //        }

            //        if (Math.Abs(_mfccVectors[i].TimePosition - mfccVectorsP[i].TimePosition) > 1e-32f)
            //        {
            //            MessageBox.Show($"Time: {i} - {j}");
            //            return;
            //        }
            //    }
            //}

            //FeaturePostProcessing.NormalizeMean(_mfccVectors);        // optional (but REQUIRED for PNCC!)
            //FeaturePostProcessing.AddDeltas(_mfccVectors);

            var header = mfccExtractor.FeatureDescriptions;

            //.Concat(mfccExtractor.DeltaFeatureDescriptions)
            //.Concat(mfccExtractor.DeltaDeltaFeatureDescriptions);

            FillFeaturesList(_mfccVectors, header);
            mfccListView.Items[0].Selected = true;

            melFilterBankPanel.Groups = mfccExtractor.FilterBank;

            mfccPanel.Line = _mfccVectors[0].Features;
        }
Exemplo n.º 2
0
        /// <summary>
        /// Constructs extractor from configuration <paramref name="options"/>.
        /// </summary>
        public PlpExtractor(PlpOptions options) : base(options)
        {
            FeatureCount = options.FeatureCount;

            // ================================ Prepare filter bank and center frequencies: ===========================================

            var filterbankSize = options.FilterBankSize;

            if (options.FilterBank is null)
            {
                _blockSize = options.FftSize > FrameSize ? options.FftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var low  = options.LowFrequency;
                var high = options.HighFrequency;

                FilterBank = FilterBanks.BarkBankSlaney(filterbankSize, _blockSize, SamplingRate, low, high);

                var barkBands = FilterBanks.BarkBandsSlaney(filterbankSize, SamplingRate, low, high);
                _centerFrequencies = barkBands.Select(b => b.Item2).ToArray();
            }
            else
            {
                FilterBank     = options.FilterBank;
                filterbankSize = FilterBank.Length;
                _blockSize     = 2 * (FilterBank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");

                if (options.CenterFrequencies != null)
                {
                    _centerFrequencies = options.CenterFrequencies;
                }
                else
                {
                    var herzResolution = (double)SamplingRate / _blockSize;

                    // try to determine center frequencies automatically from filterbank weights:

                    _centerFrequencies = new double[filterbankSize];

                    for (var i = 0; i < FilterBank.Length; i++)
                    {
                        var minPos = 0;
                        var maxPos = _blockSize / 2;

                        for (var j = 0; j < FilterBank[i].Length; j++)
                        {
                            if (FilterBank[i][j] > 0)
                            {
                                minPos = j;
                                break;
                            }
                        }
                        for (var j = minPos; j < FilterBank[i].Length; j++)
                        {
                            if (FilterBank[i][j] == 0)
                            {
                                maxPos = j;
                                break;
                            }
                        }

                        _centerFrequencies[i] = herzResolution * (maxPos + minPos) / 2;
                    }
                }
            }

            // ==================================== Compute equal loudness curve: =========================================

            _equalLoudnessCurve = new double[filterbankSize];

            for (var i = 0; i < _centerFrequencies.Length; i++)
            {
                var level2 = _centerFrequencies[i] * _centerFrequencies[i];

                _equalLoudnessCurve[i] = Math.Pow(level2 / (level2 + 1.6e5), 2) * ((level2 + 1.44e6) / (level2 + 9.61e6));
            }

            // ============================== Prepare RASTA filters (if necessary): =======================================

            _rasta = options.Rasta;

            if (_rasta > 0)
            {
                _rastaFilters = Enumerable.Range(0, filterbankSize)
                                .Select(f => new RastaFilter(_rasta))
                                .ToArray();
            }

            // ============== Precompute IDFT table for obtaining autocorrelation coeffs from power spectrum: =============

            _lpcOrder = options.LpcOrder > 0 ? options.LpcOrder : FeatureCount - 1;

            _idftTable = new float[_lpcOrder + 1][];

            var bandCount = filterbankSize + 2;     // +2 duplicated edges
            var freq      = Math.PI / (bandCount - 1);

            for (var i = 0; i < _idftTable.Length; i++)
            {
                _idftTable[i] = new float[bandCount];

                _idftTable[i][0] = 1.0f;

                for (var j = 1; j < bandCount - 1; j++)
                {
                    _idftTable[i][j] = 2 * (float)Math.Cos(freq * i * j);
                }

                _idftTable[i][bandCount - 1] = (float)Math.Cos(freq * i * (bandCount - 1));
            }

            _lpc = new float[_lpcOrder + 1];
            _cc  = new float[bandCount];

            // =================================== Prepare everything else: ==============================

            _fft = new RealFft(_blockSize);

            _lifterSize   = options.LifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _includeEnergy  = options.IncludeEnergy;
            _logEnergyFloor = options.LogEnergyFloor;

            _spectrum     = new float[_blockSize / 2 + 1];
            _bandSpectrum = new float[filterbankSize];
        }
Exemplo n.º 3
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="samplingRate"></param>
        /// <param name="featureCount"></param>
        /// <param name="frameDuration"></param>
        /// <param name="hopDuration"></param>
        /// <param name="lpcOrder"></param>
        /// <param name="rasta"></param>
        /// <param name="filterbankSize"></param>
        /// <param name="lowFreq"></param>
        /// <param name="highFreq"></param>
        /// <param name="fftSize"></param>
        /// <param name="lifterSize"></param>
        /// <param name="preEmphasis"></param>
        /// <param name="window"></param>
        /// <param name="filterbank"></param>
        /// <param name="centerFrequencies"></param>
        public PlpExtractor(int samplingRate,
                            int featureCount,
                            double frameDuration       = 0.0256 /*sec*/,
                            double hopDuration         = 0.010 /*sec*/,
                            int lpcOrder               = 0,         // will be autocalculated as featureCount - 1
                            double rasta               = 0,
                            int filterbankSize         = 24,
                            double lowFreq             = 0,
                            double highFreq            = 0,
                            int fftSize                = 0,
                            int lifterSize             = 0,
                            double preEmphasis         = 0,
                            WindowTypes window         = WindowTypes.Hamming,
                            float[][] filterbank       = null,
                            double[] centerFrequencies = null)

            : base(samplingRate, frameDuration, hopDuration, preEmphasis)
        {
            FeatureCount = featureCount;

            // ================================ Prepare filter bank and center frequencies: ===========================================

            _lowFreq  = lowFreq;
            _highFreq = highFreq;

            if (filterbank == null)
            {
                _blockSize = fftSize > FrameSize ? fftSize : MathUtils.NextPowerOfTwo(FrameSize);

                var barkBands = FilterBanks.BarkBandsSlaney(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq);
                FilterBank = FilterBanks.BarkBankSlaney(filterbankSize, _blockSize, samplingRate, _lowFreq, _highFreq);

                _centerFrequencies = barkBands.Select(b => b.Item2).ToArray();
            }
            else
            {
                FilterBank     = filterbank;
                filterbankSize = filterbank.Length;
                _blockSize     = 2 * (filterbank[0].Length - 1);

                Guard.AgainstExceedance(FrameSize, _blockSize, "frame size", "FFT size");

                if (centerFrequencies != null)
                {
                    _centerFrequencies = centerFrequencies;
                }
                else
                {
                    var herzResolution = (double)samplingRate / _blockSize;

                    // try to determine center frequencies automatically from filterbank weights:

                    _centerFrequencies = new double[filterbankSize];

                    for (var i = 0; i < filterbank.Length; i++)
                    {
                        var minPos = 0;
                        var maxPos = _blockSize / 2;

                        for (var j = 0; j < filterbank[i].Length; j++)
                        {
                            if (filterbank[i][j] > 0)
                            {
                                minPos = j;
                                break;
                            }
                        }
                        for (var j = minPos; j < filterbank[i].Length; j++)
                        {
                            if (filterbank[i][j] == 0)
                            {
                                maxPos = j;
                                break;
                            }
                        }

                        _centerFrequencies[i] = herzResolution * (maxPos + minPos) / 2;
                    }
                }
            }

            // ==================================== Compute equal loudness curve: =========================================

            _equalLoudnessCurve = new double[filterbankSize];

            for (var i = 0; i < _centerFrequencies.Length; i++)
            {
                var level2 = _centerFrequencies[i] * _centerFrequencies[i];

                _equalLoudnessCurve[i] = Math.Pow(level2 / (level2 + 1.6e5), 2) * ((level2 + 1.44e6) / (level2 + 9.61e6));
            }

            // ============================== Prepare RASTA filters (if necessary): =======================================

            _rasta = rasta;

            if (rasta > 0)
            {
                _rastaFilters = Enumerable.Range(0, filterbankSize)
                                .Select(f => new RastaFilter(rasta))
                                .ToArray();
            }

            // ============== Precompute IDFT table for obtaining autocorrelation coeffs from power spectrum: =============

            _lpcOrder = lpcOrder > 0 ? lpcOrder : FeatureCount - 1;

            _idftTable = new float[_lpcOrder + 1][];

            var bandCount = filterbankSize + 2;     // +2 duplicated edges
            var freq      = Math.PI / (bandCount - 1);

            for (var i = 0; i < _idftTable.Length; i++)
            {
                _idftTable[i] = new float[bandCount];

                _idftTable[i][0] = 1.0f;

                for (var j = 1; j < bandCount - 1; j++)
                {
                    _idftTable[i][j] = 2 * (float)Math.Cos(freq * i * j);
                }

                _idftTable[i][bandCount - 1] = (float)Math.Cos(freq * i * (bandCount - 1));
            }

            _lpc = new float[_lpcOrder + 1];
            _cc  = new float[bandCount];

            // =================================== Prepare everything else: ==============================

            _fft = new RealFft(_blockSize);

            _window        = window;
            _windowSamples = Window.OfType(_window, FrameSize);

            _lifterSize   = lifterSize;
            _lifterCoeffs = _lifterSize > 0 ? Window.Liftering(FeatureCount, _lifterSize) : null;

            _spectrum     = new float[_blockSize / 2 + 1];
            _bandSpectrum = new float[filterbankSize];
        }