Exemple #1
0
        public (List <double[]> composed_mfcc, List <double[]> composed_delta, List <double[]> composed_delta_delta) ExtractCoefficientFromAudio(string filePath)
        {
            var audio = _audioService.GetSignal(filePath);

            var mfcc = new MelFrequencyCepstrumCoefficient(
                filterCount: 26,
                cepstrumCount: 13,
                lowerFrequency: 300,
                upperFrequency: audio.SampleRate / 2,
                alpha: 0.97,
                samplingRate:
                audio.SampleRate,
                frameRate: 25,
                windowLength: 0.0257,
                numberOfBins: 512);

            var cepstrumCoefficients = mfcc.Transform(audio).Select(_ => _.Descriptor).ToList();

            double[,] mfcc_list = cepstrumCoefficients.CreateRectangularArray();

            var delta      = mfcc_list.performDelta2D();
            var deltaDelta = delta.performDelta2D();

            List <double[]> composed_mfcc        = mfcc_list.ToJaggedArray().ToList();
            List <double[]> composed_delta       = delta.ToJaggedArray().ToList();
            List <double[]> composed_delta_delta = deltaDelta.ToJaggedArray().ToList();

            return(composed_mfcc : composed_mfcc, composed_delta : composed_delta, composed_delta_delta : composed_delta_delta);
        }
        public void sample_test()
        {
            string basePath = NUnit.Framework.TestContext.CurrentContext.TestDirectory;

            FreeSpokenDigitsDataset fsdd = new FreeSpokenDigitsDataset(Path.Combine(basePath, "mfcc"));
            var mfcc = new MelFrequencyCepstrumCoefficient();

            Signal a = fsdd.GetSignal(0, "jackson", 10);

            MelFrequencyCepstrumCoefficientDescriptor[] ra = mfcc.Transform(a).ToArray();
            Assert.AreEqual(35, ra.Length);
            Assert.IsTrue(new double[] { 10.570020645259348d, 1.3484344242338475d, 0.4861056552885234d, -0.79287993818868352d, -0.64182784362935996d, -0.28079835895392041d, -0.46378109632237779d, 0.072039410871952647d, -0.43971730320461733d, 0.48891921252102533d, -0.22502241185050212d, 0.12478713268421229d, -0.13373400147110801d }.IsEqual(ra[0].Descriptor, 1e-8));

            Signal b = fsdd.GetSignal(0, "nicolas", 10);

            MelFrequencyCepstrumCoefficientDescriptor[] rb = mfcc.Transform(b).ToArray();
            Assert.AreEqual(24, rb.Length);
            Assert.IsTrue(new[] { 10.6434445230168, -0.222107787197107, 0.316067614396639, -0.212769536249701, -0.107755264262885, -0.292732772820073, -0.00445205345925395, 0.024397440969199, 0.0213769364471326, -0.0882765552657509, -0.177682484734242, -0.1013307739251, -0.099014915302743 }.IsEqual(rb[0].Descriptor, 1e-8));

            Signal c = fsdd.GetSignal(5, "theo", 23);

            MelFrequencyCepstrumCoefficientDescriptor[] rc = mfcc.Transform(c).ToArray();
            Assert.AreEqual(27, rc.Length);
            Assert.IsTrue(new[] { 7.24614406589037, -1.16796769512142, -0.134374026111248, -0.192703972718674, 0.112752647291759, -0.118712048308068, -0.0603752892245708, -0.0275002195634854, -0.0830858413953528, -0.0838965948140795, -0.15293502718595, 0.0107796827068413, -0.0491283773795346 }.IsEqual(rc[0].Descriptor, 1e-8));
        }
Exemple #3
0
        public static double[] GetMfcc(string file)
        {
            WaveDecoder decoder = new WaveDecoder(file);
            Signal      signal  = decoder.Decode();
            MelFrequencyCepstrumCoefficient mfcc = new MelFrequencyCepstrumCoefficient();

            MelFrequencyCepstrumCoefficientDescriptor[] ra = mfcc.Transform(signal).ToArray();
            var query = from x in ra select x.Descriptor.Average();

            return(query.ToArray());
        }
Exemple #4
0
        public static IEnumerable <MelFrequencyCepstrumCoefficientDescriptor> ObterMFCCDescriptor(Signal signal)
        {
            using (var mfcc = new MelFrequencyCepstrumCoefficient(filterCount: 20, samplingRate: signal.SampleRate))
            {
                var mfccTransformResult = mfcc.Transform(signal);

                foreach (var mfccTransformItem in mfccTransformResult)
                {
                    yield return(mfccTransformItem);
                }
            }
        }
        public IEnumerable <MelFrequencyCepstrumCoefficientDescriptor> ObterAudioFiltrado()
        {
            using (var signal = ObterSinal())
                using (var mfcc = new MelFrequencyCepstrumCoefficient())
                {
                    var mfccTransformResult = mfcc.Transform(signal);

                    //signal.RawData

                    foreach (var mfccTransformItem in mfccTransformResult)
                    {
                        yield return(mfccTransformItem);
                    }
                }
        }
        // get record from file
        public Record(String path, String label)
        {
            WaveChannel32 wave = new WaveChannel32(new WaveFileReader(path));

            this.path   = path;
            this.label  = label;
            this.frames = new List <RecordFrame>();

            byte[]        buffer      = new byte[16384];
            int           read        = 0;
            List <double> listSamples = new List <double>();
            int           sampleRate  = 16000;

            while (wave.Position < wave.Length)
            {
                read = wave.Read(buffer, 0, 16384);
                for (int i = 0; i < read / 4; i++)
                {
                    double sample = BitConverter.ToSingle(buffer, i * 4);
                    listSamples.Add(sample);
                }
            }

            Signal signal = Signal.FromArray(listSamples.ToArray(), sampleRate);

            MelFrequencyCepstrumCoefficient mfcc = new MelFrequencyCepstrumCoefficient();
            IEnumerable <MelFrequencyCepstrumCoefficientDescriptor> features = mfcc.Transform(signal);

            foreach (var t in features)
            {
                RecordFrame recordFrame = new RecordFrame();
                recordFrame.coefficients = new List <double>(t.Descriptor);
                recordFrame.label        = label;

                this.frames.Add(recordFrame);
            }
        }
        public void sample_test()
        {
            string basePath = NUnit.Framework.TestContext.CurrentContext.TestDirectory;
            string pathWhereTheDatasetShouldBeStored = Path.Combine(basePath, "mfcc");

            #region doc_example1
            // Let's say we would like to analyse an audio sample. To give an example that
            // could be reproduced by anyone without having to give a specific sound file
            // that would need to have been downloaded by every user trying to run this example,
            // we will use obtain an example from the Free Spoken Digits Dataset instead:
            var fsdd = new FreeSpokenDigitsDataset(path: pathWhereTheDatasetShouldBeStored);

            // Let's obtain one of the audio signals:
            Signal a = fsdd.GetSignal(0, "jackson", 10);

            // Note: if you would like to load a signal from the
            // disk, you could use the following method directly:
            // Signal a = Signal.FromFile(fileName);

            // First we could extract some characteristics from the audio signal, just
            // for informative purposes. We don't actually need to register them just
            // to compute the MFCC, so please skip those checks if you would like!
            int          numberOfChannels = a.NumberOfChannels; // should be: 1
            int          numberOfFrames   = a.NumberOfFrames;   // should be: 5451
            int          numberOfSamples  = a.NumberOfSamples;  // should be: 5451
            SampleFormat format           = a.SampleFormat;     // should be: Format32BitIeeeFloat
            int          sampleRate       = a.SampleRate;       // should be: 8000 (8khz)
            int          samples          = a.Samples;          // should be: 5451
            int          sampleSize       = a.SampleSize;       // should be: 4
            int          numberOfBytes    = a.NumberOfBytes;    // should be: 21804

            // Now, let's say we would like to compute its MFCC:
            var extractor = new MelFrequencyCepstrumCoefficient(
                filterCount: 40,            // Note: all values are optional, you can
                cepstrumCount: 13,          // specify only the ones you'd like and leave
                lowerFrequency: 133.3333,   // all others at their defaults
                upperFrequency: 6855.4976,
                alpha: 0.97,
                samplingRate: 16000,
                frameRate: 100,
                windowLength: 0.0256,
                numberOfBins: 512);

            // We can call the transform method of the MFCC extractor class:
            IEnumerable <MelFrequencyCepstrumCoefficientDescriptor> mfcc = extractor.Transform(a);

            // or we could also transform them to a matrix directly with:
            double[][] actual = mfcc.Select(x => x.Descriptor).ToArray();

            // This matrix would contain X different MFCC values (due the length of the signal)
            int numberOfMFCCs = actual.Length; // should be 35 (depends on the MFCC window)

            // Each of those MFCC values would have length 13;
            int descriptorLength = actual[0].Length; // 13 (depends on the MFCC Ceptrtum's count)

            // An example of an MFCC vector would have been:
            double[] row = actual[0]; // should have been: (see vector written below)

            double[] expected = new double[]
            {
                10.570020645259348d, 1.3484344242338475d, 0.4861056552885234d,
                -0.79287993818868352d, -0.64182784362935996d, -0.28079835895392041d,
                -0.46378109632237779d, 0.072039410871952647d, -0.43971730320461733d,
                0.48891921252102533d, -0.22502241185050212d, 0.12478713268421229d, -0.13373400147110801d
            };
            #endregion

            Assert.AreEqual(1, numberOfChannels);
            Assert.AreEqual(5451, numberOfFrames);
            Assert.AreEqual(5451, numberOfSamples);
            Assert.AreEqual(SampleFormat.Format32BitIeeeFloat, format);
            Assert.AreEqual(8000, sampleRate);
            Assert.AreEqual(5451, samples);
            Assert.AreEqual(4, sampleSize);
            Assert.AreEqual(21804, numberOfBytes);
            Assert.AreEqual(sampleSize * numberOfFrames * numberOfChannels, numberOfBytes);
            Assert.AreEqual(35, numberOfMFCCs);
            Assert.IsTrue(expected.IsEqual(row, 1e-8));

            Signal b = fsdd.GetSignal(0, "nicolas", 10);
            Assert.AreEqual(2, b.NumberOfChannels);
            Assert.AreEqual(3755, b.NumberOfFrames);
            Assert.AreEqual(7510, b.NumberOfSamples);
            Assert.AreEqual(SampleFormat.Format32BitIeeeFloat, b.SampleFormat);
            Assert.AreEqual(8000, b.SampleRate);
            Assert.AreEqual(7510, b.Samples);
            Assert.AreEqual(4, b.SampleSize);
            Assert.AreEqual(30040, b.NumberOfBytes);
            Assert.AreEqual(b.SampleSize * b.NumberOfFrames * b.NumberOfChannels, b.NumberOfBytes);
            MelFrequencyCepstrumCoefficientDescriptor[] rb = extractor.Transform(b).ToArray();
            Assert.AreEqual(24, rb.Length);
            Assert.IsTrue(new[] { 10.6434445230168, -0.222107787197107, 0.316067614396639, -0.212769536249701, -0.107755264262885, -0.292732772820073, -0.00445205345925395, 0.024397440969199, 0.0213769364471326, -0.0882765552657509, -0.177682484734242, -0.1013307739251, -0.099014915302743 }.IsEqual(rb[0].Descriptor, 1e-8));

            Signal c = fsdd.GetSignal(5, "theo", 23);
            Assert.AreEqual(1, c.NumberOfChannels);
            Assert.AreEqual(4277, c.NumberOfFrames);
            Assert.AreEqual(4277, c.NumberOfSamples);
            Assert.AreEqual(SampleFormat.Format32BitIeeeFloat, c.SampleFormat);
            Assert.AreEqual(8000, c.SampleRate);
            Assert.AreEqual(4277, c.Samples);
            Assert.AreEqual(4, c.SampleSize);
            Assert.AreEqual(17108, c.NumberOfBytes);
            Assert.AreEqual(b.SampleSize * c.NumberOfFrames * c.NumberOfChannels, c.NumberOfBytes);
            MelFrequencyCepstrumCoefficientDescriptor[] rc = extractor.Transform(c).ToArray();
            Assert.AreEqual(27, rc.Length);
            Assert.IsTrue(new[] { 7.24614406589037, -1.16796769512142, -0.134374026111248, -0.192703972718674, 0.112752647291759, -0.118712048308068, -0.0603752892245708, -0.0275002195634854, -0.0830858413953528, -0.0838965948140795, -0.15293502718595, 0.0107796827068413, -0.0491283773795346 }.IsEqual(rc[0].Descriptor, 1e-8));
        }