Пример #1
0
        public void sample_test()
        {
            string basePath = NUnit.Framework.TestContext.CurrentContext.TestDirectory;

            FreeSpokenDigitsDataset fsdd = new FreeSpokenDigitsDataset(Path.Combine(basePath, "mfcc"));
            var mfcc = new MelFrequencyCepstrumCoefficient();

            Signal a = fsdd.GetSignal(0, "jackson", 10);

            MelFrequencyCepstrumCoefficientDescriptor[] ra = mfcc.Transform(a).ToArray();
            Assert.AreEqual(35, ra.Length);
            Assert.IsTrue(new double[] { 10.570020645259348d, 1.3484344242338475d, 0.4861056552885234d, -0.79287993818868352d, -0.64182784362935996d, -0.28079835895392041d, -0.46378109632237779d, 0.072039410871952647d, -0.43971730320461733d, 0.48891921252102533d, -0.22502241185050212d, 0.12478713268421229d, -0.13373400147110801d }.IsEqual(ra[0].Descriptor, 1e-8));

            Signal b = fsdd.GetSignal(0, "nicolas", 10);

            MelFrequencyCepstrumCoefficientDescriptor[] rb = mfcc.Transform(b).ToArray();
            Assert.AreEqual(24, rb.Length);
            Assert.IsTrue(new[] { 10.6434445230168, -0.222107787197107, 0.316067614396639, -0.212769536249701, -0.107755264262885, -0.292732772820073, -0.00445205345925395, 0.024397440969199, 0.0213769364471326, -0.0882765552657509, -0.177682484734242, -0.1013307739251, -0.099014915302743 }.IsEqual(rb[0].Descriptor, 1e-8));

            Signal c = fsdd.GetSignal(5, "theo", 23);

            MelFrequencyCepstrumCoefficientDescriptor[] rc = mfcc.Transform(c).ToArray();
            Assert.AreEqual(27, rc.Length);
            Assert.IsTrue(new[] { 7.24614406589037, -1.16796769512142, -0.134374026111248, -0.192703972718674, 0.112752647291759, -0.118712048308068, -0.0603752892245708, -0.0275002195634854, -0.0830858413953528, -0.0838965948140795, -0.15293502718595, 0.0107796827068413, -0.0491283773795346 }.IsEqual(rc[0].Descriptor, 1e-8));
        }
Пример #2
0
        public void GetEnergyTest_doc()
        {
            string basePath = Path.Combine(NUnit.Framework.TestContext.CurrentContext.TestDirectory, "energy");

            #region doc_energy
            // Let's say we would like to compute the energy of an audio signal. For this,
            // we will take an example signal from the Free Spoken Digits Dataset (FSDD):
            FreeSpokenDigitsDataset fsdd = new FreeSpokenDigitsDataset(basePath);
            Signal signal = fsdd.GetSignal(digit: 3, speaker: "jackson", index: 0);

            // The energy is defined as the sum of squared values in all
            // channels of the audio signal. In this case, it should be:
            double energy = signal.GetEnergy(); // 19.448728048242629
            #endregion

            Assert.AreEqual(19.448728048242629, energy, 1e-10);
        }
Пример #3
0
        public void sample_test()
        {
            string basePath = NUnit.Framework.TestContext.CurrentContext.TestDirectory;
            string pathWhereTheDatasetShouldBeStored = Path.Combine(basePath, "mfcc");

            #region doc_example1
            // Let's say we would like to analyse an audio sample. To give an example that
            // could be reproduced by anyone without having to give a specific sound file
            // that would need to have been downloaded by every user trying to run this example,
            // we will use obtain an example from the Free Spoken Digits Dataset instead:
            var fsdd = new FreeSpokenDigitsDataset(path: pathWhereTheDatasetShouldBeStored);

            // Let's obtain one of the audio signals:
            Signal a          = fsdd.GetSignal(0, "jackson", 10);
            int    sampleRate = a.SampleRate; // 8000

            // Note: if you would like to load a signal from the
            // disk, you could use the following method directly:
            // Signal a = Signal.FromFile(fileName);

            // Create a low-pass filter to keep only frequencies below 100 Hz
            var filter = new LowPassFilter(frequency: 100, sampleRate: sampleRate);

            // Apply the filter to the signal
            Signal result = filter.Apply(a);

            // Create a spectrogram for the original
            var sourceSpectrum = new Spectrogram(a);

            // Create a spectrogram for the filtered signal:
            var resultSpectrum = new Spectrogram(result);

            // Get the count for a high frequency before and after the low-pass filter:
            double before = sourceSpectrum.GetFrequencyCount(windowIndex: 0, frequency: 1000); // 0.00028203820434203334
            double after  = resultSpectrum.GetFrequencyCount(windowIndex: 0, frequency: 1000); // 2.9116651158267508E-05
            #endregion

            Assert.AreEqual(0.00028203820434203334, before, 1e-8);
            Assert.AreEqual(2.9116651158267508E-05, after, 1e-8);
        }
        public void sample_test()
        {
            string basePath = NUnit.Framework.TestContext.CurrentContext.TestDirectory;
            string pathWhereTheDatasetShouldBeStored = Path.Combine(basePath, "mfcc");

            #region doc_example1
            // Let's say we would like to analyse an audio sample. To give an example that
            // could be reproduced by anyone without having to give a specific sound file
            // that would need to have been downloaded by every user trying to run this example,
            // we will use obtain an example from the Free Spoken Digits Dataset instead:
            var fsdd = new FreeSpokenDigitsDataset(path: pathWhereTheDatasetShouldBeStored);

            // Let's obtain one of the audio signals:
            Signal a = fsdd.GetSignal(0, "jackson", 10);

            // Note: if you would like to load a signal from the
            // disk, you could use the following method directly:
            // Signal a = Signal.FromFile(fileName);

            // First we could extract some characteristics from the audio signal, just
            // for informative purposes. We don't actually need to register them just
            // to compute the MFCC, so please skip those checks if you would like!
            int          numberOfChannels = a.NumberOfChannels; // should be: 1
            int          numberOfFrames   = a.NumberOfFrames;   // should be: 5451
            int          numberOfSamples  = a.NumberOfSamples;  // should be: 5451
            SampleFormat format           = a.SampleFormat;     // should be: Format32BitIeeeFloat
            int          sampleRate       = a.SampleRate;       // should be: 8000 (8khz)
            int          samples          = a.Samples;          // should be: 5451
            int          sampleSize       = a.SampleSize;       // should be: 4
            int          numberOfBytes    = a.NumberOfBytes;    // should be: 21804

            // Now, let's say we would like to compute its MFCC:
            var extractor = new MelFrequencyCepstrumCoefficient(
                filterCount: 40,            // Note: all values are optional, you can
                cepstrumCount: 13,          // specify only the ones you'd like and leave
                lowerFrequency: 133.3333,   // all others at their defaults
                upperFrequency: 6855.4976,
                alpha: 0.97,
                samplingRate: 16000,
                frameRate: 100,
                windowLength: 0.0256,
                numberOfBins: 512);

            // We can call the transform method of the MFCC extractor class:
            IEnumerable <MelFrequencyCepstrumCoefficientDescriptor> mfcc = extractor.Transform(a);

            // or we could also transform them to a matrix directly with:
            double[][] actual = mfcc.Select(x => x.Descriptor).ToArray();

            // This matrix would contain X different MFCC values (due the length of the signal)
            int numberOfMFCCs = actual.Length; // should be 35 (depends on the MFCC window)

            // Each of those MFCC values would have length 13;
            int descriptorLength = actual[0].Length; // 13 (depends on the MFCC Ceptrtum's count)

            // An example of an MFCC vector would have been:
            double[] row = actual[0]; // should have been: (see vector written below)

            double[] expected = new double[]
            {
                10.570020645259348d, 1.3484344242338475d, 0.4861056552885234d,
                -0.79287993818868352d, -0.64182784362935996d, -0.28079835895392041d,
                -0.46378109632237779d, 0.072039410871952647d, -0.43971730320461733d,
                0.48891921252102533d, -0.22502241185050212d, 0.12478713268421229d, -0.13373400147110801d
            };
            #endregion

            Assert.AreEqual(1, numberOfChannels);
            Assert.AreEqual(5451, numberOfFrames);
            Assert.AreEqual(5451, numberOfSamples);
            Assert.AreEqual(SampleFormat.Format32BitIeeeFloat, format);
            Assert.AreEqual(8000, sampleRate);
            Assert.AreEqual(5451, samples);
            Assert.AreEqual(4, sampleSize);
            Assert.AreEqual(21804, numberOfBytes);
            Assert.AreEqual(sampleSize * numberOfFrames * numberOfChannels, numberOfBytes);
            Assert.AreEqual(35, numberOfMFCCs);
            Assert.IsTrue(expected.IsEqual(row, 1e-8));

            Signal b = fsdd.GetSignal(0, "nicolas", 10);
            Assert.AreEqual(2, b.NumberOfChannels);
            Assert.AreEqual(3755, b.NumberOfFrames);
            Assert.AreEqual(7510, b.NumberOfSamples);
            Assert.AreEqual(SampleFormat.Format32BitIeeeFloat, b.SampleFormat);
            Assert.AreEqual(8000, b.SampleRate);
            Assert.AreEqual(7510, b.Samples);
            Assert.AreEqual(4, b.SampleSize);
            Assert.AreEqual(30040, b.NumberOfBytes);
            Assert.AreEqual(b.SampleSize * b.NumberOfFrames * b.NumberOfChannels, b.NumberOfBytes);
            MelFrequencyCepstrumCoefficientDescriptor[] rb = extractor.Transform(b).ToArray();
            Assert.AreEqual(24, rb.Length);
            Assert.IsTrue(new[] { 10.6434445230168, -0.222107787197107, 0.316067614396639, -0.212769536249701, -0.107755264262885, -0.292732772820073, -0.00445205345925395, 0.024397440969199, 0.0213769364471326, -0.0882765552657509, -0.177682484734242, -0.1013307739251, -0.099014915302743 }.IsEqual(rb[0].Descriptor, 1e-8));

            Signal c = fsdd.GetSignal(5, "theo", 23);
            Assert.AreEqual(1, c.NumberOfChannels);
            Assert.AreEqual(4277, c.NumberOfFrames);
            Assert.AreEqual(4277, c.NumberOfSamples);
            Assert.AreEqual(SampleFormat.Format32BitIeeeFloat, c.SampleFormat);
            Assert.AreEqual(8000, c.SampleRate);
            Assert.AreEqual(4277, c.Samples);
            Assert.AreEqual(4, c.SampleSize);
            Assert.AreEqual(17108, c.NumberOfBytes);
            Assert.AreEqual(b.SampleSize * c.NumberOfFrames * c.NumberOfChannels, c.NumberOfBytes);
            MelFrequencyCepstrumCoefficientDescriptor[] rc = extractor.Transform(c).ToArray();
            Assert.AreEqual(27, rc.Length);
            Assert.IsTrue(new[] { 7.24614406589037, -1.16796769512142, -0.134374026111248, -0.192703972718674, 0.112752647291759, -0.118712048308068, -0.0603752892245708, -0.0275002195634854, -0.0830858413953528, -0.0838965948140795, -0.15293502718595, 0.0107796827068413, -0.0491283773795346 }.IsEqual(rc[0].Descriptor, 1e-8));
        }
        public void learn()
        {
            string basePath = Path.Combine(NUnit.Framework.TestContext.CurrentContext.TestDirectory, "learn");

            #region doc_learn
            // Ensure results are reproducible
            Accord.Math.Random.Generator.Seed = 0;

            // The Bag-of-Audio-Words model converts audio signals of arbitrary
            // size into fixed-length feature vectors. In this example, we
            // will be setting the codebook size to 10. This means all feature
            // vectors that will be generated will have the same length of 10.

            // By default, the BoW object will use the MFCC extractor as the
            // feature extractor and K-means as the clustering algorithm.

            // Create a new Bag-of-Audio-Words (BoW) model
            var bow = BagOfAudioWords.Create(numberOfWords: 32);
            // Note: a simple BoW model can also be created using
            // var bow = new BagOfAudioWords(numberOfWords: 10);

            // Get some training images
            FreeSpokenDigitsDataset fsdd = new FreeSpokenDigitsDataset(basePath);
            string[] trainFileNames      = fsdd.Training.LocalPaths;
            int[]    trainOutputs        = fsdd.Training.Digits;

            // Compute the model
            bow.Learn(trainFileNames);

            // After this point, we will be able to translate
            // the signals into double[] feature vectors using
            double[][] trainInputs = bow.Transform(trainFileNames);

            // We can also check some statistics about the dataset:
            int numberOfSignals = bow.Statistics.TotalNumberOfInstances; // 1350

            // Statistics about all the descriptors that have been extracted:
            int      totalDescriptors = bow.Statistics.TotalNumberOfDescriptors;                     // 29106
            double   totalMean        = bow.Statistics.TotalNumberOfDescriptorsPerInstance.Mean;     // 21.56
            double   totalVar         = bow.Statistics.TotalNumberOfDescriptorsPerInstance.Variance; // 52.764002965159314
            IntRange totalRange       = bow.Statistics.TotalNumberOfDescriptorsPerInstanceRange;     // [8, 115]

            // Statistics only about the descriptors that have been actually used:
            int      takenDescriptors = bow.Statistics.NumberOfDescriptorsTaken;                     // 29106
            double   takenMean        = bow.Statistics.NumberOfDescriptorsTakenPerInstance.Mean;     // 21.56
            double   takenVar         = bow.Statistics.NumberOfDescriptorsTakenPerInstance.Variance; // 52.764002965159314
            IntRange takenRange       = bow.Statistics.NumberOfDescriptorsTakenPerInstanceRange;     // [8, 115]
            #endregion

            Assert.AreEqual(1350, numberOfSignals);

            Assert.AreEqual(29106, totalDescriptors);
            Assert.AreEqual(21.56, totalMean);
            Assert.AreEqual(52.764002965159314, totalVar, 1e-8);
            Assert.AreEqual(new IntRange(8, 115), totalRange);

            Assert.AreEqual(29106, takenDescriptors);
            Assert.AreEqual(21.56, takenMean);
            Assert.AreEqual(52.764002965159314, takenVar, 1e-8);
            Assert.AreEqual(new IntRange(8, 115), takenRange);


            var kmeans = bow.Clustering as KMeans;
            Assert.AreEqual(13, kmeans.Clusters.NumberOfInputs);
            Assert.AreEqual(32, kmeans.Clusters.NumberOfOutputs);
            Assert.AreEqual(32, kmeans.Clusters.NumberOfClasses);

            #region doc_classification

            // Now, the features can be used to train any classification
            // algorithm as if they were the signals themselves. For example,
            // we can use them to train an Chi-square SVM as shown below:

            // Create the SMO algorithm to learn a Chi-Square kernel SVM
            var teacher = new MulticlassSupportVectorLearning <ChiSquare>()
            {
                Learner = (p) => new SequentialMinimalOptimization <ChiSquare>()
            };

            // Obtain a learned machine
            var svm = teacher.Learn(trainInputs, trainOutputs);

            // Use the machine to classify the features
            int[] output = svm.Decide(trainInputs);

            // Compute the error between the expected and predicted labels for the training set:
            var    trainMetrics = GeneralConfusionMatrix.Estimate(svm, trainInputs, trainOutputs);
            double trainAcc     = trainMetrics.Accuracy; // should be around 0.97259259259259256

            // Now, we can evaluate the performance of the model on the testing set:
            string[] testFileNames = fsdd.Testing.LocalPaths;
            int[]    testOutputs   = fsdd.Testing.Digits;

            // First we transform the testing set to double[]:
            double[][] testInputs = bow.Transform(testFileNames);

            // Then we compute the error between expected and predicted for the testing set:
            var    testMetrics = GeneralConfusionMatrix.Estimate(svm, testInputs, testOutputs);
            double testAcc     = testMetrics.Accuracy; // should be around 0.8666666666666667
            #endregion

            Assert.AreEqual(0.97259259259259256, trainAcc, 1e-8);
            Assert.AreEqual(0.8666666666666667, testAcc, 1e-8);
        }