Exemple #1
0
        public void ClassificationForestModel_GetVariableImportance()
        {
            var parser             = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations       = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets            = parser.EnumerateRows("Pass").ToF64Vector();
            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new ClassificationRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100 },
                { "AptitudeTestScore", 43.4356891141648 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, m_delta);
            }
        }
    private float GetAUROC(F64Matrix observations, double[] targets)
    {
        // print the raw data being used for classification
        PrintArray(observations);
        PrintVector(targets);

        // split the data into training and test set
        var splitter          = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.5);
        var trainingTestSplit = splitter.SplitSet(observations, targets);
        var trainSet          = trainingTestSplit.TrainingSet;
        var testSet           = trainingTestSplit.TestSet;

        // train the model
        var learner = new ClassificationRandomForestLearner();
        var model   = learner.Learn(trainSet.Observations, trainSet.Targets);

        // make the predictions from the test set
        var testPredictions = model.PredictProbability(testSet.Observations);

        // create the metric and measure the error
        var metric    = new RocAucClassificationProbabilityMetric(1);
        var testError = (float)metric.Error(testSet.Targets, testPredictions);

        if (testError < .5f)
        {
            testError = 1f - testError;
        }

        return(testError);
    }
Exemple #3
0
        public void ClassificationForestModel_GetVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new ClassificationRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100 },
                { "AptitudeTestScore", 43.4356891141648 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, m_delta);
            }
        }
        double ClassificationRandomLearner_Learn_Aptitude(int trees, double subSampleRatio = 1.0)
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut   = new ClassificationRandomForestLearner(trees, 5, 100, 1, 0.0001, subSampleRatio, 42, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
        public static void LearnModel(int trees = 30)
        {
            var keySignatureTable = Properties.Resources.keySignaturesDataset;

            var parser     = new CsvParser(() => new StringReader(keySignatureTable), ',');
            var targetName = "key";

            var observations = parser.EnumerateRows(c => c != targetName).ToF64Matrix();
            var targets      = parser.EnumerateRows(targetName).ToF64Vector();

            var learner = new ClassificationRandomForestLearner(trees: trees);

            model = learner.Learn(observations, targets);
        }
Exemple #6
0
        public void ClassificationForestModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.23076923076923078, error, m_delta);
        }
Exemple #7
0
        public void ClassificationRandomForestLearner_Learn_Glass_100_Trees_Parallel()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var sut = new ClassificationRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, true);

            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.018691588785046728, error, m_delta);
        }
Exemple #8
0
        public void ClassificationForestModel_Save()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationRandomForestLearner(2, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var writer = new StringWriter();

            sut.Save(() => writer);

            var actual = writer.ToString();

            Assert.AreEqual(m_classificationForestModelString, actual);
        }
Exemple #9
0
        public void ClassificationForestModel_Predict_Multiple()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new ClassificationRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.23076923076923078, error, m_delta);
        }
Exemple #10
0
        public void ClassificationForestModel_Save()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learner = new ClassificationRandomForestLearner(2, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var writer = new StringWriter();

            sut.Save(() => writer);

            var actual = writer.ToString();

            Assert.AreEqual(ClassificationForestModelString, actual);
        }
Exemple #11
0
        double ClassificationRandomLearner_Learn_Aptitude(int trees, double subSampleRatio = 1.0)
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var sut   = new ClassificationRandomForestLearner(trees, 5, 100, 1, 0.0001, subSampleRatio, 42, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
Exemple #12
0
        public void ClassificationForestModel_GetRawVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 5.1708306492004992, 11.904566854251304 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], m_delta);
            }
        }
Exemple #13
0
        public void ClassificationRandomForestLearner_Learn_Glass_100_Trees_Parallel()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var sut = new ClassificationRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, true);

            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.018691588785046728, error, m_delta);
        }
Exemple #14
0
        public void ClassificationForestModel_GetRawVariableImportance()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learner = new ClassificationRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 5.1708306492004992, 11.904566854251304 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], m_delta);
            }
        }
Exemple #15
0
        public void ClassificationForestModel_Predict_Single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new ClassificationRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.23076923076923078, error, m_delta);
        }
Exemple #16
0
        public void ClassificationRandomForestLearner_Learn_Glass_100_Indices()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var sut = new ClassificationRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model = sut.Learn(observations, targets, indices);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.098130841121495324, error, m_delta);
        }
Exemple #17
0
        public static void Learn(string dataPath, int LEARNING_ROWS_COUNT, int TESTING_ROWS_COUNT, int FEATURES_COUNT = 10)
        {
            F64Matrix     observations     = new F64Matrix(LEARNING_ROWS_COUNT, FEATURES_COUNT);
            F64Matrix     testObservations = new F64Matrix(TESTING_ROWS_COUNT, FEATURES_COUNT);
            List <double> targets          = new List <double>();
            List <double> actualTargets    = new List <double>();

            Parse(dataPath, observations, targets, testObservations, actualTargets, TESTING_ROWS_COUNT, LEARNING_ROWS_COUNT);

            // Create a random forest learner for classification with 100 trees
            var learner = new ClassificationRandomForestLearner(trees: 100);

            // learn the model
            ClassificationForestModel model = learner.Learn(observations, targets.ToArray());

            Console.WriteLine("Finish learning !");

            // use the model for predicting new observations
            var predictions = model.Predict(testObservations);
            var test        = "";
            var testIdx     = 0;
            var error       = 0.0;
            var g           = 0;

            foreach (var p in predictions)
            {
                var e = Math.Abs(p - actualTargets[testIdx]);
                error += e;
                g      = Math.Max((int)g, (int)e);

                test += p.ToString() + " " + e + "\n";
                testIdx++;
            }
            File.WriteAllText("test.txt", test);

            // save the model for use with another application
            model.Save(() => new StreamWriter("randomforest.xml"));
        }
Exemple #18
0
        public void ClassificationRandomForestLearner_Learn_Glass_100_Indices()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var rows         = targets.Length;

            var sut = new ClassificationRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model = sut.Learn(observations, targets, indices);

            var predictions = model.Predict(observations);

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.098130841121495324, error, m_delta);
        }
Exemple #19
0
        public void ClassificationForestModel_PredictProbability_Single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new ClassificationRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual = new ProbabilityPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                actual[i] = sut.PredictProbability(observations.Row(i));
            }

            var evaluator = new TotalErrorClassificationMetric <double>();
            var error     = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.076923076923076927, error, m_delta);

            var expected = new ProbabilityPrediction[] { new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.650149027443145 }, { 1, 0.349850972556855 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.566943847818848 }, { 1, 0.433056152181152 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.726936489980608 }, { 1, 0.273063510019392 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.752781908451026 }, { 1, 0.247218091548974 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.566943847818848 }, { 1, 0.433056152181152 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.792506836300954 }, { 1, 0.207493163699046 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.491736055611056 }, { 1, 0.508263944388944 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.574583315377433 }, { 1, 0.425416684622567 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.838724674018791 }, { 1, 0.161275325981208 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.241480824730825 }, { 1, 0.758519175269175 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.385258186258186 }, { 1, 0.614741813741813 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.726936489980608 }, { 1, 0.273063510019392 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.706733044733045 }, { 1, 0.293266955266955 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.801266011766012 }, { 1, 0.198733988233988 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.294952297702298 }, { 1, 0.705047702297702 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.821706914001031 }, { 1, 0.178293085998968 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.780062391856509 }, { 1, 0.21993760814349 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.554444388944389 }, { 1, 0.445555611055611 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.261349872349872 }, { 1, 0.738650127650127 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.419758186258186 }, { 1, 0.580241813741813 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.71382231249143 }, { 1, 0.28617768750857 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.241480824730825 }, { 1, 0.758519175269175 },
                }), new ProbabilityPrediction(1, new Dictionary <double, double> {
                    { 0, 0.47562148962149 }, { 1, 0.52437851037851 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.821706914001031 }, { 1, 0.178293085998968 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.792506836300954 }, { 1, 0.207493163699046 },
                }), new ProbabilityPrediction(0, new Dictionary <double, double> {
                    { 0, 0.666244987039105 }, { 1, 0.333755012960895 },
                }) };

            CollectionAssert.AreEqual(expected, actual);
        }
        static void Main(string[] args)
        {
            try
            {
                //Load data
                data_label_train = loadTrainingData("train.csv");
                data_label_test  = loadTestData("test_data.csv", "result.csv");

                //Normalize train data
                foreach (Data reading in data_label_train)
                {
                    reading.Normalize();
                }

                //Scaling test data and Normalize it
                Random rnd = new System.Random();
                foreach (LabeledData reading in data_label_test)
                {
                    double a = rnd.NextDouble() + 1.0;
                    double b = rnd.NextDouble();
                    reading.Scaling(a, b);
                    reading.Normalize();
                }

                double fs = 8000.0;
                //Create bandpass filters
                OnlineFilter bandpass0 = OnlineFilter.CreateLowpass(ImpulseResponse.Finite, fs, 200.0);
                OnlineFilter bandpass1 = OnlineFilter.CreateBandpass(ImpulseResponse.Finite, fs, 200.0, 2000.0);
                OnlineFilter bandpass2 = OnlineFilter.CreateBandpass(ImpulseResponse.Finite, fs, 2000.0, 3000.0);
                OnlineFilter bandpass3 = OnlineFilter.CreateBandpass(ImpulseResponse.Finite, fs, 3000.0, 3400.0);
                OnlineFilter bandpass4 = OnlineFilter.CreateHighpass(ImpulseResponse.Finite, fs, 3400.0);

                //Pre process: extract features from raw data
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.crestFactor, bandpass0));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.skew, bandpass0));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.kuri, bandpass0));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.std, bandpass0));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.var, bandpass0));

                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.crestFactor, bandpass1));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.skew, bandpass1));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.kuri, bandpass1));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.std, bandpass1));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.var, bandpass1));

                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.crestFactor, bandpass2));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.skew, bandpass2));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.kuri, bandpass2));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.std, bandpass2));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.var, bandpass2));

                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.crestFactor, bandpass3));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.skew, bandpass3));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.kuri, bandpass3));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.std, bandpass3));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.var, bandpass3));

                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.crestFactor, bandpass4));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.skew, bandpass4));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.kuri, bandpass4));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.std, bandpass4));
                selectedFeat.Add(new FeatureFilterSelection(FeaturesSelc.var, bandpass4));

                // learn the model
                var learner = new ClassificationRandomForestLearner(trees: 50);
                var model   = learner.Learn(FeatureExtract(data_label_train, selectedFeat), data_label_train.Select(x => x.label).ToArray());

                // Test: use the model for predicting new observations
                List <double> p = new List <double>();
                foreach (LabeledData reading in data_label_test)
                {
                    var prediction = model.Predict(reading.FeatureExtract(selectedFeat));
                    p.Add(prediction);
                }

                double accuracy = 0.0;
                for (int i = 0; i < data_label_test.Count; i++)
                {
                    if (data_label_test[i].label == p[i])
                    {
                        accuracy += 1.0 / data_label_test.Count * 100;
                    }
                    Console.WriteLine("Prediction result: {0} Ideal result: {1}", p[i], data_label_test[i].label);
                }

                Console.WriteLine("Accuracy {0}", accuracy.ToString());

                Console.ReadKey();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
            Console.ReadKey();
        }