public void RegressionForestModel_GetVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100 },
                { "AptitudeTestScore", 42.3879919692465 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, m_delta);
            }
        }
        public void RegressionRandomForestLearnerTest_Learn_Glass_100_Indices()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var rows         = targets.Length;

            var sut = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var decisionTreeModels = sut.Learn(observations, targets, indices, out var rawVariableImportance).ToArray();
            var model = new RegressionForestModel(decisionTreeModels, rawVariableImportance);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.49709813080602938, error, m_delta);
        }
        public void RegressionForestModel_PredictCertainty_Single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual = new CertaintyPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                actual[i] = sut.PredictCertainty(observations.Row(i));
            }

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.15381141277554411, error, 0.0000001);

            var expected = new CertaintyPrediction[] { new CertaintyPrediction(0.379151515151515, 0.0608255007215007), new CertaintyPrediction(0.411071351850763, 0.0831655436577049), new CertaintyPrediction(0.243420918950331, 0.0452827034233046), new CertaintyPrediction(0.302332251082251, 0.0699917594408057), new CertaintyPrediction(0.411071351850763, 0.0831655436577049), new CertaintyPrediction(0.175743762773174, 0.0354069437824887), new CertaintyPrediction(0.574083361083361, 0.0765858693929188), new CertaintyPrediction(0.259063776093188, 0.0491198812971218), new CertaintyPrediction(0.163878898878899, 0.0331543420321184), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.418472943722944, 0.0607014359023913), new CertaintyPrediction(0.243420918950331, 0.0452827034233046), new CertaintyPrediction(0.443779942279942, 0.0941961872991865), new CertaintyPrediction(0.156999361749362, 0.0435804333960299), new CertaintyPrediction(0.591222034501446, 0.0873624628347336), new CertaintyPrediction(0.123822406351818, 0.0283119805431255), new CertaintyPrediction(0.162873993653405, 0.0333697457759022), new CertaintyPrediction(0.596261932511932, 0.0695341060210394), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.418472943722944, 0.0607014359023913), new CertaintyPrediction(0.329000027750028, 0.0788869852405852), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.499770375049787, 0.0913884936411888), new CertaintyPrediction(0.140025508804921, 0.0309875116490099), new CertaintyPrediction(0.161207326986739, 0.0336321035325246), new CertaintyPrediction(0.389553418803419, 0.0744433596104835), };

            CollectionAssert.AreEqual(expected, actual);
        }
        public void RegressionForestModel_GetVariableImportance()
        {
            var parser             = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations       = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets            = parser.EnumerateRows("Pass").ToF64Vector();
            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100 },
                { "AptitudeTestScore", 42.3879919692465 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001);
            }
        }
Ejemplo n.º 5
0
        public void RandomForest_Default_Parameters_Save_Load_Model_Using_Static_Methods()
        {
            #region read and split data
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;
            #endregion

            // create learner with default parameters
            var learner = new RegressionRandomForestLearner(trees: 100);

            // learn model with found parameters
            var model = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            TraceTrainingAndTestError(trainError, testError);

            //Save model, in the file system use new StreamWriter(filePath);
            // default format is xml.
            var savedModel = new StringWriter();
            model.Save(() => savedModel);

            // load model, in the file system use new StreamReader(filePath);
            // default format is xml.
            var loadedModel = RegressionForestModel.Load(() => new StringReader(savedModel.ToString()));
        }
Ejemplo n.º 6
0
        private void Train()
        {
            var parser       = new CsvParser(() => new StreamReader(machineLearningModelsAbsolutePath + "warehouse_data.csv"), separator: ',');
            var targetName   = "NextCGPA";
            var targets      = parser.EnumerateRows(targetName).ToF64Vector();
            var observations = parser.EnumerateRows(x => x != targetName).ToF64Matrix();
            var learner      = new RegressionRandomForestLearner(trees: numberOfTrees);
            var model        = learner.Learn(observations, targets);

            model.Save(() => new StreamWriter(machineLearningModelsAbsolutePath + "random_forest_model.xml"));
        }
        public void RegressionRandomForestLearnerTest_Learn_Glass_100_Trees_Parallel()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var sut   = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, true);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.22701441864756075, error, m_delta);
        }
        double RegressionRandomForestLearner_Learn_Aptitude(int trees, double subSampleRatio = 1.0)
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut   = new RegressionRandomForestLearner(trees, 5, 100, 1, 0.0001, subSampleRatio, 42, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
        public void RegressionForestModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.15381141277554411, error, m_delta);
        }
        public void RegressionRandomForestLearnerTest_Learn_Glass_100_Trees_Parallel()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var sut   = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, true);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.22701441864756075, error, m_delta);
        }
Ejemplo n.º 11
0
        public void TargetArrayIsSortedByReference()
        {
            // Build a forest learner with out test substitute classes.
            var forestLearner = new RegressionRandomForestLearner <TestSplitSearcher, TestImpurityCalculator>(runParallel: false);
            // Generate arbitrary training data.
            var matrix  = this.GetFeatureMatrix();
            var targets = Enumerable.Range(0, 10).Select(i => (double)i).ToArray();

            // TestSplitSearcher checks if TestImpurityCalculator.CurrentTargets is always "up to date".
            var model = forestLearner.Learn(matrix, targets);

            // just out of curiosity :)
            var predictions = model.Predict(matrix);
            var metric      = new MeanSquaredErrorRegressionMetric();
            var error       = metric.Error(targets, predictions);
        }
        double RegressionRandomForestLearner_Learn_Aptitude(int trees, double subSampleRatio = 1.0)
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var sut   = new RegressionRandomForestLearner(trees, 5, 100, 1, 0.0001, subSampleRatio, 42, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
        public void RegressionForestModel_Predict_Multiple()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.15381141277554411, error, 0.0000001);
        }
        public void RegressionForestModel_GetRawVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 59.2053755086635, 139.67487667643803 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], m_delta);
            }
        }
        public void RegressionForestModel_GetRawVariableImportance()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 59.2053755086635, 139.67487667643803 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
        public void RegressionForestModel_Predict_Single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.15381141277554411, error, m_delta);
        }
Ejemplo n.º 17
0
        public void RandomForest_Default_Parameters()
        {
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;

            // Create the learner and learn the model.
            var learner = new RegressionRandomForestLearner(trees: 100);
            var model   = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            TraceTrainingAndTestError(trainError, testError);
        }
        public void RegressionForestModel_Save()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionRandomForestLearner(2, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            // save model.
            var writer = new StringWriter();

            sut.Save(() => writer);

            // load model and assert prediction results.
            sut = RegressionForestModel.Load(() => new StringReader(writer.ToString()));
            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14547628738104926, actual, m_delta);
        }
        public void RegressionRandomForestLearnerTest_Learn_Glass_100_Indices()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var sut = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model = sut.Learn(observations, targets, indices);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.49709813080602938, error, m_delta);
        }
        public void RegressionForestModel_Save()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learner = new RegressionRandomForestLearner(2, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            // save model.
            var writer = new StringWriter();

            sut.Save(() => writer);

            // load model and assert prediction results.
            sut = RegressionForestModel.Load(() => new StringReader(writer.ToString()));
            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14547628738104926, actual, 0.0001);
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Optimization using Sequential Model-based optimization.
        /// Returns the final results ordered from best to worst (minimized).
        /// </summary>
        /// <param name="functionToMinimize"></param>
        /// <returns></returns>
        public OptimizerResult[] Optimize(Func <double[], OptimizerResult> functionToMinimize)
        {
            var bestParameterSet      = new double[m_parameters.Length];
            var bestParameterSetScore = double.MaxValue;

            // initialize max and min parameter bounds
            var maxParameters = new double[m_parameters.Length];
            var minParameters = new double[m_parameters.Length];

            for (int i = 0; i < m_parameters.Length; i++)
            {
                maxParameters[i] = m_parameters[i].Max();
                minParameters[i] = m_parameters[i].Min();
            }

            var parameterSets      = new List <double[]>();
            var parameterSetScores = new List <double>();

            var usePreviousResults = m_previousParameterSetScores != null && m_previousParameterSets != null;

            if (usePreviousResults)
            {
                parameterSets.AddRange(m_previousParameterSets);
                parameterSetScores.AddRange(m_previousParameterSetScores);

                for (int i = 0; i < parameterSets.Count; i++)
                {
                    var score = parameterSetScores[i];
                    if (!double.IsNaN(score))
                    {
                        if (score < bestParameterSetScore)
                        {
                            bestParameterSetScore = score;
                            bestParameterSet      = parameterSets[i];
                        }
                    }
                }
            }
            else
            {
                // initialize random starting points for the first iteration
                for (int i = 0; i < m_numberOfStartingPoints; i++)
                {
                    var set   = CreateParameterSet();
                    var score = functionToMinimize(set).Error;

                    if (!double.IsNaN(score))
                    {
                        parameterSets.Add(set);
                        parameterSetScores.Add(score);

                        if (score < bestParameterSetScore)
                        {
                            bestParameterSetScore = score;
                            bestParameterSet      = set;
                        }
                    }
                }
            }

            var lastSet = new double[m_parameters.Length];

            for (int iteration = 0; iteration < m_maxIterations; iteration++)
            {
                // fit model
                var observations = parameterSets.ToF64Matrix();
                var targets      = parameterSetScores.ToArray();
                var model        = m_learner.Learn(observations, targets);

                var bestScore  = parameterSetScores.Min();
                var candidates = FindMinimumCandidates(model, bestScore);

                var first = true;

                foreach (var candidate in candidates)
                {
                    var parameterSet = candidate.ParameterSet;

                    if (Equals(lastSet, parameterSet) && !first)
                    {
                        // skip evaluation if parameters have not changed.
                        continue;
                    }

                    if (Equals(bestParameterSet, parameterSet))
                    {
                        // if the beset parameter set is sampled again.
                        // Add a new random parameter set.
                        parameterSet = CreateParameterSet();
                    }

                    var result = functionToMinimize(parameterSet);

                    if (!double.IsNaN(result.Error))
                    {
                        // update best
                        if (result.Error < bestParameterSetScore)
                        {
                            bestParameterSetScore = result.Error;
                            bestParameterSet      = result.ParameterSet;
                            //Console.WriteLine("New Best: " + result.Error + " : " + string.Join(", ", result.ParameterSet));
                        }

                        // add point to parameter set list for next iterations model
                        parameterSets.Add(result.ParameterSet);
                        parameterSetScores.Add(result.Error);
                    }

                    lastSet = parameterSet;
                    first   = false;
                }
            }

            var results = new List <OptimizerResult>();

            for (int i = 0; i < parameterSets.Count; i++)
            {
                results.Add(new OptimizerResult(parameterSets[i], parameterSetScores[i]));
            }

            return(results.Where(v => !double.IsNaN(v.Error)).OrderBy(r => r.Error).ToArray());
        }
Ejemplo n.º 22
0
        public void RandomForest_Default_Parameters_Variable_Importance()
        {
            #region read and split data
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;
            #endregion

            // create learner with default parameters
            var learner = new RegressionRandomForestLearner(trees: 100);

            // learn model with found parameters
            var model = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            TraceTrainingAndTestError(trainError, testError);

            // the variable importance requires the featureNameToIndex
            // from the data set. This mapping describes the relation
            // from column name to index in the feature matrix.
            var featureNameToIndex = parser.EnumerateRows(c => c != targetName)
                                     .First().ColumnNameToIndex;

            // Get the variable importance from the model.
            // Variable importance is a measure made by to model
            // of how important each feature is.
            var importances = model.GetVariableImportance(featureNameToIndex);

            // trace normalized importances as csv.
            var importanceCsv = new StringBuilder();
            importanceCsv.Append("FeatureName;Importance");
            foreach (var feature in importances)
            {
                importanceCsv.AppendLine();
                importanceCsv.Append(string.Format("{0};{1:0.00}",
                                                   feature.Key, feature.Value));
            }

            Trace.WriteLine(importanceCsv);
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Predição de Floresta Aleatória e Rede Neural
        /// </summary>
        public void RegressionLearner_Learn_And_Predict()
        {
            #region Treinamento da Floresta Aleatória
            var parser     = new CsvParser(() => new StringReader(treinamento));
            var targetName = "T";

            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();
            UltimaObservacao = new double[] { observations[observations.RowCount - 1, 0], observations[observations.RowCount - 1, 2], observations[observations.RowCount - 1, 3], observations[observations.RowCount - 1, 4], observations[observations.RowCount - 1, 5], targets[targets.Count() - 1] };

            var learner = new RegressionRandomForestLearner(trees: 500);
            model = learner.Learn(observations, targets);
            #endregion

            #region Teste da Floresta Aleatória
            parser = new CsvParser(() => new StringReader(teste));
            var observationsTeste = parser.EnumerateRows(c => c != targetName)
                                    .ToF64Matrix();
            var targetsTeste = parser.EnumerateRows(targetName)
                               .ToF64Vector();

            // predict the training and test set.
            var trainPredictions = model.Predict(observations);
            var testPredictions  = model.Predict(observationsTeste);

            // create the metric
            var metric = new MeanSquaredErrorRegressionMetric();


            // measure the error on training and test set.
            trainError = metric.Error(targets, trainPredictions);
            testError  = metric.Error(targetsTeste, testPredictions);
            #endregion

            #region Treinamento da Rede Neural
            var net = new NeuralNet();
            net.Add(new InputLayer(6));
            net.Add(new DropoutLayer(0.2));
            net.Add(new DenseLayer(800, Activation.Relu));
            net.Add(new DropoutLayer(0.5));
            net.Add(new DenseLayer(800, Activation.Relu));
            net.Add(new DropoutLayer(0.5));
            net.Add(new SquaredErrorRegressionLayer());

            var learnernet = new RegressionNeuralNetLearner(net, iterations: 500, loss: new SquareLoss());
            modelnet = learnernet.Learn(observations, targets);
            #endregion

            #region Teste da Rede Neural
            trainPredictions = modelnet.Predict(observations);
            testPredictions  = modelnet.Predict(observationsTeste);

            trainErrorNet = metric.Error(targets, trainPredictions);
            testErrorNet  = metric.Error(targetsTeste, testPredictions);
            #endregion

            #region Treinamento Ada
            var learnerada = new RegressionAdaBoostLearner(maximumTreeDepth: 35, iterations: 2000, learningRate: 0.1);
            modelada = learnerada.Learn(observations, targets);
            #endregion

            #region Teste Ada
            trainPredictions = modelada.Predict(observations);
            testPredictions  = modelada.Predict(observationsTeste);

            trainErrorAda = metric.Error(targets, trainPredictions);
            testErrorAda  = metric.Error(targetsTeste, testPredictions);

            string stargets          = "";
            string strainPredictions = "";
            string stargetsTeste     = "";
            string stestPredictions  = "";

            foreach (var i in targets)
            {
                stargets += i + ";";
            }
            foreach (var i in trainPredictions)
            {
                strainPredictions += i + ";";
            }
            foreach (var i in targetsTeste)
            {
                stargetsTeste += i + ";";
            }
            foreach (var i in testPredictions)
            {
                stestPredictions += i + ";";
            }
            #endregion
        }