public void RegressionForestModel_GetVariableImportance()
        {
            var parser             = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations       = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets            = parser.EnumerateRows("Pass").ToF64Vector();
            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100 },
                { "AptitudeTestScore", 42.3879919692465 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, 0.000001);
            }
        }
Пример #2
0
        /// <summary>
        /// Sequential Model-based optimization (SMBO). SMBO learns a model based on the initial parameter sets and scores.
        /// This model is used to sample new promising parameter candiates which are evaluated and added to the existing paramter sets.
        /// This process iterates several times. The method is computational expensive so is most relevant for expensive problems,
        /// where each evaluation of the function to minimize takes a long time, like hyper parameter tuning a machine learning method.
        /// But in that case it can usually reduce the number of iterations required to reach a good solution compared to less sophisticated methods.
        /// Implementation loosely based on:
        /// http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf
        /// https://papers.nips.cc/paper/4522-practical-bayesian-optimization-of-machine-learning-algorithms.pdf
        /// https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf
        /// </summary>
        /// <param name="parameters">Each row is a series of values for a specific parameter</param>
        /// <param name="maxIterations">Maximum number of iterations. MaxIteration * numberOfCandidatesEvaluatedPrIteration = totalFunctionEvaluations</param>
        /// <param name="numberOfStartingPoints">Number of randomly created starting points to use for the initial model in the first iteration (default is 10)</param>
        /// <param name="numberOfCandidatesEvaluatedPrIteration">How many candiate parameter set should by sampled from the model in each iteration.
        /// The parameter sets are inlcuded in order of most promissing outcome (default is 3)</param>
        /// <param name="seed">Seed for the random initialization</param>
        public SequentialModelBasedOptimizer(double[][] parameters, int maxIterations, int numberOfStartingPoints = 10, int numberOfCandidatesEvaluatedPrIteration = 3, int seed = 42)
        {
            if (parameters == null)
            {
                throw new ArgumentNullException("parameters");
            }
            if (maxIterations <= 0)
            {
                throw new ArgumentNullException("maxIterations must be at least 1");
            }
            if (numberOfStartingPoints < 1)
            {
                throw new ArgumentNullException("numberOfParticles must be at least 1");
            }

            m_parameters             = parameters;
            m_maxIterations          = maxIterations;
            m_numberOfStartingPoints = numberOfStartingPoints;
            m_numberOfCandidatesEvaluatedPrIteration = numberOfCandidatesEvaluatedPrIteration;

            m_random = new Random(seed);
            // hyper parameters for regression random forest learner
            m_learner = new RegressionRandomForestLearner(20, 1, 2000, parameters.Length, 1e-6, 1.0, 42, false);
            // optimizer for finding maximum expectation (most promissing hyper parameters) from random forest model
            m_optimizer = new ParticleSwarmOptimizer(m_parameters, 100, 40);
        }
        public void RegressionForestModel_PredictCertainty_Single()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual = new CertaintyPrediction[rows];

            for (int i = 0; i < rows; i++)
            {
                actual[i] = sut.PredictCertainty(observations.Row(i));
            }

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, actual.Select(p => p.Prediction).ToArray());

            Assert.AreEqual(0.15381141277554411, error, 0.0000001);

            var expected = new CertaintyPrediction[] { new CertaintyPrediction(0.379151515151515, 0.0608255007215007), new CertaintyPrediction(0.411071351850763, 0.0831655436577049), new CertaintyPrediction(0.243420918950331, 0.0452827034233046), new CertaintyPrediction(0.302332251082251, 0.0699917594408057), new CertaintyPrediction(0.411071351850763, 0.0831655436577049), new CertaintyPrediction(0.175743762773174, 0.0354069437824887), new CertaintyPrediction(0.574083361083361, 0.0765858693929188), new CertaintyPrediction(0.259063776093188, 0.0491198812971218), new CertaintyPrediction(0.163878898878899, 0.0331543420321184), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.418472943722944, 0.0607014359023913), new CertaintyPrediction(0.243420918950331, 0.0452827034233046), new CertaintyPrediction(0.443779942279942, 0.0941961872991865), new CertaintyPrediction(0.156999361749362, 0.0435804333960299), new CertaintyPrediction(0.591222034501446, 0.0873624628347336), new CertaintyPrediction(0.123822406351818, 0.0283119805431255), new CertaintyPrediction(0.162873993653405, 0.0333697457759022), new CertaintyPrediction(0.596261932511932, 0.0695341060210394), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.418472943722944, 0.0607014359023913), new CertaintyPrediction(0.329000027750028, 0.0788869852405852), new CertaintyPrediction(0.671753996003996, 0.0624466591504497), new CertaintyPrediction(0.499770375049787, 0.0913884936411888), new CertaintyPrediction(0.140025508804921, 0.0309875116490099), new CertaintyPrediction(0.161207326986739, 0.0336321035325246), new CertaintyPrediction(0.389553418803419, 0.0744433596104835), };

            CollectionAssert.AreEqual(expected, actual);
        }
        public void RegressionForestModel_GetVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var featureNameToIndex = new Dictionary <string, int> {
                { "AptitudeTestScore", 0 },
                { "PreviousExperience_month", 1 }
            };

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetVariableImportance(featureNameToIndex);
            var expected = new Dictionary <string, double> {
                { "PreviousExperience_month", 100 },
                { "AptitudeTestScore", 42.3879919692465 }
            };

            Assert.AreEqual(expected.Count, actual.Count);
            var zip = expected.Zip(actual, (e, a) => new { Expected = e, Actual = a });

            foreach (var item in zip)
            {
                Assert.AreEqual(item.Expected.Key, item.Actual.Key);
                Assert.AreEqual(item.Expected.Value, item.Actual.Value, m_delta);
            }
        }
        public void RegressionRandomForestLearnerTest_Learn_Glass_100_Indices()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var rows         = targets.Length;

            var sut = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var decisionTreeModels = sut.Learn(observations, targets, indices, out var rawVariableImportance).ToArray();
            var model = new RegressionForestModel(decisionTreeModels, rawVariableImportance);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.49709813080602938, error, m_delta);
        }
Пример #6
0
        public void RandomForest_Default_Parameters_Save_Load_Model_Using_Static_Methods()
        {
            #region read and split data
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;
            #endregion

            // create learner with default parameters
            var learner = new RegressionRandomForestLearner(trees: 100);

            // learn model with found parameters
            var model = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            TraceTrainingAndTestError(trainError, testError);

            //Save model, in the file system use new StreamWriter(filePath);
            // default format is xml.
            var savedModel = new StringWriter();
            model.Save(() => savedModel);

            // load model, in the file system use new StreamReader(filePath);
            // default format is xml.
            var loadedModel = RegressionForestModel.Load(() => new StringReader(savedModel.ToString()));
        }
Пример #7
0
        private void Train()
        {
            var parser       = new CsvParser(() => new StreamReader(machineLearningModelsAbsolutePath + "warehouse_data.csv"), separator: ',');
            var targetName   = "NextCGPA";
            var targets      = parser.EnumerateRows(targetName).ToF64Vector();
            var observations = parser.EnumerateRows(x => x != targetName).ToF64Matrix();
            var learner      = new RegressionRandomForestLearner(trees: numberOfTrees);
            var model        = learner.Learn(observations, targets);

            model.Save(() => new StreamWriter(machineLearningModelsAbsolutePath + "random_forest_model.xml"));
        }
        public void RegressionForestModel_Predict_Multiple()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.15381141277554411, error, m_delta);
        }
        public void RegressionRandomForestLearnerTest_Learn_Glass_100_Trees_Parallel()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var sut   = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, true);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.22701441864756075, error, m_delta);
        }
        double RegressionRandomForestLearner_Learn_Aptitude(int trees, double subSampleRatio = 1.0)
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var sut   = new RegressionRandomForestLearner(trees, 5, 100, 1, 0.0001, subSampleRatio, 42, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
Пример #11
0
        public void TargetArrayIsSortedByReference()
        {
            // Build a forest learner with out test substitute classes.
            var forestLearner = new RegressionRandomForestLearner <TestSplitSearcher, TestImpurityCalculator>(runParallel: false);
            // Generate arbitrary training data.
            var matrix  = this.GetFeatureMatrix();
            var targets = Enumerable.Range(0, 10).Select(i => (double)i).ToArray();

            // TestSplitSearcher checks if TestImpurityCalculator.CurrentTargets is always "up to date".
            var model = forestLearner.Learn(matrix, targets);

            // just out of curiosity :)
            var predictions = model.Predict(matrix);
            var metric      = new MeanSquaredErrorRegressionMetric();
            var error       = metric.Error(targets, predictions);
        }
        public void RegressionRandomForestLearnerTest_Learn_Glass_100_Trees_Parallel()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();

            var sut   = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, true);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.22701441864756075, error, m_delta);
        }
        double RegressionRandomForestLearner_Learn_Aptitude(int trees, double subSampleRatio = 1.0)
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var sut   = new RegressionRandomForestLearner(trees, 5, 100, 1, 0.0001, subSampleRatio, 42, false);
            var model = sut.Learn(observations, targets);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            return(error);
        }
        public void RegressionForestModel_GetRawVariableImportance()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 59.2053755086635, 139.67487667643803 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], m_delta);
            }
        }
        public void RegressionForestModel_Predict_Multiple()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();
            var rows         = targets.Length;

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.15381141277554411, error, 0.0000001);
        }
        public void RegressionForestModel_GetRawVariableImportance()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var actual   = sut.GetRawVariableImportance();
            var expected = new double[] { 59.2053755086635, 139.67487667643803 };

            Assert.AreEqual(expected.Length, actual.Length);

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
Пример #17
0
        /// <summary>
        /// Sequential Model-based optimization (SMBO). SMBO learns a model based on the initial parameter sets and scores.
        /// This model is used to sample new promising parameter candiates which are evaluated and added to the existing paramter sets.
        /// This process iterates several times. The method is computational expensive so is most relevant for expensive problems,
        /// where each evaluation of the function to minimize takes a long time, like hyper parameter tuning a machine learning method.
        /// But in that case it can usually reduce the number of iterations required to reach a good solution compared to less sophisticated methods.
        /// Implementation loosely based on:
        /// http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf
        /// https://papers.nips.cc/paper/4522-practical-bayesian-optimization-of-machine-learning-algorithms.pdf
        /// https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf
        /// </summary>
        /// <param name="parameters">Each row is a series of values for a specific parameter</param>
        /// <param name="maxIterations">Maximum number of iterations. MaxIteration * numberOfCandidatesEvaluatedPrIteration = totalFunctionEvaluations</param>
        /// <param name="previousParameterSets">Parameter sets from previous run</param>
        /// <param name="previousParameterSetScores">Scores from from previous run corresponding to each parameter set</param>
        /// <param name="numberOfCandidatesEvaluatedPrIteration">How many candiate parameter set should by sampled from the model in each iteration.
        /// The parameter sets are inlcuded in order of most promissing outcome (default is 3)</param>
        /// <param name="seed">Seed for the random initialization</param>
        public SequentialModelBasedOptimizer(double[][] parameters, int maxIterations, List <double[]> previousParameterSets, List <double> previousParameterSetScores,
                                             int numberOfCandidatesEvaluatedPrIteration = 3, int seed = 42)
        {
            if (parameters == null)
            {
                throw new ArgumentNullException("parameters");
            }
            if (maxIterations <= 0)
            {
                throw new ArgumentNullException("maxIterations must be at least 1");
            }
            if (previousParameterSets == null)
            {
                throw new ArgumentNullException("previousParameterSets");
            }
            if (previousParameterSetScores == null)
            {
                throw new ArgumentNullException("previousResults");
            }
            if (previousParameterSets.Count != previousParameterSetScores.Count)
            {
                throw new ArgumentException("previousParameterSets length: "
                                            + previousParameterSets.Count + " does not correspond with previousResults length: " + previousParameterSetScores.Count);
            }
            if (previousParameterSetScores.Count < 2 || previousParameterSets.Count < 2)
            {
                throw new ArgumentException("previousParameterSets length and previousResults length must be at least 2 and was: " + previousParameterSetScores.Count);
            }


            m_parameters    = parameters;
            m_maxIterations = maxIterations;
            m_numberOfCandidatesEvaluatedPrIteration = numberOfCandidatesEvaluatedPrIteration;

            m_random = new Random(seed);
            // hyper parameters for regression random forest learner
            m_learner = new RegressionRandomForestLearner(20, 1, 2000, parameters.Length, 1e-6, 1.0, 42, false);
            // optimizer for finding maximum expectation (most promissing hyper parameters) from random forest model
            m_optimizer = new ParticleSwarmOptimizer(m_parameters, 100, 40);

            m_previousParameterSets      = previousParameterSets;
            m_previousParameterSetScores = previousParameterSetScores;
        }
        public void RegressionForestModel_Predict_Single()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionRandomForestLearner(100, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            var rows        = targets.Length;
            var predictions = new double[rows];

            for (int i = 0; i < rows; i++)
            {
                predictions[i] = sut.Predict(observations.Row(i));
            }

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.15381141277554411, error, m_delta);
        }
Пример #19
0
        public void RandomForest_Default_Parameters()
        {
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;

            // Create the learner and learn the model.
            var learner = new RegressionRandomForestLearner(trees: 100);
            var model   = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            TraceTrainingAndTestError(trainError, testError);
        }
        public void RegressionForestModel_Save()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionRandomForestLearner(2, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            // save model.
            var writer = new StringWriter();

            sut.Save(() => writer);

            // load model and assert prediction results.
            sut = RegressionForestModel.Load(() => new StringReader(writer.ToString()));
            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14547628738104926, actual, m_delta);
        }
        public void RegressionRandomForestLearnerTest_Learn_Glass_100_Indices()
        {
            var(observations, targets) = DataSetUtilities.LoadGlassDataSet();

            var sut = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var model = sut.Learn(observations, targets, indices);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.49709813080602938, error, m_delta);
        }
        public void RegressionForestModel_Save()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learner = new RegressionRandomForestLearner(2, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            // save model.
            var writer = new StringWriter();

            sut.Save(() => writer);

            // load model and assert prediction results.
            sut = RegressionForestModel.Load(() => new StringReader(writer.ToString()));
            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14547628738104926, actual, 0.0001);
        }
Пример #23
0
        public void RandomForest_Default_Parameters_Variable_Importance()
        {
            #region read and split data
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;
            #endregion

            // create learner with default parameters
            var learner = new RegressionRandomForestLearner(trees: 100);

            // learn model with found parameters
            var model = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            TraceTrainingAndTestError(trainError, testError);

            // the variable importance requires the featureNameToIndex
            // from the data set. This mapping describes the relation
            // from column name to index in the feature matrix.
            var featureNameToIndex = parser.EnumerateRows(c => c != targetName)
                                     .First().ColumnNameToIndex;

            // Get the variable importance from the model.
            // Variable importance is a measure made by to model
            // of how important each feature is.
            var importances = model.GetVariableImportance(featureNameToIndex);

            // trace normalized importances as csv.
            var importanceCsv = new StringBuilder();
            importanceCsv.Append("FeatureName;Importance");
            foreach (var feature in importances)
            {
                importanceCsv.AppendLine();
                importanceCsv.Append(string.Format("{0};{1:0.00}",
                                                   feature.Key, feature.Value));
            }

            Trace.WriteLine(importanceCsv);
        }
        private ILearner <double> GetRandomForest()
        {
            var learner = new RegressionRandomForestLearner(trees: 2000, maximumTreeDepth: 100, featuresPrSplit: 0, seed: 42);

            return(learner);
        }
Пример #25
0
        /// <summary>
        /// Predição de Floresta Aleatória e Rede Neural
        /// </summary>
        public void RegressionLearner_Learn_And_Predict()
        {
            #region Treinamento da Floresta Aleatória
            var parser     = new CsvParser(() => new StringReader(treinamento));
            var targetName = "T";

            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();
            UltimaObservacao = new double[] { observations[observations.RowCount - 1, 0], observations[observations.RowCount - 1, 2], observations[observations.RowCount - 1, 3], observations[observations.RowCount - 1, 4], observations[observations.RowCount - 1, 5], targets[targets.Count() - 1] };

            var learner = new RegressionRandomForestLearner(trees: 500);
            model = learner.Learn(observations, targets);
            #endregion

            #region Teste da Floresta Aleatória
            parser = new CsvParser(() => new StringReader(teste));
            var observationsTeste = parser.EnumerateRows(c => c != targetName)
                                    .ToF64Matrix();
            var targetsTeste = parser.EnumerateRows(targetName)
                               .ToF64Vector();

            // predict the training and test set.
            var trainPredictions = model.Predict(observations);
            var testPredictions  = model.Predict(observationsTeste);

            // create the metric
            var metric = new MeanSquaredErrorRegressionMetric();


            // measure the error on training and test set.
            trainError = metric.Error(targets, trainPredictions);
            testError  = metric.Error(targetsTeste, testPredictions);
            #endregion

            #region Treinamento da Rede Neural
            var net = new NeuralNet();
            net.Add(new InputLayer(6));
            net.Add(new DropoutLayer(0.2));
            net.Add(new DenseLayer(800, Activation.Relu));
            net.Add(new DropoutLayer(0.5));
            net.Add(new DenseLayer(800, Activation.Relu));
            net.Add(new DropoutLayer(0.5));
            net.Add(new SquaredErrorRegressionLayer());

            var learnernet = new RegressionNeuralNetLearner(net, iterations: 500, loss: new SquareLoss());
            modelnet = learnernet.Learn(observations, targets);
            #endregion

            #region Teste da Rede Neural
            trainPredictions = modelnet.Predict(observations);
            testPredictions  = modelnet.Predict(observationsTeste);

            trainErrorNet = metric.Error(targets, trainPredictions);
            testErrorNet  = metric.Error(targetsTeste, testPredictions);
            #endregion

            #region Treinamento Ada
            var learnerada = new RegressionAdaBoostLearner(maximumTreeDepth: 35, iterations: 2000, learningRate: 0.1);
            modelada = learnerada.Learn(observations, targets);
            #endregion

            #region Teste Ada
            trainPredictions = modelada.Predict(observations);
            testPredictions  = modelada.Predict(observationsTeste);

            trainErrorAda = metric.Error(targets, trainPredictions);
            testErrorAda  = metric.Error(targetsTeste, testPredictions);

            string stargets          = "";
            string strainPredictions = "";
            string stargetsTeste     = "";
            string stestPredictions  = "";

            foreach (var i in targets)
            {
                stargets += i + ";";
            }
            foreach (var i in trainPredictions)
            {
                strainPredictions += i + ";";
            }
            foreach (var i in targetsTeste)
            {
                stargetsTeste += i + ";";
            }
            foreach (var i in testPredictions)
            {
                stestPredictions += i + ";";
            }
            #endregion
        }