// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
        public static void FastTreeRegression()
        {
            // Downloading a regression dataset from github.com/dotnet/machinelearning
            // this will create a housing.txt file in the filsystem this code will run
            // you can open the file to see the data.
            string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();

            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Creating a data loader, based on the format of the data
            var loader = TextLoaderStatic.CreateLoader(mlContext, c => (
                                                           label: c.LoadFloat(0),
                                                           features: c.LoadFloat(1, 6)
                                                           ),
                                                       separator: '\t', hasHeader: true);

            // Load the data, and leave 10% out, so we can use them for testing
            var data = loader.Load(dataFile);

            // The predictor that gets produced out of training
            FastTreeRegressionModelParameters pred = null;

            // Create the estimator
            var learningPipeline = loader.MakeNewEstimator()
                                   .Append(r => (r.label, score: mlContext.Regression.Trainers.FastTree(
                                                     r.label,
                                                     r.features,
                                                     numTrees: 100,             // try: (int) 20-2000
                                                     numLeaves: 20,             // try: (int) 2-128
                                                     minDatapointsInLeaves: 10, // try: (int) 1-100
                                                     learningRate: 0.2,         // try: (float) 0.025-0.4
                                                     onFit: p => pred = p)
                                                 )
                                           );

            var cvResults       = mlContext.Regression.CrossValidate(data, learningPipeline, r => r.label, numFolds: 5);
            var averagedMetrics = (
                L1 : cvResults.Select(r => r.metrics.MeanAbsoluteError).Average(),
                L2 : cvResults.Select(r => r.metrics.MeanSquaredError).Average(),
                LossFn : cvResults.Select(r => r.metrics.LossFunction).Average(),
                Rms : cvResults.Select(r => r.metrics.RootMeanSquaredError).Average(),
                RSquared : cvResults.Select(r => r.metrics.RSquared).Average()
                );

            Console.WriteLine($"L1 - {averagedMetrics.L1}");               // 3.091095
            Console.WriteLine($"L2 - {averagedMetrics.L2}");               // 20.351073
            Console.WriteLine($"LossFunction - {averagedMetrics.LossFn}"); // 20.351074
            Console.WriteLine($"RMS - {averagedMetrics.Rms}");             // 4.478358
            Console.WriteLine($"RSquared - {averagedMetrics.RSquared}");   // 0.754977
        }
示例#2
0
        public void FastTreeRegression()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx = new RegressionContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                 separator: ';', hasHeader: true);

            FastTreeRegressionModelParameters pred = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: ctx.Trainers.FastTree(r.label, r.features,
                                                                          numTrees: 10,
                                                                          numLeaves: 5,
                                                                          onFit: (p) => { pred = p; })));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 11 input features, so we ought to have 11 weights.
            VBuffer <float> weights = new VBuffer <float>();

            pred.GetFeatureWeights(ref weights);
            Assert.Equal(11, weights.Length);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss());

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.L1, 0, double.PositiveInfinity);
            Assert.InRange(metrics.L2, 0, double.PositiveInfinity);
            Assert.InRange(metrics.Rms, 0, double.PositiveInfinity);
            Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5);
            Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity);
        }
        public void FastTreeRegressionRepresentation()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var catalog = new RegressionCatalog(env);

            var reader = TextLoaderStatic.CreateLoader(env,
                                                       c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                       separator: ';', hasHeader: true);

            var opts = new FastTreeRegressionTrainer.Options()
            {
                NumTrees   = 10,
                NumLeaves  = 5,
                NumThreads = 1
            };

            FastTreeRegressionModelParameters pred = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: catalog.Trainers.FastTree(r.label, r.features, null, opts,
                                                                              onFit: (p) => { pred = p; })));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);

            var treeCollection = pred.TrainedTreeEnsemble;

            Assert.Equal(0, treeCollection.Bias);
            Assert.Equal(10, treeCollection.Trees.Count);
            Assert.Equal(10, treeCollection.TreeWeights.Count);

            var trees = treeCollection.Trees;

            Assert.Equal(4, trees[0].NumNodes);

            // Numerical split. There is no categorical split so the follwoing vector contains 0-element.
            var categoricalSplitFeatures = trees[0].GetCategoricalSplitFeaturesAt(0);

            Assert.Equal(0, categoricalSplitFeatures.Count);

            // Numerical split. There is no categorical split so the follwoing vector contains 0-element.
            var categoricalSplitFeatureRange = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(0);

            Assert.Equal(0, categoricalSplitFeatureRange.Count);

            var expectedGtChild = new int[] { 3, 2, -4, -5 };

            Assert.Equal(4, trees[0].GtChild.Count);
            Assert.Equal(expectedGtChild, trees[0].GtChild);

            var expectedLteChild = new int[] { 1, -1, -3, -2 };

            Assert.Equal(4, trees[0].LteChild.Count);
            Assert.Equal(expectedLteChild, trees[0].LteChild);

            var expectedCategoricalSplitFlags = new bool[] { false, false, false, false };

            Assert.Equal(4, trees[0].CategoricalSplitFlags.Count);
            Assert.Equal(expectedCategoricalSplitFlags, trees[0].CategoricalSplitFlags);

            var expectedNumericalSplitFeatureIndexes = new int[] { 0, 10, 2, 10 };

            Assert.Equal(4, trees[0].NumericalSplitFeatureIndexes.Count);
            Assert.Equal(expectedNumericalSplitFeatureIndexes, trees[0].NumericalSplitFeatureIndexes);

            var expectedNumericalSplitThresholds = new float[] { 0.14f, -0.645f, -0.095f, 0.31f };

            Assert.Equal(4, trees[0].NumericalSplitThresholds.Count);
            for (int i = 0; i < trees[0].NumericalSplitThresholds.Count; ++i)
            {
                Assert.Equal(expectedNumericalSplitThresholds[i], trees[0].NumericalSplitThresholds[i], 6);
            }

            Assert.Equal(5, trees[0].NumLeaves);

            var expectedLeafValues = new double[] { 40.159015006449692, 80.434805844435061, 57.072130551545513, 82.898710076162757, 104.17547955322266 };

            Assert.Equal(5, trees[0].LeafValues.Count);
            for (int i = 0; i < trees[0].LeafValues.Count; ++i)
            {
                Assert.Equal(expectedLeafValues[i], trees[0].LeafValues[i], 6);
            }
        }
        public void FastTreeRegressionRepresentationWithCategoricalSplit()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var catalog = new RegressionCatalog(env);

            var reader = TextLoaderStatic.CreateLoader(env,
                                                       c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)),
                                                       separator: ';', hasHeader: true);

            FastTreeRegressionModelParameters pred = null;

            var opts = new FastTreeRegressionTrainer.Options()
            {
                CategoricalSplit = true,
                NumTrees         = 3,
                NumLeaves        = 5,
                NumThreads       = 1,
                // This is the minimal samples to form a split (i.e., generating two extra nodes/leaves). For a small data set,
                // we should set a small value. Otherwise, the trained trees could be empty.
                MinDocumentsInLeafs = 2
            };

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, features: r.features.OneHotEncoding()))
                      .Append(r => (r.label, score: catalog.Trainers.FastTree(r.label, r.features, null, opts,
                                                                              onFit: (p) => { pred = p; })));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);

            var treeCollection = pred.TrainedTreeEnsemble;

            Assert.Equal(0, treeCollection.Bias);
            Assert.Equal(3, treeCollection.Trees.Count);
            Assert.Equal(3, treeCollection.TreeWeights.Count);

            var trees = treeCollection.Trees;

            Assert.Equal(4, trees[0].NumNodes);

            var expectedGtChild = new int[] { 3, -3, -4, -5 };

            Assert.Equal(4, trees[0].GtChild.Count);
            Assert.Equal(expectedGtChild, trees[0].GtChild);

            var expectedLteChild = new int[] { 1, 2, -1, -2 };

            Assert.Equal(4, trees[0].LteChild.Count);
            Assert.Equal(expectedLteChild, trees[0].LteChild);

            var expectedCategoricalSplitFlags = new bool[] { true, true, true, true };

            Assert.Equal(4, trees[0].CategoricalSplitFlags.Count);
            Assert.Equal(expectedCategoricalSplitFlags, trees[0].CategoricalSplitFlags);

            var expectedNumericalSplitFeatureIndexes = new int[] { 5312, 2, 2126, 533 };

            Assert.Equal(4, trees[0].NumericalSplitFeatureIndexes.Count);
            Assert.Equal(expectedNumericalSplitFeatureIndexes, trees[0].NumericalSplitFeatureIndexes);

            var expectedNumericalSplitThresholds = new float[] { 0.5f, 0.5f, 0.5f, 0.5f };

            Assert.Equal(4, trees[0].NumericalSplitThresholds.Count);
            for (int i = 0; i < trees[0].NumericalSplitThresholds.Count; ++i)
            {
                Assert.Equal(expectedNumericalSplitThresholds[i], trees[0].NumericalSplitThresholds[i], 6);
            }

            var actualCategoricalRanges0 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(0);

            Assert.Equal(actualCategoricalRanges0, new int[] { 5312, 5782 });

            var actualCategoricalRanges1 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(1);

            Assert.Equal(actualCategoricalRanges1, new int[] { 2, 417 });

            var actualCategoricalRanges2 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(2);

            Assert.Equal(actualCategoricalRanges2, new int[] { 2126, 2593 });

            var actualCategoricalRanges3 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(3);

            Assert.Equal(actualCategoricalRanges3, new int[] { 533, 983 });

            int[] expectedCounts = { 62, 52, 54, 22 };
            int[] expectedStarts = { 5315, 10, 2141, 533 };
            int[] expectedEnds   = { 5782, 401, 2558, 874 };
            for (int i = 0; i < trees[0].NumNodes; ++i)
            {
                // Retrieve i-th node's split features.
                var actualCategoricalSplitFeatures = trees[0].GetCategoricalSplitFeaturesAt(i);
                Assert.Equal(expectedCounts[i], actualCategoricalSplitFeatures.Count);
                Assert.Equal(expectedStarts[i], actualCategoricalSplitFeatures[0]);
                Assert.Equal(expectedEnds[i], actualCategoricalSplitFeatures[expectedCounts[i] - 1]);
            }

            Assert.Equal(5, trees[0].NumLeaves);

            var expectedLeafValues = new double[] { 48.456055413607892, 86.584156799316418, 87.017326642027, 76.381184971185391, 117.68872643673058 };

            Assert.Equal(5, trees[0].LeafValues.Count);
            for (int i = 0; i < trees[0].LeafValues.Count; ++i)
            {
                Assert.Equal(expectedLeafValues[i], trees[0].LeafValues[i], 6);
            }
        }