// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>. public static void FastTreeRegression() { // Downloading a regression dataset from github.com/dotnet/machinelearning // this will create a housing.txt file in the filsystem this code will run // you can open the file to see the data. string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Creating a data loader, based on the format of the data var loader = TextLoaderStatic.CreateLoader(mlContext, c => ( label: c.LoadFloat(0), features: c.LoadFloat(1, 6) ), separator: '\t', hasHeader: true); // Load the data, and leave 10% out, so we can use them for testing var data = loader.Load(dataFile); // The predictor that gets produced out of training FastTreeRegressionModelParameters pred = null; // Create the estimator var learningPipeline = loader.MakeNewEstimator() .Append(r => (r.label, score: mlContext.Regression.Trainers.FastTree( r.label, r.features, numTrees: 100, // try: (int) 20-2000 numLeaves: 20, // try: (int) 2-128 minDatapointsInLeaves: 10, // try: (int) 1-100 learningRate: 0.2, // try: (float) 0.025-0.4 onFit: p => pred = p) ) ); var cvResults = mlContext.Regression.CrossValidate(data, learningPipeline, r => r.label, numFolds: 5); var averagedMetrics = ( L1 : cvResults.Select(r => r.metrics.MeanAbsoluteError).Average(), L2 : cvResults.Select(r => r.metrics.MeanSquaredError).Average(), LossFn : cvResults.Select(r => r.metrics.LossFunction).Average(), Rms : cvResults.Select(r => r.metrics.RootMeanSquaredError).Average(), RSquared : cvResults.Select(r => r.metrics.RSquared).Average() ); Console.WriteLine($"L1 - {averagedMetrics.L1}"); // 3.091095 Console.WriteLine($"L2 - {averagedMetrics.L2}"); // 20.351073 Console.WriteLine($"LossFunction - {averagedMetrics.LossFn}"); // 20.351074 Console.WriteLine($"RMS - {averagedMetrics.Rms}"); // 4.478358 Console.WriteLine($"RSquared - {averagedMetrics.RSquared}"); // 0.754977 }
public void FastTreeRegression() { var env = new MLContext(seed: 0); var dataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var dataSource = new MultiFileSource(dataPath); var ctx = new RegressionContext(env); var reader = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); FastTreeRegressionModelParameters pred = null; var est = reader.MakeNewEstimator() .Append(r => (r.label, score: ctx.Trainers.FastTree(r.label, r.features, numTrees: 10, numLeaves: 5, onFit: (p) => { pred = p; }))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); // 11 input features, so we ought to have 11 weights. VBuffer <float> weights = new VBuffer <float>(); pred.GetFeatureWeights(ref weights); Assert.Equal(11, weights.Length); var data = model.Read(dataSource); var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss()); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.L1, 0, double.PositiveInfinity); Assert.InRange(metrics.L2, 0, double.PositiveInfinity); Assert.InRange(metrics.Rms, 0, double.PositiveInfinity); Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5); Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity); }
public void FastTreeRegressionRepresentation() { var env = new MLContext(seed: 0); var dataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var dataSource = new MultiFileSource(dataPath); var catalog = new RegressionCatalog(env); var reader = TextLoaderStatic.CreateLoader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); var opts = new FastTreeRegressionTrainer.Options() { NumTrees = 10, NumLeaves = 5, NumThreads = 1 }; FastTreeRegressionModelParameters pred = null; var est = reader.MakeNewEstimator() .Append(r => (r.label, score: catalog.Trainers.FastTree(r.label, r.features, null, opts, onFit: (p) => { pred = p; }))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); var treeCollection = pred.TrainedTreeEnsemble; Assert.Equal(0, treeCollection.Bias); Assert.Equal(10, treeCollection.Trees.Count); Assert.Equal(10, treeCollection.TreeWeights.Count); var trees = treeCollection.Trees; Assert.Equal(4, trees[0].NumNodes); // Numerical split. There is no categorical split so the follwoing vector contains 0-element. var categoricalSplitFeatures = trees[0].GetCategoricalSplitFeaturesAt(0); Assert.Equal(0, categoricalSplitFeatures.Count); // Numerical split. There is no categorical split so the follwoing vector contains 0-element. var categoricalSplitFeatureRange = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(0); Assert.Equal(0, categoricalSplitFeatureRange.Count); var expectedGtChild = new int[] { 3, 2, -4, -5 }; Assert.Equal(4, trees[0].GtChild.Count); Assert.Equal(expectedGtChild, trees[0].GtChild); var expectedLteChild = new int[] { 1, -1, -3, -2 }; Assert.Equal(4, trees[0].LteChild.Count); Assert.Equal(expectedLteChild, trees[0].LteChild); var expectedCategoricalSplitFlags = new bool[] { false, false, false, false }; Assert.Equal(4, trees[0].CategoricalSplitFlags.Count); Assert.Equal(expectedCategoricalSplitFlags, trees[0].CategoricalSplitFlags); var expectedNumericalSplitFeatureIndexes = new int[] { 0, 10, 2, 10 }; Assert.Equal(4, trees[0].NumericalSplitFeatureIndexes.Count); Assert.Equal(expectedNumericalSplitFeatureIndexes, trees[0].NumericalSplitFeatureIndexes); var expectedNumericalSplitThresholds = new float[] { 0.14f, -0.645f, -0.095f, 0.31f }; Assert.Equal(4, trees[0].NumericalSplitThresholds.Count); for (int i = 0; i < trees[0].NumericalSplitThresholds.Count; ++i) { Assert.Equal(expectedNumericalSplitThresholds[i], trees[0].NumericalSplitThresholds[i], 6); } Assert.Equal(5, trees[0].NumLeaves); var expectedLeafValues = new double[] { 40.159015006449692, 80.434805844435061, 57.072130551545513, 82.898710076162757, 104.17547955322266 }; Assert.Equal(5, trees[0].LeafValues.Count); for (int i = 0; i < trees[0].LeafValues.Count; ++i) { Assert.Equal(expectedLeafValues[i], trees[0].LeafValues[i], 6); } }
public void FastTreeRegressionRepresentationWithCategoricalSplit() { var env = new MLContext(seed: 0); var dataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var dataSource = new MultiFileSource(dataPath); var catalog = new RegressionCatalog(env); var reader = TextLoaderStatic.CreateLoader(env, c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)), separator: ';', hasHeader: true); FastTreeRegressionModelParameters pred = null; var opts = new FastTreeRegressionTrainer.Options() { CategoricalSplit = true, NumTrees = 3, NumLeaves = 5, NumThreads = 1, // This is the minimal samples to form a split (i.e., generating two extra nodes/leaves). For a small data set, // we should set a small value. Otherwise, the trained trees could be empty. MinDocumentsInLeafs = 2 }; var est = reader.MakeNewEstimator() .Append(r => (r.label, features: r.features.OneHotEncoding())) .Append(r => (r.label, score: catalog.Trainers.FastTree(r.label, r.features, null, opts, onFit: (p) => { pred = p; }))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); var treeCollection = pred.TrainedTreeEnsemble; Assert.Equal(0, treeCollection.Bias); Assert.Equal(3, treeCollection.Trees.Count); Assert.Equal(3, treeCollection.TreeWeights.Count); var trees = treeCollection.Trees; Assert.Equal(4, trees[0].NumNodes); var expectedGtChild = new int[] { 3, -3, -4, -5 }; Assert.Equal(4, trees[0].GtChild.Count); Assert.Equal(expectedGtChild, trees[0].GtChild); var expectedLteChild = new int[] { 1, 2, -1, -2 }; Assert.Equal(4, trees[0].LteChild.Count); Assert.Equal(expectedLteChild, trees[0].LteChild); var expectedCategoricalSplitFlags = new bool[] { true, true, true, true }; Assert.Equal(4, trees[0].CategoricalSplitFlags.Count); Assert.Equal(expectedCategoricalSplitFlags, trees[0].CategoricalSplitFlags); var expectedNumericalSplitFeatureIndexes = new int[] { 5312, 2, 2126, 533 }; Assert.Equal(4, trees[0].NumericalSplitFeatureIndexes.Count); Assert.Equal(expectedNumericalSplitFeatureIndexes, trees[0].NumericalSplitFeatureIndexes); var expectedNumericalSplitThresholds = new float[] { 0.5f, 0.5f, 0.5f, 0.5f }; Assert.Equal(4, trees[0].NumericalSplitThresholds.Count); for (int i = 0; i < trees[0].NumericalSplitThresholds.Count; ++i) { Assert.Equal(expectedNumericalSplitThresholds[i], trees[0].NumericalSplitThresholds[i], 6); } var actualCategoricalRanges0 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(0); Assert.Equal(actualCategoricalRanges0, new int[] { 5312, 5782 }); var actualCategoricalRanges1 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(1); Assert.Equal(actualCategoricalRanges1, new int[] { 2, 417 }); var actualCategoricalRanges2 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(2); Assert.Equal(actualCategoricalRanges2, new int[] { 2126, 2593 }); var actualCategoricalRanges3 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(3); Assert.Equal(actualCategoricalRanges3, new int[] { 533, 983 }); int[] expectedCounts = { 62, 52, 54, 22 }; int[] expectedStarts = { 5315, 10, 2141, 533 }; int[] expectedEnds = { 5782, 401, 2558, 874 }; for (int i = 0; i < trees[0].NumNodes; ++i) { // Retrieve i-th node's split features. var actualCategoricalSplitFeatures = trees[0].GetCategoricalSplitFeaturesAt(i); Assert.Equal(expectedCounts[i], actualCategoricalSplitFeatures.Count); Assert.Equal(expectedStarts[i], actualCategoricalSplitFeatures[0]); Assert.Equal(expectedEnds[i], actualCategoricalSplitFeatures[expectedCounts[i] - 1]); } Assert.Equal(5, trees[0].NumLeaves); var expectedLeafValues = new double[] { 48.456055413607892, 86.584156799316418, 87.017326642027, 76.381184971185391, 117.68872643673058 }; Assert.Equal(5, trees[0].LeafValues.Count); for (int i = 0; i < trees[0].LeafValues.Count; ++i) { Assert.Equal(expectedLeafValues[i], trees[0].LeafValues[i], 6); } }