public static void FastTreeRegression() { // Downloading a regression dataset from github.com/dotnet/machinelearning // this will create a housing.txt file in the filsystem this code will run // you can open the file to see the data. string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Creating the ML.Net IHostEnvironment object, needed for the pipeline var env = new LocalEnvironment(seed: 0); // Creating the ML context, based on the task performed. var regressionContext = new RegressionContext(env); // Creating a data reader, based on the format of the data var reader = TextLoader.CreateReader(env, c => ( label: c.LoadFloat(0), features: c.LoadFloat(1, 6) ), separator: '\t', hasHeader: true); // Read the data, and leave 10% out, so we can use them for testing var data = reader.Read(new MultiFileSource(dataFile)); // The predictor that gets produced out of training FastTreeRegressionPredictor pred = null; // Create the estimator var learningPipeline = reader.MakeNewEstimator() .Append(r => (r.label, score: regressionContext.Trainers.FastTree( r.label, r.features, numTrees: 100, // try: (int) 20-2000 numLeaves: 20, // try: (int) 2-128 minDatapointsInLeafs: 10, // try: (int) 1-100 learningRate: 0.2, // try: (float) 0.025-0.4 onFit: p => pred = p) ) ); var cvResults = regressionContext.CrossValidate(data, learningPipeline, r => r.label, numFolds: 5); var averagedMetrics = ( L1 : cvResults.Select(r => r.metrics.L1).Average(), L2 : cvResults.Select(r => r.metrics.L2).Average(), LossFn : cvResults.Select(r => r.metrics.LossFn).Average(), Rms : cvResults.Select(r => r.metrics.Rms).Average(), RSquared : cvResults.Select(r => r.metrics.RSquared).Average() ); Console.WriteLine($"L1 - {averagedMetrics.L1}"); Console.WriteLine($"L2 - {averagedMetrics.L2}"); Console.WriteLine($"LossFunction - {averagedMetrics.LossFn}"); Console.WriteLine($"RMS - {averagedMetrics.Rms}"); Console.WriteLine($"RSquared - {averagedMetrics.RSquared}"); }
public static void FastTreeRegression() { // Downloading a regression dataset from github.com/dotnet/machinelearning // this will create a housing.txt file in the filsystem this code will run // you can open the file to see the data. string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Creating a data reader, based on the format of the data var reader = TextLoader.CreateReader(mlContext, c => ( label: c.LoadFloat(0), features: c.LoadFloat(1, 6) ), separator: '\t', hasHeader: true); // Read the data, and leave 10% out, so we can use them for testing var data = reader.Read(dataFile); // The predictor that gets produced out of training FastTreeRegressionPredictor pred = null; // Create the estimator var learningPipeline = reader.MakeNewEstimator() .Append(r => (r.label, score: mlContext.Regression.Trainers.FastTree( r.label, r.features, numTrees: 100, // try: (int) 20-2000 numLeaves: 20, // try: (int) 2-128 minDatapointsInLeaves: 10, // try: (int) 1-100 learningRate: 0.2, // try: (float) 0.025-0.4 onFit: p => pred = p) ) ); var cvResults = mlContext.Regression.CrossValidate(data, learningPipeline, r => r.label, numFolds: 5); var averagedMetrics = ( L1 : cvResults.Select(r => r.metrics.L1).Average(), L2 : cvResults.Select(r => r.metrics.L2).Average(), LossFn : cvResults.Select(r => r.metrics.LossFn).Average(), Rms : cvResults.Select(r => r.metrics.Rms).Average(), RSquared : cvResults.Select(r => r.metrics.RSquared).Average() ); Console.WriteLine($"L1 - {averagedMetrics.L1}"); // 3.091095 Console.WriteLine($"L2 - {averagedMetrics.L2}"); // 20.351073 Console.WriteLine($"LossFunction - {averagedMetrics.LossFn}"); // 20.351074 Console.WriteLine($"RMS - {averagedMetrics.Rms}"); // 4.478358 Console.WriteLine($"RSquared - {averagedMetrics.RSquared}"); // 0.754977 }
public void FastTreeRegression() { var env = new MLContext(seed: 0); var dataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var dataSource = new MultiFileSource(dataPath); var ctx = new RegressionContext(env); var reader = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); FastTreeRegressionPredictor pred = null; var est = reader.MakeNewEstimator() .Append(r => (r.label, score: ctx.Trainers.FastTree(r.label, r.features, numTrees: 10, numLeaves: 5, onFit: (p) => { pred = p; }))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); // 11 input features, so we ought to have 11 weights. VBuffer <float> weights = new VBuffer <float>(); pred.GetFeatureWeights(ref weights); Assert.Equal(11, weights.Length); var data = model.Read(dataSource); var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss()); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.L1, 0, double.PositiveInfinity); Assert.InRange(metrics.L2, 0, double.PositiveInfinity); Assert.InRange(metrics.Rms, 0, double.PositiveInfinity); Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5); Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity); }