[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // LightGBM is 64-bit only public void LightGbmRegression() { var env = new MLContext(seed: 0); var dataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var dataSource = new MultiFileSource(dataPath); var ctx = new RegressionContext(env); var reader = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); LightGbmRegressionModelParameters pred = null; var est = reader.MakeNewEstimator() .Append(r => (r.label, score: ctx.Trainers.LightGbm(r.label, r.features, numBoostRound: 10, numLeaves: 5, onFit: (p) => { pred = p; }))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); // 11 input features, so we ought to have 11 weights. VBuffer <float> weights = new VBuffer <float>(); pred.GetFeatureWeights(ref weights); Assert.Equal(11, weights.Length); var data = model.Read(dataSource); var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss()); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.L1, 0, double.PositiveInfinity); Assert.InRange(metrics.L2, 0, double.PositiveInfinity); Assert.InRange(metrics.Rms, 0, double.PositiveInfinity); Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5); Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity); }
public static void Example() { // Downloading a regression dataset from github.com/dotnet/machinelearning // this will create a housing.txt file in the filsystem. // You can open the file to see the data. string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Creating a data loader, based on the format of the data var loader = TextLoaderStatic.CreateLoader(mlContext, c => ( label: c.LoadFloat(0), features: c.LoadFloat(1, 6) ), separator: '\t', hasHeader: true); // Load the data, and leave 10% out, so we can use them for testing var data = loader.Load(new MultiFileSource(dataFile)); var(trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); // The predictor that gets produced out of training LightGbmRegressionModelParameters pred = null; // Create the estimator var learningPipeline = loader.MakeNewEstimator() .Append(r => (r.label, score: mlContext.Regression.Trainers.LightGbm( r.label, r.features, numberOfLeaves: 4, minimumExampleCountPerLeaf: 6, learningRate: 0.001, onFit: p => pred = p) ) ); // Fit this pipeline to the training data var model = learningPipeline.Fit(trainData); // Check the weights that the model learned VBuffer <float> weights = default; pred.GetFeatureWeights(ref weights); var weightsValues = weights.GetValues(); Console.WriteLine($"weight 0 - {weightsValues[0]}"); Console.WriteLine($"weight 1 - {weightsValues[1]}"); // Evaluate how the model is doing on the test data var dataWithPredictions = model.Transform(testData); var metrics = mlContext.Regression.Evaluate(dataWithPredictions, r => r.label, r => r.score); Console.WriteLine($"L1 - {metrics.MeanAbsoluteError}"); // 4.9669731 Console.WriteLine($"L2 - {metrics.MeanSquaredError}"); // 51.37296 Console.WriteLine($"LossFunction - {metrics.LossFunction}"); // 51.37296 Console.WriteLine($"RMS - {metrics.RootMeanSquaredError}"); // 7.167493 Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.079478 }