Example #1
0
        public static void LightGbmRegression()
        {
            // Downloading a regression dataset from github.com/dotnet/machinelearning
            // this will create a housing.txt file in the filsystem this code will run
            // you can open the file to see the data.
            string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();

            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Creating a data reader, based on the format of the data
            var reader = TextLoader.CreateReader(mlContext, c => (
                                                     label: c.LoadFloat(0),
                                                     features: c.LoadFloat(1, 6)
                                                     ),
                                                 separator: '\t', hasHeader: true);

            // Read the data, and leave 10% out, so we can use them for testing
            var data = reader.Read(new MultiFileSource(dataFile));

            var(trainData, testData) = mlContext.Regression.TrainTestSplit(data, testFraction: 0.1);

            // The predictor that gets produced out of training
            LightGbmRegressionPredictor pred = null;

            // Create the estimator
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (r.label, score: mlContext.Regression.Trainers.LightGbm(
                                                     r.label,
                                                     r.features,
                                                     numLeaves: 4,
                                                     minDataPerLeaf: 6,
                                                     learningRate: 0.001,
                                                     onFit: p => pred = p)
                                                 )
                                           );

            // Fit this pipeline to the training data
            var model = learningPipeline.Fit(trainData);

            // Check the weights that the model learned
            VBuffer <float> weights = default;

            pred.GetFeatureWeights(ref weights);

            Console.WriteLine($"weight 0 - {weights.Values[0]}");
            Console.WriteLine($"weight 1 - {weights.Values[1]}");

            // Evaluate how the model is doing on the test data
            var dataWithPredictions = model.Transform(testData);
            var metrics             = mlContext.Regression.Evaluate(dataWithPredictions, r => r.label, r => r.score);

            Console.WriteLine($"L1 - {metrics.L1}");               // 4.9669731
            Console.WriteLine($"L2 - {metrics.L2}");               // 51.37296
            Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 51.37296
            Console.WriteLine($"RMS - {metrics.Rms}");             // 7.167493
            Console.WriteLine($"RSquared - {metrics.RSquared}");   // 0.079478
        }
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // LightGBM is 64-bit only
        public void LightGbmRegression()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx = new RegressionContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                 separator: ';', hasHeader: true);

            LightGbmRegressionPredictor pred = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: ctx.Trainers.LightGbm(r.label, r.features,
                                                                          numBoostRound: 10,
                                                                          numLeaves: 5,
                                                                          onFit: (p) => { pred = p; })));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 11 input features, so we ought to have 11 weights.
            VBuffer <float> weights = new VBuffer <float>();

            pred.GetFeatureWeights(ref weights);
            Assert.Equal(11, weights.Length);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss());

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.L1, 0, double.PositiveInfinity);
            Assert.InRange(metrics.L2, 0, double.PositiveInfinity);
            Assert.InRange(metrics.Rms, 0, double.PositiveInfinity);
            Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5);
            Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity);
        }