Exemple #1
0
        public static void FastTreeRegression()
        {
            // Downloading a regression dataset from github.com/dotnet/machinelearning
            // this will create a housing.txt file in the filsystem this code will run
            // you can open the file to see the data.
            string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();

            // Creating the ML.Net IHostEnvironment object, needed for the pipeline
            var env = new LocalEnvironment(seed: 0);

            // Creating the ML context, based on the task performed.
            var regressionContext = new RegressionContext(env);

            // Creating a data reader, based on the format of the data
            var reader = TextLoader.CreateReader(env, c => (
                                                     label: c.LoadFloat(0),
                                                     features: c.LoadFloat(1, 6)
                                                     ),
                                                 separator: '\t', hasHeader: true);

            // Read the data, and leave 10% out, so we can use them for testing
            var data = reader.Read(new MultiFileSource(dataFile));

            // The predictor that gets produced out of training
            FastTreeRegressionPredictor pred = null;

            // Create the estimator
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (r.label, score: regressionContext.Trainers.FastTree(
                                                     r.label,
                                                     r.features,
                                                     numTrees: 100,            // try: (int) 20-2000
                                                     numLeaves: 20,            // try: (int) 2-128
                                                     minDatapointsInLeafs: 10, // try: (int) 1-100
                                                     learningRate: 0.2,        // try: (float) 0.025-0.4
                                                     onFit: p => pred = p)
                                                 )
                                           );

            var cvResults       = regressionContext.CrossValidate(data, learningPipeline, r => r.label, numFolds: 5);
            var averagedMetrics = (
                L1 : cvResults.Select(r => r.metrics.L1).Average(),
                L2 : cvResults.Select(r => r.metrics.L2).Average(),
                LossFn : cvResults.Select(r => r.metrics.LossFn).Average(),
                Rms : cvResults.Select(r => r.metrics.Rms).Average(),
                RSquared : cvResults.Select(r => r.metrics.RSquared).Average()
                );

            Console.WriteLine($"L1 - {averagedMetrics.L1}");
            Console.WriteLine($"L2 - {averagedMetrics.L2}");
            Console.WriteLine($"LossFunction - {averagedMetrics.LossFn}");
            Console.WriteLine($"RMS - {averagedMetrics.Rms}");
            Console.WriteLine($"RSquared - {averagedMetrics.RSquared}");
        }
        public static void FastTreeRegression()
        {
            // Downloading a regression dataset from github.com/dotnet/machinelearning
            // this will create a housing.txt file in the filsystem this code will run
            // you can open the file to see the data.
            string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();

            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            // Creating a data reader, based on the format of the data
            var reader = TextLoader.CreateReader(mlContext, c => (
                                                     label: c.LoadFloat(0),
                                                     features: c.LoadFloat(1, 6)
                                                     ),
                                                 separator: '\t', hasHeader: true);

            // Read the data, and leave 10% out, so we can use them for testing
            var data = reader.Read(dataFile);

            // The predictor that gets produced out of training
            FastTreeRegressionPredictor pred = null;

            // Create the estimator
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (r.label, score: mlContext.Regression.Trainers.FastTree(
                                                     r.label,
                                                     r.features,
                                                     numTrees: 100,             // try: (int) 20-2000
                                                     numLeaves: 20,             // try: (int) 2-128
                                                     minDatapointsInLeaves: 10, // try: (int) 1-100
                                                     learningRate: 0.2,         // try: (float) 0.025-0.4
                                                     onFit: p => pred = p)
                                                 )
                                           );

            var cvResults       = mlContext.Regression.CrossValidate(data, learningPipeline, r => r.label, numFolds: 5);
            var averagedMetrics = (
                L1 : cvResults.Select(r => r.metrics.L1).Average(),
                L2 : cvResults.Select(r => r.metrics.L2).Average(),
                LossFn : cvResults.Select(r => r.metrics.LossFn).Average(),
                Rms : cvResults.Select(r => r.metrics.Rms).Average(),
                RSquared : cvResults.Select(r => r.metrics.RSquared).Average()
                );

            Console.WriteLine($"L1 - {averagedMetrics.L1}");               // 3.091095
            Console.WriteLine($"L2 - {averagedMetrics.L2}");               // 20.351073
            Console.WriteLine($"LossFunction - {averagedMetrics.LossFn}"); // 20.351074
            Console.WriteLine($"RMS - {averagedMetrics.Rms}");             // 4.478358
            Console.WriteLine($"RSquared - {averagedMetrics.RSquared}");   // 0.754977
        }
        public void FastTreeRegression()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx = new RegressionContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                 separator: ';', hasHeader: true);

            FastTreeRegressionPredictor pred = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: ctx.Trainers.FastTree(r.label, r.features,
                                                                          numTrees: 10,
                                                                          numLeaves: 5,
                                                                          onFit: (p) => { pred = p; })));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 11 input features, so we ought to have 11 weights.
            VBuffer <float> weights = new VBuffer <float>();

            pred.GetFeatureWeights(ref weights);
            Assert.Equal(11, weights.Length);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss());

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.L1, 0, double.PositiveInfinity);
            Assert.InRange(metrics.L2, 0, double.PositiveInfinity);
            Assert.InRange(metrics.Rms, 0, double.PositiveInfinity);
            Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5);
            Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity);
        }