Example #1
0
        public static void FastTreeRegression()
        {
            // Downloading a regression dataset from github.com/dotnet/machinelearning
            // this will create a housing.txt file in the filsystem this code will run
            // you can open the file to see the data.
            string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();

            // Creating the ML.Net IHostEnvironment object, needed for the pipeline
            var env = new LocalEnvironment(seed: 0);

            // Creating the ML context, based on the task performed.
            var regressionContext = new RegressionContext(env);

            // Creating a data reader, based on the format of the data
            var reader = TextLoader.CreateReader(env, c => (
                                                     label: c.LoadFloat(0),
                                                     features: c.LoadFloat(1, 6)
                                                     ),
                                                 separator: '\t', hasHeader: true);

            // Read the data, and leave 10% out, so we can use them for testing
            var data = reader.Read(new MultiFileSource(dataFile));

            // The predictor that gets produced out of training
            FastTreeRegressionPredictor pred = null;

            // Create the estimator
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (r.label, score: regressionContext.Trainers.FastTree(
                                                     r.label,
                                                     r.features,
                                                     numTrees: 100,            // try: (int) 20-2000
                                                     numLeaves: 20,            // try: (int) 2-128
                                                     minDatapointsInLeafs: 10, // try: (int) 1-100
                                                     learningRate: 0.2,        // try: (float) 0.025-0.4
                                                     onFit: p => pred = p)
                                                 )
                                           );

            var cvResults       = regressionContext.CrossValidate(data, learningPipeline, r => r.label, numFolds: 5);
            var averagedMetrics = (
                L1 : cvResults.Select(r => r.metrics.L1).Average(),
                L2 : cvResults.Select(r => r.metrics.L2).Average(),
                LossFn : cvResults.Select(r => r.metrics.LossFn).Average(),
                Rms : cvResults.Select(r => r.metrics.Rms).Average(),
                RSquared : cvResults.Select(r => r.metrics.RSquared).Average()
                );

            Console.WriteLine($"L1 - {averagedMetrics.L1}");
            Console.WriteLine($"L2 - {averagedMetrics.L2}");
            Console.WriteLine($"LossFunction - {averagedMetrics.LossFn}");
            Console.WriteLine($"RMS - {averagedMetrics.Rms}");
            Console.WriteLine($"RSquared - {averagedMetrics.RSquared}");
        }
        /// <summary>
        /// Build model for predicting next month country unit sales using Learning Pipelines API
        /// </summary>
        /// <param name="dataPath">Input training file path</param>
        /// <returns></returns>
        private static void CreateProductModelUsingPipeline(string dataPath, string outputModelPath)
        {
            var env = new LocalEnvironment(seed: 1);  //Seed set to any number so you have a deterministic environment
            var ctx = new RegressionContext(env);

            ConsoleWriteHeader("Training product forecasting");

            var reader = new TextLoader(env, new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("next", DataKind.R4, 0),
                    new TextLoader.Column("productId", DataKind.Text, 1),
                    new TextLoader.Column("year", DataKind.R4, 2),
                    new TextLoader.Column("month", DataKind.R4, 3),
                    new TextLoader.Column("units", DataKind.R4, 4),
                    new TextLoader.Column("avg", DataKind.R4, 5),
                    new TextLoader.Column("count", DataKind.R4, 6),
                    new TextLoader.Column("max", DataKind.R4, 7),
                    new TextLoader.Column("min", DataKind.R4, 8),
                    new TextLoader.Column("prev", DataKind.R4, 9)
                },
                HasHeader = true,
                Separator = ","
            });


            var pipeline = new ConcatEstimator(env, "NumFeatures", new[] { "year", "month", "units", "avg", "count", "max", "min", "prev" })
                           .Append(new CategoricalEstimator(env, "CatFeatures", "productId"))
                           .Append(new ConcatEstimator(env, "Features", new[] { "NumFeatures", "CatFeatures" }))
                           .Append(new CopyColumnsEstimator(env, "next", "Label"))
                           .Append(new FastTreeTweedieTrainer(env, "Label", "Features"));

            var datasource = reader.Read(new MultiFileSource(dataPath));

            var cvResults = ctx.CrossValidate(datasource, pipeline, labelColumn: "Label", numFolds: 5);

            var L1           = cvResults.Select(r => r.metrics.L1);
            var L2           = cvResults.Select(r => r.metrics.L2);
            var RMS          = cvResults.Select(r => r.metrics.L1);
            var lossFunction = cvResults.Select(r => r.metrics.LossFn);
            var R2           = cvResults.Select(r => r.metrics.RSquared);

            var model = pipeline.Fit(datasource);

            Console.WriteLine("Average L1 Loss: " + L1.Average());
            Console.WriteLine("Average L2 Loss: " + L2.Average());
            Console.WriteLine("Average RMS: " + RMS.Average());
            Console.WriteLine("Average Loss Function: " + lossFunction.Average());
            Console.WriteLine("Average R-squared: " + R2.Average());

            using (var file = File.OpenWrite(outputModelPath))
                model.SaveTo(env, file);
        }