private static RegressionEvaluator.Result Evaluate(LocalEnvironment mlcontext,
                                                           string testDataLocation,
                                                           ITransformer model
                                                           )
        {
            //Create TextLoader with schema related to columns in the TESTING/EVALUATION data file
            TextLoader textLoader = CreateTaxiFareDataFileLoader(mlcontext);

            //Load evaluation/test data
            IDataView testDataView = textLoader.Read(new MultiFileSource(testDataLocation));

            Console.WriteLine("=============== Evaluating Model's accuracy with Test data===============");
            var predictions = model.Transform(testDataView);

            var regressionCtx = new RegressionContext(mlcontext);
            var metrics       = regressionCtx.Evaluate(predictions, "Label", "Score");
            var algorithmName = "SdcaRegressionTrainer";

            Console.WriteLine($"*************************************************");
            Console.WriteLine($"*       Metrics for {algorithmName}          ");
            Console.WriteLine($"*------------------------------------------------");
            Console.WriteLine($"*       LossFn: {metrics.LossFn:0.##}");
            Console.WriteLine($"*       R2 Score: {metrics.RSquared:0.##}");
            Console.WriteLine($"*       Absolute loss: {metrics.L1:#.##}");
            Console.WriteLine($"*       Squared loss: {metrics.L2:#.##}");
            Console.WriteLine($"*       RMS loss: {metrics.Rms:#.##}");
            Console.WriteLine($"*************************************************");

            return(metrics);
        }
        static void Main(string[] args)
        {
            string dataset =
                "SalaryData.csv"
                //MLNetUtilities.GetDataPathByDatasetName("SalaryData.csv")
            ;
            string testDataset =
                "SalaryData-test.csv"
                //MLNetUtilities.GetDataPathByDatasetName("SalaryData-test.csv")
            ;

            LocalEnvironment env = new LocalEnvironment();

            Microsoft.ML.StaticPipe.DataReader
            <
                IMultiStreamSource,
                (
                    Microsoft.ML.StaticPipe.Scalar <float> YearsExperience,
                    Microsoft.ML.StaticPipe.Scalar <float> Target
                )
            > reader = TextLoader.CreateReader
                       (
                env,
                ctx =>
                (
                    YearsExperience: ctx.LoadFloat(0),
                    Target: ctx.LoadFloat(1)
                ),
                hasHeader: true,
                separator: ','
                       );

            Microsoft.ML.StaticPipe.DataView
            <
                (
                    Microsoft.ML.StaticPipe.Scalar <float> YearsExperience,
                    Microsoft.ML.StaticPipe.Scalar <float> Target
                )
            > data = reader.Read(new MultiFileSource(dataset));

            var regression = new RegressionContext(env);

            var pipeline = reader.MakeNewEstimator()
                           .Append(r => (
                                       r.Target,
                                       Prediction: regression.Trainers.FastTree(label: r.Target, features: r.YearsExperience.AsVector())
                                       ));

            Microsoft.ML.Core.Data.ITransformer model = pipeline.Fit(data).AsDynamic;

            var predictionFunc = model.MakePredictionFunction <SalaryData, SalaryPrediction>(env);

            var prediction = predictionFunc.Predict(new SalaryData {
                YearsExperience = 8
            });

            Console.WriteLine($"Predicted salary - {String.Format("{0:C}", prediction.PredictedSalary)}");

            Console.Read();
        }
Example #3
0
        public static void SdcaRegression()
        {
            // Downloading a regression dataset from github.com/dotnet/machinelearning
            // this will create a housing.txt file in the filsystem this code will run
            // you can open the file to see the data.
            string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();

            // Creating the ML.Net IHostEnvironment object, needed for the pipeline
            var env = new LocalEnvironment(seed: 0);

            // Creating the ML context, based on the task performed.
            var regressionContext = new RegressionContext(env);

            // Creating a data reader, based on the format of the data
            var reader = TextLoader.CreateReader(env, c => (
                                                     label: c.LoadFloat(0),
                                                     features: c.LoadFloat(1, 6)
                                                     ),
                                                 separator: '\t', hasHeader: true);

            // Read the data, and leave 10% out, so we can use them for testing
            var data = reader.Read(new MultiFileSource(dataFile));

            var(trainData, testData) = regressionContext.TrainTestSplit(data, testFraction: 0.1);

            // The predictor that gets produced out of training
            LinearRegressionPredictor pred = null;

            // Create the estimator
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (r.label, score: regressionContext.Trainers.Sdca(
                                                     r.label,
                                                     r.features,
                                                     l1Threshold: 0f,
                                                     maxIterations: 100,
                                                     onFit: p => pred = p)
                                                 )
                                           );

            // Fit this pipeline to the training data
            var model = learningPipeline.Fit(trainData);

            // Check the weights that the model learned
            VBuffer <float> weights = default;

            pred.GetFeatureWeights(ref weights);

            Console.WriteLine($"weight 0 - {weights.Values[0]}");
            Console.WriteLine($"weight 1 - {weights.Values[1]}");

            // Evaluate how the model is doing on the test data
            var dataWithPredictions = model.Transform(testData);
            var metrics             = regressionContext.Evaluate(dataWithPredictions, r => r.label, r => r.score);

            Console.WriteLine($"L1 - {metrics.L1}");               // 3.7226085
            Console.WriteLine($"L2 - {metrics.L2}");               // 24.250636
            Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 24.25063
            Console.WriteLine($"RMS - {metrics.Rms}");             // 4.924493
            Console.WriteLine($"RSquared - {metrics.RSquared}");   // 0.565467
        }
        // todo: instead of internal methods, use static debug class w/ singleton logger?
        internal static RegressionResult AutoFit(this RegressionContext context,
                                                 IDataView trainData,
                                                 string label,
                                                 IDataView validationData            = null,
                                                 InferredColumn[] inferredColumns    = null,
                                                 AutoFitSettings settings            = null,
                                                 CancellationToken cancellationToken = default,
                                                 IProgress <RegressionIterationResult> iterationCallback = null,
                                                 IDebugLogger debugLogger = null)
        {
            // run autofit & get all pipelines run in that process
            var(allPipelines, bestPipeline) = AutoFitApi.Fit(trainData, validationData, label, inferredColumns,
                                                             settings, TaskKind.Regression, OptimizingMetric.RSquared, debugLogger);

            var results = new RegressionIterationResult[allPipelines.Length];

            for (var i = 0; i < results.Length; i++)
            {
                var iterationResult = allPipelines[i];
                var result          = new RegressionIterationResult(iterationResult.Model, (RegressionMetrics)iterationResult.EvaluatedMetrics, iterationResult.ScoredValidationData);
                results[i] = result;
            }
            var bestResult = new RegressionIterationResult(bestPipeline.Model, (RegressionMetrics)bestPipeline.EvaluatedMetrics, bestPipeline.ScoredValidationData);

            return(new RegressionResult(bestResult, results));
        }
Example #5
0
        public void SdcaRegressionNameCollision()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);
            var ctx        = new RegressionContext(env);

            // Here we introduce another column called "Score" to collide with the name of the default output. Heh heh heh...
            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10), Score: c.LoadText(2)),
                                                 separator: ';', hasHeader: true);

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, r.Score, score: ctx.Trainers.Sdca(r.label, r.features, maxIterations: 2, advancedSettings: s => s.NumThreads = 1)));

            var pipe = reader.Append(est);

            var model = pipe.Fit(dataSource);
            var data  = model.Read(dataSource);

            // Now, let's see if that column is still there, and still text!
            var schema = data.AsDynamic.Schema;

            Assert.True(schema.TryGetColumnIndex("Score", out int scoreCol), "Score column not present!");
            Assert.Equal(TextType.Instance, schema[scoreCol].Type);

            for (int c = 0; c < schema.Count; ++c)
            {
                Console.WriteLine($"{schema[c].Name}, {schema[c].Type}");
            }
        }
Example #6
0
        public static void SdcaRegression()
        {
            var(trainDataPath, testDataPath) = DatasetCreator.CreateRegressionDataset();

            //creating the ML.Net IHostEnvironment object, needed for the pipeline
            var env = new LocalEnvironment(seed: 0);

            // creating the ML context, based on the task performed.
            var regressionContext = new RegressionContext(env);

            // Creating a data reader, based on the format of the data
            var reader = TextLoader.CreateReader(env, c => (
                                                     label: c.LoadFloat(2),
                                                     features: c.LoadFloat(0, 1)
                                                     ),
                                                 separator: ',', hasHeader: true);

            // Read the data
            var trainData = reader.Read(new MultiFileSource(trainDataPath));

            // The predictor that gets produced out of training
            LinearRegressionPredictor pred = null;

            // Create the estimator
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (r.label, score: regressionContext.Trainers.Sdca(
                                                     r.label,
                                                     r.features,
                                                     l1Threshold: 0f,
                                                     maxIterations: 100,
                                                     onFit: p => pred = p)
                                                 )
                                           );

            // fit this pipeline to the training data
            var model = learningPipeline.Fit(trainData);

            // check the weights that the model learned
            VBuffer <float> weights = default;

            pred.GetFeatureWeights(ref weights);

            Console.WriteLine($"weight 0 - {weights.Values[0]}");
            Console.WriteLine($"weight 1 - {weights.Values[1]}");

            // test the model we just trained, using the test file.
            var testData = reader.Read(new MultiFileSource(testDataPath));
            var data     = model.Transform(testData);

            //Evaluate how the model is doing on the test data
            var metrics = regressionContext.Evaluate(data, r => r.label, r => r.score);

            Console.WriteLine($"L1 - {metrics.L1}");
            Console.WriteLine($"L2 - {metrics.L2}");
            Console.WriteLine($"LossFunction - {metrics.LossFn}");
            Console.WriteLine($"RMS - {metrics.Rms}");
            Console.WriteLine($"RSquared - {metrics.RSquared}");
        }
Example #7
0
        public static void FastTreeRegression()
        {
            // Downloading a regression dataset from github.com/dotnet/machinelearning
            // this will create a housing.txt file in the filsystem this code will run
            // you can open the file to see the data.
            string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();

            // Creating the ML.Net IHostEnvironment object, needed for the pipeline
            var env = new LocalEnvironment(seed: 0);

            // Creating the ML context, based on the task performed.
            var regressionContext = new RegressionContext(env);

            // Creating a data reader, based on the format of the data
            var reader = TextLoader.CreateReader(env, c => (
                                                     label: c.LoadFloat(0),
                                                     features: c.LoadFloat(1, 6)
                                                     ),
                                                 separator: '\t', hasHeader: true);

            // Read the data, and leave 10% out, so we can use them for testing
            var data = reader.Read(new MultiFileSource(dataFile));

            // The predictor that gets produced out of training
            FastTreeRegressionPredictor pred = null;

            // Create the estimator
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (r.label, score: regressionContext.Trainers.FastTree(
                                                     r.label,
                                                     r.features,
                                                     numTrees: 100,            // try: (int) 20-2000
                                                     numLeaves: 20,            // try: (int) 2-128
                                                     minDatapointsInLeafs: 10, // try: (int) 1-100
                                                     learningRate: 0.2,        // try: (float) 0.025-0.4
                                                     onFit: p => pred = p)
                                                 )
                                           );

            var cvResults       = regressionContext.CrossValidate(data, learningPipeline, r => r.label, numFolds: 5);
            var averagedMetrics = (
                L1 : cvResults.Select(r => r.metrics.L1).Average(),
                L2 : cvResults.Select(r => r.metrics.L2).Average(),
                LossFn : cvResults.Select(r => r.metrics.LossFn).Average(),
                Rms : cvResults.Select(r => r.metrics.Rms).Average(),
                RSquared : cvResults.Select(r => r.metrics.RSquared).Average()
                );

            Console.WriteLine($"L1 - {averagedMetrics.L1}");
            Console.WriteLine($"L2 - {averagedMetrics.L2}");
            Console.WriteLine($"LossFunction - {averagedMetrics.LossFn}");
            Console.WriteLine($"RMS - {averagedMetrics.Rms}");
            Console.WriteLine($"RSquared - {averagedMetrics.RSquared}");
        }
        /// <summary>
        /// Build model for predicting next month country unit sales using Learning Pipelines API
        /// </summary>
        /// <param name="dataPath">Input training file path</param>
        /// <returns></returns>
        private static void CreateProductModelUsingPipeline(string dataPath, string outputModelPath)
        {
            var env = new LocalEnvironment(seed: 1);  //Seed set to any number so you have a deterministic environment
            var ctx = new RegressionContext(env);

            ConsoleWriteHeader("Training product forecasting");

            var reader = new TextLoader(env, new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("next", DataKind.R4, 0),
                    new TextLoader.Column("productId", DataKind.Text, 1),
                    new TextLoader.Column("year", DataKind.R4, 2),
                    new TextLoader.Column("month", DataKind.R4, 3),
                    new TextLoader.Column("units", DataKind.R4, 4),
                    new TextLoader.Column("avg", DataKind.R4, 5),
                    new TextLoader.Column("count", DataKind.R4, 6),
                    new TextLoader.Column("max", DataKind.R4, 7),
                    new TextLoader.Column("min", DataKind.R4, 8),
                    new TextLoader.Column("prev", DataKind.R4, 9)
                },
                HasHeader = true,
                Separator = ","
            });


            var pipeline = new ConcatEstimator(env, "NumFeatures", new[] { "year", "month", "units", "avg", "count", "max", "min", "prev" })
                           .Append(new CategoricalEstimator(env, "CatFeatures", "productId"))
                           .Append(new ConcatEstimator(env, "Features", new[] { "NumFeatures", "CatFeatures" }))
                           .Append(new CopyColumnsEstimator(env, "next", "Label"))
                           .Append(new FastTreeTweedieTrainer(env, "Label", "Features"));

            var datasource = reader.Read(new MultiFileSource(dataPath));

            var cvResults = ctx.CrossValidate(datasource, pipeline, labelColumn: "Label", numFolds: 5);

            var L1           = cvResults.Select(r => r.metrics.L1);
            var L2           = cvResults.Select(r => r.metrics.L2);
            var RMS          = cvResults.Select(r => r.metrics.L1);
            var lossFunction = cvResults.Select(r => r.metrics.LossFn);
            var R2           = cvResults.Select(r => r.metrics.RSquared);

            var model = pipeline.Fit(datasource);

            Console.WriteLine("Average L1 Loss: " + L1.Average());
            Console.WriteLine("Average L2 Loss: " + L2.Average());
            Console.WriteLine("Average RMS: " + RMS.Average());
            Console.WriteLine("Average Loss Function: " + lossFunction.Average());
            Console.WriteLine("Average R-squared: " + R2.Average());

            using (var file = File.OpenWrite(outputModelPath))
                model.SaveTo(env, file);
        }
 public static RegressionResult AutoFit(this RegressionContext context,
                                        IDataView trainData,
                                        string label,
                                        IDataView validationData            = null,
                                        AutoFitSettings settings            = null,
                                        InferredColumn[] inferredColumns    = null,
                                        CancellationToken cancellationToken = default,
                                        IProgress <RegressionIterationResult> iterationCallback = null)
 {
     return(AutoFit(context, trainData, label, validationData, inferredColumns, settings,
                    cancellationToken, iterationCallback, null));
 }
Example #10
0
        /// <summary>
        /// Create the ML context.
        /// </summary>
        /// <param name="seed">Random seed. Set to <c>null</c> for a non-deterministic environment.</param>
        /// <param name="conc">Concurrency level. Set to 1 to run single-threaded. Set to 0 to pick automatically.</param>
        public MLContext(int?seed = null, int conc = 0)
        {
            _env = new LocalEnvironment(seed, conc, MakeCompositionContainer);
            _env.AddListener(ProcessMessage);

            BinaryClassification     = new BinaryClassificationContext(_env);
            MulticlassClassification = new MulticlassClassificationContext(_env);
            Regression = new RegressionContext(_env);
            Clustering = new ClusteringContext(_env);
            Ranking    = new RankingContext(_env);
            Transforms = new TransformsCatalog(_env);
            Model      = new ModelOperationsCatalog(_env);
            Data       = new DataOperations(_env);
        }
Example #11
0
        static void Main(string[] args)
        {
            var environment       = new LocalEnvironment();
            var regressionContext = new RegressionContext(environment);

            //var multi = new MulticlassClassificationContext();

            //var aaa = new BinaryClassificationContext();

            //var bbb = new ClusteringContext();

            //bbb.Trainers.

            //aaa.Trainers.

            //regressionContext.Trainers.

            var reader = TextLoader.CreateReader(
                environment,
                context => (
                    Season: context.LoadFloat(2),
                    Year: context.LoadFloat(3),
                    Month: context.LoadFloat(4),
                    Hour: context.LoadFloat(5),
                    Holiday: context.LoadText(6),
                    Weekday: context.LoadBool(7),
                    WorkingDay: context.LoadFloat(8),
                    Weather: context.LoadFloat(9),
                    Temperature: context.LoadFloat(10),
                    NormalizedTemperature: context.LoadFloat(11),
                    Humidity: context.LoadFloat(12),
                    Windspeed: context.LoadFloat(13),
                    Count: context.LoadFloat(16)
                    ),
                separator: ',',
                hasHeader: true);

            var trainData = reader.Read(new MultiFileSource(@"Data/hour_train.csv"));
            var testData  = reader.Read(new MultiFileSource(@"Data/hour_test.csv"));

            var estimator = reader.MakeNewEstimator();

            //estimator.Append(
            //    r => (
            //        Features: r.Season.ConcatWith(r.Month, r.Hour),
            //        Holiday: r.Weekday.
            //    ));
        }
Example #12
0
        public void SdcaRegression()
        {
            var env        = new MLContext(seed: 0, conc: 1);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx = new RegressionContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                 separator: ';', hasHeader: true);

            LinearRegressionModelParameters pred = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: ctx.Trainers.Sdca(r.label, r.features, maxIterations: 2,
                                                                      onFit: p => pred = p, advancedSettings: s => s.NumThreads = 1)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 11 input features, so we ought to have 11 weights.
            Assert.Equal(11, pred.Weights.Count);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss());

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.L1, 0, double.PositiveInfinity);
            Assert.InRange(metrics.L2, 0, double.PositiveInfinity);
            Assert.InRange(metrics.Rms, 0, double.PositiveInfinity);
            Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5);
            Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity);

            // Just output some data on the schema for fun.
            var schema = data.AsDynamic.Schema;

            for (int c = 0; c < schema.Count; ++c)
            {
                Console.WriteLine($"{schema[c].Name}, {schema[c].Type}");
            }
        }
Example #13
0
        public void OnlineGradientDescent()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx = new RegressionContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                 separator: ';', hasHeader: true);

            LinearRegressionModelParameters pred = null;

            var loss = new SquaredLoss();

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: ctx.Trainers.OnlineGradientDescent(r.label, r.features,
                                                                                       lossFunction: loss,
                                                                                       onFit: (p) => { pred = p; })));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 11 input features, so we ought to have 11 weights.
            VBuffer <float> weights = new VBuffer <float>();

            pred.GetFeatureWeights(ref weights);
            Assert.Equal(11, weights.Length);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss());

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.L1, 0, double.PositiveInfinity);
            Assert.InRange(metrics.L2, 0, double.PositiveInfinity);
            Assert.InRange(metrics.Rms, 0, double.PositiveInfinity);
            Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5);
            Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity);
        }
Example #14
0
        public void PoissonRegression()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx = new RegressionContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                 separator: ';', hasHeader: true);

            PoissonRegressionPredictor pred = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: ctx.Trainers.PoissonRegression(r.label, r.features,
                                                                                   l1Weight: 2,
                                                                                   enoforceNoNegativity: true,
                                                                                   onFit: (p) => { pred = p; },
                                                                                   advancedSettings: s => s.NumThreads = 1)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 11 input features, so we ought to have 11 weights.
            VBuffer <float> weights = new VBuffer <float>();

            pred.GetFeatureWeights(ref weights);
            Assert.Equal(11, weights.Length);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss());

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.L1, 0, double.PositiveInfinity);
            Assert.InRange(metrics.L2, 0, double.PositiveInfinity);
            Assert.InRange(metrics.Rms, 0, double.PositiveInfinity);
            Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5);
            Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity);
        }
Example #15
0
 PermutationFeatureImportance(
     this RegressionContext ctx,
     IPredictionTransformer <IPredictor> model,
     IDataView data,
     string label                = DefaultColumnNames.Label,
     string features             = DefaultColumnNames.Features,
     bool useFeatureWeightFilter = false,
     int?topExamples             = null)
 {
     return(PermutationFeatureImportance <RegressionEvaluator.Result> .GetImportanceMetricsMatrix(
                CatalogUtils.GetEnvironment(ctx),
                model,
                data,
                idv => ctx.Evaluate(idv, label),
                RegressionDelta,
                features,
                useFeatureWeightFilter,
                topExamples));
 }
Example #16
0
        /// <summary>
        /// Build model for predicting next month country unit sales using Learning Pipelines API
        /// </summary>
        /// <param name="dataPath">Input training file path</param>
        /// <returns></returns>
        private static void CreateCountryModel(string dataPath, string outputModelPath)
        {
            var env = new LocalEnvironment(seed: 1);  //Seed set to any number so you have a deterministic environment
            var ctx = new RegressionContext(env);

            ConsoleWriteHeader("Training country forecasting model");

            var reader = new TextLoader(env, new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("next", DataKind.R4, 0),
                    new TextLoader.Column("country", DataKind.Text, 1),
                    new TextLoader.Column("year", DataKind.R4, 2),
                    new TextLoader.Column("month", DataKind.R4, 3),
                    new TextLoader.Column("max", DataKind.R4, 4),
                    new TextLoader.Column("min", DataKind.R4, 5),
                    new TextLoader.Column("std", DataKind.R4, 6),
                    new TextLoader.Column("count", DataKind.R4, 7),
                    new TextLoader.Column("sales", DataKind.R4, 8),
                    new TextLoader.Column("med", DataKind.R4, 9),
                    new TextLoader.Column("prev", DataKind.R4, 10)
                },
                HasHeader = true,
                Separator = ","
            });


            var pipeline = new ConcatEstimator(env, "NumFeatures", new[] { "year", "month", "max", "min", "std", "count", "sales", "med", "prev" })
                           .Append(new CategoricalEstimator(env, "CatFeatures", "country"))
                           .Append(new ConcatEstimator(env, "Features", new[] { "NumFeatures", "CatFeatures" }))
                           .Append(new CopyColumnsEstimator(env, "next", "Label"))
                           .Append(new FastTreeTweedieTrainer(env, "Label", "Features"));

            var datasource = reader.Read(new MultiFileSource(dataPath));
            var model      = pipeline.Fit(datasource);

            using (var file = File.OpenWrite(outputModelPath))
                model.SaveTo(env, file);
        }
 public static Pipeline GetPipeline(this RegressionContext context, IDataView dataView, string label)
 {
     return(PipelineSuggesterApi.GetPipeline(TaskKind.Regression, dataView, label));
 }
Example #18
0
        private void TrainRegression(string trainDataPath, string testDataPath, string modelPath)
        {
            // Create a new environment for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var env = new LocalEnvironment();

            // Step one: read the data as an IDataView.
            // First, we define the reader: specify the data columns and where to find them in the text file.
            var reader = TextLoader.CreateReader(env, ctx => (
                                                     // We read the first 11 values as a single float vector.
                                                     FeatureVector: ctx.LoadFloat(0, 10),
                                                     // Separately, read the target variable.
                                                     Target: ctx.LoadFloat(11)
                                                     ),
                                                 // The data file has header.
                                                 hasHeader: true,
                                                 // Default separator is tab, but we need a semicolon.
                                                 separator: ';');


            // Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
            var trainData = reader.Read(new MultiFileSource(trainDataPath));

            // Step two: define the learning pipeline.
            // We know that this is a regression task, so we create a regression context: it will give us the algorithms
            // we need, as well as the evaluation procedure.
            var regression = new RegressionContext(env);

            // We 'start' the pipeline with the output of the reader.
            var learningPipeline = reader.MakeNewEstimator()
                                   // Now we can add any 'training steps' to it. In our case we want to 'normalize' the data (rescale to be
                                   // between -1 and 1 for all examples), and then train the model.
                                   .Append(r => (
                                               // Retain the 'Target' column for evaluation purposes.
                                               r.Target,
                                               // We choose the SDCA regression trainer. Note that we normalize the 'FeatureVector' right here in
                                               // the the same call.
                                               Prediction: regression.Trainers.Sdca(label: r.Target, features: r.FeatureVector.Normalize())));

            var fx = trainData.GetColumn(x => x.FeatureVector);

            // Step three. Train the pipeline.
            var model = learningPipeline.Fit(trainData);

            // Read the test dataset.
            var testData = reader.Read(new MultiFileSource(testDataPath));
            // Calculate metrics of the model on the test data.
            // We are using the 'regression' context object here to perform evaluation.
            var metrics = regression.Evaluate(model.Transform(testData), label: r => r.Target, score: r => r.Prediction);

            using (var stream = File.Create(modelPath))
            {
                // Saving and loading happens to 'dynamic' models, so the static typing is lost in the process.
                model.AsDynamic.SaveTo(env, stream);
            }

            // Potentially, the lines below can be in a different process altogether.

            // When you load the model, it's a 'dynamic' transformer.
            ITransformer loadedModel;

            using (var stream = File.OpenRead(modelPath))
                loadedModel = TransformerChain.LoadFrom(env, stream);
        }
 public static RegressionResult AutoFit(this RegressionContext context,
                                        IDataView trainData,
                                        string label,
                                        IDataView validationData = null,
                                        AutoFitSettings settings = null,
                                        IEnumerable <(string, ColumnPurpose)> purposeOverrides = null,