Ejemplo n.º 1
0
        public void SdcaRegression()
        {
            var env        = new ConsoleEnvironment(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx = new RegressionContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                 separator: ';', hasHeader: true);

            LinearRegressionPredictor pred = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: ctx.Trainers.Sdca(r.label, r.features, maxIterations: 2, onFit: p => pred = p)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 11 input features, so we ought to have 11 weights.
            Assert.Equal(11, pred.Weights2.Count);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss());

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.L1, 0, double.PositiveInfinity);
            Assert.InRange(metrics.L2, 0, double.PositiveInfinity);
            Assert.InRange(metrics.Rms, 0, double.PositiveInfinity);
            Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5);
            Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity);

            // Just output some data on the schema for fun.
            var schema = data.AsDynamic.Schema;

            for (int c = 0; c < schema.ColumnCount; ++c)
            {
                Console.WriteLine($"{schema.GetColumnName(c)}, {schema.GetColumnType(c)}");
            }
        }
Ejemplo n.º 2
0
        public void OnlineGradientDescent()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx = new RegressionContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                 separator: ';', hasHeader: true);

            LinearRegressionModelParameters pred = null;

            var loss = new SquaredLoss();

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: ctx.Trainers.OnlineGradientDescent(r.label, r.features,
                                                                                       lossFunction: loss,
                                                                                       onFit: (p) => { pred = p; })));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 11 input features, so we ought to have 11 weights.
            VBuffer <float> weights = new VBuffer <float>();

            pred.GetFeatureWeights(ref weights);
            Assert.Equal(11, weights.Length);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss());

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.L1, 0, double.PositiveInfinity);
            Assert.InRange(metrics.L2, 0, double.PositiveInfinity);
            Assert.InRange(metrics.Rms, 0, double.PositiveInfinity);
            Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5);
            Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity);
        }
Ejemplo n.º 3
0
        public void PoissonRegression()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx = new RegressionContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                 separator: ';', hasHeader: true);

            PoissonRegressionPredictor pred = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: ctx.Trainers.PoissonRegression(r.label, r.features,
                                                                                   l1Weight: 2,
                                                                                   enoforceNoNegativity: true,
                                                                                   onFit: (p) => { pred = p; },
                                                                                   advancedSettings: s => s.NumThreads = 1)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 11 input features, so we ought to have 11 weights.
            VBuffer <float> weights = new VBuffer <float>();

            pred.GetFeatureWeights(ref weights);
            Assert.Equal(11, weights.Length);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss());

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.L1, 0, double.PositiveInfinity);
            Assert.InRange(metrics.L2, 0, double.PositiveInfinity);
            Assert.InRange(metrics.Rms, 0, double.PositiveInfinity);
            Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5);
            Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity);
        }
Ejemplo n.º 4
0
 PermutationFeatureImportance(
     this RegressionContext ctx,
     IPredictionTransformer <IPredictor> model,
     IDataView data,
     string label                = DefaultColumnNames.Label,
     string features             = DefaultColumnNames.Features,
     bool useFeatureWeightFilter = false,
     int?topExamples             = null)
 {
     return(PermutationFeatureImportance <RegressionEvaluator.Result> .GetImportanceMetricsMatrix(
                CatalogUtils.GetEnvironment(ctx),
                model,
                data,
                idv => ctx.Evaluate(idv, label),
                RegressionDelta,
                features,
                useFeatureWeightFilter,
                topExamples));
 }
Ejemplo n.º 5
0
        private void TrainRegression(string trainDataPath, string testDataPath, string modelPath)
        {
            // Create a new environment for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var env = new LocalEnvironment();

            // Step one: read the data as an IDataView.
            // First, we define the reader: specify the data columns and where to find them in the text file.
            var reader = TextLoader.CreateReader(env, ctx => (
                                                     // We read the first 11 values as a single float vector.
                                                     FeatureVector: ctx.LoadFloat(0, 10),
                                                     // Separately, read the target variable.
                                                     Target: ctx.LoadFloat(11)
                                                     ),
                                                 // The data file has header.
                                                 hasHeader: true,
                                                 // Default separator is tab, but we need a semicolon.
                                                 separator: ';');


            // Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
            var trainData = reader.Read(new MultiFileSource(trainDataPath));

            // Step two: define the learning pipeline.
            // We know that this is a regression task, so we create a regression context: it will give us the algorithms
            // we need, as well as the evaluation procedure.
            var regression = new RegressionContext(env);

            // We 'start' the pipeline with the output of the reader.
            var learningPipeline = reader.MakeNewEstimator()
                                   // Now we can add any 'training steps' to it. In our case we want to 'normalize' the data (rescale to be
                                   // between -1 and 1 for all examples), and then train the model.
                                   .Append(r => (
                                               // Retain the 'Target' column for evaluation purposes.
                                               r.Target,
                                               // We choose the SDCA regression trainer. Note that we normalize the 'FeatureVector' right here in
                                               // the the same call.
                                               Prediction: regression.Trainers.Sdca(label: r.Target, features: r.FeatureVector.Normalize())));

            var fx = trainData.GetColumn(x => x.FeatureVector);

            // Step three. Train the pipeline.
            var model = learningPipeline.Fit(trainData);

            // Read the test dataset.
            var testData = reader.Read(new MultiFileSource(testDataPath));
            // Calculate metrics of the model on the test data.
            // We are using the 'regression' context object here to perform evaluation.
            var metrics = regression.Evaluate(model.Transform(testData), label: r => r.Target, score: r => r.Prediction);

            using (var stream = File.Create(modelPath))
            {
                // Saving and loading happens to 'dynamic' models, so the static typing is lost in the process.
                model.AsDynamic.SaveTo(env, stream);
            }

            // Potentially, the lines below can be in a different process altogether.

            // When you load the model, it's a 'dynamic' transformer.
            ITransformer loadedModel;

            using (var stream = File.OpenRead(modelPath))
                loadedModel = TransformerChain.LoadFrom(env, stream);
        }
Ejemplo n.º 6
0
        public static void LightGbmRegression()
        {
            // Downloading a regression dataset from github.com/dotnet/machinelearning
            // this will create a housing.txt file in the filsystem this code will run
            // you can open the file to see the data.
            string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();

            // Creating the ML.Net IHostEnvironment object, needed for the pipeline
            var env = new LocalEnvironment(seed: 0);

            // Creating the ML context, based on the task performed.
            var regressionContext = new RegressionContext(env);

            // Creating a data reader, based on the format of the data
            var reader = TextLoader.CreateReader(env, c => (
                                                     label: c.LoadFloat(0),
                                                     features: c.LoadFloat(1, 6)
                                                     ),
                                                 separator: '\t', hasHeader: true);

            // Read the data, and leave 10% out, so we can use them for testing
            var data = reader.Read(new MultiFileSource(dataFile));

            var(trainData, testData) = regressionContext.TrainTestSplit(data, testFraction: 0.1);

            // The predictor that gets produced out of training
            LightGbmRegressionPredictor pred = null;

            // Create the estimator
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (r.label, score: regressionContext.Trainers.LightGbm(
                                                     r.label,
                                                     r.features,
                                                     numLeaves: 4,
                                                     minDataPerLeaf: 6,
                                                     learningRate: 0.001,
                                                     onFit: p => pred = p)
                                                 )
                                           );

            // Fit this pipeline to the training data
            var model = learningPipeline.Fit(trainData);

            // Check the weights that the model learned
            VBuffer <float> weights = default;

            pred.GetFeatureWeights(ref weights);

            Console.WriteLine($"weight 0 - {weights.Values[0]}");
            Console.WriteLine($"weight 1 - {weights.Values[1]}");

            // Evaluate how the model is doing on the test data
            var dataWithPredictions = model.Transform(testData);
            var metrics             = regressionContext.Evaluate(dataWithPredictions, r => r.label, r => r.score);

            Console.WriteLine($"L1 - {metrics.L1}");
            Console.WriteLine($"L2 - {metrics.L2}");
            Console.WriteLine($"LossFunction - {metrics.LossFn}");
            Console.WriteLine($"RMS - {metrics.Rms}");
            Console.WriteLine($"RSquared - {metrics.RSquared}");
        }