public void SdcaRegression() { var env = new ConsoleEnvironment(seed: 0); var dataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var dataSource = new MultiFileSource(dataPath); var ctx = new RegressionContext(env); var reader = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); LinearRegressionPredictor pred = null; var est = reader.MakeNewEstimator() .Append(r => (r.label, score: ctx.Trainers.Sdca(r.label, r.features, maxIterations: 2, onFit: p => pred = p))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); // 11 input features, so we ought to have 11 weights. Assert.Equal(11, pred.Weights2.Count); var data = model.Read(dataSource); var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss()); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.L1, 0, double.PositiveInfinity); Assert.InRange(metrics.L2, 0, double.PositiveInfinity); Assert.InRange(metrics.Rms, 0, double.PositiveInfinity); Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5); Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity); // Just output some data on the schema for fun. var schema = data.AsDynamic.Schema; for (int c = 0; c < schema.ColumnCount; ++c) { Console.WriteLine($"{schema.GetColumnName(c)}, {schema.GetColumnType(c)}"); } }
public void OnlineGradientDescent() { var env = new MLContext(seed: 0); var dataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var dataSource = new MultiFileSource(dataPath); var ctx = new RegressionContext(env); var reader = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); LinearRegressionModelParameters pred = null; var loss = new SquaredLoss(); var est = reader.MakeNewEstimator() .Append(r => (r.label, score: ctx.Trainers.OnlineGradientDescent(r.label, r.features, lossFunction: loss, onFit: (p) => { pred = p; }))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); // 11 input features, so we ought to have 11 weights. VBuffer <float> weights = new VBuffer <float>(); pred.GetFeatureWeights(ref weights); Assert.Equal(11, weights.Length); var data = model.Read(dataSource); var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss()); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.L1, 0, double.PositiveInfinity); Assert.InRange(metrics.L2, 0, double.PositiveInfinity); Assert.InRange(metrics.Rms, 0, double.PositiveInfinity); Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5); Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity); }
public void PoissonRegression() { var env = new MLContext(seed: 0); var dataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var dataSource = new MultiFileSource(dataPath); var ctx = new RegressionContext(env); var reader = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); PoissonRegressionPredictor pred = null; var est = reader.MakeNewEstimator() .Append(r => (r.label, score: ctx.Trainers.PoissonRegression(r.label, r.features, l1Weight: 2, enoforceNoNegativity: true, onFit: (p) => { pred = p; }, advancedSettings: s => s.NumThreads = 1))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); // 11 input features, so we ought to have 11 weights. VBuffer <float> weights = new VBuffer <float>(); pred.GetFeatureWeights(ref weights); Assert.Equal(11, weights.Length); var data = model.Read(dataSource); var metrics = ctx.Evaluate(data, r => r.label, r => r.score, new PoissonLoss()); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.L1, 0, double.PositiveInfinity); Assert.InRange(metrics.L2, 0, double.PositiveInfinity); Assert.InRange(metrics.Rms, 0, double.PositiveInfinity); Assert.Equal(metrics.Rms * metrics.Rms, metrics.L2, 5); Assert.InRange(metrics.LossFn, 0, double.PositiveInfinity); }
PermutationFeatureImportance( this RegressionContext ctx, IPredictionTransformer <IPredictor> model, IDataView data, string label = DefaultColumnNames.Label, string features = DefaultColumnNames.Features, bool useFeatureWeightFilter = false, int?topExamples = null) { return(PermutationFeatureImportance <RegressionEvaluator.Result> .GetImportanceMetricsMatrix( CatalogUtils.GetEnvironment(ctx), model, data, idv => ctx.Evaluate(idv, label), RegressionDelta, features, useFeatureWeightFilter, topExamples)); }
private void TrainRegression(string trainDataPath, string testDataPath, string modelPath) { // Create a new environment for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var env = new LocalEnvironment(); // Step one: read the data as an IDataView. // First, we define the reader: specify the data columns and where to find them in the text file. var reader = TextLoader.CreateReader(env, ctx => ( // We read the first 11 values as a single float vector. FeatureVector: ctx.LoadFloat(0, 10), // Separately, read the target variable. Target: ctx.LoadFloat(11) ), // The data file has header. hasHeader: true, // Default separator is tab, but we need a semicolon. separator: ';'); // Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed). var trainData = reader.Read(new MultiFileSource(trainDataPath)); // Step two: define the learning pipeline. // We know that this is a regression task, so we create a regression context: it will give us the algorithms // we need, as well as the evaluation procedure. var regression = new RegressionContext(env); // We 'start' the pipeline with the output of the reader. var learningPipeline = reader.MakeNewEstimator() // Now we can add any 'training steps' to it. In our case we want to 'normalize' the data (rescale to be // between -1 and 1 for all examples), and then train the model. .Append(r => ( // Retain the 'Target' column for evaluation purposes. r.Target, // We choose the SDCA regression trainer. Note that we normalize the 'FeatureVector' right here in // the the same call. Prediction: regression.Trainers.Sdca(label: r.Target, features: r.FeatureVector.Normalize()))); var fx = trainData.GetColumn(x => x.FeatureVector); // Step three. Train the pipeline. var model = learningPipeline.Fit(trainData); // Read the test dataset. var testData = reader.Read(new MultiFileSource(testDataPath)); // Calculate metrics of the model on the test data. // We are using the 'regression' context object here to perform evaluation. var metrics = regression.Evaluate(model.Transform(testData), label: r => r.Target, score: r => r.Prediction); using (var stream = File.Create(modelPath)) { // Saving and loading happens to 'dynamic' models, so the static typing is lost in the process. model.AsDynamic.SaveTo(env, stream); } // Potentially, the lines below can be in a different process altogether. // When you load the model, it's a 'dynamic' transformer. ITransformer loadedModel; using (var stream = File.OpenRead(modelPath)) loadedModel = TransformerChain.LoadFrom(env, stream); }
public static void LightGbmRegression() { // Downloading a regression dataset from github.com/dotnet/machinelearning // this will create a housing.txt file in the filsystem this code will run // you can open the file to see the data. string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Creating the ML.Net IHostEnvironment object, needed for the pipeline var env = new LocalEnvironment(seed: 0); // Creating the ML context, based on the task performed. var regressionContext = new RegressionContext(env); // Creating a data reader, based on the format of the data var reader = TextLoader.CreateReader(env, c => ( label: c.LoadFloat(0), features: c.LoadFloat(1, 6) ), separator: '\t', hasHeader: true); // Read the data, and leave 10% out, so we can use them for testing var data = reader.Read(new MultiFileSource(dataFile)); var(trainData, testData) = regressionContext.TrainTestSplit(data, testFraction: 0.1); // The predictor that gets produced out of training LightGbmRegressionPredictor pred = null; // Create the estimator var learningPipeline = reader.MakeNewEstimator() .Append(r => (r.label, score: regressionContext.Trainers.LightGbm( r.label, r.features, numLeaves: 4, minDataPerLeaf: 6, learningRate: 0.001, onFit: p => pred = p) ) ); // Fit this pipeline to the training data var model = learningPipeline.Fit(trainData); // Check the weights that the model learned VBuffer <float> weights = default; pred.GetFeatureWeights(ref weights); Console.WriteLine($"weight 0 - {weights.Values[0]}"); Console.WriteLine($"weight 1 - {weights.Values[1]}"); // Evaluate how the model is doing on the test data var dataWithPredictions = model.Transform(testData); var metrics = regressionContext.Evaluate(dataWithPredictions, r => r.label, r => r.score); Console.WriteLine($"L1 - {metrics.L1}"); Console.WriteLine($"L2 - {metrics.L2}"); Console.WriteLine($"LossFunction - {metrics.LossFn}"); Console.WriteLine($"RMS - {metrics.Rms}"); Console.WriteLine($"RSquared - {metrics.RSquared}"); }