/// <summary> /// Predict a target using a linear regression model trained with the SDCA trainer. /// </summary> /// <param name="catalog">The regression catalog trainer object.</param> /// <param name="label">The label, or dependent variable.</param> /// <param name="features">The features, or independent variables.</param> /// <param name="weights">The optional example weights.</param> /// <param name="options">Advanced arguments to the algorithm.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to /// be informed about what was learnt.</param> /// <returns>The predicted output.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs)] /// ]]></format> /// </example> public static Scalar <float> Sdca(this RegressionCatalog.RegressionTrainers catalog, Scalar <float> label, Vector <float> features, Scalar <float> weights, SdcaRegressionTrainer.Options options, Action <LinearRegressionModelParameters> onFit = null) { Contracts.CheckValue(label, nameof(label)); Contracts.CheckValue(features, nameof(features)); Contracts.CheckValueOrNull(weights); Contracts.CheckValueOrNull(options); Contracts.CheckValueOrNull(onFit); var rec = new TrainerEstimatorReconciler.Regression( (env, labelName, featuresName, weightsName) => { options.LabelColumnName = labelName; options.FeatureColumnName = featuresName; var trainer = new SdcaRegressionTrainer(env, options); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => onFit(trans.Model))); } return(trainer); }, label, features, weights); return(rec.Score); }
Sdca( this SweepableRegressionTrainers trainers, string labelColumnName = "Label", string featureColumnName = "Features", SweepableOption <SdcaRegressionTrainer.Options> optionSweeper = null, SdcaRegressionTrainer.Options defaultOption = null) { var context = trainers.Context; if (optionSweeper == null) { optionSweeper = SdcaRegressionTrainerSweepableOptions.Default; } optionSweeper.SetDefaultOption(defaultOption); return(context.AutoML().CreateSweepableEstimator( (context, option) => { option.LabelColumnName = labelColumnName; option.FeatureColumnName = featureColumnName; return context.Regression.Trainers.Sdca(option); }, optionSweeper, new string[] { labelColumnName, featureColumnName }, new string[] { Score }, nameof(SdcaRegressionTrainer))); }
/// <summary> /// Predict a target using a linear regression model trained with the SDCA trainer. /// </summary> /// <param name="catalog">The regression catalog trainer object.</param> /// <param name="options">Advanced arguments to the algorithm.</param> public static SdcaRegressionTrainer StochasticDualCoordinateAscent(this RegressionCatalog.RegressionTrainers catalog, SdcaRegressionTrainer.Options options) { Contracts.CheckValue(catalog, nameof(catalog)); Contracts.CheckValue(options, nameof(options)); var env = CatalogUtils.GetEnvironment(catalog); return(new SdcaRegressionTrainer(env, options)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, SdcaOption param) { var option = new SdcaRegressionTrainer.Options() { LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, L1Regularization = param.L1Regularization, L2Regularization = param.L2Regularization, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), }; return(context.Regression.Trainers.Sdca(option)); }
static void Main(string[] args) { Console.WriteLine("Hello World 'Microsoft.ML'!"); List <FormulaData> pointsValues = Enumerable .Range(-1, 100) .Select(value => { return(new FormulaData(value, value * 2 - 1)); }) .ToList(); // Create MLContext var mlContext = new MLContext(1); // Load Data IDataView data = mlContext.Data.LoadFromEnumerable <FormulaData>(pointsValues); DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.2); IDataView trainData = dataSplit.TrainSet; IDataView testData = dataSplit.TestSet; // Define trainer options. var options = new SdcaRegressionTrainer.Options { LabelColumnName = "Label", //nameof(FormulaData.Y), FeatureColumnName = "Features", //nameof(FormulaData.X), // Make the convergence tolerance tighter. It effectively leads to more training iterations. ConvergenceTolerance = 0.02f, // Increase the maximum number of passes over training data. Similar to ConvergenceTolerance, // this value specifics the hard iteration limit on the training algorithm. MaximumNumberOfIterations = 30, // Increase learning rate for bias. BiasLearningRate = 0.1f }; // Define StochasticDualCoodrinateAscent regression algorithm estimator var sdcaEstimator = mlContext.Regression.Trainers.Sdca(options); // Build machine learning model var trainedModel = sdcaEstimator.Fit(trainData); // Use trained model to make inferences on test data IDataView testDataPredictions = trainedModel.Transform(testData); // Extract model metrics and get RSquared RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions); double rSquared = trainedModelMetrics.RSquared; Console.WriteLine($"rSquared: {rSquared}"); }
public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create in-memory examples as C# native class and convert to IDataView var data = SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(1000); var dataView = mlContext.Data.LoadFromEnumerable(data); // Split the data into training and test sets. Only training set is used in fitting // the created pipeline. Metrics are computed on the test. var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.1); // Create trainer options. var options = new SdcaRegressionTrainer.Options { // Make the convergence tolerance tighter. ConvergenceTolerance = 0.02f, // Increase the maximum number of passes over training data. MaxIterations = 30, // Increase learning rate for bias BiasLearningRate = 0.1f }; // Train the model. var pipeline = mlContext.Regression.Trainers.StochasticDualCoordinateAscent(options); var model = pipeline.Fit(split.TrainSet); // Do prediction on the test set. var dataWithPredictions = model.Transform(split.TestSet); // Evaluate the trained model using the test set. var metrics = mlContext.Regression.Evaluate(dataWithPredictions); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // L1: 0.26 // L2: 0.11 // LossFunction: 0.11 // RMS: 0.33 // RSquared: 0.56 }
public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, SdcaRegressionTrainer.Options input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("TrainSDCA"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); return(LearnerEntryPointsUtils.Train <SdcaRegressionTrainer.Options, CommonOutputs.RegressionOutput>(host, input, () => new SdcaRegressionTrainer(host, input), () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumn))); }
public static void Example() { // Create a new context for ML.NET operations. It can be used for // exception tracking and logging, as a catalog of available operations // and as the source of randomness. Setting the seed to a fixed number // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. var options = new SdcaRegressionTrainer.Options { LabelColumnName = nameof(DataPoint.Label), FeatureColumnName = nameof(DataPoint.Features), // Make the convergence tolerance tighter. It effectively leads to // more training iterations. ConvergenceTolerance = 0.02f, // Increase the maximum number of passes over training data. Similar // to ConvergenceTolerance, this value specifics the hard iteration // limit on the training algorithm. MaximumNumberOfIterations = 30, // Increase learning rate for bias. BiasLearningRate = 0.1f }; // Define the trainer. var pipeline = mlContext.Regression.Trainers.Sdca(options); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data.LoadFromEnumerable( GenerateRandomDataPoints(5, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data.CreateEnumerable <Prediction>( transformedTestData, reuseRowObject: false).ToList(); // Look at 5 predictions for the Label, side by side with the actual // Label for comparison. foreach (var p in predictions) { Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); } // Expected output: // Label: 0.985, Prediction: 0.927 // Label: 0.155, Prediction: 0.062 // Label: 0.515, Prediction: 0.439 // Label: 0.566, Prediction: 0.500 // Label: 0.096, Prediction: 0.078 // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); PrintMetrics(metrics); // Expected output: // Mean Absolute Error: 0.05 // Mean Squared Error: 0.00 // Root Mean Squared Error: 0.06 // RSquared: 0.97 (closer to 1 is better. The worst case is 0) }