public override IEstimator <ITransformer> BuildFromOption(MLContext context, SdcaOption param) { var option = new SdcaMaximumEntropyMulticlassTrainer.Options() { LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, L1Regularization = param.L1Regularization, L2Regularization = param.L2Regularization, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), }; return(context.MulticlassClassification.Trainers.SdcaMaximumEntropy(option)); }
public static ITransformer BuildAndTrainModel(this MLContext mlContext, IDataView splitTrainSet) { var options = new SdcaMaximumEntropyMulticlassTrainer.Options { // Make the convergence tolerance tighter. ConvergenceTolerance = 0.05f, // Increase the maximum number of passes over training data. MaximumNumberOfIterations = 30, }; var pipeline = mlContext.Transforms.Conversion.MapValueToKey("Label") .Append(mlContext.MulticlassClassification.Trainers .SdcaMaximumEntropy(options)); var model = pipeline.Fit(splitTrainSet); return(model); }
public static CommonOutputs.MulticlassClassificationOutput TrainMulticlass(IHostEnvironment env, SdcaMaximumEntropyMulticlassTrainer.Options input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("TrainSDCA"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); return(TrainerEntryPointsUtils.Train <SdcaMaximumEntropyMulticlassTrainer.Options, CommonOutputs.MulticlassClassificationOutput>(host, input, () => new SdcaMaximumEntropyMulticlassTrainer(host, input), () => TrainerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumnName))); }
SdcaMaximumEntropy(this SweepableMultiClassificationTrainers trainer, string labelColumnName = "Label", string featureColumnName = "Features", SweepableOption <SdcaMaximumEntropyMulticlassTrainer.Options> optionBuilder = null, SdcaMaximumEntropyMulticlassTrainer.Options defaultOption = null) { var context = trainer.Context; if (optionBuilder == null) { optionBuilder = SdcaMaximumEntropyMulticlassTrainerSweepableOptions.Default; } optionBuilder.SetDefaultOption(defaultOption); return(context.AutoML().CreateSweepableEstimator( (context, option) => { option.LabelColumnName = labelColumnName; option.FeatureColumnName = featureColumnName; return context.MulticlassClassification.Trainers.SdcaMaximumEntropy(option); }, optionBuilder, new string[] { featureColumnName }, new string[] { PredictedLabel }, nameof(SdcaMaximumEntropyMulticlassTrainer))); }
public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaMaximumEntropyMulticlassTrainer.Options { // Make the convergence tolerance tighter. ConvergenceTolerance = 0.05f, // Increase the maximum number of passes over training data. MaximumNumberOfIterations = 30, }; // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(options)); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different from training data. var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) { Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); } // Expected output: // Label: 1, Prediction: 1 // Label: 2, Prediction: 2 // Label: 3, Prediction: 2 // Label: 2, Prediction: 2 // Label: 3, Prediction: 3 // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); PrintMetrics(metrics); // Expected output: // Micro Accuracy: 0.92 // Macro Accuracy: 0.92 // Log Loss: 0.31 // Log Loss Reduction: 0.72 // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall // TRUTH ||======================== // 0 || 147 | 0 | 13 | 0.9188 // 1 || 0 | 164 | 13 | 0.9266 // 2 || 10 | 6 | 147 | 0.9018 // ||======================== // Precision ||0.9363 |0.9647 |0.8497 | }