コード例 #1
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, SdcaOption param)
        {
            var option = new SdcaMaximumEntropyMulticlassTrainer.Options()
            {
                LabelColumnName         = param.LabelColumnName,
                FeatureColumnName       = param.FeatureColumnName,
                ExampleWeightColumnName = param.ExampleWeightColumnName,
                L1Regularization        = param.L1Regularization,
                L2Regularization        = param.L2Regularization,
                NumberOfThreads         = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
            };

            return(context.MulticlassClassification.Trainers.SdcaMaximumEntropy(option));
        }
コード例 #2
0
        public static ITransformer BuildAndTrainModel(this MLContext mlContext, IDataView splitTrainSet)
        {
            var options = new SdcaMaximumEntropyMulticlassTrainer.Options {
                // Make the convergence tolerance tighter.
                ConvergenceTolerance = 0.05f,
                // Increase the maximum number of passes over training data.
                MaximumNumberOfIterations = 30,
            };

            var pipeline =
                mlContext.Transforms.Conversion.MapValueToKey("Label")
                .Append(mlContext.MulticlassClassification.Trainers
                        .SdcaMaximumEntropy(options));

            var model = pipeline.Fit(splitTrainSet);

            return(model);
        }
コード例 #3
0
        public static CommonOutputs.MulticlassClassificationOutput TrainMulticlass(IHostEnvironment env, SdcaMaximumEntropyMulticlassTrainer.Options input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register("TrainSDCA");

            host.CheckValue(input, nameof(input));
            EntryPointUtils.CheckInputArgs(host, input);

            return(TrainerEntryPointsUtils.Train <SdcaMaximumEntropyMulticlassTrainer.Options, CommonOutputs.MulticlassClassificationOutput>(host, input,
                                                                                                                                             () => new SdcaMaximumEntropyMulticlassTrainer(host, input),
                                                                                                                                             () => TrainerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumnName)));
        }
        SdcaMaximumEntropy(this SweepableMultiClassificationTrainers trainer, string labelColumnName = "Label", string featureColumnName = "Features", SweepableOption <SdcaMaximumEntropyMulticlassTrainer.Options> optionBuilder = null, SdcaMaximumEntropyMulticlassTrainer.Options defaultOption = null)
        {
            var context = trainer.Context;

            if (optionBuilder == null)
            {
                optionBuilder = SdcaMaximumEntropyMulticlassTrainerSweepableOptions.Default;
            }

            optionBuilder.SetDefaultOption(defaultOption);

            return(context.AutoML().CreateSweepableEstimator(
                       (context, option) =>
            {
                option.LabelColumnName = labelColumnName;
                option.FeatureColumnName = featureColumnName;
                return context.MulticlassClassification.Trainers.SdcaMaximumEntropy(option);
            },
                       optionBuilder,
                       new string[] { featureColumnName },
                       new string[] { PredictedLabel },
                       nameof(SdcaMaximumEntropyMulticlassTrainer)));
        }
コード例 #5
0
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            var dataPoints = GenerateRandomDataPoints(1000);

            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

            // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times,
            // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory,
            // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms
            // which needs many data passes.
            trainingData = mlContext.Data.Cache(trainingData);

            // Define trainer options.
            var options = new SdcaMaximumEntropyMulticlassTrainer.Options
            {
                // Make the convergence tolerance tighter.
                ConvergenceTolerance = 0.05f,
                // Increase the maximum number of passes over training data.
                MaximumNumberOfIterations = 30,
            };

            // Define the trainer.
            var pipeline =
                // Convert the string labels into key types.
                mlContext.Transforms.Conversion.MapValueToKey("Label")
                // Apply SdcaMaximumEntropy multiclass trainer.
                .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(options));


            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Create testing data. Use different random seed to make it different from training data.
            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));

            // Run the model on test data set.
            var transformedTestData = model.Transform(testData);

            // Convert IDataView object to a list.
            var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList();

            // Look at 5 predictions
            foreach (var p in predictions.Take(5))
            {
                Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
            }

            // Expected output:
            //   Label: 1, Prediction: 1
            //   Label: 2, Prediction: 2
            //   Label: 3, Prediction: 2
            //   Label: 2, Prediction: 2
            //   Label: 3, Prediction: 3

            // Evaluate the overall metrics
            var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData);

            PrintMetrics(metrics);

            // Expected output:
            //   Micro Accuracy: 0.92
            //   Macro Accuracy: 0.92
            //   Log Loss: 0.31
            //   Log Loss Reduction: 0.72

            //   Confusion table
            //             ||========================
            //   PREDICTED ||     0 |     1 |     2 | Recall
            //   TRUTH     ||========================
            //           0 ||   147 |     0 |    13 | 0.9188
            //           1 ||     0 |   164 |    13 | 0.9266
            //           2 ||    10 |     6 |   147 | 0.9018
            //             ||========================
            //   Precision ||0.9363 |0.9647 |0.8497 |
        }