コード例 #1
0
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of data examples.
            var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000);

            // Convert the examples list to an IDataView object, which is consumable by ML.NET API.
            var dataView = mlContext.Data.LoadFromEnumerable(examples);

            //////////////////// Data Preview ////////////////////
            // Label    Features
            // AA       0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045
            // BB       0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918
            // CC       1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099
            // DD       0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455

            var options = new SdcaNonCalibratedMulticlassTrainer.Options
            {
                // Add custom loss
                Loss = new HingeLoss(),
                // Make the convergence tolerance tighter.
                ConvergenceTolerance = 0.05f,
                // Increase the maximum number of passes over training data.
                MaximumNumberOfIterations = 30,
            };

            // Create a pipeline.
            var pipeline =
                // Convert the string labels into key types.
                mlContext.Transforms.Conversion.MapValueToKey("Label")
                // Apply StochasticDualCoordinateAscent multiclass trainer.
                .Append(mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated(options));

            // Split the data into training and test sets. Only training set is used in fitting
            // the created pipeline. Metrics are computed on the test.
            var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

            // Train the model.
            var model = pipeline.Fit(split.TrainSet);

            // Do prediction on the test set.
            var dataWithPredictions = model.Transform(split.TestSet);

            // Evaluate the trained model using the test set.
            var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions);

            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

            // Expected output:
            //   Micro Accuracy: 0.82
            //   Macro Accuracy: 0.81
            //   Log Loss: 0.64
            //   Log Loss Reduction: 52.51
        }
コード例 #2
0
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            var dataPoints = GenerateRandomDataPoints(1000);

            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

            // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times,
            // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory,
            // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms
            // which needs many data passes.
            trainingData = mlContext.Data.Cache(trainingData);

            // Define trainer options.
            var options = new SdcaNonCalibratedMulticlassTrainer.Options
            {
                Loss             = new HingeLoss(),
                L1Regularization = 0.1f,
                BiasLearningRate = 0.01f,
                NumberOfThreads  = 1
            };

            // Define the trainer.
            var pipeline =
                // Convert the string labels into key types.
                mlContext.Transforms.Conversion.MapValueToKey("Label")
                // Apply SdcaNonCalibrated multiclass trainer.
                .Append(mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated(options));


            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Create testing data. Use different random seed to make it different from training data.
            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));

            // Run the model on test data set.
            var transformedTestData = model.Transform(testData);

            // Convert IDataView object to a list.
            var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList();

            // Look at 5 predictions
            foreach (var p in predictions.Take(5))
            {
                Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
            }

            // Expected output:
            //   Label: 1, Prediction: 1
            //   Label: 2, Prediction: 2
            //   Label: 3, Prediction: 2
            //   Label: 2, Prediction: 2
            //   Label: 3, Prediction: 3

            // Evaluate the overall metrics
            var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData);

            PrintMetrics(metrics);

            // Expected output:
            //  Micro Accuracy: 0.91
            //  Macro Accuracy: 0.91
            //  Log Loss: 0.22
            //  Log Loss Reduction: 0.80
        }
        SdcaNonCalibreated(this SweepableMultiClassificationTrainers trainer, string labelColumnName = "Label", string featureColumnName = "Features", SweepableOption <SdcaNonCalibratedMulticlassTrainer.Options> optionBuilder = null, SdcaNonCalibratedMulticlassTrainer.Options defaultOption = null)
        {
            var context = trainer.Context;

            if (optionBuilder == null)
            {
                optionBuilder = SdcaNonCalibratedMulticlassTrainerSweepableOptions.Default;
            }

            optionBuilder.SetDefaultOption(defaultOption);

            return(context.AutoML().CreateSweepableEstimator(
                       (context, option) =>
            {
                option.LabelColumnName = labelColumnName;
                option.FeatureColumnName = featureColumnName;
                return context.MulticlassClassification.Trainers.SdcaNonCalibrated(option);
            },
                       optionBuilder,
                       trainerName: nameof(SdcaNonCalibratedMulticlassTrainer),
                       inputs: new string[] { featureColumnName },
                       outputs: new string[] { PredictedLabel }));
        }