Exemplo n.º 1
0
        public void PlattCalibratorEstimator()
        {
            var calibratorTestData = GetCalibratorTestData();

            // plattCalibrator
            var plattCalibratorEstimator   = new PlattCalibratorEstimator(Env);
            var plattCalibratorTransformer = plattCalibratorEstimator.Fit(calibratorTestData.ScoredData);

            //testData
            CheckValidCalibratedData(calibratorTestData.ScoredData, plattCalibratorTransformer);

            //test estimator
            TestEstimatorCore(plattCalibratorEstimator, calibratorTestData.ScoredData);

            Done();
        }
        public void PlattCalibratorEstimator()
        {
            var calibratorTestData = GetCalibratorTestData();

            // platCalibrator
            var platCalibratorEstimator   = new PlattCalibratorEstimator(Env, calibratorTestData.transformer.Model, "Label", "Features");
            var platCalibratorTransformer = platCalibratorEstimator.Fit(calibratorTestData.scoredData);

            //testData
            checkValidCalibratedData(calibratorTestData.scoredData, platCalibratorTransformer);

            //test estimator
            TestEstimatorCore(platCalibratorEstimator, calibratorTestData.scoredData);

            Done();
        }
        public void OVAWithAllConstructorArgs()
        {
            var(pipeline, data) = GetMulticlassPipeline();
            var calibrator        = new PlattCalibratorEstimator(Env);
            var averagePerceptron = ML.BinaryClassification.Trainers.AveragedPerceptron(
                new AveragedPerceptronTrainer.Options {
                Shuffle = true
            });

            var ova = ML.MulticlassClassification.Trainers.OneVersusAll(averagePerceptron, imputeMissingLabelsAsNegative: true,
                                                                        calibrator: calibrator, maxCalibrationExamples: 10000, useProbabilities: true);

            pipeline = pipeline.Append(ova)
                       .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel"));

            TestEstimatorCore(pipeline, data);
            Done();
        }
Exemplo n.º 4
0
        public void TestCalibratorEstimatorBackwardsCompatibility()
        {
            // The legacy model being loaded below was trained and saved with
            // version as such:

            /*
             * var mlContext = new MLContext(seed: 1);
             * var calibratorTestData = GetCalibratorTestData();
             * var plattCalibratorEstimator = new PlattCalibratorEstimator(Env);
             * var plattCalibratorTransformer = plattCalibratorEstimator.Fit(calibratorTestData.ScoredData);
             * mlContext.Model.Save(plattCalibratorTransformer, calibratorTestData.ScoredData.Schema, "calibrator-model_VerWritten_0x00010001xyz.zip");
             */

            var          modelPath = GetDataPath("backcompat", "Calibrator_Model_VerWritten_0x00010001.zip");
            ITransformer oldPlattCalibratorTransformer;

            using (var fs = File.OpenRead(modelPath))
                oldPlattCalibratorTransformer = ML.Model.Load(fs, out var schema);

            var calibratorTestData            = GetCalibratorTestData();
            var newPlattCalibratorEstimator   = new PlattCalibratorEstimator(Env);
            var newPlattCalibratorTransformer = newPlattCalibratorEstimator.Fit(calibratorTestData.ScoredData);

            // Check that both models produce the same output
            var oldCalibratedData = oldPlattCalibratorTransformer.Transform(calibratorTestData.ScoredData).Preview();
            var newCalibratedData = newPlattCalibratorTransformer.Transform(calibratorTestData.ScoredData).Preview();

            // Check first that the produced schemas and outputs are of the same size
            Assert.True(oldCalibratedData.RowView.Length == newCalibratedData.RowView.Length);
            Assert.True(oldCalibratedData.ColumnView.Length == newCalibratedData.ColumnView.Length);

            // Then check the produced probabilities (5th value corresponds to probabilities) for
            // equality, within rounding error.
            for (int i = 0; i < 10; i++)
            {
                Assert.True((float)oldCalibratedData.RowView[i].Values[5].Value == (float)newCalibratedData.RowView[i].Values[5].Value);
            }

            Done();
        }
Exemplo n.º 5
0
        public static void Calibration()
        {
            // Downloading the dataset from github.com/dotnet/machinelearning.
            // This will create a sentiment.tsv file in the filesystem.
            // The string, dataFile, is the path to the downloaded file.
            // You can open this file, if you want to see the data.
            string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset();

            // A preview of the data.
            // Sentiment	SentimentText
            //      0	    " :Erm, thank you. "
            //      1	    ==You're cool==

            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            var mlContext = new MLContext();

            // Create a text loader.
            var reader = mlContext.Data.CreateTextReader(new TextLoader.Arguments()
            {
                Separator = "tab",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("Sentiment", DataKind.BL, 0),
                    new TextLoader.Column("SentimentText", DataKind.Text, 1)
                }
            });

            // Read the data
            var data = reader.Read(dataFile);

            // Split the dataset into two parts: one used for training, the other to train the calibrator
            var(trainData, calibratorTrainingData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);

            // Featurize the text column through the FeaturizeText API.
            // Then append the StochasticDualCoordinateAscentBinary binary classifier, setting the "Label" column as the label of the dataset, and
            // the "Features" column produced by FeaturizeText as the features column.
            var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
                           .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(
                                       labelColumn: "Sentiment",
                                       featureColumn: "Features",
                                       l2Const: 0.001f,
                                       loss: new HingeLoss())); // By specifying loss: new HingeLoss(), StochasticDualCoordinateAscent will train a support vector machine (SVM).

            // Fit the pipeline, and get a transformer that knows how to score new data.
            var        transformer = pipeline.Fit(trainData);
            IPredictor model       = transformer.LastTransformer.Model;

            // Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample
            // bears positive sentiment. This estimate is relative to the numbers obtained.
            var scoredData        = transformer.Transform(calibratorTrainingData);
            var scoredDataPreview = scoredData.Preview();

            PrintRowViewValues(scoredDataPreview);
            // Preview of scoredDataPreview.RowView
            //
            // Score - 0.458968
            // Score - 0.7022135
            // Score 1.138822
            // Score 0.4807112
            // Score 1.112813

            // Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer
            // that can transform the scored data by adding a new column names "Probability".
            var calibratorEstimator   = new PlattCalibratorEstimator(mlContext, model, "Sentiment", "Features");
            var calibratorTransformer = calibratorEstimator.Fit(scoredData);

            // Transform the scored data with a calibrator transfomer by adding a new column names "Probability".
            // This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval
            // representing the chance that the respective sample bears positive sentiment.
            var finalData = calibratorTransformer.Transform(scoredData).Preview();

            PrintRowViewValues(finalData);

            //Preview of finalData.RowView
            //
            // Score - 0.458968    Probability 0.4670409
            // Score - 0.7022135   Probability 0.3912723
            // Score 1.138822      Probability 0.8703266
            // Score 0.4807112     Probability 0.7437012
            // Score 1.112813      Probability 0.8665403
        }
Exemplo n.º 6
0
        public void TestNonStandardCalibratorEstimatorClasses()
        {
            var mlContext = new MLContext(0);

            // Store different possible variations of calibrator data classes.
            IDataView[] dataArray = new IDataView[]
            {
                mlContext.Data.LoadFromEnumerable <CalibratorTestInputReversedOrder>(
                    new CalibratorTestInputReversedOrder[]
                {
                    new CalibratorTestInputReversedOrder {
                        Score = 10, Label = true
                    },
                    new CalibratorTestInputReversedOrder {
                        Score = 15, Label = false
                    }
                }),
                mlContext.Data.LoadFromEnumerable <CalibratorTestInputUniqueScoreColumnName>(
                    new CalibratorTestInputUniqueScoreColumnName[]
                {
                    new CalibratorTestInputUniqueScoreColumnName {
                        Label = true, ScoreX = 10
                    },
                    new CalibratorTestInputUniqueScoreColumnName {
                        Label = false, ScoreX = 15
                    }
                }),
                mlContext.Data.LoadFromEnumerable <CalibratorTestInputReversedOrderAndUniqueScoreColumnName>(
                    new CalibratorTestInputReversedOrderAndUniqueScoreColumnName[]
                {
                    new CalibratorTestInputReversedOrderAndUniqueScoreColumnName {
                        ScoreX = 10, Label = true
                    },
                    new CalibratorTestInputReversedOrderAndUniqueScoreColumnName {
                        ScoreX = 15, Label = false
                    }
                })
            };

            // When label and/or score columns are different from their default names ("Label" and "Score", respectively), they
            // need to be manually defined as done below.
            // Successful training of estimators and transforming with transformers indicate correct label and score columns
            // have been found.
            for (int i = 0; i < dataArray.Length; i++)
            {
                // Test PlattCalibratorEstimator
                var calibratorPlattEstimator = new PlattCalibratorEstimator(Env,
                                                                            scoreColumnName: i > 0 ? "ScoreX" : DefaultColumnNames.Score);
                var calibratorPlattTransformer = calibratorPlattEstimator.Fit(dataArray[i]);
                calibratorPlattTransformer.Transform(dataArray[i]);

                // Test FixedPlattCalibratorEstimator
                var calibratorFixedPlattEstimator = new FixedPlattCalibratorEstimator(Env,
                                                                                      scoreColumn: i > 0 ? "ScoreX" : DefaultColumnNames.Score);
                var calibratorFixedPlattTransformer = calibratorFixedPlattEstimator.Fit(dataArray[i]);
                calibratorFixedPlattTransformer.Transform(dataArray[i]);

                // Test NaiveCalibratorEstimator
                var calibratorNaiveEstimator = new NaiveCalibratorEstimator(Env,
                                                                            scoreColumn: i > 0 ? "ScoreX" : DefaultColumnNames.Score);
                var calibratorNaiveTransformer = calibratorNaiveEstimator.Fit(dataArray[i]);
                calibratorNaiveTransformer.Transform(dataArray[i]);

                // Test IsotonicCalibratorEstimator
                var calibratorIsotonicEstimator = new IsotonicCalibratorEstimator(Env,
                                                                                  scoreColumn: i > 0 ? "ScoreX" : DefaultColumnNames.Score);
                var calibratorIsotonicTransformer = calibratorIsotonicEstimator.Fit(dataArray[i]);
                calibratorIsotonicTransformer.Transform(dataArray[i]);
            }
        }