public void PlattCalibratorEstimator() { var calibratorTestData = GetCalibratorTestData(); // plattCalibrator var plattCalibratorEstimator = new PlattCalibratorEstimator(Env); var plattCalibratorTransformer = plattCalibratorEstimator.Fit(calibratorTestData.ScoredData); //testData CheckValidCalibratedData(calibratorTestData.ScoredData, plattCalibratorTransformer); //test estimator TestEstimatorCore(plattCalibratorEstimator, calibratorTestData.ScoredData); Done(); }
public void PlattCalibratorEstimator() { var calibratorTestData = GetCalibratorTestData(); // platCalibrator var platCalibratorEstimator = new PlattCalibratorEstimator(Env, calibratorTestData.transformer.Model, "Label", "Features"); var platCalibratorTransformer = platCalibratorEstimator.Fit(calibratorTestData.scoredData); //testData checkValidCalibratedData(calibratorTestData.scoredData, platCalibratorTransformer); //test estimator TestEstimatorCore(platCalibratorEstimator, calibratorTestData.scoredData); Done(); }
public void TestCalibratorEstimatorBackwardsCompatibility() { // The legacy model being loaded below was trained and saved with // version as such: /* * var mlContext = new MLContext(seed: 1); * var calibratorTestData = GetCalibratorTestData(); * var plattCalibratorEstimator = new PlattCalibratorEstimator(Env); * var plattCalibratorTransformer = plattCalibratorEstimator.Fit(calibratorTestData.ScoredData); * mlContext.Model.Save(plattCalibratorTransformer, calibratorTestData.ScoredData.Schema, "calibrator-model_VerWritten_0x00010001xyz.zip"); */ var modelPath = GetDataPath("backcompat", "Calibrator_Model_VerWritten_0x00010001.zip"); ITransformer oldPlattCalibratorTransformer; using (var fs = File.OpenRead(modelPath)) oldPlattCalibratorTransformer = ML.Model.Load(fs, out var schema); var calibratorTestData = GetCalibratorTestData(); var newPlattCalibratorEstimator = new PlattCalibratorEstimator(Env); var newPlattCalibratorTransformer = newPlattCalibratorEstimator.Fit(calibratorTestData.ScoredData); // Check that both models produce the same output var oldCalibratedData = oldPlattCalibratorTransformer.Transform(calibratorTestData.ScoredData).Preview(); var newCalibratedData = newPlattCalibratorTransformer.Transform(calibratorTestData.ScoredData).Preview(); // Check first that the produced schemas and outputs are of the same size Assert.True(oldCalibratedData.RowView.Length == newCalibratedData.RowView.Length); Assert.True(oldCalibratedData.ColumnView.Length == newCalibratedData.ColumnView.Length); // Then check the produced probabilities (5th value corresponds to probabilities) for // equality, within rounding error. for (int i = 0; i < 10; i++) { Assert.True((float)oldCalibratedData.RowView[i].Values[5].Value == (float)newCalibratedData.RowView[i].Values[5].Value); } Done(); }
public static void Calibration() { // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // The string, dataFile, is the path to the downloaded file. // You can open this file, if you want to see the data. string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); // A preview of the data. // Sentiment SentimentText // 0 " :Erm, thank you. " // 1 ==You're cool== // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); // Create a text loader. var reader = mlContext.Data.CreateTextReader(new TextLoader.Arguments() { Separator = "tab", HasHeader = true, Column = new[] { new TextLoader.Column("Sentiment", DataKind.BL, 0), new TextLoader.Column("SentimentText", DataKind.Text, 1) } }); // Read the data var data = reader.Read(dataFile); // Split the dataset into two parts: one used for training, the other to train the calibrator var(trainData, calibratorTrainingData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); // Featurize the text column through the FeaturizeText API. // Then append the StochasticDualCoordinateAscentBinary binary classifier, setting the "Label" column as the label of the dataset, and // the "Features" column produced by FeaturizeText as the features column. var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features") .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent( labelColumn: "Sentiment", featureColumn: "Features", l2Const: 0.001f, loss: new HingeLoss())); // By specifying loss: new HingeLoss(), StochasticDualCoordinateAscent will train a support vector machine (SVM). // Fit the pipeline, and get a transformer that knows how to score new data. var transformer = pipeline.Fit(trainData); IPredictor model = transformer.LastTransformer.Model; // Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample // bears positive sentiment. This estimate is relative to the numbers obtained. var scoredData = transformer.Transform(calibratorTrainingData); var scoredDataPreview = scoredData.Preview(); PrintRowViewValues(scoredDataPreview); // Preview of scoredDataPreview.RowView // // Score - 0.458968 // Score - 0.7022135 // Score 1.138822 // Score 0.4807112 // Score 1.112813 // Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer // that can transform the scored data by adding a new column names "Probability". var calibratorEstimator = new PlattCalibratorEstimator(mlContext, model, "Sentiment", "Features"); var calibratorTransformer = calibratorEstimator.Fit(scoredData); // Transform the scored data with a calibrator transfomer by adding a new column names "Probability". // This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval // representing the chance that the respective sample bears positive sentiment. var finalData = calibratorTransformer.Transform(scoredData).Preview(); PrintRowViewValues(finalData); //Preview of finalData.RowView // // Score - 0.458968 Probability 0.4670409 // Score - 0.7022135 Probability 0.3912723 // Score 1.138822 Probability 0.8703266 // Score 0.4807112 Probability 0.7437012 // Score 1.112813 Probability 0.8665403 }
public void TestNonStandardCalibratorEstimatorClasses() { var mlContext = new MLContext(0); // Store different possible variations of calibrator data classes. IDataView[] dataArray = new IDataView[] { mlContext.Data.LoadFromEnumerable <CalibratorTestInputReversedOrder>( new CalibratorTestInputReversedOrder[] { new CalibratorTestInputReversedOrder { Score = 10, Label = true }, new CalibratorTestInputReversedOrder { Score = 15, Label = false } }), mlContext.Data.LoadFromEnumerable <CalibratorTestInputUniqueScoreColumnName>( new CalibratorTestInputUniqueScoreColumnName[] { new CalibratorTestInputUniqueScoreColumnName { Label = true, ScoreX = 10 }, new CalibratorTestInputUniqueScoreColumnName { Label = false, ScoreX = 15 } }), mlContext.Data.LoadFromEnumerable <CalibratorTestInputReversedOrderAndUniqueScoreColumnName>( new CalibratorTestInputReversedOrderAndUniqueScoreColumnName[] { new CalibratorTestInputReversedOrderAndUniqueScoreColumnName { ScoreX = 10, Label = true }, new CalibratorTestInputReversedOrderAndUniqueScoreColumnName { ScoreX = 15, Label = false } }) }; // When label and/or score columns are different from their default names ("Label" and "Score", respectively), they // need to be manually defined as done below. // Successful training of estimators and transforming with transformers indicate correct label and score columns // have been found. for (int i = 0; i < dataArray.Length; i++) { // Test PlattCalibratorEstimator var calibratorPlattEstimator = new PlattCalibratorEstimator(Env, scoreColumnName: i > 0 ? "ScoreX" : DefaultColumnNames.Score); var calibratorPlattTransformer = calibratorPlattEstimator.Fit(dataArray[i]); calibratorPlattTransformer.Transform(dataArray[i]); // Test FixedPlattCalibratorEstimator var calibratorFixedPlattEstimator = new FixedPlattCalibratorEstimator(Env, scoreColumn: i > 0 ? "ScoreX" : DefaultColumnNames.Score); var calibratorFixedPlattTransformer = calibratorFixedPlattEstimator.Fit(dataArray[i]); calibratorFixedPlattTransformer.Transform(dataArray[i]); // Test NaiveCalibratorEstimator var calibratorNaiveEstimator = new NaiveCalibratorEstimator(Env, scoreColumn: i > 0 ? "ScoreX" : DefaultColumnNames.Score); var calibratorNaiveTransformer = calibratorNaiveEstimator.Fit(dataArray[i]); calibratorNaiveTransformer.Transform(dataArray[i]); // Test IsotonicCalibratorEstimator var calibratorIsotonicEstimator = new IsotonicCalibratorEstimator(Env, scoreColumn: i > 0 ? "ScoreX" : DefaultColumnNames.Score); var calibratorIsotonicTransformer = calibratorIsotonicEstimator.Fit(dataArray[i]); calibratorIsotonicTransformer.Transform(dataArray[i]); } }