void ReconfigurablePrediction() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); IPredictor predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); using (var ch = env.Start("Calibrator training")) { predictor = CalibratorUtils.TrainCalibrator(env, ch, new PlattCalibratorTrainer(env), int.MaxValue, predictor, trainRoles); } var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var dataEval = new RoleMappedData(scorer, label: "Label", feature: "Features", opt: true); var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { }); var metricsDict = evaluator.Evaluate(dataEval); var metrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0]; var bindable = ScoreUtils.GetSchemaBindableMapper(env, predictor, null); var mapper = bindable.Bind(env, trainRoles.Schema); var newScorer = new BinaryClassifierScorer(env, new BinaryClassifierScorer.Arguments { Threshold = 0.01f, ThresholdColumn = DefaultColumnNames.Probability }, scoreRoles.Data, mapper, trainRoles.Schema); dataEval = new RoleMappedData(newScorer, label: "Label", feature: "Features", opt: true); var newEvaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { Threshold = 0.01f, UseRawScoreThreshold = false }); metricsDict = newEvaluator.Evaluate(dataEval); var newMetrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0]; } }
private BinaryClassificationMetrics EvaluateBinary(IHostEnvironment env, IDataView scoredData) { var dataEval = new RoleMappedData(scoredData, label: "Label", feature: "Features", opt: true); // Evaluate. // It does not work. It throws error "Failed to find 'Score' column" when Evaluate is called //var evaluator = new BinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments()); var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments()); var metricsDic = evaluator.Evaluate(dataEval); return(BinaryClassificationMetrics.FromMetrics(env, metricsDic["OverallMetrics"], metricsDic["ConfusionMatrix"])[0]); }
public BinaryClassificationMetrics Evaluate(IDataView data, string labelColumn = DefaultColumnNames.Label, string probabilityColumn = DefaultColumnNames.Probability) { var ci = EvaluateUtils.GetScoreColumnInfo(_env, data.Schema, null, DefaultColumnNames.Score, MetadataUtils.Const.ScoreColumnKind.BinaryClassification); var map = new KeyValuePair <RoleMappedSchema.ColumnRole, string>[] { RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Probability, probabilityColumn), RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Score, ci.Name) }; var rmd = new RoleMappedData(data, labelColumn, DefaultColumnNames.Features, opt: true, custom: map); var metricsDict = _evaluator.Evaluate(rmd); return(BinaryClassificationMetrics.FromMetrics(_env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"]).Single()); }
public void Evaluation() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); // Create prediction engine and test predictions. var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer); // Take a couple examples out of the test data and run predictions on top. var testLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); var dataEval = new RoleMappedData(scorer, label: "Label", feature: "Features", opt: true); var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { }); var metricsDict = evaluator.Evaluate(dataEval); var metrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0]; } }
void CrossValidation() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); int numFolds = 5; using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline. var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var text = TextTransform.Create(env, MakeSentimentTextTransformArgs(false), loader); IDataView trans = new GenerateNumberTransform(env, text, "StratificationColumn"); // Train. var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1, ConvergenceTolerance = 1f }); var metrics = new List <BinaryClassificationMetrics>(); for (int fold = 0; fold < numFolds; fold++) { IDataView trainPipe = new RangeFilter(env, new RangeFilter.Arguments() { Column = "StratificationColumn", Min = (Double)fold / numFolds, Max = (Double)(fold + 1) / numFolds, Complement = true }, trans); trainPipe = new OpaqueDataView(trainPipe); var trainData = new RoleMappedData(trainPipe, label: "Label", feature: "Features"); // Auto-normalization. NormalizeTransform.CreateIfNeeded(env, ref trainData, trainer); var preCachedData = trainData; // Auto-caching. if (trainer.Info.WantCaching) { var prefetch = trainData.Schema.GetColumnRoles().Select(kc => kc.Value.Index).ToArray(); var cacheView = new CacheDataView(env, trainData.Data, prefetch); // Because the prefetching worked, we know that these are valid columns. trainData = new RoleMappedData(cacheView, trainData.Schema.GetColumnRoleNames()); } var predictor = trainer.Train(new Runtime.TrainContext(trainData)); IDataView testPipe = new RangeFilter(env, new RangeFilter.Arguments() { Column = "StratificationColumn", Min = (Double)fold / numFolds, Max = (Double)(fold + 1) / numFolds, Complement = false }, trans); testPipe = new OpaqueDataView(testPipe); var pipe = ApplyTransformUtils.ApplyAllTransformsToData(env, preCachedData.Data, testPipe, trainPipe); var testRoles = new RoleMappedData(pipe, trainData.Schema.GetColumnRoleNames()); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, testRoles, env, testRoles.Schema); BinaryClassifierMamlEvaluator eval = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { }); var dataEval = new RoleMappedData(scorer, testRoles.Schema.GetColumnRoleNames(), opt: true); var dict = eval.Evaluate(dataEval); var foldMetrics = BinaryClassificationMetrics.FromMetrics(env, dict["OverallMetrics"], dict["ConfusionMatrix"]); metrics.Add(foldMetrics.Single()); } } }