public void TwitterSentimentAnalysis() { var env = new LocalEnvironment(); var dataPath = "wikipedia-detox-250-line-data.tsv"; // Load the data into the system. var dataReader = TextLoader.CreateReader(env, dataPath, ctx => ( label: ctx.LoadFloat(0), text: ctx.LoadText(1)), hasHeader: true); var pipeline = dataReader.MakeEstimator() .Append(row => ( label: row.label, // Concatenate all features into a vector. features: row.text.TextFeaturizer())) .Append(row => ( label: row.label, row.label.TrainLinearClassification(row.features))); var(trainData, testData) = dataReader.Read(dataPath).TrainTestSplit(trainFraction: 0.7); var model = pipeline.Fit(trainData); var predictions = model.Transform(testData); var metrics = BinaryClassifierEvaluator.Evaluate(predictions, row => row.label, row => row.prediction); }
/// <summary> /// Evaluates scored binary classification data, without probability-based metrics. /// </summary> /// <param name="data">The scored data.</param> /// <param name="labelColumnName">The name of the label column in <paramref name="data"/>.</param> /// <param name="scoreColumnName">The name of the score column in <paramref name="data"/>.</param> /// <param name="predictedLabelColumnName">The name of the predicted label column in <paramref name="data"/>.</param> /// <returns>The evaluation results for these uncalibrated outputs.</returns> public BinaryClassificationMetrics EvaluateNonCalibrated(IDataView data, string labelColumnName = DefaultColumnNames.Label, string scoreColumnName = DefaultColumnNames.Score, string predictedLabelColumnName = DefaultColumnNames.PredictedLabel) { Environment.CheckValue(data, nameof(data)); Environment.CheckNonEmpty(labelColumnName, nameof(labelColumnName)); Environment.CheckNonEmpty(predictedLabelColumnName, nameof(predictedLabelColumnName)); var eval = new BinaryClassifierEvaluator(Environment, new BinaryClassifierEvaluator.Arguments() { }); return(eval.Evaluate(data, labelColumnName, scoreColumnName, predictedLabelColumnName)); }
/// <summary> /// Evaluates scored binary classification data, without probability-based metrics. /// </summary> /// <param name="data">The scored data.</param> /// <param name="label">The name of the label column in <paramref name="data"/>.</param> /// <param name="score">The name of the score column in <paramref name="data"/>.</param> /// <param name="predictedLabel">The name of the predicted label column in <paramref name="data"/>.</param> /// <returns>The evaluation results for these uncalibrated outputs.</returns> public BinaryClassifierEvaluator.Result EvaluateNonCalibrated(IDataView data, string label, string score = DefaultColumnNames.Score, string predictedLabel = DefaultColumnNames.PredictedLabel) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); Host.CheckNonEmpty(score, nameof(score)); Host.CheckNonEmpty(predictedLabel, nameof(predictedLabel)); var eval = new BinaryClassifierEvaluator(Host, new BinaryClassifierEvaluator.Arguments() { }); return(eval.Evaluate(data, label, score, predictedLabel)); }
public BinaryClassificationMetrics Evaluate(IDataView data, string labelColumn = DefaultColumnNames.Label, string probabilityColumn = DefaultColumnNames.Probability) { var ci = EvaluateUtils.GetScoreColumnInfo(_env, data.Schema, null, DefaultColumnNames.Score, MetadataUtils.Const.ScoreColumnKind.BinaryClassification); var map = new KeyValuePair <RoleMappedSchema.ColumnRole, string>[] { RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Probability, probabilityColumn), RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Score, ci.Name) }; var rmd = new RoleMappedData(data, labelColumn, DefaultColumnNames.Features, opt: true, custom: map); var metricsDict = _evaluator.Evaluate(rmd); return(BinaryClassificationMetrics.FromMetrics(_env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"]).Single()); }
/// <summary> /// Evaluates scored binary classification data. /// </summary> /// <param name="data">The scored data.</param> /// <param name="label">The name of the label column in <paramref name="data"/>.</param> /// <param name="score">The name of the score column in <paramref name="data"/>.</param> /// <param name="probability">The name of the probability column in <paramref name="data"/>, the calibrated version of <paramref name="score"/>.</param> /// <param name="predictedLabel">The name of the predicted label column in <paramref name="data"/>.</param> /// <returns>The evaluation results for these calibrated outputs.</returns> public CalibratedBinaryClassificationMetrics Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score, string probability = DefaultColumnNames.Probability, string predictedLabel = DefaultColumnNames.PredictedLabel) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); Host.CheckNonEmpty(score, nameof(score)); Host.CheckNonEmpty(probability, nameof(probability)); Host.CheckNonEmpty(predictedLabel, nameof(predictedLabel)); var eval = new BinaryClassifierEvaluator(Host, new BinaryClassifierEvaluator.Arguments() { }); return(eval.Evaluate(data, label, score, probability, predictedLabel)); }
public void SdcaBinaryClassificationNoClaibration() { var env = new ConsoleEnvironment(seed: 0); var dataPath = GetDataPath("breast-cancer.txt"); var dataSource = new MultiFileSource(dataPath); var reader = TextLoader.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); LinearBinaryPredictor pred = null; var loss = new HingeLoss(new HingeLoss.Arguments() { Margin = 1 }); // With a custom loss function we no longer get calibrated predictions. var est = reader.MakeNewEstimator() .Append(r => (r.label, preds: r.label.PredictSdcaBinaryClassification(r.features, maxIterations: 2, loss: loss, onFit: p => pred = p))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); // 9 input features, so we ought to have 9 weights. Assert.Equal(9, pred.Weights2.Count); var data = model.Read(dataSource); var metrics = BinaryClassifierEvaluator.Evaluate(data, r => r.label, r => r.preds); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.Accuracy, 0, 1); Assert.InRange(metrics.Auc, 0, 1); Assert.InRange(metrics.Auprc, 0, 1); // Just output some data on the schema for fun. var schema = data.AsDynamic.Schema; for (int c = 0; c < schema.ColumnCount; ++c) { Console.WriteLine($"{schema.GetColumnName(c)}, {schema.GetColumnType(c)}"); } }
public void SdcaBinaryClassification() { var env = new ConsoleEnvironment(seed: 0); var dataPath = GetDataPath("breast-cancer.txt"); var dataSource = new MultiFileSource(dataPath); var reader = TextLoader.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); LinearBinaryPredictor pred = null; ParameterMixingCalibratedPredictor cali = null; var est = reader.MakeNewEstimator() .Append(r => (r.label, preds: r.label.PredictSdcaBinaryClassification(r.features, maxIterations: 2, onFit: (p, c) => { pred = p; cali = c; }))); var pipe = reader.Append(est); Assert.Null(pred); Assert.Null(cali); var model = pipe.Fit(dataSource); Assert.NotNull(pred); Assert.NotNull(cali); // 9 input features, so we ought to have 9 weights. Assert.Equal(9, pred.Weights2.Count); var data = model.Read(dataSource); var metrics = BinaryClassifierEvaluator.Evaluate(data, r => r.label, r => r.preds); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.Accuracy, 0, 1); Assert.InRange(metrics.Auc, 0, 1); Assert.InRange(metrics.Auprc, 0, 1); Assert.InRange(metrics.LogLoss, 0, double.PositiveInfinity); Assert.InRange(metrics.Entropy, 0, double.PositiveInfinity); // Just output some data on the schema for fun. var schema = data.AsDynamic.Schema; for (int c = 0; c < schema.ColumnCount; ++c) { Console.WriteLine($"{schema.GetColumnName(c)}, {schema.GetColumnType(c)}"); } }