public void TrainOneVersusAll() { string dataPath = GetDataPath("iris.txt"); var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1); pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false)); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier())); var model = pipeline.Train <IrisData, IrisPrediction>(); var testData = new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false); var evaluator = new ClassificationEvaluator(); ClassificationMetrics metrics = evaluator.Evaluate(model, testData); CheckMetrics(metrics); var trainTest = new TrainTestEvaluator() { Kind = MacroUtilsTrainerKinds.SignatureMultiClassClassifierTrainer }.TrainTestEvaluate <IrisData, IrisPrediction>(pipeline, testData); CheckMetrics(trainTest.ClassificationMetrics); }
public void TrainTestPredictSentimentModelTest() { var pipeline = PreparePipeline(); var testData = PrepareTextLoaderTestData(); var tt = new TrainTestEvaluator().TrainTestEvaluate <SentimentData, SentimentPrediction>(pipeline, testData); Assert.Null(tt.ClassificationMetrics); Assert.Null(tt.RegressionMetrics); Assert.NotNull(tt.BinaryClassificationMetrics); Assert.NotNull(tt.PredictorModels); ValidateExamples(tt.PredictorModels); ValidateBinaryMetrics(tt.BinaryClassificationMetrics); }
public void TrainTestPredictSentimentModelTest() { string dataPath = GetDataPath(SentimentDataPath); var pipeline = new LearningPipeline(); pipeline.Add(new Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { Separator = new[] { '\t' }, HasHeader = true, Column = new[] { new TextLoaderColumn() { Name = "Label", Source = new [] { new TextLoaderRange(0) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SentimentText", Source = new [] { new TextLoaderRange(1) }, Type = Runtime.Data.DataKind.Text } } } }); pipeline.Add(new TextFeaturizer("Features", "SentimentText") { KeepDiacritics = false, KeepPunctuations = false, TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, OutputTokens = true, StopWordsRemover = new PredefinedStopWordsRemover(), VectorNormalizer = TextTransformTextNormKind.L2, CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true } }); pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); PredictionModel <SentimentData, SentimentPrediction> model = pipeline.Train <SentimentData, SentimentPrediction>(); IEnumerable <SentimentData> sentiments = new[] { new SentimentData { SentimentText = "Please refrain from adding nonsense to Wikipedia." }, new SentimentData { SentimentText = "He is a CHEATER, and the article should say that." } }; string testDataPath = GetDataPath(SentimentTestPath); var testData = new Data.TextLoader(testDataPath) { Arguments = new TextLoaderArguments { Separator = new[] { '\t' }, HasHeader = true, Column = new[] { new TextLoaderColumn() { Name = "Label", Source = new [] { new TextLoaderRange(0) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SentimentText", Source = new [] { new TextLoaderRange(1) }, Type = Runtime.Data.DataKind.Text } } } }; var tt = new TrainTestEvaluator().TrainTestEvaluate <SentimentData, SentimentPrediction>(pipeline, testData); Assert.Null(tt.ClassificationMetrics); Assert.Null(tt.RegressionMetrics); Assert.NotNull(tt.BinaryClassificationMetrics); Assert.NotNull(tt.PredictorModels); BinaryClassificationMetrics metrics = tt.BinaryClassificationMetrics; Assert.Equal(.5556, metrics.Accuracy, 4); Assert.Equal(.8, metrics.Auc, 1); Assert.Equal(.87, metrics.Auprc, 2); Assert.Equal(1, metrics.Entropy, 3); Assert.Equal(.6923, metrics.F1Score, 4); Assert.Equal(.969, metrics.LogLoss, 3); Assert.Equal(3.083, metrics.LogLossReduction, 3); Assert.Equal(1, metrics.NegativePrecision, 3); Assert.Equal(.111, metrics.NegativeRecall, 3); Assert.Equal(.529, metrics.PositivePrecision, 3); Assert.Equal(1, metrics.PositiveRecall); ConfusionMatrix matrix = metrics.ConfusionMatrix; Assert.Equal(2, matrix.Order); Assert.Equal(2, matrix.ClassNames.Count); Assert.Equal("positive", matrix.ClassNames[0]); Assert.Equal("negative", matrix.ClassNames[1]); Assert.Equal(9, matrix[0, 0]); Assert.Equal(9, matrix["positive", "positive"]); Assert.Equal(0, matrix[0, 1]); Assert.Equal(0, matrix["positive", "negative"]); Assert.Equal(8, matrix[1, 0]); Assert.Equal(8, matrix["negative", "positive"]); Assert.Equal(1, matrix[1, 1]); Assert.Equal(1, matrix["negative", "negative"]); IEnumerable <SentimentPrediction> predictions = tt.PredictorModels.Predict(sentiments); Assert.Equal(2, predictions.Count()); Assert.True(predictions.ElementAt(0).Sentiment.IsFalse); Assert.True(predictions.ElementAt(1).Sentiment.IsTrue); predictions = tt.PredictorModels.Predict(sentiments); Assert.Equal(2, predictions.Count()); Assert.True(predictions.ElementAt(0).Sentiment.IsFalse); Assert.True(predictions.ElementAt(1).Sentiment.IsTrue); }