public ClassifierBuildResult Build(TextDocument[] trainingSet, ClassifierParams classifierParams) { var targetTag = classifierParams.TargetTag; var featuredWords = featureSelector.Select(trainingSet, classifierParams.FeatureSelectionParams).FeaturedWords.Select(x => x.Word).ToArray(); var trainingExamples = trainingSet.Select(textDocument => textDocumentConverter.ConvertToTrainingExample(textDocument, targetTag, featuredWords)).ToArray(); var oversampledExamples = sampler.OverSample(trainingExamples, classifierParams.SamplingParams); var algorithmBuildResult = classificationAlgorithmBuilder.Build(oversampledExamples, classifierParams.ClassificationAlgorithmParams); var result = new SimpleClassifier(algorithmBuildResult.ClassificationAlgorithm, textDocumentConverter, targetTag, featuredWords); return ClassifierBuildResult.Create(result, algorithmBuildResult.Error); }
public ClassifierBuildResult Build(TextDocument[] trainingSet, EnsembleParams ensembleParams) { var weightedDocuments = trainingSet.Select((x, i) => new WeightedDocument { Document = x, Weight = 1.0 / trainingSet.Length }).ToArray(); var classifiers = new List<WeightedClassifier>(); var classifierParamses = ensembleParams.ClassifiersParams; var targetTag = ensembleParams.TargetTag; for (int iteration = 0; iteration < classifierParamses.Length; iteration++) { Console.WriteLine("Running {0}/{1} boosting iteration", iteration + 1, classifierParamses.Length); var sampledTrainingSet = DoSampling(weightedDocuments); var binaryClassifierBuildResult = classifierBuilder.Build(sampledTrainingSet, classifierParamses[iteration]); var classifier = binaryClassifierBuildResult.Classifier; var error = weightedDocuments.Sum(x => classifier.IsClassifierWrong(x.Document, targetTag) ? x.Weight : 0); var alpha = 0.5 * Math.Log((1.0 - error) / error); foreach (var weightedDocument in weightedDocuments) { if (classifier.IsClassifierWrong(weightedDocument.Document, targetTag)) weightedDocument.Weight *= Math.Exp(alpha); else weightedDocument.Weight *= Math.Exp(-alpha); } var z = weightedDocuments.Sum(x => x.Weight); foreach (var weightedDocument in weightedDocuments) { weightedDocument.Weight /= z; } classifiers.Add(new WeightedClassifier { Classifier = classifier, Weight = alpha }); Console.WriteLine("Error (weighted) = {0}", error); Console.WriteLine("Alpha = {0}", alpha); var evaluationResult = classifierEvaluator.Evaluate(classifier, trainingSet, targetTag); Console.WriteLine("FScore = {0}", evaluationResult.FScore); } var result = new ClassifiersEnsemble(classifiers); return ClassifierBuildResult.Create(result, trainingSet, targetTag); }
public EvaluationResult Evaluate(IClassifier classifier, TextDocument[] testSet, string targetTag) { var results = testSet .Select(x => new BusinessObjects.Evaluation { DocumentId = x.Id, Result = classifier.Classify(x), ExpectedClass = x.Tags.Contains(targetTag) ? 1 : 0, }) .ToArray(); var total = results.Length; var errors = Get(results, r => r.Result.PredictedClass != r.ExpectedClass); var truePositives = Get(results, r => r.Result.PredictedClass == 1 && r.ExpectedClass == 1); var falsePositives = Get(results, r => r.Result.PredictedClass == 1 && r.ExpectedClass == 0); var falseNegatives = Get(results, r => r.Result.PredictedClass == 0 && r.ExpectedClass == 1); var trueNegatives = Get(results, r => r.Result.PredictedClass == 0 && r.ExpectedClass == 0); var accuracy = (total - errors.Length) * 1.0 / total; var precision = truePositives.Length * 1.0 / (truePositives.Length + falsePositives.Length); var recall = truePositives.Length * 1.0 / (truePositives.Length + falseNegatives.Length); var fscore = 2.0 * precision * recall / (precision + recall); if (double.IsNaN(fscore)) { fscore = 0.0; } return new EvaluationResult { Accuracy = accuracy, Precision = precision, Recall = recall, FScore = fscore, TruePositives = truePositives, FalsePositives = falsePositives, TrueNegatives = trueNegatives, FalseNegatives = falseNegatives, Errors = errors, }; }