private void buttonRun_Click(object sender, EventArgs e) { try { var documentDataList = documentDataBusinessLogic.GetDocumentDataForMultipleXmlFiles(filepathsToUseForDocumentData); var lists = SplitListIntoTwoSeparateLists(documentDataList, 70); var listForTraining = lists.Item1; var listForValidation = lists.Item2; var datasetRepresentationTraining = documentDataList.ToDatasetRepresentation(); datasetRepresentationTraining = datasetRepresentationTraining.ReconstructByEliminatingWordsBelowAndAboveThresholds(5, 95); var stopwatch = new Stopwatch(); stopwatch.Start(); var features = featureSelector.GetMostImportantWords(datasetRepresentationTraining); stopwatch.Stop(); var featuresJson = JsonConvert.SerializeObject(features); File.WriteAllText("features.json", featuresJson); datasetRepresentationTraining = datasetRepresentationTraining.ReconstructByKeepingOnlyTheseWords(features); var datasetJson = JsonConvert.SerializeObject(datasetRepresentationTraining); var datasetArff = datasetRepresentationTraining.ToArffFileFormat(); File.WriteAllText("dataset.json", datasetJson); File.WriteAllText("dataset.arff", datasetArff); topicPredictor.Train(datasetRepresentationTraining); double total = listForValidation.Count; var successfullyPredicted = 0; foreach (var documentData in listForValidation) { var predictedTopic = topicPredictor.PredictTopic(documentData); if (documentData.Topics.Contains(predictedTopic)) { successfullyPredicted++; } } var accuracy = successfullyPredicted / total * 100; //documentDataDisplayUserControl.DisplayDocumentData(documentData); SetStatusLabel("Done", Color.GreenYellow); } catch (Exception exception) { SetStatusLabel("Error", Color.Red); //textBoxResult.Text = exception.ToString(); } }
public List <ClassEvaluationResult> EvaluateTopicPredictor(ITopicPredictor topicPredictor, List <DocumentData> documentDataList) { ArgumentValidator.ValidateObject(topicPredictor); ArgumentValidator.ValidateNotEmptyList(documentDataList); var evaluationResults = new Dictionary <string, ClassEvaluationResult>(); double total = documentDataList.Count; var successfullyPredicted = 0; var distinctTopics = documentDataList.ToDatasetRepresentation().GetAllDistinctTopics(); foreach (var topic in distinctTopics) { evaluationResults.Add(topic, new ClassEvaluationResult()); } var predicted = new int[documentDataList.Count(x => x.Topics.Count > 0)]; var expected = new int[documentDataList.Count(x => x.Topics.Count > 0)]; foreach (var documentData in documentDataList.Where(x => x.Topics.Count > 0)) { var predictedTopic = topicPredictor.PredictTopic(documentData); var expectedTopic = documentData.Topics[0]; evaluationResults.TryGetValue(predictedTopic, out var resultForPredictedTopic); evaluationResults.TryGetValue(expectedTopic, out var resultForExpectedTopic); if (resultForPredictedTopic == null && resultForExpectedTopic == null) { continue; } if (predictedTopic != expectedTopic) { if (resultForPredictedTopic != null) { resultForPredictedTopic.FalsePositives++; } if (resultForExpectedTopic != null) { resultForExpectedTopic.FalseNegatives++; } } else { if (resultForPredictedTopic != null) { resultForPredictedTopic.TruePositives++; } } var remainingTopics = distinctTopics .Where(x => x != predictedTopic && x != expectedTopic) .ToList(); foreach (var remainingTopic in remainingTopics) { if (evaluationResults.TryGetValue(remainingTopic, out var resultForRemainingTopic)) { resultForRemainingTopic.TrueNegatives++; } } } return(evaluationResults.Values.ToList()); }