private void buttonRun_Click(object sender, EventArgs e)
        {
            try
            {
                var documentDataList  = documentDataBusinessLogic.GetDocumentDataForMultipleXmlFiles(filepathsToUseForDocumentData);
                var lists             = SplitListIntoTwoSeparateLists(documentDataList, 70);
                var listForTraining   = lists.Item1;
                var listForValidation = lists.Item2;

                var datasetRepresentationTraining = documentDataList.ToDatasetRepresentation();
                datasetRepresentationTraining = datasetRepresentationTraining.ReconstructByEliminatingWordsBelowAndAboveThresholds(5, 95);

                var stopwatch = new Stopwatch();
                stopwatch.Start();
                var features = featureSelector.GetMostImportantWords(datasetRepresentationTraining);
                stopwatch.Stop();

                var featuresJson = JsonConvert.SerializeObject(features);
                File.WriteAllText("features.json", featuresJson);

                datasetRepresentationTraining = datasetRepresentationTraining.ReconstructByKeepingOnlyTheseWords(features);
                var datasetJson = JsonConvert.SerializeObject(datasetRepresentationTraining);
                var datasetArff = datasetRepresentationTraining.ToArffFileFormat();
                File.WriteAllText("dataset.json", datasetJson);
                File.WriteAllText("dataset.arff", datasetArff);

                topicPredictor.Train(datasetRepresentationTraining);

                double total = listForValidation.Count;
                var    successfullyPredicted = 0;

                foreach (var documentData in listForValidation)
                {
                    var predictedTopic = topicPredictor.PredictTopic(documentData);

                    if (documentData.Topics.Contains(predictedTopic))
                    {
                        successfullyPredicted++;
                    }
                }

                var accuracy = successfullyPredicted / total * 100;

                //documentDataDisplayUserControl.DisplayDocumentData(documentData);
                SetStatusLabel("Done", Color.GreenYellow);
            }
            catch (Exception exception)
            {
                SetStatusLabel("Error", Color.Red);
                //textBoxResult.Text = exception.ToString();
            }
        }
Example #2
0
        public List <ClassEvaluationResult> EvaluateTopicPredictor(ITopicPredictor topicPredictor, List <DocumentData> documentDataList)
        {
            ArgumentValidator.ValidateObject(topicPredictor);
            ArgumentValidator.ValidateNotEmptyList(documentDataList);

            var    evaluationResults     = new Dictionary <string, ClassEvaluationResult>();
            double total                 = documentDataList.Count;
            var    successfullyPredicted = 0;

            var distinctTopics = documentDataList.ToDatasetRepresentation().GetAllDistinctTopics();

            foreach (var topic in distinctTopics)
            {
                evaluationResults.Add(topic, new ClassEvaluationResult());
            }

            var predicted = new int[documentDataList.Count(x => x.Topics.Count > 0)];
            var expected  = new int[documentDataList.Count(x => x.Topics.Count > 0)];

            foreach (var documentData in documentDataList.Where(x => x.Topics.Count > 0))
            {
                var predictedTopic = topicPredictor.PredictTopic(documentData);
                var expectedTopic  = documentData.Topics[0];

                evaluationResults.TryGetValue(predictedTopic, out var resultForPredictedTopic);
                evaluationResults.TryGetValue(expectedTopic, out var resultForExpectedTopic);

                if (resultForPredictedTopic == null && resultForExpectedTopic == null)
                {
                    continue;
                }

                if (predictedTopic != expectedTopic)
                {
                    if (resultForPredictedTopic != null)
                    {
                        resultForPredictedTopic.FalsePositives++;
                    }

                    if (resultForExpectedTopic != null)
                    {
                        resultForExpectedTopic.FalseNegatives++;
                    }
                }
                else
                {
                    if (resultForPredictedTopic != null)
                    {
                        resultForPredictedTopic.TruePositives++;
                    }
                }

                var remainingTopics = distinctTopics
                                      .Where(x => x != predictedTopic && x != expectedTopic)
                                      .ToList();

                foreach (var remainingTopic in remainingTopics)
                {
                    if (evaluationResults.TryGetValue(remainingTopic, out var resultForRemainingTopic))
                    {
                        resultForRemainingTopic.TrueNegatives++;
                    }
                }
            }

            return(evaluationResults.Values.ToList());
        }