private static List <DocumentData> HandleDocumentDataListProcessing(string selectedDirectory) { var processedDocumentDataListJsonPath = Path.Combine(selectedDirectory, "document_data_list.json"); var processedDocumentDataListJsonFileExists = File.Exists(processedDocumentDataListJsonPath); if (processedDocumentDataListJsonFileExists) { ConsoleWriteLineWithColor($"Found processed documents file at '{processedDocumentDataListJsonPath}'"); ConsoleWriteLineWithColor("Would you like to use that in order to skip the processing step?"); ConsoleWriteLineWithColor("1. Yes"); ConsoleWriteLineWithColor("2. No"); var selectedOption = UserInputHandler.GetNumberInputFromUser(new List <int> { 1, 2 }); if (selectedOption == 1) { var processedDocumentDataListJson = File.ReadAllText(processedDocumentDataListJsonPath); var processedDocumentDataList = JsonConvert.DeserializeObject <List <DocumentData> >(processedDocumentDataListJson); return(processedDocumentDataList); } } var filePathsToUseForDocumentData = new List <string>(); filePathsToUseForDocumentData.AddRange(Directory.GetFiles(selectedDirectory)); var documentDataList = documentDataBusinessLogic.GetDocumentDataForMultipleXmlFiles(filePathsToUseForDocumentData); if (processedDocumentDataListJsonFileExists) { ConsoleWriteLineWithColor("Overwrite existing processed documents file?"); ConsoleWriteLineWithColor("1. Yes"); ConsoleWriteLineWithColor("2. No"); var selectedOption = UserInputHandler.GetNumberInputFromUser(new List <int> { 1, 2 }); if (selectedOption == 1) { var documentDataListJson = JsonConvert.SerializeObject(documentDataList); File.WriteAllText(processedDocumentDataListJsonPath, documentDataListJson); } } else { var documentDataListJson = JsonConvert.SerializeObject(documentDataList); File.WriteAllText(processedDocumentDataListJsonPath, documentDataListJson); } return(documentDataList); }
private void buttonRun_Click(object sender, EventArgs e) { try { var documentDataList = documentDataBusinessLogic.GetDocumentDataForMultipleXmlFiles(filepathsToUseForDocumentData); var lists = SplitListIntoTwoSeparateLists(documentDataList, 70); var listForTraining = lists.Item1; var listForValidation = lists.Item2; var datasetRepresentationTraining = documentDataList.ToDatasetRepresentation(); datasetRepresentationTraining = datasetRepresentationTraining.ReconstructByEliminatingWordsBelowAndAboveThresholds(5, 95); var stopwatch = new Stopwatch(); stopwatch.Start(); var features = featureSelector.GetMostImportantWords(datasetRepresentationTraining); stopwatch.Stop(); var featuresJson = JsonConvert.SerializeObject(features); File.WriteAllText("features.json", featuresJson); datasetRepresentationTraining = datasetRepresentationTraining.ReconstructByKeepingOnlyTheseWords(features); var datasetJson = JsonConvert.SerializeObject(datasetRepresentationTraining); var datasetArff = datasetRepresentationTraining.ToArffFileFormat(); File.WriteAllText("dataset.json", datasetJson); File.WriteAllText("dataset.arff", datasetArff); topicPredictor.Train(datasetRepresentationTraining); double total = listForValidation.Count; var successfullyPredicted = 0; foreach (var documentData in listForValidation) { var predictedTopic = topicPredictor.PredictTopic(documentData); if (documentData.Topics.Contains(predictedTopic)) { successfullyPredicted++; } } var accuracy = successfullyPredicted / total * 100; //documentDataDisplayUserControl.DisplayDocumentData(documentData); SetStatusLabel("Done", Color.GreenYellow); } catch (Exception exception) { SetStatusLabel("Error", Color.Red); //textBoxResult.Text = exception.ToString(); } }