public void Run() { try { // Get User Input Console.WriteLine("Please Enter a file with complete path."); Console.WriteLine("For example: C:\\Users\\david.browning\\Desktop\\Text1.txt"); string filePath = Console.ReadLine(); if (filePath.IndexOfAny(Path.GetInvalidPathChars()) == -1) { // Read a file var dirtyText = fileService.ReadAllTextFromFile(filePath); var nonUnicodeText = stringCleanerService.RemoveUnicodeCharacters(dirtyText); // Remove Stop Words var noPunctuationOrSymbols = stringCleanerService.RemovePunctuationAndSymbols(nonUnicodeText); var semiSanitizedArray = noPunctuationOrSymbols.Split(" ", StringSplitOptions.RemoveEmptyEntries); var sanitizedList = stringCleanerService.RemoveApostropheExceptConjunctions(semiSanitizedArray); Console.WriteLine("Please enter a file with complete path for the stopwords."); Console.WriteLine("For example: C:\\Users\\david.browning\\Desktop\\Text1.txt"); var stopWordPath = Console.ReadLine(); var stopWordRemovedList = stopWordService.RemoveStopWords(sanitizedList, stopWordPath); // Remove all non alpha characters var alphaOnlyText = stringCleanerService.RemoveNonAlphaCharacters(string.Join(" ", stopWordRemovedList)); // Stemming Algorithm var alphaOnlyArray = alphaOnlyText.Split(" ", StringSplitOptions.RemoveEmptyEntries); var stemmingArray = porterStemmingService.RunStemmingAlgorithm(alphaOnlyArray); // Computes the frequency of each term var wordOccurrenceCount = wordOccurrenceService.GetWordOccurences(stemmingArray); var sortedOccurrenceCount = wordOccurrenceService.SortWordOccurences(wordOccurrenceCount); // Prints out the most commonly occurring terms (not including stop words) in descending order of frequency wordOccurrenceService.PrintTopWordOccurences(sortedOccurrenceCount); } else { Console.WriteLine("Input entered wrong. Please only enter 1 or 2"); } } catch (Exception ex) { Console.WriteLine($"An Error Occurred: {ex}"); } finally { Console.WriteLine("Program Finished - Press Enter"); Console.ReadLine(); } }
public string[] GetStopWords(string filePath) { try { var text = File.ReadAllText($@"{filePath}"); var cleanWords = stringCleanerService.RemoveUnicodeCharacters(text); return(cleanWords.Split(" ", StringSplitOptions.RemoveEmptyEntries)); } catch (Exception ex) { Console.WriteLine($"Error when reading StopWords text file: {ex}"); return(new string[0]); } }