Example #1
0
        public void Run()
        {
            try
            {
                // Get User Input
                Console.WriteLine("Please Enter a file with complete path.");
                Console.WriteLine("For example: C:\\Users\\david.browning\\Desktop\\Text1.txt");

                string filePath = Console.ReadLine();
                if (filePath.IndexOfAny(Path.GetInvalidPathChars()) == -1)
                {
                    // Read a file
                    var dirtyText      = fileService.ReadAllTextFromFile(filePath);
                    var nonUnicodeText = stringCleanerService.RemoveUnicodeCharacters(dirtyText);

                    // Remove Stop Words
                    var noPunctuationOrSymbols = stringCleanerService.RemovePunctuationAndSymbols(nonUnicodeText);
                    var semiSanitizedArray     = noPunctuationOrSymbols.Split(" ", StringSplitOptions.RemoveEmptyEntries);
                    var sanitizedList          = stringCleanerService.RemoveApostropheExceptConjunctions(semiSanitizedArray);

                    Console.WriteLine("Please enter a file with complete path for the stopwords.");
                    Console.WriteLine("For example: C:\\Users\\david.browning\\Desktop\\Text1.txt");

                    var stopWordPath = Console.ReadLine();

                    var stopWordRemovedList = stopWordService.RemoveStopWords(sanitizedList, stopWordPath);

                    // Remove all non alpha characters
                    var alphaOnlyText = stringCleanerService.RemoveNonAlphaCharacters(string.Join(" ", stopWordRemovedList));

                    // Stemming Algorithm
                    var alphaOnlyArray = alphaOnlyText.Split(" ", StringSplitOptions.RemoveEmptyEntries);
                    var stemmingArray  = porterStemmingService.RunStemmingAlgorithm(alphaOnlyArray);

                    // Computes the frequency of each term
                    var wordOccurrenceCount   = wordOccurrenceService.GetWordOccurences(stemmingArray);
                    var sortedOccurrenceCount = wordOccurrenceService.SortWordOccurences(wordOccurrenceCount);

                    // Prints out the most commonly occurring terms (not including stop words) in descending order of frequency
                    wordOccurrenceService.PrintTopWordOccurences(sortedOccurrenceCount);
                }
                else
                {
                    Console.WriteLine("Input entered wrong. Please only enter 1 or 2");
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine($"An Error Occurred: {ex}");
            }
            finally
            {
                Console.WriteLine("Program Finished - Press Enter");
                Console.ReadLine();
            }
        }
 public string[] GetStopWords(string filePath)
 {
     try
     {
         var text       = File.ReadAllText($@"{filePath}");
         var cleanWords = stringCleanerService.RemoveUnicodeCharacters(text);
         return(cleanWords.Split(" ", StringSplitOptions.RemoveEmptyEntries));
     }
     catch (Exception ex)
     {
         Console.WriteLine($"Error when reading StopWords text file: {ex}");
         return(new string[0]);
     }
 }