Example #1
0
 private static void WriteBadDateSpaces(string str)
 {
     (var badDates, var goodDates) = TxtReader.FindBadDateSpaces(str);
     Console.WriteLine();
     Console.WriteLine("Number of bad date descriptions {0} out of {1}", badDates.Count, goodDates.Count + badDates.Count);
     foreach (Match match in badDates)
     {
         Console.WriteLine(match.ToString());
     }
     Console.WriteLine();
 }
Example #2
0
        private static void WriteAllQuotes(string str)
        {
            (MatchCollection matches, int wordCount, int wholeStringWordCount) = TxtReader.CheckQuotes(str);
            Console.WriteLine();
            Console.WriteLine("Number of quotes: {0} -- {1}%", matches.Count, (wordCount * 100) / wholeStringWordCount);
            int i = 0;

            foreach (Match match in matches)
            {
                i++;
                Console.WriteLine("Quote {0}: {1}", i, match);
            }
            Console.WriteLine();
        }
Example #3
0
        public static void Main(string[] args)
        {
            List <string> workSentences;

            string[] workWords;
            string   wholeWorkString;

            if (File.Exists(@"WorkFile.txt"))
            {
                (workSentences, workWords, wholeWorkString) = TxtReader.LoadSentences(@"WorkFile.txt", numberOfWords);

                var startupPath = @"source\";

                if (Directory.Exists(startupPath))
                {
                    // This path is a directory
                    ProcessDirectory(startupPath, workSentences);
                }
                else
                {
                    Console.WriteLine("{0} is not a valid path.", startupPath);
                }

                WriteWordsCountWithPercents(workWords);
                WriteBadDateSpaces(wholeWorkString);
                WriteAllQuotes(wholeWorkString);

                StringBuilder builder = new StringBuilder();
                foreach (var word in workWords)
                {
                    builder.Append(word + " ");
                }

                HtmlGenerator generator = new HtmlGenerator();
                generator.AddText(builder.ToString());
                generator.AddMaches(matchedSentences, searchGroups);
                var html = generator.GenerateHtml();
                using (System.IO.StreamWriter file = new System.IO.StreamWriter(@"Output.html", false))
                {
                    file.Write(html);
                }
            }
            else
            {
                Console.WriteLine("WorkFile.txt not found");
            }

            Console.ReadKey();
        }
Example #4
0
        public static void ProcessDirectory(string targetDirectory, List <string> sourceSentences)
        {
            string[] fileEntries = Directory.GetFiles(targetDirectory, "*.txt");
            string[] pdfEntries  = Directory.GetFiles(targetDirectory, "*.pdf");
            foreach (string pdfName in pdfEntries)
            {
                if (fileEntries.All(x => x.Replace(".txt", "") != pdfName.Replace(".pdf", "")))
                {
                    TxtReader.ExtractTextFromPdfToTxt(pdfName, targetDirectory);
                }
            }

            fileEntries = Directory.GetFiles(targetDirectory, "*.txt");
            Console.WriteLine("Number of loaded sources: " + fileEntries.Length);
            Console.WriteLine();
            foreach (var fileName in fileEntries)
            {
                ProcessFile(fileName, sourceSentences);
            }
        }
Example #5
0
        public static void ProcessFile(string path, List <string> sourceSentences)
        {
            // load source
            (List <string> sentences, string[] wordArray, string str) = TxtReader.LoadSentences(path, numberOfWords);

            //print filename
            Console.WriteLine(path);
            Console.WriteLine();

            //find all sentences
            var matches = sourceSentences.Intersect(sentences);

            matches = TxtReader.CombineFollowingSencences(matches.ToList(), numberOfWords);

            WriteFileMatches(matches.ToList());

            foreach (var match in matches)
            {
                matchedSentences.Add(match);
            }
        }