private static void WriteBadDateSpaces(string str) { (var badDates, var goodDates) = TxtReader.FindBadDateSpaces(str); Console.WriteLine(); Console.WriteLine("Number of bad date descriptions {0} out of {1}", badDates.Count, goodDates.Count + badDates.Count); foreach (Match match in badDates) { Console.WriteLine(match.ToString()); } Console.WriteLine(); }
private static void WriteAllQuotes(string str) { (MatchCollection matches, int wordCount, int wholeStringWordCount) = TxtReader.CheckQuotes(str); Console.WriteLine(); Console.WriteLine("Number of quotes: {0} -- {1}%", matches.Count, (wordCount * 100) / wholeStringWordCount); int i = 0; foreach (Match match in matches) { i++; Console.WriteLine("Quote {0}: {1}", i, match); } Console.WriteLine(); }
public static void Main(string[] args) { List <string> workSentences; string[] workWords; string wholeWorkString; if (File.Exists(@"WorkFile.txt")) { (workSentences, workWords, wholeWorkString) = TxtReader.LoadSentences(@"WorkFile.txt", numberOfWords); var startupPath = @"source\"; if (Directory.Exists(startupPath)) { // This path is a directory ProcessDirectory(startupPath, workSentences); } else { Console.WriteLine("{0} is not a valid path.", startupPath); } WriteWordsCountWithPercents(workWords); WriteBadDateSpaces(wholeWorkString); WriteAllQuotes(wholeWorkString); StringBuilder builder = new StringBuilder(); foreach (var word in workWords) { builder.Append(word + " "); } HtmlGenerator generator = new HtmlGenerator(); generator.AddText(builder.ToString()); generator.AddMaches(matchedSentences, searchGroups); var html = generator.GenerateHtml(); using (System.IO.StreamWriter file = new System.IO.StreamWriter(@"Output.html", false)) { file.Write(html); } } else { Console.WriteLine("WorkFile.txt not found"); } Console.ReadKey(); }
public static void ProcessDirectory(string targetDirectory, List <string> sourceSentences) { string[] fileEntries = Directory.GetFiles(targetDirectory, "*.txt"); string[] pdfEntries = Directory.GetFiles(targetDirectory, "*.pdf"); foreach (string pdfName in pdfEntries) { if (fileEntries.All(x => x.Replace(".txt", "") != pdfName.Replace(".pdf", ""))) { TxtReader.ExtractTextFromPdfToTxt(pdfName, targetDirectory); } } fileEntries = Directory.GetFiles(targetDirectory, "*.txt"); Console.WriteLine("Number of loaded sources: " + fileEntries.Length); Console.WriteLine(); foreach (var fileName in fileEntries) { ProcessFile(fileName, sourceSentences); } }
public static void ProcessFile(string path, List <string> sourceSentences) { // load source (List <string> sentences, string[] wordArray, string str) = TxtReader.LoadSentences(path, numberOfWords); //print filename Console.WriteLine(path); Console.WriteLine(); //find all sentences var matches = sourceSentences.Intersect(sentences); matches = TxtReader.CombineFollowingSencences(matches.ToList(), numberOfWords); WriteFileMatches(matches.ToList()); foreach (var match in matches) { matchedSentences.Add(match); } }