private static void BuildFrequencyDictionary() { var result = new FrequencyResults(); Console.WriteLine("How many sentences do you want to parse?"); var nbOfSentencesToParse = int.Parse(Console.ReadLine()); var nbOfAlreadyParsedSentences = 0; var frequencyDirectory = Utilities.PathToDownloadDirectory + "frequencies"; if (!Directory.Exists(frequencyDirectory)) { Directory.CreateDirectory(frequencyDirectory); } var frequencyFilePath = frequencyDirectory + "/frequencies.txt"; var excludedFrequencyFilePath = frequencyDirectory + "/excluded-frequencies.txt"; var nbOfSentencesParsedFilePath = frequencyDirectory + "/nbOfSentencesParsed.txt"; var parsingResumed = false; if (File.Exists(nbOfSentencesParsedFilePath)) { int nbOfSentencesParsed; if (int.TryParse(File.ReadAllText(nbOfSentencesParsedFilePath), out nbOfSentencesParsed)) { Console.WriteLine("{0} sentences have already been parsed. Resume parsing? (y/n)", nbOfSentencesParsed); var resumeParsing = string.Equals(Console.ReadLine(), "Y", StringComparison.InvariantCultureIgnoreCase); if (resumeParsing) { nbOfAlreadyParsedSentences = nbOfSentencesParsed; parsingResumed = true; } } } var sw = Stopwatch.StartNew(); Console.WriteLine("Building of frequency dictionary started"); // Tokenize the sentences and compute the frequencies Func <string[], int, bool> extractTokens = (tokens, sentenceCounter) => { for (var i = 0; i < tokens.Length; i++) { var wordOccurence = new WordOccurrence() { IsFirstTokenInSentence = i == 0, Word = tokens[i] }; result.AddOccurence(wordOccurence); } return(true); }; Utilities.ExtractTokensFromTxtFiles(extractTokens, nbOfSentencesToParse, nbOfAlreadyParsedSentences); // Load previous frequency dictionaries that were already computed if (parsingResumed) { result.LoadFrequencyDictionary(frequencyFilePath); result.LoadFrequencyDictionary(excludedFrequencyFilePath); } // Save frequency files on disk result.SaveFrequencyDictionary(frequencyFilePath); result.SaveExcludedFrequencyDictionary(excludedFrequencyFilePath); // Save the nb of sentences parsed (for information and being able to relaunch the parsing at this point) File.WriteAllText(nbOfSentencesParsedFilePath, nbOfSentencesToParse.ToString()); Console.WriteLine("Building of frequency dictionary done"); Console.WriteLine("====================================="); sw.Stop(); Console.WriteLine("Ellapsed time: {0}", sw.Elapsed.ToString("g")); }