public void SaveResults(string wordFrequenciesFilePath, string ngramsFrequenciesFilePath) { // Persist word frequencies using (var writer = new StreamWriter(wordFrequenciesFilePath)) { var lines = WordFrequencies .Select(ent => string.Format("{0}{1}{2}", ent.Key, Separator, ent.Value)); foreach (var line in lines) { writer.WriteLine(line); } } // Persist ngrams frequencies using (var writer = new StreamWriter(ngramsFrequenciesFilePath)) { // First line is the ngrams counter writer.WriteLine(TotalNgramsCounter); // Don't try to order NGramsFrequencies since it causes OutOfMemoryExceptions (ordering a dictionary creates an ordered copy in all cases) foreach (var freq in NGramsFrequencies) { var sb = new StringBuilder(); sb.Append(string.Join(Separator.ToString(), freq.Key)).Append(Separator).Append(freq.Value); writer.WriteLine(sb.ToString()); } } }
private void WordFrequencyExplorer() { Console.WriteLine($"Unique word count: {UniqueWords.Count:n0}"); Console.WriteLine($"Total word count: {TotalWordCount:n0}"); Console.WriteLine($"Total verse count: {TotalVerseCount:n0}"); Console.WriteLine(); Console.WriteLine(); while (true) { Console.Write("Enter word: "); var line = Console.ReadLine(); var key = line.ToUpperInvariant().Trim(); if (string.IsNullOrWhiteSpace(key)) { return; } if (!WordFrequencies.ContainsKey(key)) { Console.WriteLine($"{key} was not found in this bible."); continue; } int count = WordFrequencies[key]; Console.WriteLine($"{key} was found ({count:n0}) times in this bible."); Console.WriteLine(); } }
public void FirstTry_Basic_Test() { string sentence = "This is my sentence and I can write anything one this sentence because this is mine!"; string word = "sentence"; int count = new WordFrequencies().FirstTry(sentence, word); Assert.Equal(2, count); }
public void FirstTry_No_Occurrences() { string sentence = "This is my sentence and I can write anything one this sentence because this is mine!"; string word = "word"; int count = new WordFrequencies().FirstTry(sentence, word); Assert.Equal(0, count); }
public void InvalidNThrowsExceptionTest() { // Arrange var words = StringList("a"); var wordFrequencies = new WordFrequencies(words); var numToFind = 0; // Act wordFrequencies.GetNthMostOccurring(numToFind); }
public void SecondTry_Word_This_With_3_Occurrences() { string sentence = "This is my sentence and I can write anything one this sentence because this is mine!"; string word = "this"; var myWordFrequencies = new WordFrequencies(sentence); int count = myWordFrequencies.SecondTry(word); Assert.Equal(3, count); }
public void SecondTry_Basic_Test() { string sentence = "This is my sentence and I can write anything one this sentence because this is mine!"; string word = "sentence"; var myWordFrequencies = new WordFrequencies(sentence); int count = myWordFrequencies.SecondTry(word); Assert.Equal(2, count); }
// Methods ------------------ private void IncreaseWordFreq(string word, long frequency = 1) { TotalWordCounter += frequency; if (WordFrequencies.ContainsKey(word)) { WordFrequencies[word] += frequency; } else { WordFrequencies.Add(word, frequency); } }
public void OrderingSkipsValuesTest() { // Arrange var words = StringList("a", "a", "b", "b", "c"); var wordFrequencies = new WordFrequencies(words); var expected = StringList("c"); var numToFind = 3; // Act var actual = wordFrequencies.GetNthMostOccurring(numToFind); // Assert AssertListsAreEqual(expected, actual); }
public void TwoWordsSameOccurrencesAreSortedTest() { // Arrange var words = StringList("a", "a", "b", "b"); var wordFrequencies = new WordFrequencies(words); var expected = StringList("a", "b"); var numToFind = 1; // Act var actual = wordFrequencies.GetNthMostOccurring(numToFind); // Assert AssertListsAreEqual(expected, actual); }
public void SingleWordWithMultipleOccurrencesTest() { // Arrange var words = StringList("a", "a"); var wordFrequencies = new WordFrequencies(words); var expected = StringList("a"); var numToFind = 1; // Act var actual = wordFrequencies.GetNthMostOccurring(numToFind); // Assert AssertListsAreEqual(expected, actual); }
public void InBetweenFrequenciesReturnsNothingTest() { // Arrange var words = StringList("a", "a", "b", "b", "c"); var wordFrequencies = new WordFrequencies(words); List <string> expected = null; var numToFind = 2; // Act var actual = wordFrequencies.GetNthMostOccurring(numToFind); // Assert AssertListsAreEqual(expected, actual); }
private void WordLocationExplorer() { while (true) { Console.Write("Enter word: "); var line = Console.ReadLine(); var key = line.ToUpperInvariant().Trim(); if (string.IsNullOrWhiteSpace(key)) { return; } if (!WordFrequencies.ContainsKey(key)) { Console.WriteLine($"{key} was not found in this bible."); continue; } int count = WordFrequencies[key]; Console.WriteLine(); Console.ForegroundColor = ConsoleColor.DarkBlue; Console.WriteLine($"{key} was found ({count:n0}) times in this bible."); Console.ResetColor(); Console.WriteLine(); foreach (var location in WordLocations(key)) { Console.ForegroundColor = ConsoleColor.Blue; Console.WriteLine(location.ToString()); Console.ResetColor(); Console.WriteLine(location.Text); Console.WriteLine(); } } }
private void LoadWordFrequencies() { foreach (XmlNode book in BibleXml.DocumentElement.ChildNodes) { var bookName = book.Attributes["n"].InnerText; foreach (XmlNode chapter in book.ChildNodes) { var chapterName = chapter.Attributes["n"].InnerText; foreach (XmlNode verse in chapter.ChildNodes) { TotalVerseCount++; var verseNumber = verse.Attributes["n"].InnerText; string text = verse.InnerText.ToUpperInvariant(); var chars = new List <char>(); for (int i = 0; i < text.Length; i++) { var c = text[i]; if (!char.IsLetter(c) && c != ' ' && c != '-') { continue; } // allow hyphenated words if (i > 0 && i < text.Length - 1 && c == '-') { if (!char.IsLetter(text[i - 1]) || !char.IsLetter(text[i + 1])) { continue; } } chars.Add(text[i]); } text = string.Concat(chars).ToUpperInvariant(); var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries); foreach (var word in words) { var tempWord = word.Trim().TrimStart('-').TrimEnd('-'); if (string.IsNullOrWhiteSpace(tempWord)) { continue; } TotalWordCount++; if (WordFrequencies.ContainsKey(tempWord)) { WordFrequencies[tempWord]++; } else { WordFrequencies[tempWord] = 1; } UniqueWords.Add(tempWord); } } } } }