public void SaveResults(string wordFrequenciesFilePath, string ngramsFrequenciesFilePath)
        {
            // Persist word frequencies
            using (var writer = new StreamWriter(wordFrequenciesFilePath))
            {
                var lines = WordFrequencies
                            .Select(ent => string.Format("{0}{1}{2}", ent.Key, Separator, ent.Value));
                foreach (var line in lines)
                {
                    writer.WriteLine(line);
                }
            }

            // Persist ngrams frequencies
            using (var writer = new StreamWriter(ngramsFrequenciesFilePath))
            {
                // First line is the ngrams counter
                writer.WriteLine(TotalNgramsCounter);
                // Don't try to order NGramsFrequencies since it causes OutOfMemoryExceptions (ordering a dictionary creates an ordered copy in all cases)
                foreach (var freq in NGramsFrequencies)
                {
                    var sb = new StringBuilder();
                    sb.Append(string.Join(Separator.ToString(), freq.Key)).Append(Separator).Append(freq.Value);
                    writer.WriteLine(sb.ToString());
                }
            }
        }
        private void WordFrequencyExplorer()
        {
            Console.WriteLine($"Unique word count: {UniqueWords.Count:n0}");
            Console.WriteLine($"Total word count: {TotalWordCount:n0}");
            Console.WriteLine($"Total verse count: {TotalVerseCount:n0}");
            Console.WriteLine();
            Console.WriteLine();

            while (true)
            {
                Console.Write("Enter word: ");
                var line = Console.ReadLine();
                var key  = line.ToUpperInvariant().Trim();
                if (string.IsNullOrWhiteSpace(key))
                {
                    return;
                }

                if (!WordFrequencies.ContainsKey(key))
                {
                    Console.WriteLine($"{key} was not found in this bible.");
                    continue;
                }

                int count = WordFrequencies[key];
                Console.WriteLine($"{key} was found ({count:n0}) times in this bible.");
                Console.WriteLine();
            }
        }
Beispiel #3
0
        public void FirstTry_Basic_Test()
        {
            string sentence = "This is my sentence and I can write anything one this sentence because this is mine!";
            string word     = "sentence";

            int count = new WordFrequencies().FirstTry(sentence, word);

            Assert.Equal(2, count);
        }
Beispiel #4
0
        public void FirstTry_No_Occurrences()
        {
            string sentence = "This is my sentence and I can write anything one this sentence because this is mine!";
            string word     = "word";

            int count = new WordFrequencies().FirstTry(sentence, word);

            Assert.Equal(0, count);
        }
Beispiel #5
0
        public void InvalidNThrowsExceptionTest()
        {
            // Arrange
            var words           = StringList("a");
            var wordFrequencies = new WordFrequencies(words);
            var numToFind       = 0;

            // Act
            wordFrequencies.GetNthMostOccurring(numToFind);
        }
Beispiel #6
0
        public void SecondTry_Word_This_With_3_Occurrences()
        {
            string sentence = "This is my sentence and I can write anything one this sentence because this is mine!";
            string word     = "this";

            var myWordFrequencies = new WordFrequencies(sentence);

            int count = myWordFrequencies.SecondTry(word);

            Assert.Equal(3, count);
        }
Beispiel #7
0
        public void SecondTry_Basic_Test()
        {
            string sentence = "This is my sentence and I can write anything one this sentence because this is mine!";
            string word     = "sentence";

            var myWordFrequencies = new WordFrequencies(sentence);

            int count = myWordFrequencies.SecondTry(word);

            Assert.Equal(2, count);
        }
        // Methods ------------------

        private void IncreaseWordFreq(string word, long frequency = 1)
        {
            TotalWordCounter += frequency;
            if (WordFrequencies.ContainsKey(word))
            {
                WordFrequencies[word] += frequency;
            }
            else
            {
                WordFrequencies.Add(word, frequency);
            }
        }
Beispiel #9
0
        public void OrderingSkipsValuesTest()
        {
            // Arrange
            var words           = StringList("a", "a", "b", "b", "c");
            var wordFrequencies = new WordFrequencies(words);
            var expected        = StringList("c");
            var numToFind       = 3;

            // Act
            var actual = wordFrequencies.GetNthMostOccurring(numToFind);

            // Assert
            AssertListsAreEqual(expected, actual);
        }
Beispiel #10
0
        public void TwoWordsSameOccurrencesAreSortedTest()
        {
            // Arrange
            var words           = StringList("a", "a", "b", "b");
            var wordFrequencies = new WordFrequencies(words);
            var expected        = StringList("a", "b");
            var numToFind       = 1;

            // Act
            var actual = wordFrequencies.GetNthMostOccurring(numToFind);

            // Assert
            AssertListsAreEqual(expected, actual);
        }
Beispiel #11
0
        public void SingleWordWithMultipleOccurrencesTest()
        {
            // Arrange
            var words           = StringList("a", "a");
            var wordFrequencies = new WordFrequencies(words);
            var expected        = StringList("a");
            var numToFind       = 1;

            // Act
            var actual = wordFrequencies.GetNthMostOccurring(numToFind);

            // Assert
            AssertListsAreEqual(expected, actual);
        }
Beispiel #12
0
        public void InBetweenFrequenciesReturnsNothingTest()
        {
            // Arrange
            var           words           = StringList("a", "a", "b", "b", "c");
            var           wordFrequencies = new WordFrequencies(words);
            List <string> expected        = null;
            var           numToFind       = 2;

            // Act
            var actual = wordFrequencies.GetNthMostOccurring(numToFind);

            // Assert
            AssertListsAreEqual(expected, actual);
        }
        private void WordLocationExplorer()
        {
            while (true)
            {
                Console.Write("Enter word: ");
                var line = Console.ReadLine();
                var key  = line.ToUpperInvariant().Trim();
                if (string.IsNullOrWhiteSpace(key))
                {
                    return;
                }

                if (!WordFrequencies.ContainsKey(key))
                {
                    Console.WriteLine($"{key} was not found in this bible.");
                    continue;
                }

                int count = WordFrequencies[key];

                Console.WriteLine();
                Console.ForegroundColor = ConsoleColor.DarkBlue;
                Console.WriteLine($"{key} was found ({count:n0}) times in this bible.");
                Console.ResetColor();
                Console.WriteLine();

                foreach (var location in WordLocations(key))
                {
                    Console.ForegroundColor = ConsoleColor.Blue;
                    Console.WriteLine(location.ToString());
                    Console.ResetColor();
                    Console.WriteLine(location.Text);
                    Console.WriteLine();
                }
            }
        }
        private void LoadWordFrequencies()
        {
            foreach (XmlNode book in BibleXml.DocumentElement.ChildNodes)
            {
                var bookName = book.Attributes["n"].InnerText;

                foreach (XmlNode chapter in book.ChildNodes)
                {
                    var chapterName = chapter.Attributes["n"].InnerText;

                    foreach (XmlNode verse in chapter.ChildNodes)
                    {
                        TotalVerseCount++;
                        var    verseNumber = verse.Attributes["n"].InnerText;
                        string text        = verse.InnerText.ToUpperInvariant();
                        var    chars       = new List <char>();

                        for (int i = 0; i < text.Length; i++)
                        {
                            var c = text[i];
                            if (!char.IsLetter(c) && c != ' ' && c != '-')
                            {
                                continue;
                            }

                            // allow hyphenated words
                            if (i > 0 && i < text.Length - 1 && c == '-')
                            {
                                if (!char.IsLetter(text[i - 1]) || !char.IsLetter(text[i + 1]))
                                {
                                    continue;
                                }
                            }

                            chars.Add(text[i]);
                        }

                        text = string.Concat(chars).ToUpperInvariant();

                        var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries);

                        foreach (var word in words)
                        {
                            var tempWord = word.Trim().TrimStart('-').TrimEnd('-');
                            if (string.IsNullOrWhiteSpace(tempWord))
                            {
                                continue;
                            }

                            TotalWordCount++;

                            if (WordFrequencies.ContainsKey(tempWord))
                            {
                                WordFrequencies[tempWord]++;
                            }
                            else
                            {
                                WordFrequencies[tempWord] = 1;
                            }

                            UniqueWords.Add(tempWord);
                        }
                    }
                }
            }
        }