Exemplo n.º 1
0
        private WordOccurenceCollection CountOccurencesForText(string textBlockText)
        {
            if (textBlockText == null)
            {
                throw new ArgumentNullException(nameof(textBlockText));
            }

            var occurences = new WordOccurenceCollection();

            // Split text to words
            var words =
                textBlockText.Split(new[] { '.', '?', '!', ' ', ':', ',' }, StringSplitOptions.RemoveEmptyEntries)
                .Where(x => x.Length >= MinimumWordLength);

            foreach (var word in words)
            {
                var simpleWord = word.Simplify();
                if (!string.IsNullOrWhiteSpace(simpleWord) && simpleWord.Length >= MinimumWordLength)
                {
                    occurences.Add(simpleWord);
                }
            }

            return(occurences);
        }
Exemplo n.º 2
0
        public void GetWordCount_OnExecuteWithNull_ThrowsException()
        {
            var wordOccurendeCollection = new WordOccurenceCollection {
                "word"
            };

            wordOccurendeCollection.GetWordCount(null);
        }
Exemplo n.º 3
0
        public void GetWordCount_OnExecuteWithWordNotInCollection_ReturnsZero()
        {
            var wordOccurendeCollection = new WordOccurenceCollection {
                "word"
            };

            var result = wordOccurendeCollection.GetWordCount("test");

            Assert.AreEqual(0, result);
        }
Exemplo n.º 4
0
        public void Merge_OnExecute_ReturnsNewMergedCollection()
        {
            var wordOccurendeCollection = new WordOccurenceCollection {
                "word", "word"
            };
            var wordOccurendeCollection2 = new WordOccurenceCollection {
                "word", "word"
            };

            var mergedCollection = wordOccurendeCollection.Merge(wordOccurendeCollection2);

            Assert.AreNotEqual(wordOccurendeCollection, mergedCollection);
            Assert.AreNotEqual(wordOccurendeCollection2, mergedCollection);
            Assert.AreEqual(4, mergedCollection.GetWordCount("word"));
        }
        public IEnumerable <KeyValuePair <string, int> > GetKeywords(HtmlResult html)
        {
            var occurences = new WordOccurenceCollection();

            var textBlocks = html.Document.SelectNodes("//*[not(self::script) and not(self::style)]]//text()");

            if (textBlocks != null)
            {
                foreach (var textBlock in textBlocks)
                {
                    var textBlockText     = textBlock.InnerText;
                    var occurencesInBlock = CountOccurencesForText(textBlockText);
                    occurences = occurences.Merge(occurencesInBlock);
                }
            }
            return(occurences.OrderByDescending(x => x.Value));
        }
Exemplo n.º 6
0
        public void GetWordCount_OnExecute_ReturnsTheOccurenceOfTheWords()
        {
            var wordOccurendeCollection = new WordOccurenceCollection
            {
                "word",
                "word",
                "word",
                "word",
                "test",
                "test",
                "test"
            };

            var resultForWord = wordOccurendeCollection.GetWordCount("word");
            var resultForTest = wordOccurendeCollection.GetWordCount("test");

            Assert.AreEqual(4, resultForWord);
            Assert.AreEqual(3, resultForTest);
        }
Exemplo n.º 7
0
        public WordOccurenceCollection GetKeywords(HtmlNode htmlNode)
        {
            if (htmlNode == null)
            {
                throw new ArgumentNullException(nameof(htmlNode));
            }

            var occurences = new WordOccurenceCollection();

            var textBlocks = htmlNode.SelectNodes("//*[not(self::script) and not(self::style)]//text()");

            if (textBlocks != null)
            {
                var textBlocksWithText = textBlocks.Where(x => !string.IsNullOrWhiteSpace(x.InnerText)).Select(x => x.InnerHtml);

                foreach (var text in textBlocksWithText)
                {
                    var occurencesInBlock = CountOccurencesForText(text);
                    occurences = occurences.Merge(occurencesInBlock);
                }
            }
            return(occurences);
        }