Esempio n. 1
0
        public void Merge_OnExecute_ReturnsNewMergedCollection()
        {
            var wordOccurendeCollection = new WordOccurenceCollection {
                "word", "word"
            };
            var wordOccurendeCollection2 = new WordOccurenceCollection {
                "word", "word"
            };

            var mergedCollection = wordOccurendeCollection.Merge(wordOccurendeCollection2);

            Assert.AreNotEqual(wordOccurendeCollection, mergedCollection);
            Assert.AreNotEqual(wordOccurendeCollection2, mergedCollection);
            Assert.AreEqual(4, mergedCollection.GetWordCount("word"));
        }
        public IEnumerable <KeyValuePair <string, int> > GetKeywords(HtmlResult html)
        {
            var occurences = new WordOccurenceCollection();

            var textBlocks = html.Document.SelectNodes("//*[not(self::script) and not(self::style)]]//text()");

            if (textBlocks != null)
            {
                foreach (var textBlock in textBlocks)
                {
                    var textBlockText     = textBlock.InnerText;
                    var occurencesInBlock = CountOccurencesForText(textBlockText);
                    occurences = occurences.Merge(occurencesInBlock);
                }
            }
            return(occurences.OrderByDescending(x => x.Value));
        }
Esempio n. 3
0
        public WordOccurenceCollection GetKeywords(HtmlNode htmlNode)
        {
            if (htmlNode == null)
            {
                throw new ArgumentNullException(nameof(htmlNode));
            }

            var occurences = new WordOccurenceCollection();

            var textBlocks = htmlNode.SelectNodes("//*[not(self::script) and not(self::style)]//text()");

            if (textBlocks != null)
            {
                var textBlocksWithText = textBlocks.Where(x => !string.IsNullOrWhiteSpace(x.InnerText)).Select(x => x.InnerHtml);

                foreach (var text in textBlocksWithText)
                {
                    var occurencesInBlock = CountOccurencesForText(text);
                    occurences = occurences.Merge(occurencesInBlock);
                }
            }
            return(occurences);
        }