public void Merge_OnExecute_ReturnsNewMergedCollection() { var wordOccurendeCollection = new WordOccurenceCollection { "word", "word" }; var wordOccurendeCollection2 = new WordOccurenceCollection { "word", "word" }; var mergedCollection = wordOccurendeCollection.Merge(wordOccurendeCollection2); Assert.AreNotEqual(wordOccurendeCollection, mergedCollection); Assert.AreNotEqual(wordOccurendeCollection2, mergedCollection); Assert.AreEqual(4, mergedCollection.GetWordCount("word")); }
public IEnumerable <KeyValuePair <string, int> > GetKeywords(HtmlResult html) { var occurences = new WordOccurenceCollection(); var textBlocks = html.Document.SelectNodes("//*[not(self::script) and not(self::style)]]//text()"); if (textBlocks != null) { foreach (var textBlock in textBlocks) { var textBlockText = textBlock.InnerText; var occurencesInBlock = CountOccurencesForText(textBlockText); occurences = occurences.Merge(occurencesInBlock); } } return(occurences.OrderByDescending(x => x.Value)); }
public WordOccurenceCollection GetKeywords(HtmlNode htmlNode) { if (htmlNode == null) { throw new ArgumentNullException(nameof(htmlNode)); } var occurences = new WordOccurenceCollection(); var textBlocks = htmlNode.SelectNodes("//*[not(self::script) and not(self::style)]//text()"); if (textBlocks != null) { var textBlocksWithText = textBlocks.Where(x => !string.IsNullOrWhiteSpace(x.InnerText)).Select(x => x.InnerHtml); foreach (var text in textBlocksWithText) { var occurencesInBlock = CountOccurencesForText(text); occurences = occurences.Merge(occurencesInBlock); } } return(occurences); }