} // graph internal void RebuildIndex() { CachedIndex.Clear(); foreach (var item in items) { CachedIndex.Add(item.XPath, item); } ; ContentGraph = NodeGraph.Build(items); // BuildGraph(); if (ContentGraph != null) { var allChildren = ContentGraph.getAllChildren(); foreach (graphWrapNode <LeafNodeDictionaryEntry> child in allChildren) { if (!child.name.isNullOrEmpty()) { String tag = child.name.Trim(ContentGraph.pathSeparator.ToCharArray()); var m = TagSelectorRegex.Match(tag); if (m.Success) { tag = m.Groups[m.Groups.Count - 1].Value; } NodeTagCounter.Count(tag); } } } foreach (var item in items) { TagCounter.Count(item.node.Name); } }
/// <summary> /// Gets the first name of the significant parent by node. /// </summary> /// <param name="node">The node.</param> /// <param name="significanceLevel">The significance level: rade of occurence, less value more significant/rare the tag is.</param> /// <returns></returns> public HtmlNode GetFirstSignificantParentByNodeName(HtmlNode node, Double significanceLevel = 0.2) { HtmlNode head = node; Double rate = 1; Int32 topFreq = NodeTagCounter.GetTopFrequency(); while (rate > significanceLevel) { if (head.ParentNode == null) { return(head); } Int32 freq = NodeTagCounter.GetFrequencyForItem(head.Name); if (freq == 0) { return(head); } rate = freq.GetRatio(topFreq); head = head.ParentNode; } return(head); }