public void Add(graphWrapNode <htmlWrapper> input) { IObjectWithName rni = input.root as IObjectWithName; string xp = input.path.removeStartsWith(rni.name).Replace("/", "\\"); xPathRoots.AddUnique(xp); foreach (var lf in input.getAllLeafs()) { graphWrapNode <htmlWrapper> ch = lf as graphWrapNode <htmlWrapper>; Add(ch.item); } }
/// <summary> /// Adds new node or nodes to correspond to specified path or name. <c>pathOrName</c> can be path like: path1\\path2\\path3 /// </summary> /// <param name="pathWithName"></param> /// <param name="__item"></param> /// <returns></returns> public override graphWrapNode <htmlWrapper> Add(string pathWithName, htmlWrapper __item) { List <string> pathParts = pathWithName.SplitSmart(pathSeparator); graphWrapNode <htmlWrapper> head = this; foreach (string part in pathParts) { head = head.Add(part); } head.SetItem(__item); return(head); }
/// <summary> /// Builds the content tree out of <see cref="HtmlDocument"/> /// </summary> /// <param name="htmlDoc">The HTML document</param> /// <param name="__name">The name of the root</param> /// <returns></returns> public static nodeTree buildTree(this HtmlDocument htmlDoc, string __name, Boolean allowTitle = false, Boolean allowMeta = false) { nodeTree output = new nodeTree(__name, htmlDoc); XPathNodeIterator iterator = htmlDoc.CreateNavigator().Select("//*[text()][count(*)=0]"); //XPathNodeIterator iterator = htmlDoc.CreateNavigator().SelectDescendants(System.Xml.XPath.XPathNodeType.Text, false); while (iterator.MoveNext()) { XPathNavigator current = iterator.Current; HtmlNodeNavigator htmlNavigator = current as HtmlNodeNavigator; string sp = htmlNavigator.CurrentNode.XPath.Replace("/", "\\"); string cn = ""; if (htmlNavigator.CurrentNode.Name.ToLower() == "title") { } if (isTagAcceptable(htmlNavigator.CurrentNode, null, allowTitle, allowMeta)) { cn = ""; cn = htmlNavigator.CurrentNode.InnerText; //if (htmlNavigator.CurrentNode.checkTextHtmlConsistensy()) //{ //} cn = cn.htmlContentProcess().Trim(); if (!cn.isNullOrEmptyString()) { graphWrapNode <htmlWrapper> nn = output.Add(sp, htmlNavigator.CurrentNode.Clone()); nn.item.content = cn; nn.item.xPath = sp; nn.item.path = nn.path; // nodesWithText.AddNewLeaf(sp, htmlNavigator.CurrentNode.Clone(), report, cn); } else { } } else { } } return(output); }
public void Analyze(LeafNodeDictionary leafDictionary) { CompleteGraph = graphTools.BuildGraphFromItems <LeafNodeDictionaryEntry, graphWrapNode <LeafNodeDictionaryEntry> >(leafDictionary.items, x => x.XPath, "/"); CompleteGraph.pathSeparator = "/"; GraphMetrics.Process(CompleteGraph); var bins = GraphMetrics.JunctionCounter.GetFrequencyBins(); foreach (var bin in bins) { if (bin.Key >= JunctionSizeMin) { } } }
/// <summary> /// Adds the specified name. /// </summary> /// <param name="__name">The name.</param> /// <returns></returns> public override graphWrapNode <htmlWrapper> Add(string __name) { if (!children.Contains(__name)) { var tkng = new nodeTree(__name, this); children.Add(__name, tkng); return(tkng); } else { var chd = this[__name]; graphWrapNode <htmlWrapper> wchd = chd as graphWrapNode <htmlWrapper>; wchd.item.score++; } return((graphWrapNode <htmlWrapper>) this[__name] as graphWrapNode <htmlWrapper>); }
public virtual void SetNodeCustomization(graphWrapNode <LeafNodeDictionaryEntry> sourceNode, Node targetNode, DirectedGraphStylingCase styleCase) { //if (sourceNode.item != null) //{ // switch (sourceNode.item.Category) // { // case "Structure": // break; // case "Static": // targetNode.Background = Color.LightBlue.ColorToHex(); // break; // case "Dynamic": // targetNode.Background = Color.Orange.ColorToHex(); // break; // default: // break; // } //} }
/// <summary> /// Gets the first branching node, starting from root to leafs. Returns the first branching node (with name other then specified in <c>NodeNameToSkip</c> or leaf (if reached) /// </summary> /// <param name="graph">The graph.</param> /// <param name="NodeNameToSkip">The node name to skip.</param> /// <returns></returns> public static graphWrapNode <LeafNodeDictionaryEntry> GetFirstBranchingNode(this graphWrapNode <LeafNodeDictionaryEntry> graph, List <String> NodeNameToSkip = null) { if (NodeNameToSkip == null) { NodeNameToSkip = new List <String>() { "html", "body" } } ; graphWrapNode <LeafNodeDictionaryEntry> head = graph; Boolean traverse = true; while (traverse) { String firstChildName = head.getChildNames().FirstOrDefault(); if (firstChildName.isNullOrEmpty()) { return(head); } head = head[firstChildName] as graphWrapNode <LeafNodeDictionaryEntry>; String node_name = head.name.GetFirstNodeNameFromXPath(); if (NodeNameToSkip.Contains(node_name)) { } else if (head.GetChildren().Count > 1) { traverse = false; } } return(head); }
/// <summary> /// Expands the specified steps. /// </summary> /// <param name="steps">The steps.</param> public void expand(int steps) { getReady(); if (item == null) { return; } if (termNotFoundInLexicon) { return; } List <graphWrapNode <termSparkArm> > output = new List <graphWrapNode <termSparkArm> >(); List <graphWrapNode <termSparkArm> > stack = new List <graphWrapNode <termSparkArm> >(); List <ILexiconItem> known = new List <ILexiconItem>(); stack.AddRangeUnique(getLeafs()); int step_i = 1; while (stack.Any()) { List <graphWrapNode <termSparkArm> > n_stack = new List <graphWrapNode <termSparkArm> >(); foreach (graphWrapNode <termSparkArm> child in stack) { lexiconItemExpandEnum reach = lexiconItemExpandEnum.upBelowLateral; reach = lexiconItemExpandEnum.upBelowLateral | lexiconItemExpandEnum.conceptUp; //if (child.item.lexItem.getItemTypeName()==nameof(Concept)) //{ // graphWrapNode<termSparkArm> p = child.parent as graphWrapNode<termSparkArm>; // if (p.item.lexItem.getItemTypeName() != nameof(Concept)) // { // ; // } //} List <ILexiconItem> exp = null; //lock (ExpandLock) { int ri = 0; int rl = 10; while (ri < rl) { try { exp = child.item.lexItem.expandOnce(reach, known); ri = rl + 1; } catch (Exception ex) { ri++; Thread.Sleep(100); aceLog.log("Expand crashed (" + ex.Message + ") -- retry " + ri + " / " + rl); Thread.SpinWait(100); } } //} foreach (ILexiconItem __lexItem in exp) { if (__lexItem != null) { graphWrapNode <termSparkArm> nd = null; //if (child == this) //{ // nd = child.Add(new termSparkArm(__lexItem.name, __lexItem, 1)); //} //else //{ nd = child.Add(new termSparkArm(__lexItem.name, __lexItem, (1 / ((double)child.level + 1)))); //} known.Add(nd.item.lexItem); if (lemma == null) { if (__lexItem is TermLemma) { lemma = (TermLemma)__lexItem; } } } if (known.Count() > expandReachLimit) { return; break; } } if (step_i < steps) { n_stack.AddRangeUnique(child); } } step_i++; stack = n_stack; } }
public virtual void SetLinkCustomization(graphWrapNode <LeafNodeDictionaryEntry> parent, graphWrapNode <LeafNodeDictionaryEntry> child, Link link, DirectedGraphStylingCase styleCase) { //if (child.item != null) //{ // if (child.item.Category.HasFlag() // { // link.StrokeDashArray = "3,3,6,3"; // link.Label = child.item.Content; // } //} }
public void AddNewBlock(graphWrapNode <htmlWrapper> input) { nodeBlock bl = new nodeBlock(input); Add(bl); }
public nodeBlock(graphWrapNode <htmlWrapper> input) { Add(input); deploy(); }
/// <summary> /// Gets the data point pairs: Junction method /// </summary> /// <param name="input">The input.</param> /// <param name="ChildrenCountTrigger">The children count trigger.</param> /// <returns></returns> public DataPointMapperResult GetDataPointPairs(LeafNodeDictionaryAnalysis input) { List <DataPointMapEntry> dpList = new List <DataPointMapEntry>(); var allLeafs = input.CompleteGraph.getAllLeafs(); List <String> dpRoots = new List <string>(); foreach (graphWrapNode <LeafNodeDictionaryEntry> g in allLeafs) { var inputLeaf = input.Nodes.GetEntry(g.path); if (g.parent != null) { if (inputLeaf.Category.HasFlag(NodeInTemplateRole.Dynamic)) { graphWrapNode <LeafNodeDictionaryEntry> head = g; Int32 pC = head.Count(); while (pC < ChildrenCountTrigger) { if (head.parent != null) { head = head.parent as graphWrapNode <LeafNodeDictionaryEntry>; if (head == null) { break; } else { pC = head.parent.Count(); } } } if (head != null) { if (head.path.isNullOrEmpty()) { if (!dpRoots.Contains(head.path)) { dpRoots.Add(head.path); } } } } else { } } } foreach (String root in dpRoots) { var dpItems = input.Nodes.items.Where(x => x.XPath.StartsWith(root)).ToList(); List <DataPointMapEntry> dp_tmp = MergeEntriesToDataPoints(dpItems, input); if (dp_tmp.Count > 1 && flags.HasFlag(DataPointMapBlockDetectionFlags.AllowMultiColumnDataPoints)) { DataPointMapEntry parent_dp = new DataPointMapEntry() { DataPointXPathRoot = root, LabelXPathRelative = "", DataXPathRelative = "", Properties = dp_tmp }; foreach (var d in dp_tmp) { d.DataPointXPathRoot = ""; } dpList.Add(parent_dp); } else { dpList.AddRange(dp_tmp); } } List <graphWrapNode <LeafNodeDictionaryEntry> > dpRootGraphNodes = new List <graphWrapNode <LeafNodeDictionaryEntry> >(); foreach (String root in dpRoots) { var cp = input.CompleteGraph.GetChildAtPath(root); if (cp != null) { dpRootGraphNodes.AddUnique(cp); } } DataPointMapperResult output = new DataPointMapperResult(); List <String> BlockRoots = new List <string>(); foreach (graphWrapNode <LeafNodeDictionaryEntry> g in dpRootGraphNodes) { if (g.parent != null) { graphWrapNode <LeafNodeDictionaryEntry> head = g; Int32 pC = head.Count(); while (pC < JunctionSizeMin) { if (head.level > 1) { head = head.parent as graphWrapNode <LeafNodeDictionaryEntry>; if (head.parent == null) { break; } pC = head.parent.Count(); } } if (head != null) { if (!head.path.isNullOrEmpty()) { if (!BlockRoots.Contains(head.path)) { BlockRoots.Add(head.path); } } } } } if (!BlockRoots.Any()) { return(output); } if (flags.HasFlag(DataPointMapBlockDetectionFlags.maximizeBlockSize)) { BlockRoots = BlockRoots.OrderBy(x => x.Length).ToList(); } else if (flags.HasFlag(DataPointMapBlockDetectionFlags.maximizeDataRelatness)) { BlockRoots = BlockRoots.OrderByDescending(x => x.Length).ToList(); } foreach (String blockRoot in BlockRoots) { var b = new DataPointMapBlock(blockRoot); var dpl = dpList.ToList().Where(x => x.DataPointXPathRoot.StartsWith(blockRoot)); foreach (DataPointMapEntry e in dpl) { dpList.Remove(e); b.DataPoints.Add(e); } if (b.DataPoints.Count > 0) { output.MapBlocks.Add(b); } } //if (flags.HasFlag(DataPointMapBlockDetectionFlags.BreakByDimensions)) //{ // BreakBlocksByRecordDimensions(output.MapBlocks); //} return(output); }