Пример #1
0
        public void Add(graphWrapNode <htmlWrapper> input)
        {
            IObjectWithName rni = input.root as IObjectWithName;
            string          xp  = input.path.removeStartsWith(rni.name).Replace("/", "\\");

            xPathRoots.AddUnique(xp);

            foreach (var lf in input.getAllLeafs())
            {
                graphWrapNode <htmlWrapper> ch = lf as graphWrapNode <htmlWrapper>;
                Add(ch.item);
            }
        }
Пример #2
0
        /// <summary>
        /// Adds new node or nodes to correspond to specified path or name. <c>pathOrName</c> can be path like: path1\\path2\\path3
        /// </summary>
        /// <param name="pathWithName"></param>
        /// <param name="__item"></param>
        /// <returns></returns>
        public override graphWrapNode <htmlWrapper> Add(string pathWithName, htmlWrapper __item)
        {
            List <string> pathParts          = pathWithName.SplitSmart(pathSeparator);
            graphWrapNode <htmlWrapper> head = this;

            foreach (string part in pathParts)
            {
                head = head.Add(part);
            }

            head.SetItem(__item);

            return(head);
        }
Пример #3
0
        /// <summary>
        /// Builds the content tree out of <see cref="HtmlDocument"/>
        /// </summary>
        /// <param name="htmlDoc">The HTML document</param>
        /// <param name="__name">The name of the root</param>
        /// <returns></returns>
        public static nodeTree buildTree(this HtmlDocument htmlDoc, string __name, Boolean allowTitle = false, Boolean allowMeta = false)
        {
            nodeTree output = new nodeTree(__name, htmlDoc);

            XPathNodeIterator iterator = htmlDoc.CreateNavigator().Select("//*[text()][count(*)=0]");

            //XPathNodeIterator iterator = htmlDoc.CreateNavigator().SelectDescendants(System.Xml.XPath.XPathNodeType.Text, false);
            while (iterator.MoveNext())
            {
                XPathNavigator    current       = iterator.Current;
                HtmlNodeNavigator htmlNavigator = current as HtmlNodeNavigator;
                string            sp            = htmlNavigator.CurrentNode.XPath.Replace("/", "\\");
                string            cn            = "";

                if (htmlNavigator.CurrentNode.Name.ToLower() == "title")
                {
                }

                if (isTagAcceptable(htmlNavigator.CurrentNode, null, allowTitle, allowMeta))
                {
                    cn = "";
                    cn = htmlNavigator.CurrentNode.InnerText;

                    //if (htmlNavigator.CurrentNode.checkTextHtmlConsistensy())
                    //{

                    //}

                    cn = cn.htmlContentProcess().Trim();
                    if (!cn.isNullOrEmptyString())
                    {
                        graphWrapNode <htmlWrapper> nn = output.Add(sp, htmlNavigator.CurrentNode.Clone());
                        nn.item.content = cn;
                        nn.item.xPath   = sp;
                        nn.item.path    = nn.path;
                        // nodesWithText.AddNewLeaf(sp, htmlNavigator.CurrentNode.Clone(), report, cn);
                    }
                    else
                    {
                    }
                }
                else
                {
                }
            }

            return(output);
        }
        public void Analyze(LeafNodeDictionary leafDictionary)
        {
            CompleteGraph = graphTools.BuildGraphFromItems <LeafNodeDictionaryEntry, graphWrapNode <LeafNodeDictionaryEntry> >(leafDictionary.items, x => x.XPath, "/");
            CompleteGraph.pathSeparator = "/";

            GraphMetrics.Process(CompleteGraph);

            var bins = GraphMetrics.JunctionCounter.GetFrequencyBins();

            foreach (var bin in bins)
            {
                if (bin.Key >= JunctionSizeMin)
                {
                }
            }
        }
Пример #5
0
        /// <summary>
        /// Adds the specified name.
        /// </summary>
        /// <param name="__name">The name.</param>
        /// <returns></returns>
        public override graphWrapNode <htmlWrapper> Add(string __name)
        {
            if (!children.Contains(__name))
            {
                var tkng = new nodeTree(__name, this);
                children.Add(__name, tkng);
                return(tkng);
            }
            else
            {
                var chd = this[__name];

                graphWrapNode <htmlWrapper> wchd = chd as graphWrapNode <htmlWrapper>;
                wchd.item.score++;
            }
            return((graphWrapNode <htmlWrapper>) this[__name] as graphWrapNode <htmlWrapper>);
        }
        public virtual void SetNodeCustomization(graphWrapNode <LeafNodeDictionaryEntry> sourceNode, Node targetNode, DirectedGraphStylingCase styleCase)
        {
            //if (sourceNode.item != null)
            //{
            //    switch (sourceNode.item.Category)
            //    {
            //        case "Structure":

            //            break;
            //        case "Static":
            //            targetNode.Background = Color.LightBlue.ColorToHex();
            //            break;
            //        case "Dynamic":
            //            targetNode.Background = Color.Orange.ColorToHex();
            //            break;
            //        default:
            //            break;
            //    }

            //}
        }
Пример #7
0
        /// <summary>
        /// Gets the first branching node, starting from root to leafs. Returns the first branching node (with name other then specified in <c>NodeNameToSkip</c> or leaf (if reached)
        /// </summary>
        /// <param name="graph">The graph.</param>
        /// <param name="NodeNameToSkip">The node name to skip.</param>
        /// <returns></returns>
        public static graphWrapNode <LeafNodeDictionaryEntry> GetFirstBranchingNode(this graphWrapNode <LeafNodeDictionaryEntry> graph, List <String> NodeNameToSkip = null)
        {
            if (NodeNameToSkip == null)
            {
                NodeNameToSkip = new List <String>()
                {
                    "html", "body"
                }
            }
            ;

            graphWrapNode <LeafNodeDictionaryEntry> head = graph;
            Boolean traverse = true;

            while (traverse)
            {
                String firstChildName = head.getChildNames().FirstOrDefault();
                if (firstChildName.isNullOrEmpty())
                {
                    return(head);
                }

                head = head[firstChildName] as graphWrapNode <LeafNodeDictionaryEntry>;
                String node_name = head.name.GetFirstNodeNameFromXPath();

                if (NodeNameToSkip.Contains(node_name))
                {
                }
                else if (head.GetChildren().Count > 1)
                {
                    traverse = false;
                }
            }

            return(head);
        }
Пример #8
0
        /// <summary>
        /// Expands the specified steps.
        /// </summary>
        /// <param name="steps">The steps.</param>
        public void expand(int steps)
        {
            getReady();

            if (item == null)
            {
                return;
            }
            if (termNotFoundInLexicon)
            {
                return;
            }

            List <graphWrapNode <termSparkArm> > output = new List <graphWrapNode <termSparkArm> >();
            List <graphWrapNode <termSparkArm> > stack  = new List <graphWrapNode <termSparkArm> >();
            List <ILexiconItem> known = new List <ILexiconItem>();

            stack.AddRangeUnique(getLeafs());
            int step_i = 1;

            while (stack.Any())
            {
                List <graphWrapNode <termSparkArm> > n_stack = new List <graphWrapNode <termSparkArm> >();

                foreach (graphWrapNode <termSparkArm> child in stack)
                {
                    lexiconItemExpandEnum reach = lexiconItemExpandEnum.upBelowLateral;
                    reach = lexiconItemExpandEnum.upBelowLateral | lexiconItemExpandEnum.conceptUp;

                    //if (child.item.lexItem.getItemTypeName()==nameof(Concept))
                    //{
                    //    graphWrapNode<termSparkArm> p = child.parent as graphWrapNode<termSparkArm>;

                    //    if (p.item.lexItem.getItemTypeName() != nameof(Concept))
                    //    {
                    //        ;
                    //    }
                    //}

                    List <ILexiconItem> exp = null;

                    //lock (ExpandLock) {

                    int ri = 0;
                    int rl = 10;
                    while (ri < rl)
                    {
                        try
                        {
                            exp = child.item.lexItem.expandOnce(reach, known);
                            ri  = rl + 1;
                        }
                        catch (Exception ex)
                        {
                            ri++;
                            Thread.Sleep(100);
                            aceLog.log("Expand crashed (" + ex.Message + ") -- retry " + ri + " / " + rl);

                            Thread.SpinWait(100);
                        }
                    }
                    //}

                    foreach (ILexiconItem __lexItem in exp)
                    {
                        if (__lexItem != null)
                        {
                            graphWrapNode <termSparkArm> nd = null;

                            //if (child == this)
                            //{
                            //    nd = child.Add(new termSparkArm(__lexItem.name, __lexItem, 1));
                            //}
                            //else
                            //{
                            nd = child.Add(new termSparkArm(__lexItem.name, __lexItem, (1 / ((double)child.level + 1))));

                            //}


                            known.Add(nd.item.lexItem);

                            if (lemma == null)
                            {
                                if (__lexItem is TermLemma)
                                {
                                    lemma = (TermLemma)__lexItem;
                                }
                            }
                        }
                        if (known.Count() > expandReachLimit)
                        {
                            return;

                            break;
                        }
                    }

                    if (step_i < steps)
                    {
                        n_stack.AddRangeUnique(child);
                    }
                }

                step_i++;

                stack = n_stack;
            }
        }
 public virtual void SetLinkCustomization(graphWrapNode <LeafNodeDictionaryEntry> parent, graphWrapNode <LeafNodeDictionaryEntry> child, Link link, DirectedGraphStylingCase styleCase)
 {
     //if (child.item != null)
     //{
     //    if (child.item.Category.HasFlag()
     //    {
     //        link.StrokeDashArray = "3,3,6,3";
     //        link.Label = child.item.Content;
     //    }
     //}
 }
        public void AddNewBlock(graphWrapNode <htmlWrapper> input)
        {
            nodeBlock bl = new nodeBlock(input);

            Add(bl);
        }
Пример #11
0
        public nodeBlock(graphWrapNode <htmlWrapper> input)
        {
            Add(input);

            deploy();
        }
Пример #12
0
        /// <summary>
        /// Gets the data point pairs: Junction method
        /// </summary>
        /// <param name="input">The input.</param>
        /// <param name="ChildrenCountTrigger">The children count trigger.</param>
        /// <returns></returns>
        public DataPointMapperResult GetDataPointPairs(LeafNodeDictionaryAnalysis input)
        {
            List <DataPointMapEntry> dpList = new List <DataPointMapEntry>();

            var allLeafs = input.CompleteGraph.getAllLeafs();


            List <String> dpRoots = new List <string>();

            foreach (graphWrapNode <LeafNodeDictionaryEntry> g in allLeafs)
            {
                var inputLeaf = input.Nodes.GetEntry(g.path);

                if (g.parent != null)
                {
                    if (inputLeaf.Category.HasFlag(NodeInTemplateRole.Dynamic))
                    {
                        graphWrapNode <LeafNodeDictionaryEntry> head = g;

                        Int32 pC = head.Count();

                        while (pC < ChildrenCountTrigger)
                        {
                            if (head.parent != null)
                            {
                                head = head.parent as graphWrapNode <LeafNodeDictionaryEntry>;
                                if (head == null)
                                {
                                    break;
                                }
                                else
                                {
                                    pC = head.parent.Count();
                                }
                            }
                        }
                        if (head != null)
                        {
                            if (head.path.isNullOrEmpty())
                            {
                                if (!dpRoots.Contains(head.path))
                                {
                                    dpRoots.Add(head.path);
                                }
                            }
                        }
                    }
                    else
                    {
                    }
                }
            }



            foreach (String root in dpRoots)
            {
                var dpItems = input.Nodes.items.Where(x => x.XPath.StartsWith(root)).ToList();

                List <DataPointMapEntry> dp_tmp = MergeEntriesToDataPoints(dpItems, input);

                if (dp_tmp.Count > 1 && flags.HasFlag(DataPointMapBlockDetectionFlags.AllowMultiColumnDataPoints))
                {
                    DataPointMapEntry parent_dp = new DataPointMapEntry()
                    {
                        DataPointXPathRoot = root,
                        LabelXPathRelative = "",
                        DataXPathRelative  = "",
                        Properties         = dp_tmp
                    };
                    foreach (var d in dp_tmp)
                    {
                        d.DataPointXPathRoot = "";
                    }

                    dpList.Add(parent_dp);
                }
                else
                {
                    dpList.AddRange(dp_tmp);
                }
            }



            List <graphWrapNode <LeafNodeDictionaryEntry> > dpRootGraphNodes = new List <graphWrapNode <LeafNodeDictionaryEntry> >();

            foreach (String root in dpRoots)
            {
                var cp = input.CompleteGraph.GetChildAtPath(root);
                if (cp != null)
                {
                    dpRootGraphNodes.AddUnique(cp);
                }
            }

            DataPointMapperResult output = new DataPointMapperResult();


            List <String> BlockRoots = new List <string>();

            foreach (graphWrapNode <LeafNodeDictionaryEntry> g in dpRootGraphNodes)
            {
                if (g.parent != null)
                {
                    graphWrapNode <LeafNodeDictionaryEntry> head = g;

                    Int32 pC = head.Count();

                    while (pC < JunctionSizeMin)
                    {
                        if (head.level > 1)
                        {
                            head = head.parent as graphWrapNode <LeafNodeDictionaryEntry>;
                            if (head.parent == null)
                            {
                                break;
                            }
                            pC = head.parent.Count();
                        }
                    }

                    if (head != null)
                    {
                        if (!head.path.isNullOrEmpty())
                        {
                            if (!BlockRoots.Contains(head.path))
                            {
                                BlockRoots.Add(head.path);
                            }
                        }
                    }
                }
            }


            if (!BlockRoots.Any())
            {
                return(output);
            }

            if (flags.HasFlag(DataPointMapBlockDetectionFlags.maximizeBlockSize))
            {
                BlockRoots = BlockRoots.OrderBy(x => x.Length).ToList();
            }
            else if (flags.HasFlag(DataPointMapBlockDetectionFlags.maximizeDataRelatness))
            {
                BlockRoots = BlockRoots.OrderByDescending(x => x.Length).ToList();
            }



            foreach (String blockRoot in BlockRoots)
            {
                var b = new DataPointMapBlock(blockRoot);

                var dpl = dpList.ToList().Where(x => x.DataPointXPathRoot.StartsWith(blockRoot));

                foreach (DataPointMapEntry e in dpl)
                {
                    dpList.Remove(e);
                    b.DataPoints.Add(e);
                }
                if (b.DataPoints.Count > 0)
                {
                    output.MapBlocks.Add(b);
                }
            }

            //if (flags.HasFlag(DataPointMapBlockDetectionFlags.BreakByDimensions))
            //{
            //    BreakBlocksByRecordDimensions(output.MapBlocks);

            //}

            return(output);
        }