public override ChunkContentCandidateCollection GetCandidates(ChunkDetectionResult results)
        {
            JunctionGraphMetrics <NodeGraph> junctionGraphMetrics = new JunctionGraphMetrics <NodeGraph>();

            junctionGraphMetrics.Process(results.CurrentGraph);

            var junctionBlocks = junctionGraphMetrics.GetJunctionBlocks(1, 4, true, true);

            List <JunctionPoint <NodeGraph> > sorted = junctionBlocks.OrderByDescending(x => x.JunctionSize).ToList();

            sorted = sorted.OrderByDescending(x => x.XPathRoot.getPathParts().Count).ToList();



            ChunkContentCandidateCollection output = new ChunkContentCandidateCollection();

            foreach (var junctionPoint in sorted)
            {
                NodeGraph             childNode    = results.CurrentGraph.GetChildAtPath(junctionPoint.XPathRoot, "/", false);
                ChunkContentCandidate newCandidate = new ChunkContentCandidate(this, childNode);
                newCandidate.MetaData = junctionPoint;
                output.Add(newCandidate);
            }

            results.Candidates.AddRange(output);
            return(output);
        }
        public override ChunkContentCandidateCollection GetCandidates(ChunkDetectionResult results)
        {
            ChunkContentCandidateCollection output = new ChunkContentCandidateCollection();

            var tableChildren = results.CurrentGraph.getAllChildren(HtmlPathSelectExpression);

            foreach (NodeGraph tableChild in tableChildren)
            {
                ChunkContentCandidate newCandidate = new ChunkContentCandidate(this, tableChild);
                output.Add(newCandidate);
            }

            results.Candidates.AddRange(output);
            return(output);
        }
Пример #3
0
        public override ChunkContentCandidateCollection GetCandidates(ChunkDetectionResult results)
        {
            ChunkContentCandidateCollection output = new ChunkContentCandidateCollection();

            var inputGraph = results.InitialGraph; //.CloneByItems();

            List <NodeGraph> peaks = new List <NodeGraph>();
            ListDictionary <String, NodeGraph> dynamicNodes = new ListDictionary <string, NodeGraph>();

            var       dynamicNodeList = inputGraph.GetChildrenWithItemSet().Where(x => x.item.Category.HasFlag(NodeInTemplateRole.Dynamic)).ToList();
            NodeGraph dynamicNode     = dynamicNodeList.FirstOrDefault();
            Int32     i       = 0;
            Int32     i_limit = 5000;

            while (dynamicNode != null)
            {
                var       nodePeakSearch = new JunctionPeakSearch(dynamicNode);
                NodeGraph peakNode       = nodePeakSearch.GetJunctionPeak(Convert.ToDouble(MinJunctionSize));

                dynamicNodes[dynamicNode.path].Add(dynamicNode);

                if (dynamicNodes[dynamicNode.path].Count > 1)
                {
                }

                if (peakNode == null)
                {
                    //dynamicNode.removeFromParent();
                }
                else
                {
                    if (peaks.Contains(peakNode))
                    {
                    }
                    else
                    {
                        peaks.Add(peakNode);

                        if (peakNode.level < MinPeakNodeLevel)
                        {
                            //      dynamicNode.removeFromParent();
                        }
                        else
                        {
                            //var peakNodeAtSource = results.InitialGraph.GetChildAtPath(peakNode.path, "/", false);

                            if (peakNode.Count() != 0)
                            {
                                ChunkContentCandidate newCandidate = new ChunkContentCandidate(this, peakNode);

                                //    peakNode.removeFromParent();
                                output.Add(newCandidate);
                            }
                            else
                            {
                            }
                        }
                    }
                }
                i++;


                dynamicNode = dynamicNodeList.FirstOrDefault(x => dynamicNodes[x.path].Count == 0);  // inputGraph.GetChildrenWithItemSet().FirstOrDefault(x => (x.item.Category.HasFlag(NodeInTemplateRole.Dynamic) && dynamicNodes[x.path].Count == 0));

                if (i > i_limit)
                {
                    break;
                }
            }
            if (peaks.Count > 0)
            {
            }

            results.Candidates.AddRange(output);
            return(output);
        }