コード例 #1
0
        /// <summary>
        /// Return all elements matching a selector, within a domain baseList, starting from list.
        /// </summary>
        /// <param name="baseList"></param>
        /// <param name="list"></param>
        /// <param name="selector"></param>
        /// <returns></returns>
        protected IEnumerable<IDomObject> GetMatches(IEnumerable<IDomObject> list, Selector selector)
        {
            // Maintain a hashset of every element already searched. Since result sets frequently contain items which are
            // children of other items in the list, we would end up searching the tree repeatedly
            HashSet<IDomObject> uniqueElements = null;

            Stack<MatchElement> stack = null;
            IEnumerable<IDomObject> curList = list;
            HashSet<IDomObject> temporaryResults = new HashSet<IDomObject>();

            // The unique list has to be reset for each sub-selector
            uniqueElements = new HashSet<IDomObject>();

            if (selector.SelectorType == SelectorType.HTML)
            {
                HtmlParser.DomElementFactory factory = new HtmlParser.DomElementFactory(Document);

                foreach (var obj in factory.CreateObjects(selector.Html))
                {
                    yield return obj;
                }
                yield break;
            }

            // Result-list position selectors are simple -- skip out of main matching code if so
            if (selector.SelectorType.HasFlag(SelectorType.Position) && selector.IsResultListPosition)
            {
                foreach (var obj in GetResultPositionMatches(curList, selector))
                {
                    yield return obj;
                }
                yield break;
            }
            // Otherwise, try to match each element individually
            stack = new Stack<MatchElement>();

            foreach (var e in curList)
            {
                // We must check everything again when looking for specific depth of children
                // otherwise - no point - skip em
                if (selector.TraversalType != TraversalType.Child && uniqueElements.Contains(e))
                {
                    continue;
                }
                stack.Push(new MatchElement(e, 0));
                int matchIndex = 0;
                while (stack.Count != 0)
                {
                    var current = stack.Pop();

                    if (Matches(selector, current.Object, current.Depth))
                    {
                        temporaryResults.Add(current.Object);
                        matchIndex++;
                    }
                    // Add children to stack (in reverse order, so they are processed in the correct order when popped)

                    // Don't keep going to children if the target depth is < the depth. Though the match would still fail,
                    // stuff would end up the unique list which we might need to test later if it appears directly in the source list
                    // causing it to be ignored.

                    if (selector.TraversalType != TraversalType.Filter &&
                        current.Object is IDomElement &&
                        (selector.TraversalType != TraversalType.Child || selector.ChildDepth > current.Depth))
                    {
                        SelectorType selectorType = selector.SelectorType;
                        IDomElement elm = current.Element;
                        if (selector.TraversalType == TraversalType.Child
                            && selector.ChildDepth == current.Depth + 1
                            && selector.IsDomIndexPosition)
                        {
                            temporaryResults.AddRange(GetDomPositionMatches(elm, selector));
                            selectorType &= ~SelectorType.Position;
                        }
                        if (selectorType == 0)
                        {
                            continue;
                        }

                        for (int j = elm.ChildNodes.Count - 1; j >= 0; j--)
                        {
                            IDomObject obj = elm[j];
                            if (selector.TraversalType == TraversalType.Child && !uniqueElements.Add(obj))
                            {
                                continue;
                            }
                            if (obj.NodeType == NodeType.ELEMENT_NODE)
                            {
                                stack.Push(new MatchElement(obj, current.Depth + 1));
                            }
                        }
                    }
                }

            }

            foreach (var obj in temporaryResults)
            {
                yield return obj;
            }
            yield break;
        }
コード例 #2
0
        /// <summary>
        /// Return all elements matching a selector, within a domain baseList, starting from list.
        /// </summary>
        /// <param name="baseList"></param>
        /// <param name="list"></param>
        /// <param name="selector"></param>
        /// <returns></returns>
        protected IEnumerable <IDomObject> GetMatches(IEnumerable <IDomObject> list, Selector selector)
        {
            // Maintain a hashset of every element already searched. Since result sets frequently contain items which are
            // children of other items in the list, we would end up searching the tree repeatedly
            HashSet <IDomObject> uniqueElements = null;

            Stack <MatchElement>     stack            = null;
            IEnumerable <IDomObject> curList          = list;
            HashSet <IDomObject>     temporaryResults = new HashSet <IDomObject>();

            // The unique list has to be reset for each sub-selector
            uniqueElements = new HashSet <IDomObject>();

            if (selector.SelectorType == SelectorType.HTML)
            {
                HtmlParser.DomElementFactory factory = new HtmlParser.DomElementFactory(Document);

                foreach (var obj in factory.CreateObjects(selector.Html))
                {
                    yield return(obj);
                }
                yield break;
            }

            // Result-list position selectors are simple -- skip out of main matching code if so
            if (selector.SelectorType.HasFlag(SelectorType.Position) && selector.IsResultListPosition)
            {
                foreach (var obj in GetResultPositionMatches(curList, selector))
                {
                    yield return(obj);
                }
                yield break;
            }
            // Otherwise, try to match each element individually
            stack = new Stack <MatchElement>();

            foreach (var e in curList)
            {
                // We must check everything again when looking for specific depth of children
                // otherwise - no point - skip em
                if (selector.TraversalType != TraversalType.Child && uniqueElements.Contains(e))
                {
                    continue;
                }
                stack.Push(new MatchElement(e, 0));
                int matchIndex = 0;
                while (stack.Count != 0)
                {
                    var current = stack.Pop();

                    if (Matches(selector, current.Object, current.Depth))
                    {
                        temporaryResults.Add(current.Object);
                        matchIndex++;
                    }
                    // Add children to stack (in reverse order, so they are processed in the correct order when popped)

                    // Don't keep going to children if the target depth is < the depth. Though the match would still fail,
                    // stuff would end up the unique list which we might need to test later if it appears directly in the source list
                    // causing it to be ignored.

                    if (selector.TraversalType != TraversalType.Filter &&
                        current.Object is IDomElement &&
                        (selector.TraversalType != TraversalType.Child || selector.ChildDepth > current.Depth))
                    {
                        SelectorType selectorType = selector.SelectorType;
                        IDomElement  elm          = current.Element;
                        if (selector.TraversalType == TraversalType.Child &&
                            selector.ChildDepth == current.Depth + 1 &&
                            selector.IsDomIndexPosition)
                        {
                            temporaryResults.AddRange(GetDomPositionMatches(elm, selector));
                            selectorType &= ~SelectorType.Position;
                        }
                        if (selectorType == 0)
                        {
                            continue;
                        }

                        for (int j = elm.ChildNodes.Count - 1; j >= 0; j--)
                        {
                            IDomObject obj = elm[j];
                            if (selector.TraversalType == TraversalType.Child && !uniqueElements.Add(obj))
                            {
                                continue;
                            }
                            if (obj.NodeType == NodeType.ELEMENT_NODE)
                            {
                                stack.Push(new MatchElement(obj, current.Depth + 1));
                            }
                        }
                    }
                }
            }

            foreach (var obj in temporaryResults)
            {
                yield return(obj);
            }
            yield break;
        }