/// <summary>
 /// Recursively searches the given nodes for ones matching the specified compiled selectors.
 /// </summary>
 /// <param name="nodes">The nodes to be searched.</param>
 /// <param name="selectors">Compiled selectors that describe the nodes to find.</param>
 /// <returns>The matching nodes.</returns>
 public static IEnumerable <HtmlElementNode> Find(this IEnumerable <HtmlNode> nodes, SelectorCollection selectors) => selectors.Find(nodes);
Exemple #2
0
        /// <summary>
        /// Searches the given nodes for ones matching the specified selector.
        /// </summary>
        /// <param name="nodes">The nodes to be searched.</param>
        /// <param name="selector">Selector that describes the nodes to find.</param>
        /// <returns>The matching nodes.</returns>
        public static IEnumerable <HtmlElementNode> Find(IEnumerable <HtmlNode> nodes, string selector)
        {
            SelectorCollection selectors = Selector.ParseSelector(selector);

            return(selectors.Find(nodes));
        }
        /// <summary>
        /// Parses the given selector text and returns the corresponding data structures.
        /// </summary>
        /// <param name="selectorText">The selector text to be parsed.</param>
        /// <remarks>
        /// Returns multiple <see cref="Selector"/>s when the selector contains commas.
        /// </remarks>
        /// <returns>The parsed selector data structures.</returns>
        public static SelectorCollection ParseSelector(string selectorText)
        {
            SelectorCollection selectors = new SelectorCollection();

            if (!string.IsNullOrWhiteSpace(selectorText))
            {
                TextParser parser = new TextParser(selectorText);
                parser.MovePastWhitespace();

                while (!parser.EndOfText)
                {
                    // Test next character
                    char ch = parser.Peek();
                    if (IsNameCharacter(ch) || ch == '*')
                    {
                        // Parse tag name
                        Selector selector = selectors.GetLast(true);
                        if (ch == '*')
                        {
                            selector.Tag = null;    // Match all tags
                        }
                        else
                        {
                            selector.Tag = parser.ParseWhile(c => IsNameCharacter(c));
                        }
                    }
                    else if (SpecialCharacters.TryGetValue(ch, out string name))
                    {
                        // Parse special attributes
                        parser.MoveAhead();
                        string value = parser.ParseWhile(c => IsValueCharacter(c));
                        if (value.Length > 0)
                        {
                            SelectorAttribute attribute = new SelectorAttribute
                            {
                                Name  = name,
                                Value = value,
                                Mode  = SelectorAttributeMode.Contains
                            };

                            Selector selector = selectors.GetLast(true);
                            selector.Attributes.Add(attribute);
                        }
                    }
                    else if (ch == '[')
                    {
                        // Parse attribute selector
                        parser.MoveAhead();
                        parser.MovePastWhitespace();
                        name = parser.ParseWhile(c => IsNameCharacter(c));
                        if (name.Length > 0)
                        {
                            SelectorAttribute attribute = new SelectorAttribute
                            {
                                Name = name
                            };

                            // Parse attribute assignment operator
                            parser.MovePastWhitespace();
                            if (parser.Peek() == '=')
                            {
                                attribute.Mode = SelectorAttributeMode.Match;
                                parser.MoveAhead();
                            }
                            else if (parser.Peek() == ':' && parser.Peek(1) == '=')
                            {
                                attribute.Mode = SelectorAttributeMode.RegEx;
                                parser.MoveAhead(2);
                            }
                            else
                            {
                                attribute.Mode = SelectorAttributeMode.ExistsOnly;
                            }

                            // Parse attribute value
                            if (attribute.Mode != SelectorAttributeMode.ExistsOnly)
                            {
                                parser.MovePastWhitespace();
                                if (HtmlRules.IsQuoteChar(parser.Peek()))
                                {
                                    attribute.Value = parser.ParseQuotedText();
                                }
                                else
                                {
                                    attribute.Value = parser.ParseWhile(c => IsValueCharacter(c));
                                }
                            }

                            Selector selector = selectors.GetLast(true);
                            selector.Attributes.Add(attribute);
                        }

                        // Close out attribute selector
                        parser.MovePastWhitespace();
                        Debug.Assert(parser.Peek() == ']');
                        if (parser.Peek() == ']')
                        {
                            parser.MoveAhead();
                        }
                    }
                    else if (ch == ',')
                    {
                        // Multiple selectors
                        parser.MoveAhead();
                        parser.MovePastWhitespace();
                        selectors.Add(new Selector());
                    }
                    else if (ch == '>')
                    {
                        // Whitespace indicates child selector
                        parser.MoveAhead();
                        parser.MovePastWhitespace();
                        Debug.Assert(selectors.Any());
                        Selector selector = selectors.AddChildSelector();
                        selector.ImmediateChildOnly = true;
                    }
                    else if (char.IsWhiteSpace(ch))
                    {
                        // Handle whitespace
                        parser.MovePastWhitespace();
                        // ',' and '>' change meaning of whitespace
                        if (parser.Peek() != ',' && parser.Peek() != '>')
                        {
                            selectors.AddChildSelector();
                        }
                    }
                    else
                    {
                        // Unknown syntax
                        Debug.Assert(false);
                        parser.MoveAhead();
                    }
                }
            }
            selectors.RemoveEmpty();
            return(selectors);
        }