/// <summary> /// Searches the given nodes for ones matching the specified selector. /// </summary> /// <param name="nodes">The nodes to be searched.</param> /// <param name="selector">Selector that describes the nodes to find.</param> /// <returns>The matching nodes.</returns> public static IEnumerable <HtmlElementNode> Find(IEnumerable <HtmlNode> nodes, string selector) { SelectorCollection selectors = Selector.ParseSelector(selector); return(selectors.Find(nodes)); }
/// <summary> /// Parses the given selector text and returns the corresponding data structures. /// </summary> /// <param name="selectorText">The selector text to be parsed.</param> /// <remarks> /// Returns multiple <see cref="Selector"/>s when the selector contains commas. /// </remarks> /// <returns>The parsed selector data structures.</returns> public static SelectorCollection ParseSelector(string selectorText) { SelectorCollection selectors = new SelectorCollection(); if (!string.IsNullOrWhiteSpace(selectorText)) { TextParser parser = new TextParser(selectorText); parser.MovePastWhitespace(); while (!parser.EndOfText) { // Test next character char ch = parser.Peek(); if (IsNameCharacter(ch) || ch == '*') { // Parse tag name Selector selector = selectors.GetLast(true); if (ch == '*') { selector.Tag = null; // Match all tags } else { selector.Tag = parser.ParseWhile(c => IsNameCharacter(c)); } } else if (SpecialCharacters.TryGetValue(ch, out string name)) { // Parse special attributes parser.MoveAhead(); string value = parser.ParseWhile(c => IsValueCharacter(c)); if (value.Length > 0) { SelectorAttribute attribute = new SelectorAttribute { Name = name, Value = value, Mode = SelectorAttributeMode.Contains }; Selector selector = selectors.GetLast(true); selector.Attributes.Add(attribute); } } else if (ch == '[') { // Parse attribute selector parser.MoveAhead(); parser.MovePastWhitespace(); name = parser.ParseWhile(c => IsNameCharacter(c)); if (name.Length > 0) { SelectorAttribute attribute = new SelectorAttribute { Name = name }; // Parse attribute assignment operator parser.MovePastWhitespace(); if (parser.Peek() == '=') { attribute.Mode = SelectorAttributeMode.Match; parser.MoveAhead(); } else if (parser.Peek() == ':' && parser.Peek(1) == '=') { attribute.Mode = SelectorAttributeMode.RegEx; parser.MoveAhead(2); } else { attribute.Mode = SelectorAttributeMode.ExistsOnly; } // Parse attribute value if (attribute.Mode != SelectorAttributeMode.ExistsOnly) { parser.MovePastWhitespace(); if (HtmlRules.IsQuoteChar(parser.Peek())) { attribute.Value = parser.ParseQuotedText(); } else { attribute.Value = parser.ParseWhile(c => IsValueCharacter(c)); } } Selector selector = selectors.GetLast(true); selector.Attributes.Add(attribute); } // Close out attribute selector parser.MovePastWhitespace(); Debug.Assert(parser.Peek() == ']'); if (parser.Peek() == ']') { parser.MoveAhead(); } } else if (ch == ',') { // Multiple selectors parser.MoveAhead(); parser.MovePastWhitespace(); selectors.Add(new Selector()); } else if (ch == '>') { // Whitespace indicates child selector parser.MoveAhead(); parser.MovePastWhitespace(); Debug.Assert(selectors.Any()); Selector selector = selectors.AddChildSelector(); selector.ImmediateChildOnly = true; } else if (char.IsWhiteSpace(ch)) { // Handle whitespace parser.MovePastWhitespace(); // ',' and '>' change meaning of whitespace if (parser.Peek() != ',' && parser.Peek() != '>') { selectors.AddChildSelector(); } } else { // Unknown syntax Debug.Assert(false); parser.MoveAhead(); } } } selectors.RemoveEmpty(); return(selectors); }