/// <summary> /// Finishes any open selector and clears the current selector /// </summary> protected void FinishSelector() { if (Current.IsComplete) { var cur = Current.Clone(); Selectors.Add(cur); } Current.Clear(); NextTraversalType = TraversalType.Filter; NextCombinatorType = CombinatorType.Chained; }
/// <summary> /// Parse the string, and return a sequence of Selector objects /// </summary> /// <param name="selector"></param> /// <returns></returns> public Selector Parse(string selector) { Selectors = new Selector(); string sel = (selector ?? String.Empty).Trim(); if (IsHtml(selector)) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; Selectors.Add(Current); return Selectors; } scanner = Scanner.Create(sel); while (!scanner.Finished) { switch (scanner.Current) { case '*': StartNewSelector(SelectorType.All); scanner.Next(); break; case '<': // not selecting - creating html Current.Html = sel; scanner.End(); break; case ':': scanner.Next(); string key = scanner.Get(MatchFunctions.PseudoSelector).ToLower(); switch (key) { case "input": AddTagSelector("input"); AddTagSelector("textarea",true); AddTagSelector("select",true); AddTagSelector("button",true); break; case "text": StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.Equals; Current.AttributeName = "type"; Current.AttributeValue = "text"; StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag, CombinatorType.Grouped, Current.TraversalType); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "type"; Current.SelectorType |= SelectorType.Tag; Current.Tag = "input"; break; case "checkbox": case "radio": case "button": case "file": case "image": case "password": AddInputSelector(key,"input"); break; case "reset": case "submit": AddInputSelector(key); break; case "checked": case "selected": case "disabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.Exists; Current.AttributeName = key; break; case "enabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "disabled"; break; case "first-letter": case "first-line": case "before": case "after": throw new NotImplementedException("The CSS pseudoelement selectors are not implemented in CsQuery."); case "target": case "link": case "hover": case "active": case "focus": case "visited": throw new NotImplementedException("Pseudoclasses that require a browser aren't implemented."); default: if (!AddPseudoSelector(key)) { throw new ArgumentException("Unknown pseudo-class :\"" + key + "\". If this is a valid CSS or jQuery selector, please let us know."); } break; } break; case '.': StartNewSelector(SelectorType.Class); scanner.Next(); Current.Class = scanner.Get(MatchFunctions.CssClassName); break; case '#': scanner.Next(); if (!scanner.Finished) { StartNewSelector(SelectorType.ID); Current.ID = scanner.Get(MatchFunctions.HtmlIDValue()); } break; case '[': StartNewSelector(SelectorType.AttributeValue); IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner(); Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute()); innerScanner.SkipWhitespace(); if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=","|="); // CSS allows [attr=] as a synonym for [attr] if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { var rawValue = innerScanner.Expect(expectsOptionallyQuotedValue()).ToNewScanner(); Current.AttributeValue = rawValue.Finished ? "" : rawValue.Get(new EscapedString()); switch (matchType) { case "=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Equals; break; case "^=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWith; // attributevalue starts with "" matches nothing if (Current.AttributeValue == "") { Current.AttributeValue = "" + (char)0; } break; case "*=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Contains; break; case "~=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.ContainsWord; break; case "$=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.EndsWith; break; case "!=": Current.AttributeSelectorType = AttributeSelectorType.NotEquals; // must matched manually - missing also validates as notEquals break; case "|=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen; break; default: throw new ArgumentException("Unknown attibute matching operator '" + matchType + "'"); } } } break; case ',': FinishSelector(); NextCombinatorType = CombinatorType.Root; NextTraversalType = TraversalType.All; scanner.NextNonWhitespace(); break; case '+': StartNewSelector(TraversalType.Adjacent); scanner.NextNonWhitespace(); break; case '~': StartNewSelector(TraversalType.Sibling); scanner.NextNonWhitespace(); break; case '>': StartNewSelector(TraversalType.Child); // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter" // whereas if it is used in a combination selector the meaning is "filter for 1st child" //Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1); Current.ChildDepth = 1; scanner.NextNonWhitespace(); break; case ' ': // if a ">" or "," is later found, it will be overridden. scanner.NextNonWhitespace(); NextTraversalType = TraversalType.Descendent; break; default: string tag = ""; if (scanner.TryGet(MatchFunctions.HTMLTagSelectorName(), out tag)) { AddTagSelector(tag); } else { if (scanner.Index == 0) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; scanner.End(); } else { throw new ArgumentException(scanner.LastError); } } break; } } // Close any open selectors FinishSelector(); if (Selectors.Count == 0) { var empty = new SelectorClause { SelectorType = SelectorType.None, TraversalType = TraversalType.Filter }; Selectors.Add(empty); } return Selectors; }
/// <summary> /// Parse the string, and return a sequence of Selector objects /// </summary> /// <param name="selector"></param> /// <returns></returns> public Selector Parse(string selector) { Selectors = new Selector(); string sel = (selector ?? String.Empty).Trim(); if (IsHtml(selector)) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; Selectors.Add(Current); return(Selectors); } scanner = Scanner.Create(sel); while (!scanner.Finished) { switch (scanner.Current) { case '*': StartNewSelector(SelectorType.All); scanner.Next(); break; case '<': // not selecting - creating html Current.Html = sel; scanner.End(); break; case ':': scanner.Next(); string key = scanner.Get(MatchFunctions.PseudoSelector).ToLower(); switch (key) { case "input": AddTagSelector("input"); AddTagSelector("textarea", true); AddTagSelector("select", true); AddTagSelector("button", true); break; case "text": StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.Equals; Current.AttributeName = "type"; Current.AttributeValue = "text"; StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag, CombinatorType.Grouped, Current.TraversalType); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "type"; Current.SelectorType |= SelectorType.Tag; Current.Tag = "input"; break; case "checkbox": case "radio": case "button": case "file": case "image": case "password": AddInputSelector(key, "input"); break; case "reset": case "submit": AddInputSelector(key); break; case "checked": case "selected": case "disabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.Exists; Current.AttributeName = key; break; case "enabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "disabled"; break; case "first-letter": case "first-line": case "before": case "after": throw new NotImplementedException("The CSS pseudoelement selectors are not implemented in CsQuery."); case "target": case "link": case "hover": case "active": case "focus": case "visited": throw new NotImplementedException("Pseudoclasses that require a browser aren't implemented."); default: if (!AddPseudoSelector(key)) { throw new ArgumentException("Unknown pseudo-class :\"" + key + "\". If this is a valid CSS or jQuery selector, please let us know."); } break; } break; case '.': StartNewSelector(SelectorType.Class); scanner.Next(); Current.Class = scanner.Get(MatchFunctions.CssClassName); break; case '#': scanner.Next(); if (!scanner.Finished) { StartNewSelector(SelectorType.ID); Current.ID = scanner.Get(MatchFunctions.HtmlIDValue()); } break; case '[': StartNewSelector(SelectorType.AttributeValue); IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner(); Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute()); innerScanner.SkipWhitespace(); if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=", "|="); // CSS allows [attr=] as a synonym for [attr] if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { var rawValue = innerScanner.Expect(expectsOptionallyQuotedValue()).ToNewScanner(); Current.AttributeValue = rawValue.Finished ? "" : rawValue.Get(new EscapedString()); switch (matchType) { case "=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Equals; break; case "^=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWith; // attributevalue starts with "" matches nothing if (Current.AttributeValue == "") { Current.AttributeValue = "" + (char)0; } break; case "*=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Contains; break; case "~=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.ContainsWord; break; case "$=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.EndsWith; break; case "!=": Current.AttributeSelectorType = AttributeSelectorType.NotEquals; // must matched manually - missing also validates as notEquals break; case "|=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen; break; default: throw new ArgumentException("Unknown attibute matching operator '" + matchType + "'"); } } } break; case ',': FinishSelector(); NextCombinatorType = CombinatorType.Root; NextTraversalType = TraversalType.All; scanner.NextNonWhitespace(); break; case '+': StartNewSelector(TraversalType.Adjacent); scanner.NextNonWhitespace(); break; case '~': StartNewSelector(TraversalType.Sibling); scanner.NextNonWhitespace(); break; case '>': StartNewSelector(TraversalType.Child); // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter" // whereas if it is used in a combination selector the meaning is "filter for 1st child" //Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1); Current.ChildDepth = 1; scanner.NextNonWhitespace(); break; case ' ': // if a ">" or "," is later found, it will be overridden. scanner.NextNonWhitespace(); NextTraversalType = TraversalType.Descendent; break; default: string tag = ""; if (scanner.TryGet(MatchFunctions.HTMLTagSelectorName(), out tag)) { AddTagSelector(tag); } else { if (scanner.Index == 0) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; scanner.End(); } else { throw new ArgumentException(scanner.LastError); } } break; } } // Close any open selectors FinishSelector(); if (Selectors.Count == 0) { var empty = new SelectorClause { SelectorType = SelectorType.None, TraversalType = TraversalType.Filter }; Selectors.Add(empty); } return(Selectors); }