public IEnumerable <Selector> Parse(string selector) { Selectors = new List <Selector>(); string sel = (selector ?? String.Empty).Trim(); if (IsHtml(selector)) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; Selectors.Add(Current); return(Selectors); } scanner = Scanner.Create(sel); while (!scanner.Finished) { switch (scanner.NextChar) { case '*': Current.SelectorType = SelectorType.All; scanner.Next(); break; case '<': // not selecting - creating html Current.Html = sel; scanner.End(); break; case ':': scanner.Next(); string key = scanner.Get(MatchFunctions.PseudoSelector); switch (key) { case "checkbox": case "radio": case "button": case "file": case "text": case "password": StartNewSelector(SelectorType.Attribute); //Current.SelectorType |= SelectorType.Attribute; Current.AttributeSelectorType = AttributeSelectorType.Equals; Current.AttributeName = "type"; Current.AttributeValue = key; if (key == "button" && !Current.SelectorType.HasFlag(SelectorType.Tag)) { //StartNewSelector(CombinatorType.Cumulative); StartNewSelector(SelectorType.Tag, CombinatorType.Cumulative, Current.TraversalType); //Current.SelectorType = SelectorType.Tag; Current.Tag = "button"; } break; case "checked": case "selected": case "disabled": StartNewSelector(SelectorType.Attribute); Current.AttributeSelectorType = AttributeSelectorType.Exists; Current.AttributeName = key; break; case "enabled": StartNewSelector(SelectorType.Attribute); Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "disabled"; break; case "contains": StartNewSelector(SelectorType.Contains); IStringScanner inner = scanner.ExpectBoundedBy('(', true).ToNewScanner(); Current.Criteria = inner.Get(MatchFunctions.OptionallyQuoted); break; case "eq": case "gt": case "lt": StartNewSelector(SelectorType.Position); switch (key) { case "eq": Current.PositionType = PositionType.IndexEquals; break; case "lt": Current.PositionType = PositionType.IndexLessThan; break; case "gt": Current.PositionType = PositionType.IndexGreaterThan; break; } scanner.ExpectChar('('); Current.PositionIndex = Convert.ToInt32(scanner.GetNumber()); scanner.ExpectChar(')'); break; case "even": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.Even; break; case "odd": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.Odd; break; case "first": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.First; break; case "last": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.Last; break; case "last-child": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.LastChild; break; case "first-child": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.FirstChild; break; case "nth-child": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.NthChild; Current.Criteria = scanner.GetBoundedBy('('); break; case "has": case "not": StartNewSelector(key == "has" ? SelectorType.SubSelectorHas : SelectorType.SubSelectorNot); Current.TraversalType = TraversalType.Descendent; string criteria = Current.Criteria = scanner.GetBoundedBy('(', true); SelectorChain subSelectors = new SelectorChain(criteria); Current.SubSelectors.Add(subSelectors); break; case "visible": StartNewSelector(SelectorType.Other); Current.OtherType = OtherType.Visible; break; default: throw new ArgumentOutOfRangeException("Unknown pseudoselector :\"" + key + "\""); } break; case '.': StartNewSelector(SelectorType.Class); scanner.Next(); Current.Class = scanner.Get(MatchFunctions.CssClass); break; case '#': scanner.Next(); if (!scanner.Finished) { StartNewSelector(SelectorType.ID); Current.ID = scanner.Get(MatchFunctions.HtmlIDValue); } break; case '[': StartNewSelector(SelectorType.Attribute); IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner(); Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute); innerScanner.SkipWhitespace(); if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!="); Current.AttributeValue = innerScanner.Get(expectsOptionallyQuotedValue()); switch (matchType) { case "=": Current.AttributeSelectorType = AttributeSelectorType.Equals; break; case "^=": Current.AttributeSelectorType = AttributeSelectorType.StartsWith; break; case "*=": Current.AttributeSelectorType = AttributeSelectorType.Contains; break; case "~=": Current.AttributeSelectorType = AttributeSelectorType.ContainsWord; break; case "$=": Current.AttributeSelectorType = AttributeSelectorType.EndsWith; break; case "!=": Current.AttributeSelectorType = AttributeSelectorType.NotEquals; break; default: throw new ArgumentOutOfRangeException("Unknown attibute matching operator '" + matchType + "'"); } } break; case ',': FinishSelector(); scanner.NextNonWhitespace(); break; case '>': if (Current.IsComplete) { StartNewSelector(TraversalType.Child); } else { Current.TraversalType = TraversalType.Child; } // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter" // whereas if it is used in a combination selector the meaning is "filter for 1st child" Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1); scanner.NextNonWhitespace(); break; case ' ': // if a ">" or "," is later found, it will be overridden. scanner.NextNonWhitespace(); StartNewSelector(TraversalType.Descendent); break; default: string tag = ""; if (scanner.TryGet(MatchFunctions.HTMLTagName, out tag)) { StartNewSelector(SelectorType.Tag); Current.Tag = tag; } else { if (scanner.Pos == 0) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; scanner.End(); } else { throw new InvalidOperationException(scanner.LastError); } } break; } } // Close any open selectors FinishSelector(); return(Selectors); }
/// <summary> /// Parse the string, and return a sequence of Selector objects /// </summary> /// <param name="selector"></param> /// <returns></returns> public Selector Parse(string selector) { Selectors = new Selector(); string sel = (selector ?? String.Empty).Trim(); if (IsHtml(selector)) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; Selectors.Add(Current); return(Selectors); } scanner = Scanner.Create(sel); while (!scanner.Finished) { switch (scanner.Current) { case '*': StartNewSelector(SelectorType.All); scanner.Next(); break; case '<': // not selecting - creating html Current.Html = sel; scanner.End(); break; case ':': scanner.Next(); string key = scanner.Get(MatchFunctions.PseudoSelector).ToLower(); switch (key) { case "input": AddTagSelector("input"); AddTagSelector("textarea", true); AddTagSelector("select", true); AddTagSelector("button", true); break; case "text": StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.Equals; Current.AttributeName = "type"; Current.AttributeValue = "text"; StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag, CombinatorType.Grouped, Current.TraversalType); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "type"; Current.SelectorType |= SelectorType.Tag; Current.Tag = "input"; break; case "checkbox": case "radio": case "button": case "file": case "image": case "password": AddInputSelector(key, "input"); break; case "reset": case "submit": AddInputSelector(key); break; case "checked": case "selected": case "disabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.Exists; Current.AttributeName = key; break; case "enabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "disabled"; break; case "first-letter": case "first-line": case "before": case "after": throw new NotImplementedException("The CSS pseudoelement selectors are not implemented in CsQuery."); case "target": case "link": case "hover": case "active": case "focus": case "visited": throw new NotImplementedException("Pseudoclasses that require a browser aren't implemented."); default: if (!AddPseudoSelector(key)) { throw new ArgumentException("Unknown pseudo-class :\"" + key + "\". If this is a valid CSS or jQuery selector, please let us know."); } break; } break; case '.': StartNewSelector(SelectorType.Class); scanner.Next(); Current.Class = scanner.Get(MatchFunctions.CssClassName); break; case '#': scanner.Next(); if (!scanner.Finished) { StartNewSelector(SelectorType.ID); Current.ID = scanner.Get(MatchFunctions.HtmlIDValue()); } break; case '[': StartNewSelector(SelectorType.AttributeValue); IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner(); Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute()); innerScanner.SkipWhitespace(); if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=", "|="); // CSS allows [attr=] as a synonym for [attr] if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { var rawValue = innerScanner.Expect(expectsOptionallyQuotedValue()).ToNewScanner(); Current.AttributeValue = rawValue.Finished ? "" : rawValue.Get(new EscapedString()); switch (matchType) { case "=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Equals; break; case "^=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWith; // attributevalue starts with "" matches nothing if (Current.AttributeValue == "") { Current.AttributeValue = "" + (char)0; } break; case "*=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Contains; break; case "~=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.ContainsWord; break; case "$=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.EndsWith; break; case "!=": Current.AttributeSelectorType = AttributeSelectorType.NotEquals; // must matched manually - missing also validates as notEquals break; case "|=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen; break; default: throw new ArgumentException("Unknown attibute matching operator '" + matchType + "'"); } } } break; case ',': FinishSelector(); NextCombinatorType = CombinatorType.Root; NextTraversalType = TraversalType.All; scanner.NextNonWhitespace(); break; case '+': StartNewSelector(TraversalType.Adjacent); scanner.NextNonWhitespace(); break; case '~': StartNewSelector(TraversalType.Sibling); scanner.NextNonWhitespace(); break; case '>': StartNewSelector(TraversalType.Child); // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter" // whereas if it is used in a combination selector the meaning is "filter for 1st child" //Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1); Current.ChildDepth = 1; scanner.NextNonWhitespace(); break; case ' ': // if a ">" or "," is later found, it will be overridden. scanner.NextNonWhitespace(); NextTraversalType = TraversalType.Descendent; break; default: string tag = ""; if (scanner.TryGet(MatchFunctions.HTMLTagSelectorName(), out tag)) { AddTagSelector(tag); } else { if (scanner.Index == 0) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; scanner.End(); } else { throw new ArgumentException(scanner.LastError); } } break; } } // Close any open selectors FinishSelector(); if (Selectors.Count == 0) { var empty = new SelectorClause { SelectorType = SelectorType.None, TraversalType = TraversalType.Filter }; Selectors.Add(empty); } return(Selectors); }
/// <summary> /// Parse the string, and return a sequence of Selector objects /// </summary> /// <param name="selector"></param> /// <returns></returns> public Selector Parse(string selector) { Selectors = new Selector(); string sel = (selector ?? String.Empty).Trim(); if (IsHtml(selector)) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; Selectors.Add(Current); return Selectors; } scanner = Scanner.Create(sel); while (!scanner.Finished) { switch (scanner.Current) { case '*': StartNewSelector(SelectorType.All); scanner.Next(); break; case '<': // not selecting - creating html Current.Html = sel; scanner.End(); break; case ':': scanner.Next(); string key = scanner.Get(MatchFunctions.PseudoSelector).ToLower(); switch (key) { case "input": AddTagSelector("input"); AddTagSelector("textarea",true); AddTagSelector("select",true); AddTagSelector("button",true); break; case "text": StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.Equals; Current.AttributeName = "type"; Current.AttributeValue = "text"; StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag, CombinatorType.Grouped, Current.TraversalType); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "type"; Current.SelectorType |= SelectorType.Tag; Current.Tag = "input"; break; case "checkbox": case "radio": case "button": case "file": case "image": case "password": AddInputSelector(key,"input"); break; case "reset": case "submit": AddInputSelector(key); break; case "checked": case "selected": case "disabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.Exists; Current.AttributeName = key; break; case "enabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "disabled"; break; case "first-letter": case "first-line": case "before": case "after": throw new NotImplementedException("The CSS pseudoelement selectors are not implemented in CsQuery."); case "target": case "link": case "hover": case "active": case "focus": case "visited": throw new NotImplementedException("Pseudoclasses that require a browser aren't implemented."); default: if (!AddPseudoSelector(key)) { throw new ArgumentException("Unknown pseudo-class :\"" + key + "\". If this is a valid CSS or jQuery selector, please let us know."); } break; } break; case '.': StartNewSelector(SelectorType.Class); scanner.Next(); Current.Class = scanner.Get(MatchFunctions.CssClassName); break; case '#': scanner.Next(); if (!scanner.Finished) { StartNewSelector(SelectorType.ID); Current.ID = scanner.Get(MatchFunctions.HtmlIDValue()); } break; case '[': StartNewSelector(SelectorType.AttributeValue); IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner(); Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute()); innerScanner.SkipWhitespace(); if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=","|="); // CSS allows [attr=] as a synonym for [attr] if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { var rawValue = innerScanner.Expect(expectsOptionallyQuotedValue()).ToNewScanner(); Current.AttributeValue = rawValue.Finished ? "" : rawValue.Get(new EscapedString()); switch (matchType) { case "=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Equals; break; case "^=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWith; // attributevalue starts with "" matches nothing if (Current.AttributeValue == "") { Current.AttributeValue = "" + (char)0; } break; case "*=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Contains; break; case "~=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.ContainsWord; break; case "$=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.EndsWith; break; case "!=": Current.AttributeSelectorType = AttributeSelectorType.NotEquals; // must matched manually - missing also validates as notEquals break; case "|=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen; break; default: throw new ArgumentException("Unknown attibute matching operator '" + matchType + "'"); } } } break; case ',': FinishSelector(); NextCombinatorType = CombinatorType.Root; NextTraversalType = TraversalType.All; scanner.NextNonWhitespace(); break; case '+': StartNewSelector(TraversalType.Adjacent); scanner.NextNonWhitespace(); break; case '~': StartNewSelector(TraversalType.Sibling); scanner.NextNonWhitespace(); break; case '>': StartNewSelector(TraversalType.Child); // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter" // whereas if it is used in a combination selector the meaning is "filter for 1st child" //Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1); Current.ChildDepth = 1; scanner.NextNonWhitespace(); break; case ' ': // if a ">" or "," is later found, it will be overridden. scanner.NextNonWhitespace(); NextTraversalType = TraversalType.Descendent; break; default: string tag = ""; if (scanner.TryGet(MatchFunctions.HTMLTagSelectorName(), out tag)) { AddTagSelector(tag); } else { if (scanner.Index == 0) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; scanner.End(); } else { throw new ArgumentException(scanner.LastError); } } break; } } // Close any open selectors FinishSelector(); if (Selectors.Count == 0) { var empty = new SelectorClause { SelectorType = SelectorType.None, TraversalType = TraversalType.Filter }; Selectors.Add(empty); } return Selectors; }
public IEnumerable<Selector> Parse(string selector) { Selectors = new List<Selector>(); string sel = (selector ?? String.Empty).Trim(); if (IsHtml(selector)) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; Selectors.Add(Current); return Selectors; } scanner = Scanner.Create(sel); while (!scanner.Finished) { switch (scanner.NextChar) { case '*': Current.SelectorType = SelectorType.All; scanner.Next(); break; case '<': // not selecting - creating html Current.Html = sel; scanner.End(); break; case ':': scanner.Next(); string key = scanner.Get(MatchFunctions.PseudoSelector); switch (key) { case "checkbox": case "radio": case "button": case "file": case "text": case "password": StartNewSelector(SelectorType.Attribute); //Current.SelectorType |= SelectorType.Attribute; Current.AttributeSelectorType = AttributeSelectorType.Equals; Current.AttributeName = "type"; Current.AttributeValue = key; if (key == "button" && !Current.SelectorType.HasFlag(SelectorType.Tag)) { //StartNewSelector(CombinatorType.Cumulative); StartNewSelector(SelectorType.Tag, CombinatorType.Cumulative, Current.TraversalType); //Current.SelectorType = SelectorType.Tag; Current.Tag = "button"; } break; case "checked": case "selected": case "disabled": StartNewSelector(SelectorType.Attribute); Current.AttributeSelectorType = AttributeSelectorType.Exists; Current.AttributeName = key; break; case "enabled": StartNewSelector(SelectorType.Attribute); Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "disabled"; break; case "contains": StartNewSelector(SelectorType.Contains); IStringScanner inner = scanner.ExpectBoundedBy('(', true).ToNewScanner(); Current.Criteria = inner.Get(MatchFunctions.OptionallyQuoted); break; case "eq": case "gt": case "lt": StartNewSelector(SelectorType.Position); switch (key) { case "eq": Current.PositionType = PositionType.IndexEquals; break; case "lt": Current.PositionType = PositionType.IndexLessThan; break; case "gt": Current.PositionType = PositionType.IndexGreaterThan; break; } scanner.ExpectChar('('); Current.PositionIndex = Convert.ToInt32(scanner.GetNumber()); scanner.ExpectChar(')'); break; case "even": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.Even; break; case "odd": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.Odd; break; case "first": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.First; break; case "last": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.Last; break; case "last-child": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.LastChild; break; case "first-child": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.FirstChild; break; case "nth-child": StartNewSelector(SelectorType.Position); Current.PositionType = PositionType.NthChild; Current.Criteria = scanner.GetBoundedBy('('); break; case "has": case "not": StartNewSelector(key == "has" ? SelectorType.SubSelectorHas : SelectorType.SubSelectorNot); Current.TraversalType = TraversalType.Descendent; string criteria = Current.Criteria = scanner.GetBoundedBy('(', true); SelectorChain subSelectors = new SelectorChain(criteria); Current.SubSelectors.Add(subSelectors); break; case "visible": StartNewSelector(SelectorType.Other); Current.OtherType = OtherType.Visible; break; default: throw new ArgumentOutOfRangeException("Unknown pseudoselector :\"" + key + "\""); } break; case '.': StartNewSelector(SelectorType.Class); scanner.Next(); Current.Class = scanner.Get(MatchFunctions.CssClass); break; case '#': scanner.Next(); if (!scanner.Finished) { StartNewSelector(SelectorType.ID); Current.ID = scanner.Get(MatchFunctions.HtmlIDValue); } break; case '[': StartNewSelector(SelectorType.Attribute); IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner(); Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute); innerScanner.SkipWhitespace(); if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!="); Current.AttributeValue = innerScanner.Get(expectsOptionallyQuotedValue()); switch (matchType) { case "=": Current.AttributeSelectorType = AttributeSelectorType.Equals; break; case "^=": Current.AttributeSelectorType = AttributeSelectorType.StartsWith; break; case "*=": Current.AttributeSelectorType = AttributeSelectorType.Contains; break; case "~=": Current.AttributeSelectorType = AttributeSelectorType.ContainsWord; break; case "$=": Current.AttributeSelectorType = AttributeSelectorType.EndsWith; break; case "!=": Current.AttributeSelectorType = AttributeSelectorType.NotEquals; break; default: throw new ArgumentOutOfRangeException("Unknown attibute matching operator '" + matchType + "'"); } } break; case ',': FinishSelector(); scanner.NextNonWhitespace(); break; case '>': if (Current.IsComplete) { StartNewSelector(TraversalType.Child); } else { Current.TraversalType = TraversalType.Child; } // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter" // whereas if it is used in a combination selector the meaning is "filter for 1st child" Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1); scanner.NextNonWhitespace(); break; case ' ': // if a ">" or "," is later found, it will be overridden. scanner.NextNonWhitespace(); StartNewSelector(TraversalType.Descendent); break; default: string tag = ""; if (scanner.TryGet(MatchFunctions.HTMLTagName, out tag)) { StartNewSelector(SelectorType.Tag); Current.Tag = tag; } else { if (scanner.Pos == 0) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; scanner.End(); } else { throw new InvalidOperationException(scanner.LastError); } } break; } } // Close any open selectors FinishSelector(); return Selectors; }