public void StringParsing() { string test = @"someSelect[attr-bute= 'this is \' a quoted value']"; scanner = test; scanner.IgnoreWhitespace = true; var text = scanner.GetAlpha(); Assert.AreEqual("someSelect", text, "Got first word"); Assert.Throws(typeof(ArgumentException), Del(() => { scanner.Expect(MatchFunctions.Quoted()); }), "Bounds don't work with quoted value"); scanner.Expect(MatchFunctions.BoundChar); text = scanner.Get(MatchFunctions.HTMLAttribute()); Assert.AreEqual("attr-bute", text, "Got attribue"); scanner.ExpectChar('='); text = scanner.Get(MatchFunctions.Quoted()); Assert.AreEqual("this is \\' a quoted value", text, "Got first word"); Assert.AreEqual(scanner.Current, ']', "At right postiion"); }
/// <summary> /// Parse single argument passed to a pseudoselector /// </summary> /// /// <exception cref="ArgumentException"> /// Thrown when one or more arguments have unsupported or illegal values. /// </exception> /// <exception cref="NotImplementedException"> /// Thrown when the requested operation is unimplemented. /// </exception> /// /// <param name="value"> /// The arguments. /// </param> /// /// <returns> /// The parsed string /// </returns> protected string ParseSingleArg(string value) { IStringScanner scanner = Scanner.Create(value); var quoting = ParameterQuoted(0); switch (quoting) { case QuotingRule.OptionallyQuoted: scanner.Expect(MatchFunctions.OptionallyQuoted()); if (!scanner.Finished) { throw new ArgumentException(InvalidArgumentsError()); } return(scanner.Match); case QuotingRule.AlwaysQuoted: scanner.Expect(MatchFunctions.Quoted()); if (!scanner.Finished) { throw new ArgumentException(InvalidArgumentsError()); } return(scanner.Match); case QuotingRule.NeverQuoted: return(value); default: throw new NotImplementedException("Unimplemented quoting rule"); } }
public void OptionallyQuoted() { scanner = @"key[value='this ""is \' a quoted value']"; StringScannerEngine inner = scanner.ExpectAlpha() .Get(MatchFunctions.Bounded); scanner.AssertFinished(); inner.Expect(MatchFunctions.HTMLAttribute()) .Expect("="); var optQuote = new OptionallyQuoted(); optQuote.Terminators = "]"; string text = inner.Get(optQuote); Assert.AreEqual(@"this ""is \' a quoted value", text, "Got the right text"); inner.Text = @"this ""is \' a quoted value"; text = inner.Get(optQuote); Assert.AreEqual("this \"is \\' a quoted value", text, "Got the right text without quotes"); inner.Text = @"""this is \"" a quoted value"""; text = inner.Get(optQuote); Assert.AreEqual("this is \\\" a quoted value", text, "Got the right text with quotes"); }
public void BuiltInSelectors() { scanner = @"someSelect[attr-bute= 'this ""is \' a quoted value']"; var text = scanner.Get(MatchFunctions.HTMLTagSelectorName()); Assert.AreEqual("someSelect", text, "Got first word"); StringScannerEngine innerScanner = scanner.Get(MatchFunctions.BoundedWithQuotedContent); Assert.IsTrue(scanner.Finished, "Outer scanner finished"); Assert.AreEqual(@"attr-bute= 'this ""is \' a quoted value'", innerScanner.Text, "Inner scanner text is right"); text = innerScanner.Get(MatchFunctions.HTMLAttribute()); Assert.AreEqual("attr-bute", text, "Got the attribute name"); innerScanner.Expect("="); text = innerScanner.Get(MatchFunctions.Quoted()); Assert.AreEqual(@"this ""is \' a quoted value", text, "Quotes were dequoted"); Assert.IsTrue(innerScanner.Finished, "It's finished after we got the last text"); scanner = @"<comment>How's complex bounding working?</comment> the end"; text = scanner.GetBoundedBy("<comment>", "</comment>"); Assert.AreEqual(@"How's complex bounding working?", text, "Complex bounding worked"); Assert.AreEqual(' ', scanner.Current, "At the right place"); Assert.IsTrue(scanner.ExpectAlpha().ExpectAlpha().Finished, "At the end"); }
public void Selectors() { scanner = "div:contains('Product')"; string text = scanner.Get(MatchFunctions.HTMLTagSelectorName()); Assert.AreEqual("div", text, "Got the first part"); scanner.Expect(":"); text = scanner.Get(MatchFunctions.PseudoSelector); Assert.AreEqual("contains", text, "Got the 2nd part"); text = scanner.Get(MatchFunctions.Bounded); Assert.AreEqual("'Product'", text, "Got the 3rdd part"); }
/// <summary> /// Parse the arguments using the rules returned by the ParameterQuoted method. /// </summary> /// /// <param name="value"> /// The arguments /// </param> /// /// <returns> /// An array of strings /// </returns> protected string[] ParseArgs(string value) { List <string> parms = new List <string>(); int index = 0; IStringScanner scanner = Scanner.Create(value); while (!scanner.Finished) { var quoting = ParameterQuoted(index); switch (quoting) { case QuotingRule.OptionallyQuoted: scanner.Expect(MatchFunctions.OptionallyQuoted(",")); break; case QuotingRule.AlwaysQuoted: scanner.Expect(MatchFunctions.Quoted()); break; case QuotingRule.NeverQuoted: scanner.Seek(',', true); break; default: throw new NotImplementedException("Unimplemented quoting rule"); } parms.Add(scanner.Match); if (!scanner.Finished) { scanner.Next(); index++; } } return(parms.ToArray()); }
/// <summary> /// Returns the numeric value only of a style, ignoring units /// </summary> /// /// <param name="style"> /// The style. /// </param> /// /// <returns> /// A double, or null if the style did not exist or did not contain a numeric value. /// </returns> public double?NumberPart(string style) { string st = GetStyle(style); if (st == null) { return(null); } else { IStringScanner scanner = Scanner.Create(st); string numString; if (scanner.TryGet(MatchFunctions.Number(), out numString)) { double num; if (double.TryParse(numString, out num)) { return(num); } } return(null); } }
protected IOperand GetOperand <T>() where T : IConvertible { string text = ""; IOperand output = null; scanner.SkipWhitespace(); if (scanner.Current == '-') { // convert leading - to "-1" if it precedes a variable, otherwise // just add it to the output stream scanner.Next(); if (scanner.Finished) { throw new ArgumentException("Unexpected end of string found, expected an operand (a number or variable name)"); } if (CharacterData.IsType(scanner.Current, CharacterType.Number)) { text += "-"; } else { output = new Literal <T>(-1); } } else if (scanner.Current == '+') { // ignore leading + scanner.Next(); } if (output == null) { if (scanner.Info.Numeric) { text += scanner.Get(MatchFunctions.Number()); double num; if (Double.TryParse(text, out num)) { output = IsTyped ? new Literal <T>(num) : new Literal(num); } else { throw new InvalidCastException("Unable to parse number from '" + text + "'"); } } else if (scanner.Info.Alpha) { text += scanner.GetAlpha(); if (scanner.CurrentOrEmpty == "(") { IFunction func = Utils.GetFunction <T>(text); var inner = scanner.ExpectBoundedBy('(', true).ToNewScanner("{0},"); while (!inner.Finished) { string parm = inner.Get(MatchFunctions.BoundedBy(boundEnd: ",")); EquationParserEngine innerParser = new EquationParserEngine(); IOperand innerOperand = innerParser.Parse <T>(parm); func.AddOperand(innerOperand); } CacheVariables(func); output = func; } else { IVariable var = GetVariable <T>(text); output = var; } } else if (scanner.Current == '(') { string inner = scanner.Get(MatchFunctions.BoundedBy("(")); var parser = new EquationParserEngine(); parser.Parse <T>(inner); output = parser.Clause; CacheVariables(output); } else { throw new ArgumentException("Unexpected character '" + scanner.Match + "' found, expected an operand (a number or variable name)"); } } scanner.SkipWhitespace(); ParseEnd = scanner.Finished; return(output); }
/// <summary> /// Starting with the current character, treats text as a number, seeking until the next /// character that would terminate a valid number. /// </summary> /// /// <param name="requireWhitespaceTerminator"> /// (optional) the require whitespace terminator. /// </param> /// /// <returns> /// . /// </returns> public IStringScanner ExpectNumber(bool requireWhitespaceTerminator = false) { return(Expect(MatchFunctions.Number(requireWhitespaceTerminator))); }
/// <summary> /// Parse the string, and return a sequence of Selector objects /// </summary> /// <param name="selector"></param> /// <returns></returns> public Selector Parse(string selector) { Selectors = new Selector(); string sel = (selector ?? String.Empty).Trim(); if (IsHtml(selector)) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; Selectors.Add(Current); return(Selectors); } scanner = Scanner.Create(sel); while (!scanner.Finished) { switch (scanner.Current) { case '*': StartNewSelector(SelectorType.All); scanner.Next(); break; case '<': // not selecting - creating html Current.Html = sel; scanner.End(); break; case ':': scanner.Next(); string key = scanner.Get(MatchFunctions.PseudoSelector).ToLower(); switch (key) { case "input": AddTagSelector("input"); AddTagSelector("textarea", true); AddTagSelector("select", true); AddTagSelector("button", true); break; case "text": StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.Equals; Current.AttributeName = "type"; Current.AttributeValue = "text"; StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag, CombinatorType.Grouped, Current.TraversalType); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "type"; Current.SelectorType |= SelectorType.Tag; Current.Tag = "input"; break; case "checkbox": case "radio": case "button": case "file": case "image": case "password": AddInputSelector(key, "input"); break; case "reset": case "submit": AddInputSelector(key); break; case "checked": case "selected": case "disabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.Exists; Current.AttributeName = key; break; case "enabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "disabled"; break; case "first-letter": case "first-line": case "before": case "after": throw new NotImplementedException("The CSS pseudoelement selectors are not implemented in CsQuery."); case "target": case "link": case "hover": case "active": case "focus": case "visited": throw new NotImplementedException("Pseudoclasses that require a browser aren't implemented."); default: if (!AddPseudoSelector(key)) { throw new ArgumentException("Unknown pseudo-class :\"" + key + "\". If this is a valid CSS or jQuery selector, please let us know."); } break; } break; case '.': StartNewSelector(SelectorType.Class); scanner.Next(); Current.Class = scanner.Get(MatchFunctions.CssClassName); break; case '#': scanner.Next(); if (!scanner.Finished) { StartNewSelector(SelectorType.ID); Current.ID = scanner.Get(MatchFunctions.HtmlIDValue()); } break; case '[': StartNewSelector(SelectorType.AttributeValue); IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner(); Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute()); innerScanner.SkipWhitespace(); if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=", "|="); // CSS allows [attr=] as a synonym for [attr] if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { var rawValue = innerScanner.Expect(expectsOptionallyQuotedValue()).ToNewScanner(); Current.AttributeValue = rawValue.Finished ? "" : rawValue.Get(new EscapedString()); switch (matchType) { case "=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Equals; break; case "^=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWith; // attributevalue starts with "" matches nothing if (Current.AttributeValue == "") { Current.AttributeValue = "" + (char)0; } break; case "*=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Contains; break; case "~=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.ContainsWord; break; case "$=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.EndsWith; break; case "!=": Current.AttributeSelectorType = AttributeSelectorType.NotEquals; // must matched manually - missing also validates as notEquals break; case "|=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen; break; default: throw new ArgumentException("Unknown attibute matching operator '" + matchType + "'"); } } } break; case ',': FinishSelector(); NextCombinatorType = CombinatorType.Root; NextTraversalType = TraversalType.All; scanner.NextNonWhitespace(); break; case '+': StartNewSelector(TraversalType.Adjacent); scanner.NextNonWhitespace(); break; case '~': StartNewSelector(TraversalType.Sibling); scanner.NextNonWhitespace(); break; case '>': StartNewSelector(TraversalType.Child); // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter" // whereas if it is used in a combination selector the meaning is "filter for 1st child" //Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1); Current.ChildDepth = 1; scanner.NextNonWhitespace(); break; case ' ': // if a ">" or "," is later found, it will be overridden. scanner.NextNonWhitespace(); NextTraversalType = TraversalType.Descendent; break; default: string tag = ""; if (scanner.TryGet(MatchFunctions.HTMLTagSelectorName(), out tag)) { AddTagSelector(tag); } else { if (scanner.Index == 0) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; scanner.End(); } else { throw new ArgumentException(scanner.LastError); } } break; } } // Close any open selectors FinishSelector(); if (Selectors.Count == 0) { var empty = new SelectorClause { SelectorType = SelectorType.None, TraversalType = TraversalType.Filter }; Selectors.Add(empty); } return(Selectors); }
/// <summary> /// Starting with the current character, treats text as a number, seeking until the next character that would terminate a valid number. /// </summary> /// <returns></returns> public IStringScanner ExpectNumber() { return(Expect(MatchFunctions.Number())); }
protected IOperand GetOperand <T>() where T : IConvertible { string text; IOperand output; scanner.SkipWhitespace(); if (scanner.Info.NumericExtended) { text = scanner.Get(MatchFunctions.Number()); double num; if (Double.TryParse(text, out num)) { output = IsTyped ? new Literal <T>(num) : new Literal(num); } else { throw new InvalidCastException("Unable to parse number from '" + text + "'"); } } else if (scanner.Info.Alpha) { text = scanner.GetAlpha(); if (scanner.NextCharOrEmpty == "(") { IFunction func = Utils.GetFunction <T>(text); var inner = scanner.ExpectBoundedBy('(', true).ToNewScanner("{0},"); while (!inner.Finished) { string parm = inner.Get(MatchFunctions.BoundedBy(boundEnd: ",")); EquationParser innerParser = new EquationParser(); IOperand innerOperand = innerParser.Parse <T>(parm); func.AddOperand(innerOperand); } CacheVariables(func); output = func; } else { IVariable var = GetVariable <T>(text); output = var; } } else if (scanner.NextChar == '(') { string inner = scanner.Get(MatchFunctions.BoundedBy("(")); var parser = new EquationParser(); parser.Parse <T>(inner); output = parser.Clause; CacheVariables(output); } else { throw new InvalidOperationException("Unexpected character '" + scanner.Match + "' found, expected an operand (a number or variable name)"); } scanner.SkipWhitespace(); ParseEnd = scanner.Finished; return(output); }