Example #1
0
        public void StringParsing()
        {
            string test = @"someSelect[attr-bute= 'this is \' a quoted value']";

            scanner = test;
            scanner.IgnoreWhitespace = true;

            var text = scanner.GetAlpha();

            Assert.AreEqual("someSelect", text, "Got first word");

            Assert.Throws(typeof(ArgumentException), Del(() =>
            {
                scanner.Expect(MatchFunctions.Quoted());
            }), "Bounds don't work with quoted value");

            scanner.Expect(MatchFunctions.BoundChar);

            text = scanner.Get(MatchFunctions.HTMLAttribute());
            Assert.AreEqual("attr-bute", text, "Got attribue");

            scanner.ExpectChar('=');

            text = scanner.Get(MatchFunctions.Quoted());

            Assert.AreEqual("this is \\' a quoted value", text, "Got first word");
            Assert.AreEqual(scanner.Current, ']', "At right postiion");
        }
Example #2
0
        /// <summary>
        /// Parse single argument passed to a pseudoselector
        /// </summary>
        ///
        /// <exception cref="ArgumentException">
        /// Thrown when one or more arguments have unsupported or illegal values.
        /// </exception>
        /// <exception cref="NotImplementedException">
        /// Thrown when the requested operation is unimplemented.
        /// </exception>
        ///
        /// <param name="value">
        /// The arguments.
        /// </param>
        ///
        /// <returns>
        /// The parsed string
        /// </returns>

        protected string ParseSingleArg(string value)
        {
            IStringScanner scanner = Scanner.Create(value);

            var quoting = ParameterQuoted(0);

            switch (quoting)
            {
            case QuotingRule.OptionallyQuoted:
                scanner.Expect(MatchFunctions.OptionallyQuoted());
                if (!scanner.Finished)
                {
                    throw new ArgumentException(InvalidArgumentsError());
                }
                return(scanner.Match);

            case QuotingRule.AlwaysQuoted:

                scanner.Expect(MatchFunctions.Quoted());
                if (!scanner.Finished)
                {
                    throw new ArgumentException(InvalidArgumentsError());
                }
                return(scanner.Match);

            case QuotingRule.NeverQuoted:
                return(value);

            default:
                throw new NotImplementedException("Unimplemented quoting rule");
            }
        }
Example #3
0
        public void OptionallyQuoted()
        {
            scanner = @"key[value='this ""is \' a quoted value']";
            StringScannerEngine inner = scanner.ExpectAlpha()
                                        .Get(MatchFunctions.Bounded);

            scanner.AssertFinished();

            inner.Expect(MatchFunctions.HTMLAttribute())
            .Expect("=");

            var optQuote = new OptionallyQuoted();

            optQuote.Terminators = "]";

            string text = inner.Get(optQuote);

            Assert.AreEqual(@"this ""is \' a quoted value", text, "Got the right text");

            inner.Text = @"this ""is \' a quoted value";
            text       = inner.Get(optQuote);
            Assert.AreEqual("this \"is \\' a quoted value", text, "Got the right text without quotes");

            inner.Text = @"""this is \"" a quoted value""";
            text       = inner.Get(optQuote);
            Assert.AreEqual("this is \\\" a quoted value", text, "Got the right text with quotes");
        }
Example #4
0
        public void BuiltInSelectors()
        {
            scanner = @"someSelect[attr-bute= 'this ""is \' a quoted value']";

            var text = scanner.Get(MatchFunctions.HTMLTagSelectorName());

            Assert.AreEqual("someSelect", text, "Got first word");

            StringScannerEngine innerScanner = scanner.Get(MatchFunctions.BoundedWithQuotedContent);

            Assert.IsTrue(scanner.Finished, "Outer scanner finished");
            Assert.AreEqual(@"attr-bute= 'this ""is \' a quoted value'", innerScanner.Text, "Inner scanner text is right");

            text = innerScanner.Get(MatchFunctions.HTMLAttribute());
            Assert.AreEqual("attr-bute", text, "Got the attribute name");
            innerScanner.Expect("=");
            text = innerScanner.Get(MatchFunctions.Quoted());
            Assert.AreEqual(@"this ""is \' a quoted value", text, "Quotes were dequoted");
            Assert.IsTrue(innerScanner.Finished, "It's finished after we got the last text");

            scanner = @"<comment>How's complex bounding working?</comment> the end";
            text    = scanner.GetBoundedBy("<comment>", "</comment>");
            Assert.AreEqual(@"How's complex bounding working?", text, "Complex bounding worked");
            Assert.AreEqual(' ', scanner.Current, "At the right place");

            Assert.IsTrue(scanner.ExpectAlpha().ExpectAlpha().Finished, "At the end");
        }
Example #5
0
        public void Selectors()
        {
            scanner = "div:contains('Product')";
            string text = scanner.Get(MatchFunctions.HTMLTagSelectorName());

            Assert.AreEqual("div", text, "Got the first part");
            scanner.Expect(":");
            text = scanner.Get(MatchFunctions.PseudoSelector);
            Assert.AreEqual("contains", text, "Got the 2nd part");

            text = scanner.Get(MatchFunctions.Bounded);
            Assert.AreEqual("'Product'", text, "Got the 3rdd part");
        }
Example #6
0
        /// <summary>
        /// Parse the arguments using the rules returned by the ParameterQuoted method.
        /// </summary>
        ///
        /// <param name="value">
        /// The arguments
        /// </param>
        ///
        /// <returns>
        /// An array of strings
        /// </returns>

        protected string[] ParseArgs(string value)
        {
            List <string> parms = new List <string>();
            int           index = 0;


            IStringScanner scanner = Scanner.Create(value);

            while (!scanner.Finished)
            {
                var quoting = ParameterQuoted(index);
                switch (quoting)
                {
                case QuotingRule.OptionallyQuoted:
                    scanner.Expect(MatchFunctions.OptionallyQuoted(","));
                    break;

                case QuotingRule.AlwaysQuoted:
                    scanner.Expect(MatchFunctions.Quoted());
                    break;

                case QuotingRule.NeverQuoted:
                    scanner.Seek(',', true);
                    break;

                default:
                    throw new NotImplementedException("Unimplemented quoting rule");
                }

                parms.Add(scanner.Match);
                if (!scanner.Finished)
                {
                    scanner.Next();
                    index++;
                }
            }
            return(parms.ToArray());
        }
Example #7
0
        /// <summary>
        /// Returns the numeric value only of a style, ignoring units
        /// </summary>
        ///
        /// <param name="style">
        /// The style.
        /// </param>
        ///
        /// <returns>
        /// A double, or null if the style did not exist or did not contain a numeric value.
        /// </returns>

        public double?NumberPart(string style)
        {
            string st = GetStyle(style);

            if (st == null)
            {
                return(null);
            }
            else
            {
                IStringScanner scanner = Scanner.Create(st);
                string         numString;
                if (scanner.TryGet(MatchFunctions.Number(), out numString))
                {
                    double num;
                    if (double.TryParse(numString, out num))
                    {
                        return(num);
                    }
                }
                return(null);
            }
        }
Example #8
0
        protected IOperand GetOperand <T>() where T : IConvertible
        {
            string   text   = "";
            IOperand output = null;

            scanner.SkipWhitespace();

            if (scanner.Current == '-')
            {
                // convert leading - to "-1" if it precedes a variable, otherwise
                // just add it to the output stream

                scanner.Next();
                if (scanner.Finished)
                {
                    throw new ArgumentException("Unexpected end of string found, expected an operand (a number or variable name)");
                }
                if (CharacterData.IsType(scanner.Current, CharacterType.Number))
                {
                    text += "-";
                }
                else
                {
                    output = new Literal <T>(-1);
                }
            }
            else if (scanner.Current == '+')
            {
                // ignore leading +

                scanner.Next();
            }

            if (output == null)
            {
                if (scanner.Info.Numeric)
                {
                    text += scanner.Get(MatchFunctions.Number());
                    double num;
                    if (Double.TryParse(text, out num))
                    {
                        output = IsTyped ? new Literal <T>(num) : new Literal(num);
                    }
                    else
                    {
                        throw new InvalidCastException("Unable to parse number from '" + text + "'");
                    }
                }
                else if (scanner.Info.Alpha)
                {
                    text += scanner.GetAlpha();
                    if (scanner.CurrentOrEmpty == "(")
                    {
                        IFunction func = Utils.GetFunction <T>(text);

                        var inner = scanner.ExpectBoundedBy('(', true).ToNewScanner("{0},");

                        while (!inner.Finished)
                        {
                            string parm = inner.Get(MatchFunctions.BoundedBy(boundEnd: ","));
                            EquationParserEngine innerParser = new EquationParserEngine();

                            IOperand innerOperand = innerParser.Parse <T>(parm);
                            func.AddOperand(innerOperand);
                        }
                        CacheVariables(func);
                        output = func;
                    }
                    else
                    {
                        IVariable var = GetVariable <T>(text);
                        output = var;
                    }
                }
                else if (scanner.Current == '(')
                {
                    string inner  = scanner.Get(MatchFunctions.BoundedBy("("));
                    var    parser = new EquationParserEngine();
                    parser.Parse <T>(inner);
                    output = parser.Clause;
                    CacheVariables(output);
                }
                else
                {
                    throw new ArgumentException("Unexpected character '" + scanner.Match + "' found, expected an operand (a number or variable name)");
                }
            }

            scanner.SkipWhitespace();
            ParseEnd = scanner.Finished;

            return(output);
        }
        /// <summary>
        /// Starting with the current character, treats text as a number, seeking until the next
        /// character that would terminate a valid number.
        /// </summary>
        ///
        /// <param name="requireWhitespaceTerminator">
        /// (optional) the require whitespace terminator.
        /// </param>
        ///
        /// <returns>
        /// .
        /// </returns>

        public IStringScanner ExpectNumber(bool requireWhitespaceTerminator = false)
        {
            return(Expect(MatchFunctions.Number(requireWhitespaceTerminator)));
        }
Example #10
0
        /// <summary>
        /// Parse the string, and return a sequence of Selector objects
        /// </summary>
        /// <param name="selector"></param>
        /// <returns></returns>
        public Selector Parse(string selector)
        {
            Selectors = new Selector();

            string sel = (selector ?? String.Empty).Trim();

            if (IsHtml(selector))
            {
                Current.Html         = sel;
                Current.SelectorType = SelectorType.HTML;
                Selectors.Add(Current);
                return(Selectors);
            }

            scanner = Scanner.Create(sel);

            while (!scanner.Finished)
            {
                switch (scanner.Current)
                {
                case '*':
                    StartNewSelector(SelectorType.All);
                    scanner.Next();
                    break;

                case '<':
                    // not selecting - creating html
                    Current.Html = sel;
                    scanner.End();
                    break;

                case ':':
                    scanner.Next();
                    string key = scanner.Get(MatchFunctions.PseudoSelector).ToLower();
                    switch (key)
                    {
                    case "input":
                        AddTagSelector("input");
                        AddTagSelector("textarea", true);
                        AddTagSelector("select", true);
                        AddTagSelector("button", true);
                        break;

                    case "text":
                        StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag);
                        Current.Tag = "input";
                        Current.AttributeSelectorType = AttributeSelectorType.Equals;
                        Current.AttributeName         = "type";
                        Current.AttributeValue        = "text";

                        StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag, CombinatorType.Grouped, Current.TraversalType);
                        Current.Tag = "input";
                        Current.AttributeSelectorType = AttributeSelectorType.NotExists;
                        Current.AttributeName         = "type";

                        Current.SelectorType |= SelectorType.Tag;
                        Current.Tag           = "input";
                        break;

                    case "checkbox":
                    case "radio":
                    case "button":
                    case "file":
                    case "image":
                    case "password":
                        AddInputSelector(key, "input");
                        break;

                    case "reset":
                    case "submit":
                        AddInputSelector(key);
                        break;

                    case "checked":
                    case "selected":
                    case "disabled":
                        StartNewSelector(SelectorType.AttributeValue);
                        Current.AttributeSelectorType = AttributeSelectorType.Exists;
                        Current.AttributeName         = key;
                        break;

                    case "enabled":
                        StartNewSelector(SelectorType.AttributeValue);
                        Current.AttributeSelectorType = AttributeSelectorType.NotExists;
                        Current.AttributeName         = "disabled";
                        break;

                    case "first-letter":
                    case "first-line":
                    case "before":
                    case "after":
                        throw new NotImplementedException("The CSS pseudoelement selectors are not implemented in CsQuery.");

                    case "target":
                    case "link":
                    case "hover":
                    case "active":
                    case "focus":
                    case "visited":
                        throw new NotImplementedException("Pseudoclasses that require a browser aren't implemented.");

                    default:
                        if (!AddPseudoSelector(key))
                        {
                            throw new ArgumentException("Unknown pseudo-class :\"" + key + "\". If this is a valid CSS or jQuery selector, please let us know.");
                        }
                        break;
                    }
                    break;

                case '.':
                    StartNewSelector(SelectorType.Class);
                    scanner.Next();
                    Current.Class = scanner.Get(MatchFunctions.CssClassName);
                    break;

                case '#':

                    scanner.Next();
                    if (!scanner.Finished)
                    {
                        StartNewSelector(SelectorType.ID);
                        Current.ID = scanner.Get(MatchFunctions.HtmlIDValue());
                    }

                    break;

                case '[':
                    StartNewSelector(SelectorType.AttributeValue);

                    IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner();

                    Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute());
                    innerScanner.SkipWhitespace();

                    if (innerScanner.Finished)
                    {
                        Current.AttributeSelectorType = AttributeSelectorType.Exists;
                    }
                    else
                    {
                        string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=", "|=");

                        // CSS allows [attr=] as a synonym for [attr]
                        if (innerScanner.Finished)
                        {
                            Current.AttributeSelectorType = AttributeSelectorType.Exists;
                        }
                        else
                        {
                            var rawValue = innerScanner.Expect(expectsOptionallyQuotedValue()).ToNewScanner();

                            Current.AttributeValue = rawValue.Finished ?
                                                     "" :
                                                     rawValue.Get(new EscapedString());

                            switch (matchType)
                            {
                            case "=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.Equals;
                                break;

                            case "^=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.StartsWith;
                                // attributevalue starts with "" matches nothing
                                if (Current.AttributeValue == "")
                                {
                                    Current.AttributeValue = "" + (char)0;
                                }
                                break;

                            case "*=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.Contains;
                                break;

                            case "~=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.ContainsWord;
                                break;

                            case "$=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.EndsWith;
                                break;

                            case "!=":
                                Current.AttributeSelectorType = AttributeSelectorType.NotEquals;
                                // must matched manually - missing also validates as notEquals
                                break;

                            case "|=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen;

                                break;

                            default:
                                throw new ArgumentException("Unknown attibute matching operator '" + matchType + "'");
                            }
                        }
                    }

                    break;

                case ',':
                    FinishSelector();
                    NextCombinatorType = CombinatorType.Root;
                    NextTraversalType  = TraversalType.All;
                    scanner.NextNonWhitespace();
                    break;

                case '+':
                    StartNewSelector(TraversalType.Adjacent);
                    scanner.NextNonWhitespace();
                    break;

                case '~':
                    StartNewSelector(TraversalType.Sibling);
                    scanner.NextNonWhitespace();
                    break;

                case '>':
                    StartNewSelector(TraversalType.Child);
                    // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter"
                    // whereas if it is used in a combination selector the meaning is "filter for 1st child"
                    //Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1);
                    Current.ChildDepth = 1;
                    scanner.NextNonWhitespace();
                    break;

                case ' ':
                    // if a ">" or "," is later found, it will be overridden.
                    scanner.NextNonWhitespace();
                    NextTraversalType = TraversalType.Descendent;
                    break;

                default:

                    string tag = "";
                    if (scanner.TryGet(MatchFunctions.HTMLTagSelectorName(), out tag))
                    {
                        AddTagSelector(tag);
                    }
                    else
                    {
                        if (scanner.Index == 0)
                        {
                            Current.Html         = sel;
                            Current.SelectorType = SelectorType.HTML;
                            scanner.End();
                        }
                        else
                        {
                            throw new ArgumentException(scanner.LastError);
                        }
                    }

                    break;
                }
            }
            // Close any open selectors
            FinishSelector();
            if (Selectors.Count == 0)
            {
                var empty = new SelectorClause
                {
                    SelectorType  = SelectorType.None,
                    TraversalType = TraversalType.Filter
                };
                Selectors.Add(empty);
            }
            return(Selectors);
        }
Example #11
0
 /// <summary>
 /// Starting with the current character, treats text as a number, seeking until the next character that would terminate a valid number.
 /// </summary>
 /// <returns></returns>
 public IStringScanner ExpectNumber()
 {
     return(Expect(MatchFunctions.Number()));
 }
Example #12
0
        protected IOperand GetOperand <T>() where T : IConvertible
        {
            string   text;
            IOperand output;

            scanner.SkipWhitespace();
            if (scanner.Info.NumericExtended)
            {
                text = scanner.Get(MatchFunctions.Number());
                double num;
                if (Double.TryParse(text, out num))
                {
                    output = IsTyped ? new Literal <T>(num) : new Literal(num);
                }
                else
                {
                    throw new InvalidCastException("Unable to parse number from '" + text + "'");
                }
            }
            else if (scanner.Info.Alpha)
            {
                text = scanner.GetAlpha();
                if (scanner.NextCharOrEmpty == "(")
                {
                    IFunction func = Utils.GetFunction <T>(text);

                    var inner = scanner.ExpectBoundedBy('(', true).ToNewScanner("{0},");

                    while (!inner.Finished)
                    {
                        string         parm        = inner.Get(MatchFunctions.BoundedBy(boundEnd: ","));
                        EquationParser innerParser = new EquationParser();

                        IOperand innerOperand = innerParser.Parse <T>(parm);
                        func.AddOperand(innerOperand);
                    }
                    CacheVariables(func);
                    output = func;
                }
                else
                {
                    IVariable var = GetVariable <T>(text);
                    output = var;
                }
            }
            else if (scanner.NextChar == '(')
            {
                string inner  = scanner.Get(MatchFunctions.BoundedBy("("));
                var    parser = new EquationParser();
                parser.Parse <T>(inner);
                output = parser.Clause;
                CacheVariables(output);
            }
            else
            {
                throw new InvalidOperationException("Unexpected character '" + scanner.Match + "' found, expected an operand (a number or variable name)");
            }
            scanner.SkipWhitespace();
            ParseEnd = scanner.Finished;

            return(output);
        }