public IEnumerable <Selector> Parse(string selector)
        {
            Selectors = new List <Selector>();

            string sel = (selector ?? String.Empty).Trim();

            if (IsHtml(selector))
            {
                Current.Html         = sel;
                Current.SelectorType = SelectorType.HTML;
                Selectors.Add(Current);
                return(Selectors);
            }
            scanner = Scanner.Create(sel);

            while (!scanner.Finished)
            {
                switch (scanner.NextChar)
                {
                case '*':
                    Current.SelectorType = SelectorType.All;
                    scanner.Next();
                    break;

                case '<':
                    // not selecting - creating html
                    Current.Html = sel;
                    scanner.End();
                    break;

                case ':':
                    scanner.Next();
                    string key = scanner.Get(MatchFunctions.PseudoSelector);
                    switch (key)
                    {
                    case "checkbox":
                    case "radio":
                    case "button":
                    case "file":
                    case "text":
                    case "password":
                        StartNewSelector(SelectorType.Attribute);

                        //Current.SelectorType |= SelectorType.Attribute;
                        Current.AttributeSelectorType = AttributeSelectorType.Equals;
                        Current.AttributeName         = "type";
                        Current.AttributeValue        = key;

                        if (key == "button" && !Current.SelectorType.HasFlag(SelectorType.Tag))
                        {
                            //StartNewSelector(CombinatorType.Cumulative);
                            StartNewSelector(SelectorType.Tag, CombinatorType.Cumulative, Current.TraversalType);
                            //Current.SelectorType = SelectorType.Tag;
                            Current.Tag = "button";
                        }
                        break;

                    case "checked":
                    case "selected":
                    case "disabled":
                        StartNewSelector(SelectorType.Attribute);
                        Current.AttributeSelectorType = AttributeSelectorType.Exists;
                        Current.AttributeName         = key;
                        break;

                    case "enabled":
                        StartNewSelector(SelectorType.Attribute);
                        Current.AttributeSelectorType = AttributeSelectorType.NotExists;
                        Current.AttributeName         = "disabled";
                        break;

                    case "contains":

                        StartNewSelector(SelectorType.Contains);
                        IStringScanner inner = scanner.ExpectBoundedBy('(', true).ToNewScanner();
                        Current.Criteria = inner.Get(MatchFunctions.OptionallyQuoted);
                        break;

                    case "eq":
                    case "gt":
                    case "lt":
                        StartNewSelector(SelectorType.Position);
                        switch (key)
                        {
                        case "eq": Current.PositionType = PositionType.IndexEquals; break;

                        case "lt": Current.PositionType = PositionType.IndexLessThan; break;

                        case "gt": Current.PositionType = PositionType.IndexGreaterThan; break;
                        }

                        scanner.ExpectChar('(');
                        Current.PositionIndex = Convert.ToInt32(scanner.GetNumber());
                        scanner.ExpectChar(')');

                        break;

                    case "even":
                        StartNewSelector(SelectorType.Position);
                        Current.PositionType = PositionType.Even;
                        break;

                    case "odd":
                        StartNewSelector(SelectorType.Position);
                        Current.PositionType = PositionType.Odd;
                        break;

                    case "first":
                        StartNewSelector(SelectorType.Position);
                        Current.PositionType = PositionType.First;
                        break;

                    case "last":
                        StartNewSelector(SelectorType.Position);
                        Current.PositionType = PositionType.Last;
                        break;

                    case "last-child":
                        StartNewSelector(SelectorType.Position);
                        Current.PositionType = PositionType.LastChild;
                        break;

                    case "first-child":
                        StartNewSelector(SelectorType.Position);
                        Current.PositionType = PositionType.FirstChild;
                        break;

                    case "nth-child":
                        StartNewSelector(SelectorType.Position);
                        Current.PositionType = PositionType.NthChild;
                        Current.Criteria     = scanner.GetBoundedBy('(');
                        break;

                    case "has":
                    case "not":
                        StartNewSelector(key == "has" ? SelectorType.SubSelectorHas : SelectorType.SubSelectorNot);
                        Current.TraversalType = TraversalType.Descendent;

                        string        criteria     = Current.Criteria = scanner.GetBoundedBy('(', true);
                        SelectorChain subSelectors = new SelectorChain(criteria);
                        Current.SubSelectors.Add(subSelectors);
                        break;

                    case "visible":
                        StartNewSelector(SelectorType.Other);
                        Current.OtherType = OtherType.Visible;
                        break;

                    default:
                        throw new ArgumentOutOfRangeException("Unknown pseudoselector :\"" + key + "\"");
                    }
                    break;

                case '.':
                    StartNewSelector(SelectorType.Class);
                    scanner.Next();
                    Current.Class = scanner.Get(MatchFunctions.CssClass);
                    break;

                case '#':

                    scanner.Next();
                    if (!scanner.Finished)
                    {
                        StartNewSelector(SelectorType.ID);
                        Current.ID = scanner.Get(MatchFunctions.HtmlIDValue);
                    }

                    break;

                case '[':
                    StartNewSelector(SelectorType.Attribute);

                    IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner();
                    Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute);
                    innerScanner.SkipWhitespace();

                    if (innerScanner.Finished)
                    {
                        Current.AttributeSelectorType = AttributeSelectorType.Exists;
                    }
                    else
                    {
                        string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=");
                        Current.AttributeValue = innerScanner.Get(expectsOptionallyQuotedValue());
                        switch (matchType)
                        {
                        case "=":
                            Current.AttributeSelectorType = AttributeSelectorType.Equals;
                            break;

                        case "^=":
                            Current.AttributeSelectorType = AttributeSelectorType.StartsWith;
                            break;

                        case "*=":
                            Current.AttributeSelectorType = AttributeSelectorType.Contains;
                            break;

                        case "~=":
                            Current.AttributeSelectorType = AttributeSelectorType.ContainsWord;
                            break;

                        case "$=":
                            Current.AttributeSelectorType = AttributeSelectorType.EndsWith;
                            break;

                        case "!=":
                            Current.AttributeSelectorType = AttributeSelectorType.NotEquals;
                            break;

                        default:
                            throw new ArgumentOutOfRangeException("Unknown attibute matching operator '" + matchType + "'");
                        }
                    }

                    break;

                case ',':
                    FinishSelector();
                    scanner.NextNonWhitespace();
                    break;

                case '>':
                    if (Current.IsComplete)
                    {
                        StartNewSelector(TraversalType.Child);
                    }
                    else
                    {
                        Current.TraversalType = TraversalType.Child;
                    }

                    // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter"
                    // whereas if it is used in a combination selector the meaning is "filter for 1st child"
                    Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1);
                    scanner.NextNonWhitespace();
                    break;

                case ' ':
                    // if a ">" or "," is later found, it will be overridden.
                    scanner.NextNonWhitespace();
                    StartNewSelector(TraversalType.Descendent);
                    break;

                default:

                    string tag = "";
                    if (scanner.TryGet(MatchFunctions.HTMLTagName, out tag))
                    {
                        StartNewSelector(SelectorType.Tag);
                        Current.Tag = tag;
                    }
                    else
                    {
                        if (scanner.Pos == 0)
                        {
                            Current.Html         = sel;
                            Current.SelectorType = SelectorType.HTML;
                            scanner.End();
                        }
                        else
                        {
                            throw new InvalidOperationException(scanner.LastError);
                        }
                    }

                    break;
                }
            }
            // Close any open selectors
            FinishSelector();
            return(Selectors);
        }
Exemple #2
0
        /// <summary>
        /// Parse the string, and return a sequence of Selector objects
        /// </summary>
        /// <param name="selector"></param>
        /// <returns></returns>
        public Selector Parse(string selector)
        {
            Selectors = new Selector();

            string sel = (selector ?? String.Empty).Trim();

            if (IsHtml(selector))
            {
                Current.Html         = sel;
                Current.SelectorType = SelectorType.HTML;
                Selectors.Add(Current);
                return(Selectors);
            }

            scanner = Scanner.Create(sel);

            while (!scanner.Finished)
            {
                switch (scanner.Current)
                {
                case '*':
                    StartNewSelector(SelectorType.All);
                    scanner.Next();
                    break;

                case '<':
                    // not selecting - creating html
                    Current.Html = sel;
                    scanner.End();
                    break;

                case ':':
                    scanner.Next();
                    string key = scanner.Get(MatchFunctions.PseudoSelector).ToLower();
                    switch (key)
                    {
                    case "input":
                        AddTagSelector("input");
                        AddTagSelector("textarea", true);
                        AddTagSelector("select", true);
                        AddTagSelector("button", true);
                        break;

                    case "text":
                        StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag);
                        Current.Tag = "input";
                        Current.AttributeSelectorType = AttributeSelectorType.Equals;
                        Current.AttributeName         = "type";
                        Current.AttributeValue        = "text";

                        StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag, CombinatorType.Grouped, Current.TraversalType);
                        Current.Tag = "input";
                        Current.AttributeSelectorType = AttributeSelectorType.NotExists;
                        Current.AttributeName         = "type";

                        Current.SelectorType |= SelectorType.Tag;
                        Current.Tag           = "input";
                        break;

                    case "checkbox":
                    case "radio":
                    case "button":
                    case "file":
                    case "image":
                    case "password":
                        AddInputSelector(key, "input");
                        break;

                    case "reset":
                    case "submit":
                        AddInputSelector(key);
                        break;

                    case "checked":
                    case "selected":
                    case "disabled":
                        StartNewSelector(SelectorType.AttributeValue);
                        Current.AttributeSelectorType = AttributeSelectorType.Exists;
                        Current.AttributeName         = key;
                        break;

                    case "enabled":
                        StartNewSelector(SelectorType.AttributeValue);
                        Current.AttributeSelectorType = AttributeSelectorType.NotExists;
                        Current.AttributeName         = "disabled";
                        break;

                    case "first-letter":
                    case "first-line":
                    case "before":
                    case "after":
                        throw new NotImplementedException("The CSS pseudoelement selectors are not implemented in CsQuery.");

                    case "target":
                    case "link":
                    case "hover":
                    case "active":
                    case "focus":
                    case "visited":
                        throw new NotImplementedException("Pseudoclasses that require a browser aren't implemented.");

                    default:
                        if (!AddPseudoSelector(key))
                        {
                            throw new ArgumentException("Unknown pseudo-class :\"" + key + "\". If this is a valid CSS or jQuery selector, please let us know.");
                        }
                        break;
                    }
                    break;

                case '.':
                    StartNewSelector(SelectorType.Class);
                    scanner.Next();
                    Current.Class = scanner.Get(MatchFunctions.CssClassName);
                    break;

                case '#':

                    scanner.Next();
                    if (!scanner.Finished)
                    {
                        StartNewSelector(SelectorType.ID);
                        Current.ID = scanner.Get(MatchFunctions.HtmlIDValue());
                    }

                    break;

                case '[':
                    StartNewSelector(SelectorType.AttributeValue);

                    IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner();

                    Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute());
                    innerScanner.SkipWhitespace();

                    if (innerScanner.Finished)
                    {
                        Current.AttributeSelectorType = AttributeSelectorType.Exists;
                    }
                    else
                    {
                        string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=", "|=");

                        // CSS allows [attr=] as a synonym for [attr]
                        if (innerScanner.Finished)
                        {
                            Current.AttributeSelectorType = AttributeSelectorType.Exists;
                        }
                        else
                        {
                            var rawValue = innerScanner.Expect(expectsOptionallyQuotedValue()).ToNewScanner();

                            Current.AttributeValue = rawValue.Finished ?
                                                     "" :
                                                     rawValue.Get(new EscapedString());

                            switch (matchType)
                            {
                            case "=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.Equals;
                                break;

                            case "^=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.StartsWith;
                                // attributevalue starts with "" matches nothing
                                if (Current.AttributeValue == "")
                                {
                                    Current.AttributeValue = "" + (char)0;
                                }
                                break;

                            case "*=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.Contains;
                                break;

                            case "~=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.ContainsWord;
                                break;

                            case "$=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.EndsWith;
                                break;

                            case "!=":
                                Current.AttributeSelectorType = AttributeSelectorType.NotEquals;
                                // must matched manually - missing also validates as notEquals
                                break;

                            case "|=":
                                Current.SelectorType         |= SelectorType.AttributeValue;
                                Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen;

                                break;

                            default:
                                throw new ArgumentException("Unknown attibute matching operator '" + matchType + "'");
                            }
                        }
                    }

                    break;

                case ',':
                    FinishSelector();
                    NextCombinatorType = CombinatorType.Root;
                    NextTraversalType  = TraversalType.All;
                    scanner.NextNonWhitespace();
                    break;

                case '+':
                    StartNewSelector(TraversalType.Adjacent);
                    scanner.NextNonWhitespace();
                    break;

                case '~':
                    StartNewSelector(TraversalType.Sibling);
                    scanner.NextNonWhitespace();
                    break;

                case '>':
                    StartNewSelector(TraversalType.Child);
                    // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter"
                    // whereas if it is used in a combination selector the meaning is "filter for 1st child"
                    //Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1);
                    Current.ChildDepth = 1;
                    scanner.NextNonWhitespace();
                    break;

                case ' ':
                    // if a ">" or "," is later found, it will be overridden.
                    scanner.NextNonWhitespace();
                    NextTraversalType = TraversalType.Descendent;
                    break;

                default:

                    string tag = "";
                    if (scanner.TryGet(MatchFunctions.HTMLTagSelectorName(), out tag))
                    {
                        AddTagSelector(tag);
                    }
                    else
                    {
                        if (scanner.Index == 0)
                        {
                            Current.Html         = sel;
                            Current.SelectorType = SelectorType.HTML;
                            scanner.End();
                        }
                        else
                        {
                            throw new ArgumentException(scanner.LastError);
                        }
                    }

                    break;
                }
            }
            // Close any open selectors
            FinishSelector();
            if (Selectors.Count == 0)
            {
                var empty = new SelectorClause
                {
                    SelectorType  = SelectorType.None,
                    TraversalType = TraversalType.Filter
                };
                Selectors.Add(empty);
            }
            return(Selectors);
        }
Exemple #3
0
        protected IOperand GetOperand <T>() where T : IConvertible
        {
            string   text   = "";
            IOperand output = null;

            scanner.SkipWhitespace();

            if (scanner.Current == '-')
            {
                // convert leading - to "-1" if it precedes a variable, otherwise
                // just add it to the output stream

                scanner.Next();
                if (scanner.Finished)
                {
                    throw new ArgumentException("Unexpected end of string found, expected an operand (a number or variable name)");
                }
                if (CharacterData.IsType(scanner.Current, CharacterType.Number))
                {
                    text += "-";
                }
                else
                {
                    output = new Literal <T>(-1);
                }
            }
            else if (scanner.Current == '+')
            {
                // ignore leading +

                scanner.Next();
            }

            if (output == null)
            {
                if (scanner.Info.Numeric)
                {
                    text += scanner.Get(MatchFunctions.Number());
                    double num;
                    if (Double.TryParse(text, out num))
                    {
                        output = IsTyped ? new Literal <T>(num) : new Literal(num);
                    }
                    else
                    {
                        throw new InvalidCastException("Unable to parse number from '" + text + "'");
                    }
                }
                else if (scanner.Info.Alpha)
                {
                    text += scanner.GetAlpha();
                    if (scanner.CurrentOrEmpty == "(")
                    {
                        IFunction func = Utils.GetFunction <T>(text);

                        var inner = scanner.ExpectBoundedBy('(', true).ToNewScanner("{0},");

                        while (!inner.Finished)
                        {
                            string parm = inner.Get(MatchFunctions.BoundedBy(boundEnd: ","));
                            EquationParserEngine innerParser = new EquationParserEngine();

                            IOperand innerOperand = innerParser.Parse <T>(parm);
                            func.AddOperand(innerOperand);
                        }
                        CacheVariables(func);
                        output = func;
                    }
                    else
                    {
                        IVariable var = GetVariable <T>(text);
                        output = var;
                    }
                }
                else if (scanner.Current == '(')
                {
                    string inner  = scanner.Get(MatchFunctions.BoundedBy("("));
                    var    parser = new EquationParserEngine();
                    parser.Parse <T>(inner);
                    output = parser.Clause;
                    CacheVariables(output);
                }
                else
                {
                    throw new ArgumentException("Unexpected character '" + scanner.Match + "' found, expected an operand (a number or variable name)");
                }
            }

            scanner.SkipWhitespace();
            ParseEnd = scanner.Finished;

            return(output);
        }
        protected IOperand GetOperand <T>() where T : IConvertible
        {
            string   text;
            IOperand output;

            scanner.SkipWhitespace();
            if (scanner.Info.NumericExtended)
            {
                text = scanner.Get(MatchFunctions.Number());
                double num;
                if (Double.TryParse(text, out num))
                {
                    output = IsTyped ? new Literal <T>(num) : new Literal(num);
                }
                else
                {
                    throw new InvalidCastException("Unable to parse number from '" + text + "'");
                }
            }
            else if (scanner.Info.Alpha)
            {
                text = scanner.GetAlpha();
                if (scanner.NextCharOrEmpty == "(")
                {
                    IFunction func = Utils.GetFunction <T>(text);

                    var inner = scanner.ExpectBoundedBy('(', true).ToNewScanner("{0},");

                    while (!inner.Finished)
                    {
                        string         parm        = inner.Get(MatchFunctions.BoundedBy(boundEnd: ","));
                        EquationParser innerParser = new EquationParser();

                        IOperand innerOperand = innerParser.Parse <T>(parm);
                        func.AddOperand(innerOperand);
                    }
                    CacheVariables(func);
                    output = func;
                }
                else
                {
                    IVariable var = GetVariable <T>(text);
                    output = var;
                }
            }
            else if (scanner.NextChar == '(')
            {
                string inner  = scanner.Get(MatchFunctions.BoundedBy("("));
                var    parser = new EquationParser();
                parser.Parse <T>(inner);
                output = parser.Clause;
                CacheVariables(output);
            }
            else
            {
                throw new InvalidOperationException("Unexpected character '" + scanner.Match + "' found, expected an operand (a number or variable name)");
            }
            scanner.SkipWhitespace();
            ParseEnd = scanner.Finished;

            return(output);
        }