/// <summary> /// Get the sequence that is the source for the current clause, based on the selector, prior /// results, and context. /// </summary> /// /// <remarks> /// Notes from refactoring this on 10/14/2012: At issue is selectors like ":not(.sel1 .sel2, /// :first) where the subselector has filters that apply to just the context, versus selectors /// like ":has(.sel1 .sel2, :first) where the subselector needs to apply to the results of a /// selection against the DOM /// /// case1: $('.sel','.context-sel') means that ".sel" is actually applied against .context-sel. /// it's like .find. /// /// totally different from a subselector -- but the subselector still needs a context to apply /// filters, even though the selectors theselves are run against the whole doc. /// /// so we need to set up selectors before running against the context so each subselector is IDd /// as either "context" or "root" in addition to its traversal type to eliminate ambiguity of /// intent. a subselector for :not should have "root+descendant" for the first part and /// "context+filter" for the 2nd. For regular context type filters, it should be /// "context+descendant" (same as find). FOr complex context/find filters chained with a comma, /// the stuff after the comma should also be in context though jquery seems inconsistent with /// this. /// /// This code here should then use the new info to select the correct sleection source. Think we /// should be rid of traversaltype.subselect. Think traversaltype.all should really mean "include /// the context items" instead of "Descendant" as it does now. /// </remarks> /// /// <param name="clause"> /// The current selector clause. /// </param> /// <param name="context"> /// The context passed initially to this Select operation. /// </param> /// <param name="lastResult"> /// The result of the prior clause. Can be null. /// </param> /// /// <returns> /// The sequence that should source the current clause's context. /// </returns> protected IEnumerable <IDomObject> GetSelectionSource(SelectorClause clause, IEnumerable <IDomObject> context, IEnumerable <IDomObject> lastResult) { IEnumerable <IDomObject> selectionSource = null; IEnumerable <IDomObject> interimSelectionSource = null; if (clause.CombinatorType != CombinatorType.Chained) { interimSelectionSource = clause.CombinatorType == CombinatorType.Context ? context : null; } else { interimSelectionSource = lastResult; } // If the selector used the adjacent combinator, grab the next element for each if (interimSelectionSource != null) { if (clause.TraversalType == TraversalType.Adjacent || clause.TraversalType == TraversalType.Sibling) { selectionSource = GetAdjacentOrSiblings(clause.TraversalType, interimSelectionSource); clause.TraversalType = TraversalType.Filter; } else { selectionSource = interimSelectionSource; } } return(selectionSource); }
/// <summary> /// Return all child elements matching a DOM-position type selector /// </summary> /// <param name="elm"></param> /// <param name="selector"></param> /// <returns></returns> protected IEnumerable <IDomObject> GetPseudoClassMatches(IDomElement elm, SelectorClause selector) { IEnumerable <IDomObject> results; results = ((IPseudoSelectorChild)selector.PseudoSelector).ChildMatches(elm); foreach (var item in results) { yield return(item); } // Traverse children if needed if (selector.TraversalType == TraversalType.Descendent || selector.TraversalType == TraversalType.All) { foreach (var child in elm.ChildElements) { foreach (var item in GetPseudoClassMatches(child, selector)) { yield return(item); } } } }
/// <summary> /// Return all position-type matches. These are selectors that are keyed to the position within /// the selection set itself. /// </summary> /// /// <param name="list"> /// The list of elements to filter /// </param> /// <param name="selector"> /// The selector /// </param> /// /// <returns> /// A sequence of elements matching the filter /// </returns> protected IEnumerable <IDomObject> GetResultPositionMatches(IEnumerable <IDomObject> list, SelectorClause selector) { // for sibling traversal types the mapping was done already by the Matches function var sourceList = GetAllChildOrDescendants(selector.TraversalType, list); return(((IPseudoSelectorFilter)selector.PseudoSelector).Filter(sourceList)); }
/// <summary> /// Determines whether the specified <see cref="T:System.Object" /> is equal to the current /// <see cref="T:System.Object" />. /// </summary> /// /// <param name="obj"> /// The <see cref="T:System.Object" /> to compare with the current <see cref="T:System.Object" />. /// </param> /// /// <returns> /// true if the specified <see cref="T:System.Object" /> is equal to the current /// <see cref="T:System.Object" />; otherwise, false. /// </returns> public override bool Equals(object obj) { SelectorClause other = obj as SelectorClause; return(other != null && other.SelectorType == SelectorType && other.TraversalType == TraversalType && other.CombinatorType == CombinatorType && other.AttributeName == AttributeName && other.AttributeSelectorType == AttributeSelectorType && other.AttributeValue == AttributeValue && other.ChildDepth == ChildDepth && other.Class == Class && other.Criteria == Criteria && other.Html == Html && other.ID == ID && other.NoIndex == NoIndex && other.PositionIndex == PositionIndex && other.SelectElements == SelectElements && other.Tag == Tag); }
/// <summary> /// Makes a deep copy of this Selector. /// </summary> /// /// <returns> /// A copy of this object. /// </returns> public SelectorClause Clone() { SelectorClause clone = new SelectorClause(); clone.SelectorType = SelectorType; clone.TraversalType = TraversalType; clone.CombinatorType = CombinatorType; clone.AttributeName = AttributeName; clone.AttributeSelectorType = AttributeSelectorType; clone.AttributeValue = AttributeValue; clone.ChildDepth = ChildDepth; clone.Class = Class; clone.Criteria = Criteria; clone.Html = Html; clone.ID = ID; clone.NoIndex = NoIndex; clone.PositionIndex = PositionIndex; clone.SelectElements = SelectElements; clone.Tag = Tag; clone.PseudoSelector = PseudoSelector; return(clone); }
/// <summary> /// Return all elements matching a selector, within a list of elements. This function will /// traverse children, but it is expected that the source list at the current depth (e.g. from an /// Adjacent or Sibling selector) is already processed. /// </summary> /// /// <param name="list"> /// The sequence of elements to filter. /// </param> /// <param name="selector"> /// The selector. /// </param> /// /// <returns> /// The sequence of elements matching the selector. /// </returns> protected IEnumerable<IDomObject> GetMatches(IEnumerable<IDomObject> list, SelectorClause selector) { // Maintain a hashset of every element already searched. Since result sets frequently contain items which are // children of other items in the list, we would end up searching the tree repeatedly HashSet<IDomObject> uniqueElements = null; // The processing stack Stack<MatchElement> stack = null; // The source list for the current iteration IEnumerable<IDomObject> curList = list; // the results obtained so far in this iteration HashSet<IDomObject> temporaryResults = new HashSet<IDomObject>(); // The unique list has to be reset for each sub-selector uniqueElements = new HashSet<IDomObject>(); // For the jQuery extensions (which are mapped to the position in the output, not the DOM) we have to enumerate // the results first, rather than targeting specific child elements. Handle it here, if (selector.SelectorType.HasFlag(SelectorType.PseudoClass)) { if (selector.IsResultListPosition) { return GetResultPositionMatches(curList, selector); } } else if (selector.SelectorType.HasFlag(SelectorType.All)) { return GetAllChildOrDescendants(selector.TraversalType, curList); } // Otherwise, try to match each element individually stack = new Stack<MatchElement>(); foreach (var obj in curList) { // We must check everything again when looking for specific depth of children // otherwise - no point - skip em IDomElement el = obj as IDomElement; if (el == null || selector.TraversalType != TraversalType.Child && uniqueElements.Contains(el)) { continue; } stack.Push(new MatchElement(el, 0)); int matchIndex = 0; while (stack.Count != 0) { var current = stack.Pop(); if (Matches(selector, current.Element, current.Depth)) { temporaryResults.Add(current.Element); matchIndex++; } // Add children to stack (in reverse order, so they are processed in the correct order when popped) // Don't keep going to children if the target depth is < the depth. Though the match would still fail, // stuff would end up the unique list which we might need to test later if it appears directly in the source list // causing it to be ignored. if (selector.TraversalType != TraversalType.Filter && (selector.TraversalType != TraversalType.Child || selector.ChildDepth > current.Depth)) { SelectorType selectorType = selector.SelectorType; IDomElement elm = current.Element; if (selector.IsDomPositionPseudoSelector && ((selector.TraversalType == TraversalType.All) || (selector.TraversalType == TraversalType.Child && selector.ChildDepth == current.Depth + 1) || (selector.TraversalType == TraversalType.Descendent && selector.ChildDepth <= current.Depth + 1))) { temporaryResults.AddRange(GetPseudoClassMatches(elm, selector)); selectorType &= ~SelectorType.PseudoClass; } if (selectorType == 0) { continue; } for (int j = elm.ChildNodes.Count - 1; j >= 0; j--) { IDomElement child = elm[j] as IDomElement; if (child==null || !uniqueElements.Add(child)) { continue; } if (child.NodeType == NodeType.ELEMENT_NODE) { stack.Push(new MatchElement(child, current.Depth + 1)); } } } } } return temporaryResults; }
/// <summary> /// Test whether a single element matches a specific attribute selector. /// </summary> /// /// <param name="element"> /// The element to test. /// </param> /// <param name="selector"> /// The selector. /// </param> /// /// <returns> /// true if the element matches, false if not. /// </returns> public static bool Matches(IDomElement element, SelectorClause selector) { string value; bool match = ((DomElement)element).TryGetAttributeForMatching(selector.AttributeNameTokenID,out value); if (!match) { switch (selector.AttributeSelectorType) { case AttributeSelectorType.Exists: return false; case AttributeSelectorType.NotEquals: case AttributeSelectorType.NotExists: return true; default: return false; } } else { // bool isCaseSensitive = HtmlData. switch (selector.AttributeSelectorType) { case AttributeSelectorType.Exists: return true; case AttributeSelectorType.Equals: return selector.AttributeValue.Equals(value,selector.AttributeValueStringComparison); case AttributeSelectorType.StartsWith: return value != null && value.Length >= selector.AttributeValue.Length && value.Substring(0, selector.AttributeValue.Length) .Equals(selector.AttributeValue, selector.AttributeValueStringComparison); case AttributeSelectorType.Contains: return value != null && value.IndexOf(selector.AttributeValue, selector.AttributeValueStringComparison)>=0; case AttributeSelectorType.ContainsWord: return value != null && ContainsWord(value, selector.AttributeValue, selector.AttributeValueStringComparer); case AttributeSelectorType.NotEquals: return !selector.AttributeValue .Equals(value, selector.AttributeValueStringComparison); case AttributeSelectorType.NotExists: return false; case AttributeSelectorType.EndsWith: int len = selector.AttributeValue.Length; return value != null && value.Length >= len && value.Substring(value.Length - len) .Equals(selector.AttributeValue, selector.AttributeValueStringComparison); case AttributeSelectorType.StartsWithOrHyphen: if (value == null) { return false; } int dashPos = value.IndexOf("-"); string beforeDash = value; if (dashPos >= 0) { // match a dash that's included in the match attribute according to common browser behavior beforeDash = value.Substring(0, dashPos); } return selector.AttributeValue.Equals(beforeDash,selector.AttributeValueStringComparison) || selector.AttributeValue.Equals(value,selector.AttributeValueStringComparison); default: throw new InvalidOperationException("No AttributeSelectorType set"); } } }
/// <summary> /// Return true if an element matches a specific filter. /// </summary> /// /// <param name="element"> /// The element to test /// </param> /// <param name="selector"> /// A selector clause /// </param> /// /// <returns> /// true if matches pseudo class, false if not.matches the selector, false if not /// </returns> protected bool MatchesPseudoClass(IDomElement element, SelectorClause selector) { return ((IPseudoSelectorChild)selector.PseudoSelector).Matches(element); }
/// <summary> /// Parse the string, and return a sequence of Selector objects /// </summary> /// <param name="selector"></param> /// <returns></returns> public Selector Parse(string selector) { Selectors = new Selector(); string sel = (selector ?? String.Empty).Trim(); if (IsHtml(selector)) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; Selectors.Add(Current); return(Selectors); } scanner = Scanner.Create(sel); while (!scanner.Finished) { switch (scanner.Current) { case '*': StartNewSelector(SelectorType.All); scanner.Next(); break; case '<': // not selecting - creating html Current.Html = sel; scanner.End(); break; case ':': scanner.Next(); string key = scanner.Get(MatchFunctions.PseudoSelector).ToLower(); switch (key) { case "input": AddTagSelector("input"); AddTagSelector("textarea", true); AddTagSelector("select", true); AddTagSelector("button", true); break; case "text": StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.Equals; Current.AttributeName = "type"; Current.AttributeValue = "text"; StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag, CombinatorType.Grouped, Current.TraversalType); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "type"; Current.SelectorType |= SelectorType.Tag; Current.Tag = "input"; break; case "checkbox": case "radio": case "button": case "file": case "image": case "password": AddInputSelector(key, "input"); break; case "reset": case "submit": AddInputSelector(key); break; case "checked": case "selected": case "disabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.Exists; Current.AttributeName = key; break; case "enabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "disabled"; break; case "first-letter": case "first-line": case "before": case "after": throw new NotImplementedException("The CSS pseudoelement selectors are not implemented in CsQuery."); case "target": case "link": case "hover": case "active": case "focus": case "visited": throw new NotImplementedException("Pseudoclasses that require a browser aren't implemented."); default: if (!AddPseudoSelector(key)) { throw new ArgumentException("Unknown pseudo-class :\"" + key + "\". If this is a valid CSS or jQuery selector, please let us know."); } break; } break; case '.': StartNewSelector(SelectorType.Class); scanner.Next(); Current.Class = scanner.Get(MatchFunctions.CssClassName); break; case '#': scanner.Next(); if (!scanner.Finished) { StartNewSelector(SelectorType.ID); Current.ID = scanner.Get(MatchFunctions.HtmlIDValue()); } break; case '[': StartNewSelector(SelectorType.AttributeValue); IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner(); Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute()); innerScanner.SkipWhitespace(); if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=", "|="); // CSS allows [attr=] as a synonym for [attr] if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { var rawValue = innerScanner.Expect(expectsOptionallyQuotedValue()).ToNewScanner(); Current.AttributeValue = rawValue.Finished ? "" : rawValue.Get(new EscapedString()); switch (matchType) { case "=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Equals; break; case "^=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWith; // attributevalue starts with "" matches nothing if (Current.AttributeValue == "") { Current.AttributeValue = "" + (char)0; } break; case "*=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Contains; break; case "~=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.ContainsWord; break; case "$=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.EndsWith; break; case "!=": Current.AttributeSelectorType = AttributeSelectorType.NotEquals; // must matched manually - missing also validates as notEquals break; case "|=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen; break; default: throw new ArgumentException("Unknown attibute matching operator '" + matchType + "'"); } } } break; case ',': FinishSelector(); NextCombinatorType = CombinatorType.Root; NextTraversalType = TraversalType.All; scanner.NextNonWhitespace(); break; case '+': StartNewSelector(TraversalType.Adjacent); scanner.NextNonWhitespace(); break; case '~': StartNewSelector(TraversalType.Sibling); scanner.NextNonWhitespace(); break; case '>': StartNewSelector(TraversalType.Child); // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter" // whereas if it is used in a combination selector the meaning is "filter for 1st child" //Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1); Current.ChildDepth = 1; scanner.NextNonWhitespace(); break; case ' ': // if a ">" or "," is later found, it will be overridden. scanner.NextNonWhitespace(); NextTraversalType = TraversalType.Descendent; break; default: string tag = ""; if (scanner.TryGet(MatchFunctions.HTMLTagSelectorName(), out tag)) { AddTagSelector(tag); } else { if (scanner.Index == 0) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; scanner.End(); } else { throw new ArgumentException(scanner.LastError); } } break; } } // Close any open selectors FinishSelector(); if (Selectors.Count == 0) { var empty = new SelectorClause { SelectorType = SelectorType.None, TraversalType = TraversalType.Filter }; Selectors.Add(empty); } return(Selectors); }
/// <summary> /// Return true if an element matches a specific filter. /// </summary> /// /// <param name="element"> /// The element to test /// </param> /// <param name="selector"> /// A selector clause /// </param> /// /// <returns> /// true if matches pseudo class, false if not.matches the selector, false if not /// </returns> protected bool MatchesPseudoClass(IDomElement element, SelectorClause selector) { return(((IPseudoSelectorChild)selector.PseudoSelector).Matches(element)); }
/// <summary> /// Get the sequence that is the source for the current clause, based on the selector, prior /// results, and context. /// </summary> /// /// <remarks> /// Notes from refactoring this on 10/14/2012: At issue is selectors like ":not(.sel1 .sel2, /// :first) where the subselector has filters that apply to just the context, versus selectors /// like ":has(.sel1 .sel2, :first) where the subselector needs to apply to the results of a /// selection against the DOM /// /// case1: $('.sel','.context-sel') means that ".sel" is actually applied against .context-sel. /// it's like .find. /// /// totally different from a subselector -- but the subselector still needs a context to apply /// filters, even though the selectors theselves are run against the whole doc. /// /// so we need to set up selectors before running against the context so each subselector is IDd /// as either "context" or "root" in addition to its traversal type to eliminate ambiguity of /// intent. a subselector for :not should have "root+descendant" for the first part and /// "context+filter" for the 2nd. For regular context type filters, it should be /// "context+descendant" (same as find). FOr complex context/find filters chained with a comma, /// the stuff after the comma should also be in context though jquery seems inconsistent with /// this. /// /// This code here should then use the new info to select the correct sleection source. Think we /// should be rid of traversaltype.subselect. Think traversaltype.all should really mean "include /// the context items" instead of "Descendant" as it does now. /// </remarks> /// /// <param name="clause"> /// The current selector clause. /// </param> /// <param name="context"> /// The context passed initially to this Select operation. /// </param> /// <param name="lastResult"> /// The result of the prior clause. Can be null. /// </param> /// /// <returns> /// The sequence that should source the current clause's context. /// </returns> protected IEnumerable<IDomObject> GetSelectionSource(SelectorClause clause, IEnumerable<IDomObject> context, IEnumerable<IDomObject> lastResult) { IEnumerable<IDomObject> selectionSource=null; IEnumerable<IDomObject> interimSelectionSource = null; if (clause.CombinatorType != CombinatorType.Chained) { interimSelectionSource = clause.CombinatorType == CombinatorType.Context ? context : null; } else { interimSelectionSource = lastResult; } // If the selector used the adjacent combinator, grab the next element for each if (interimSelectionSource != null) { if (clause.TraversalType == TraversalType.Adjacent || clause.TraversalType == TraversalType.Sibling) { selectionSource = GetAdjacentOrSiblings(clause.TraversalType, interimSelectionSource); clause.TraversalType = TraversalType.Filter; } else { selectionSource = interimSelectionSource; } } return selectionSource; }
/// <summary> /// Return all elements matching a selector, within a list of elements. This function will /// traverse children, but it is expected that the source list at the current depth (e.g. from an /// Adjacent or Sibling selector) is already processed. /// </summary> /// /// <param name="source"> /// The sequence of elements to filter. /// </param> /// <param name="selector"> /// The selector. /// </param> /// /// <returns> /// The sequence of elements matching the selector. /// </returns> protected IEnumerable <IDomObject> GetMatches(IEnumerable <IDomObject> source, SelectorClause selector) { // Maintain a hashset of every element already searched. Since result sets frequently contain items which are // children of other items in the list, we would end up searching the tree repeatedly HashSet <IDomObject> uniqueElements = null; // The processing stack Stack <MatchElement> stack = null; // The source list for the current iteration IEnumerable <IDomObject> curList = source; // the results obtained so far in this iteration HashSet <IDomObject> temporaryResults = new HashSet <IDomObject>(); // The unique list has to be reset for each sub-selector uniqueElements = new HashSet <IDomObject>(); if (selector.SelectorType.HasFlag(SelectorType.Elements)) { var set = GetAllChildOrDescendants(selector.TraversalType, source); return(set.Intersect(selector.SelectElements)); } // For the jQuery extensions (which are mapped to the position in the output, not the DOM) we have to enumerate // the results first, rather than targeting specific child elements. Handle it here, else if (selector.SelectorType.HasFlag(SelectorType.PseudoClass)) { if (selector.IsResultListPosition) { return(GetResultPositionMatches(curList, selector)); } } else if (selector.SelectorType.HasFlag(SelectorType.All)) { return(GetAllChildOrDescendants(selector.TraversalType, curList)); } // Otherwise, try to match each element individually stack = new Stack <MatchElement>(); foreach (var obj in curList) { // We must check everything again when looking for specific depth of children // otherwise - no point - skip em IDomElement el = obj as IDomElement; if (el == null || selector.TraversalType != TraversalType.Child && uniqueElements.Contains(el)) { continue; } stack.Push(new MatchElement(el, 0)); int matchIndex = 0; while (stack.Count != 0) { var current = stack.Pop(); if (Matches(selector, current.Element, current.Depth)) { temporaryResults.Add(current.Element); matchIndex++; } // Add children to stack (in reverse order, so they are processed in the correct order when popped) // Don't keep going to children if the target depth is < the depth. Though the match would still fail, // stuff would end up the unique list which we might need to test later if it appears directly in the source list // causing it to be ignored. if (selector.TraversalType != TraversalType.Filter && (selector.TraversalType != TraversalType.Child || selector.ChildDepth > current.Depth)) { SelectorType selectorType = selector.SelectorType; IDomElement elm = current.Element; if (selector.IsDomPositionPseudoSelector && ((selector.TraversalType == TraversalType.All) || (selector.TraversalType == TraversalType.Child && selector.ChildDepth == current.Depth + 1) || (selector.TraversalType == TraversalType.Descendent && selector.ChildDepth <= current.Depth + 1))) { temporaryResults.AddRange(GetPseudoClassMatches(elm, selector)); selectorType &= ~SelectorType.PseudoClass; } if (selectorType == 0) { continue; } for (int j = elm.ChildNodes.Count - 1; j >= 0; j--) { IDomElement child = elm[j] as IDomElement; if (child == null || !uniqueElements.Add(child)) { continue; } if (child.NodeType == NodeType.ELEMENT_NODE) { stack.Push(new MatchElement(child, current.Depth + 1)); } } } } } return(temporaryResults); }
/// <summary> /// Parse the string, and return a sequence of Selector objects /// </summary> /// <param name="selector"></param> /// <returns></returns> public Selector Parse(string selector) { Selectors = new Selector(); string sel = (selector ?? String.Empty).Trim(); if (IsHtml(selector)) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; Selectors.Add(Current); return Selectors; } scanner = Scanner.Create(sel); while (!scanner.Finished) { switch (scanner.Current) { case '*': StartNewSelector(SelectorType.All); scanner.Next(); break; case '<': // not selecting - creating html Current.Html = sel; scanner.End(); break; case ':': scanner.Next(); string key = scanner.Get(MatchFunctions.PseudoSelector).ToLower(); switch (key) { case "input": AddTagSelector("input"); AddTagSelector("textarea",true); AddTagSelector("select",true); AddTagSelector("button",true); break; case "text": StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.Equals; Current.AttributeName = "type"; Current.AttributeValue = "text"; StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag, CombinatorType.Grouped, Current.TraversalType); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "type"; Current.SelectorType |= SelectorType.Tag; Current.Tag = "input"; break; case "checkbox": case "radio": case "button": case "file": case "image": case "password": AddInputSelector(key,"input"); break; case "reset": case "submit": AddInputSelector(key); break; case "checked": case "selected": case "disabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.Exists; Current.AttributeName = key; break; case "enabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "disabled"; break; case "first-letter": case "first-line": case "before": case "after": throw new NotImplementedException("The CSS pseudoelement selectors are not implemented in CsQuery."); case "target": case "link": case "hover": case "active": case "focus": case "visited": throw new NotImplementedException("Pseudoclasses that require a browser aren't implemented."); default: if (!AddPseudoSelector(key)) { throw new ArgumentException("Unknown pseudo-class :\"" + key + "\". If this is a valid CSS or jQuery selector, please let us know."); } break; } break; case '.': StartNewSelector(SelectorType.Class); scanner.Next(); Current.Class = scanner.Get(MatchFunctions.CssClassName); break; case '#': scanner.Next(); if (!scanner.Finished) { StartNewSelector(SelectorType.ID); Current.ID = scanner.Get(MatchFunctions.HtmlIDValue()); } break; case '[': StartNewSelector(SelectorType.AttributeValue); IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner(); Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute()); innerScanner.SkipWhitespace(); if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=","|="); // CSS allows [attr=] as a synonym for [attr] if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { var rawValue = innerScanner.Expect(expectsOptionallyQuotedValue()).ToNewScanner(); Current.AttributeValue = rawValue.Finished ? "" : rawValue.Get(new EscapedString()); switch (matchType) { case "=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Equals; break; case "^=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWith; // attributevalue starts with "" matches nothing if (Current.AttributeValue == "") { Current.AttributeValue = "" + (char)0; } break; case "*=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Contains; break; case "~=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.ContainsWord; break; case "$=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.EndsWith; break; case "!=": Current.AttributeSelectorType = AttributeSelectorType.NotEquals; // must matched manually - missing also validates as notEquals break; case "|=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen; break; default: throw new ArgumentException("Unknown attibute matching operator '" + matchType + "'"); } } } break; case ',': FinishSelector(); NextCombinatorType = CombinatorType.Root; NextTraversalType = TraversalType.All; scanner.NextNonWhitespace(); break; case '+': StartNewSelector(TraversalType.Adjacent); scanner.NextNonWhitespace(); break; case '~': StartNewSelector(TraversalType.Sibling); scanner.NextNonWhitespace(); break; case '>': StartNewSelector(TraversalType.Child); // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter" // whereas if it is used in a combination selector the meaning is "filter for 1st child" //Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1); Current.ChildDepth = 1; scanner.NextNonWhitespace(); break; case ' ': // if a ">" or "," is later found, it will be overridden. scanner.NextNonWhitespace(); NextTraversalType = TraversalType.Descendent; break; default: string tag = ""; if (scanner.TryGet(MatchFunctions.HTMLTagSelectorName(), out tag)) { AddTagSelector(tag); } else { if (scanner.Index == 0) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; scanner.End(); } else { throw new ArgumentException(scanner.LastError); } } break; } } // Close any open selectors FinishSelector(); if (Selectors.Count == 0) { var empty = new SelectorClause { SelectorType = SelectorType.None, TraversalType = TraversalType.Filter }; Selectors.Add(empty); } return Selectors; }
/// <summary> /// Clear the currently open selector /// </summary> protected void ClearCurrent() { _Current = null; }
/// <summary> /// Get the sequence that is the source for the current clause, based on the selector, prior results, and context. /// </summary> /// <param name="selector"></param> /// <param name="lastResult"></param> /// <param name="context"></param> /// <returns></returns> protected IEnumerable<IDomObject> GetSelectionSource(SelectorClause selector, IEnumerable<IDomObject> context, IEnumerable<IDomObject> lastResult) { IEnumerable<IDomObject> selectionSource=null; switch (selector.CombinatorType) { case CombinatorType.Root: case CombinatorType.Chained: selectionSource = null; IEnumerable<IDomObject> interimSelectionSource = null; if (selector.CombinatorType == CombinatorType.Root) { // if it's a root combinator type, then we need set the selection source to the context depending on the // traversal type being applied. if (context != null) { switch (selector.TraversalType) { case TraversalType.Adjacent: case TraversalType.Sibling: //interimSelectionSource = GetChildElements(context); interimSelectionSource = context; break; case TraversalType.Filter: case TraversalType.Descendent: interimSelectionSource = context; break; case TraversalType.All: selector.TraversalType = TraversalType.Descendent; interimSelectionSource = context; break; case TraversalType.Child: interimSelectionSource = context; break; default: throw new InvalidOperationException("The selector passed to FindImpl has an invalid traversal type for Find."); } } else { interimSelectionSource = null; } } else { // Must copy this because we will continue to add to lastResult in successive iterations interimSelectionSource = lastResult.ToList(); } // If the selector used the adjacent combinator, grab the next element for each if (interimSelectionSource != null) { if (selector.TraversalType == TraversalType.Adjacent || selector.TraversalType == TraversalType.Sibling) { selectionSource = GetAdjacentOrSiblings(selector.TraversalType, interimSelectionSource); selector.TraversalType = TraversalType.Filter; } else { selectionSource = interimSelectionSource; } } break; } return selectionSource; }
/// <summary> /// Test whether a single element matches a specific attribute selector. /// </summary> /// /// <param name="element"> /// The element to test. /// </param> /// <param name="selector"> /// The selector. /// </param> /// /// <returns> /// true if the element matches, false if not. /// </returns> public static bool Matches(IDomElement element, SelectorClause selector) { string value; bool match = ((DomElement)element).TryGetAttributeForMatching(selector.AttributeNameTokenID, out value); if (!match) { switch (selector.AttributeSelectorType) { case AttributeSelectorType.Exists: return(false); case AttributeSelectorType.NotEquals: case AttributeSelectorType.NotExists: return(true); default: return(false); } } else { // bool isCaseSensitive = HtmlData. switch (selector.AttributeSelectorType) { case AttributeSelectorType.Exists: return(true); case AttributeSelectorType.Equals: return(selector.AttributeValue.Equals(value, selector.AttributeValueStringComparison)); case AttributeSelectorType.StartsWith: return(value != null && value.Length >= selector.AttributeValue.Length && value.Substring(0, selector.AttributeValue.Length) .Equals(selector.AttributeValue, selector.AttributeValueStringComparison)); case AttributeSelectorType.Contains: return(value != null && value.IndexOf(selector.AttributeValue, selector.AttributeValueStringComparison) >= 0); case AttributeSelectorType.ContainsWord: return(value != null && ContainsWord(value, selector.AttributeValue, selector.AttributeValueStringComparer)); case AttributeSelectorType.NotEquals: return(!selector.AttributeValue .Equals(value, selector.AttributeValueStringComparison)); case AttributeSelectorType.NotExists: return(false); case AttributeSelectorType.EndsWith: int len = selector.AttributeValue.Length; return(value != null && value.Length >= len && value.Substring(value.Length - len) .Equals(selector.AttributeValue, selector.AttributeValueStringComparison)); case AttributeSelectorType.StartsWithOrHyphen: if (value == null) { return(false); } int dashPos = value.IndexOf("-"); string beforeDash = value; if (dashPos >= 0) { // match a dash that's included in the match attribute according to common browser behavior beforeDash = value.Substring(0, dashPos); } return(selector.AttributeValue.Equals(beforeDash, selector.AttributeValueStringComparison) || selector.AttributeValue.Equals(value, selector.AttributeValueStringComparison)); default: throw new InvalidOperationException("No AttributeSelectorType set"); } } }
/// <summary> /// Return true if an object matches a specific selector. If the selector has a desecendant or child traversal type, it must also /// match the specificed depth. /// </summary> /// <param name="selector">The jQuery/CSS selector</param> /// <param name="obj">The target object</param> /// <param name="depth">The depth at which the target must appear for descendant or child selectors</param> /// <returns></returns> protected bool Matches(SelectorClause selector, IDomElement obj, int depth) { switch (selector.TraversalType) { case TraversalType.Child: if (selector.ChildDepth != depth) { return(false); } break; case TraversalType.Descendent: // Special case because this code is jacked up: when only "AttributeValue" it's ALWAYS a filter, it means // the AttributeExists was handled previously by the index. // This engine at some point should be reworked so that the "And" combinator is just a subselector, this logic has // become too brittle. if (depth == 0) { return(false); } break; } if (selector.SelectorType.HasFlag(SelectorType.All)) { return(true); } if (selector.SelectorType.HasFlag(SelectorType.PseudoClass)) { return(MatchesPseudoClass(obj, selector)); } if (obj.NodeType != NodeType.ELEMENT_NODE) { return(false); } // Check each selector from easier/more specific to harder. e.g. ID is going to eliminate a lot of things. if (selector.SelectorType.HasFlag(SelectorType.ID) && selector.ID != obj.Id) { return(false); } if (selector.SelectorType.HasFlag(SelectorType.Class) && !obj.HasClass(selector.Class)) { return(false); } if (selector.SelectorType.HasFlag(SelectorType.Tag) && !String.Equals(obj.NodeName, selector.Tag, StringComparison.CurrentCultureIgnoreCase)) { return(false); } if ((selector.SelectorType & SelectorType.AttributeValue) > 0) { return(AttributeSelectors.Matches((IDomElement)obj, selector)); } if (selector.SelectorType == SelectorType.None) { return(false); } return(true); }
/// <summary> /// Return true if an object matches a specific selector. If the selector has a desecendant or child traversal type, it must also /// match the specificed depth. /// </summary> /// <param name="selector">The jQuery/CSS selector</param> /// <param name="obj">The target object</param> /// <param name="depth">The depth at which the target must appear for descendant or child selectors</param> /// <returns></returns> protected bool Matches(SelectorClause selector, IDomElement obj, int depth) { switch (selector.TraversalType) { case TraversalType.Child: if (selector.ChildDepth != depth) { return false; } break; case TraversalType.Descendent: // Special case because this code is jacked up: when only "AttributeValue" it's ALWAYS a filter, it means // the AttributeExists was handled previously by the index. // This engine at some point should be reworked so that the "And" combinator is just a subselector, this logic has // become too brittle. if (depth == 0) { return false; } break; } if (selector.SelectorType.HasFlag(SelectorType.All)) { return true; } if (selector.SelectorType.HasFlag(SelectorType.PseudoClass)) { return MatchesPseudoClass(obj, selector); } if (obj.NodeType != NodeType.ELEMENT_NODE) { return false; } // Check each selector from easier/more specific to harder. e.g. ID is going to eliminate a lot of things. if (selector.SelectorType.HasFlag(SelectorType.ID) && selector.ID != obj.Id) { return false; } if (selector.SelectorType.HasFlag(SelectorType.Class) && !obj.HasClass(selector.Class)) { return false; } if (selector.SelectorType.HasFlag(SelectorType.Tag) && !String.Equals(obj.NodeName, selector.Tag, StringComparison.CurrentCultureIgnoreCase)) { return false; } if ((selector.SelectorType & SelectorType.AttributeValue)>0) { return AttributeSelectors.Matches((IDomElement)obj,selector); } if (selector.SelectorType == SelectorType.None) { return false; } return true; }
/// <summary> /// Return all position-type matches. These are selectors that are keyed to the position within /// the selection set itself. /// </summary> /// /// <param name="list"> /// The list of elements to filter /// </param> /// <param name="selector"> /// The selector /// </param> /// /// <returns> /// A sequence of elements matching the filter /// </returns> protected IEnumerable<IDomObject> GetResultPositionMatches(IEnumerable<IDomObject> list, SelectorClause selector) { // for sibling traversal types the mapping was done already by the Matches function var sourceList = GetAllChildOrDescendants(selector.TraversalType, list); return ((IPseudoSelectorFilter)selector.PseudoSelector).Filter(sourceList); }
/// <summary> /// Return all child elements matching a DOM-position type selector /// </summary> /// <param name="elm"></param> /// <param name="selector"></param> /// <returns></returns> protected IEnumerable<IDomObject> GetPseudoClassMatches(IDomElement elm, SelectorClause selector) { IEnumerable<IDomObject> results; results = ((IPseudoSelectorChild)selector.PseudoSelector).ChildMatches(elm); foreach (var item in results) { yield return item; } // Traverse children if needed if (selector.TraversalType == TraversalType.Descendent || selector.TraversalType == TraversalType.All) { foreach (var child in elm.ChildElements) { foreach (var item in GetPseudoClassMatches(child, selector)) { yield return item; } } } }
/// <summary> /// Makes a deep copy of this Selector. /// </summary> /// /// <returns> /// A copy of this object. /// </returns> public SelectorClause Clone() { SelectorClause clone = new SelectorClause(); clone.SelectorType = SelectorType; clone.TraversalType = TraversalType; clone.CombinatorType = CombinatorType; clone.AttributeName = AttributeName; clone.AttributeSelectorType = AttributeSelectorType; clone.AttributeValue = AttributeValue; clone.ChildDepth = ChildDepth; clone.Class = Class; clone.Criteria = Criteria; clone.Html = Html; clone.ID = ID; clone.NoIndex = NoIndex; clone.PositionIndex = PositionIndex; clone.SelectElements = SelectElements; clone.Tag = Tag; clone.PseudoSelector = PseudoSelector; return clone; }
/// <summary> /// Get the sequence that is the source for the current clause, based on the selector, prior results, and context. /// </summary> /// <param name="selector"></param> /// <param name="lastResult"></param> /// <param name="context"></param> /// <returns></returns> protected IEnumerable <IDomObject> GetSelectionSource(SelectorClause selector, IEnumerable <IDomObject> context, IEnumerable <IDomObject> lastResult) { IEnumerable <IDomObject> selectionSource = null; switch (selector.CombinatorType) { case CombinatorType.Root: case CombinatorType.Chained: selectionSource = null; IEnumerable <IDomObject> interimSelectionSource = null; if (selector.CombinatorType == CombinatorType.Root) { // if it's a root combinator type, then we need set the selection source to the context depending on the // traversal type being applied. if (context != null) { switch (selector.TraversalType) { case TraversalType.Adjacent: case TraversalType.Sibling: //interimSelectionSource = GetChildElements(context); interimSelectionSource = context; break; case TraversalType.Filter: case TraversalType.Descendent: interimSelectionSource = context; break; case TraversalType.All: selector.TraversalType = TraversalType.Descendent; interimSelectionSource = context; break; case TraversalType.Child: interimSelectionSource = context; break; default: throw new InvalidOperationException("The selector passed to FindImpl has an invalid traversal type for Find."); } } else { interimSelectionSource = null; } } else { // Must copy this because we will continue to add to lastResult in successive iterations interimSelectionSource = lastResult.ToList(); } // If the selector used the adjacent combinator, grab the next element for each if (interimSelectionSource != null) { if (selector.TraversalType == TraversalType.Adjacent || selector.TraversalType == TraversalType.Sibling) { selectionSource = GetAdjacentOrSiblings(selector.TraversalType, interimSelectionSource); selector.TraversalType = TraversalType.Filter; } else { selectionSource = interimSelectionSource; } } break; } return(selectionSource); }