/// <summary> /// Remove elements from the set of matched elements. /// </summary> /// /// <param name="selector"> /// A CSS selector. /// </param> /// /// <returns> /// A new CQ object. /// </returns> /// /// <url> /// http://api.jquery.com/not/ /// </url> public CQ Not(string selector) { var subSelector = new Selector(selector); var notList = subSelector.ToFilterSelector().Select(Document,Selection); return Not(notList); }
/// <summary> /// Select elements and return a new CSQuery object. /// </summary> /// /// <remarks> /// The "Select" method is the default CsQuery method. It's overloads are identical to the /// overloads of the CQ object's property indexer (the square-bracket notation) and it functions /// the same way. This is analogous to the default jQuery method, e.g. $(...). /// </remarks> /// /// <param name="selector"> /// A string containing a selector expression. /// </param> /// /// <returns> /// A new CQ object. /// </returns> /// /// <url> /// http://api.jquery.com/jQuery/#jQuery1 /// </url> public CQ Select(string selector) { CQ csq; var sel = new Selector(selector); if (sel.IsHmtl) { csq = CQ.CreateFragment(ExpandSelfClosingTags(selector)); // when creating a fragment as a selector, the selection set is a living document // REMOVED - causes other problems. //csq.SetSelection(csq.Document.ChildNodes); csq.CsQueryParent = this; } else { // When running a true "Select" (which runs against the DOM, versus methods that operate // against the selection set) we should use the CsQueryParent document, which is the DOM // that sourced this. var selectorSource = CsQueryParent == null ? Document : CsQueryParent.Document; csq = NewCqInDomain(); csq.Selector = sel; csq.SetSelection(csq.Selector.Select(selectorSource), SelectionSetOrder.Ascending); } return csq; }
/// <summary> /// Select elements and return a new CSQuery object. /// </summary> /// /// <remarks> /// The "Select" method is the default CsQuery method. It's overloads are identical to the /// overloads of the CQ object's property indexer (the square-bracket notation) and it functions /// the same way. This is analogous to the default jQuery method, e.g. $(...). /// </remarks> /// /// <param name="selector"> /// A string containing a selector expression. /// </param> /// /// <returns> /// A new CQ object. /// </returns> /// /// <url> /// http://api.jquery.com/jQuery/#jQuery1 /// </url> public CQ Select(string selector) { CQ csq; var sel = new Selector(selector); if (sel.IsHmtl) { csq = CQ.Create(selector); csq.CsQueryParent = this; } else { // When running a true "Select" (which runs against the DOM, versus methods that operate // against the selection set) we should use the CsQueryParent document, which is the DOM // that sourced this. var selectorSource = CsQueryParent == null ? Document : CsQueryParent.Document; csq = NewCqInDomain(); csq.Selector = sel; csq.SetSelection(csq.Selector.Select(selectorSource), SelectionSetOrder.Ascending); } return csq; }
public CQ Select(string selector) { IList<IDomObject> selection; var sel = new Selector(selector); if (SelectionCache.TryGetValue(sel, out selection)) { return new CQ(selection); } else { var result = CqSource.Select(sel); SelectionCache.Add(sel, result.Selection.ToList()); return result; } }
/// <summary> /// Select elements and return a new CSQuery object. /// </summary> /// /// <remarks> /// The "Select" method is the default CsQuery method. It's overloads are identical to the /// overloads of the CQ object's property indexer (the square-bracket notation) and it functions /// the same way. This is analogous to the default jQuery method, e.g. $(...). /// </remarks> /// /// <param name="selector"> /// A Selector object. /// </param> /// /// <returns> /// A new CQ object. /// </returns> /// /// <url> /// http://api.jquery.com/jQuery/#jQuery1 /// </url> public CQ Select(Selector selector) { CQ csq= NewCqInDomain(); csq.Selector = selector; // When running a true "Select" (which runs against the DOM, versus methods that operate // against the selection set) we should use the CsQueryParent document, which is the DOM // that sourced this. var selectorSource = CsQueryParent == null ? Document : CsQueryParent.Document; csq.SetSelection(csq.Selector.Select(selectorSource), SelectionSetOrder.Ascending); return csq; }
/// <summary> /// Adds a new selector for just the attribute value. Used to chain with the indexed attribute exists selector. /// </summary> /// <param name="selector"></param> protected void InsertAttributeValueSelector(Selector fromSelector) { Selector newSel = new Selector(); newSel.TraversalType = TraversalType.Filter; newSel.SelectorType = SelectorType.Attribute; newSel.AttributeName = fromSelector.AttributeName; newSel.AttributeValue = fromSelector.AttributeValue; newSel.AttributeSelectorType = fromSelector.AttributeSelectorType; newSel.CombinatorType = CombinatorType.Chained; newSel.NoIndex = true; int insertAt = activeSelectorId + 1; if (insertAt >= ActiveSelectors.Count) { ActiveSelectors.Add(newSel); } else { ActiveSelectors.Insert(insertAt, newSel); } }
/// <summary> /// Return all position-type matches. These are selectors that are keyed to the position within the selection /// set itself. /// </summary> /// <param name="list"></param> /// <param name="selector"></param> /// <returns></returns> protected IEnumerable<IDomObject> GetResultPositionMatches(IEnumerable<IDomObject> list, Selector selector) { switch (selector.PositionType) { case PositionType.First: IDomObject first = list.FirstOrDefault(); if (first != null) { yield return first; } break; case PositionType.Last: IDomObject last = list.LastOrDefault(); if (last != null) { yield return last; } break; case PositionType.IndexEquals: int critIndex = selector.PositionIndex; if (critIndex < 0) { critIndex = list.Count() + critIndex; } bool ok = true; IEnumerator<IDomObject> enumerator = list.GetEnumerator(); for (int i = 0; i <= critIndex && ok; i++) { ok = enumerator.MoveNext(); } if (ok) { yield return enumerator.Current; } else { yield break; } break; case PositionType.IndexGreaterThan: int index = 0; foreach (IDomObject obj in list) { if (index++ > selector.PositionIndex) { yield return obj; } } break; case PositionType.IndexLessThan: int indexLess = 0; foreach (IDomObject obj in list) { if (indexLess++ < selector.PositionIndex) { yield return obj; } else { break; } } break; } yield break; }
/// <summary> /// Return all elements matching a selector, within a domain baseList, starting from list. /// </summary> /// <param name="baseList"></param> /// <param name="list"></param> /// <param name="selector"></param> /// <returns></returns> protected IEnumerable<IDomObject> GetMatches(IEnumerable<IDomObject> list, Selector selector) { // Maintain a hashset of every element already searched. Since result sets frequently contain items which are // children of other items in the list, we would end up searching the tree repeatedly HashSet<IDomObject> uniqueElements = null; Stack<MatchElement> stack = null; IEnumerable<IDomObject> curList = list; HashSet<IDomObject> temporaryResults = new HashSet<IDomObject>(); // The unique list has to be reset for each sub-selector uniqueElements = new HashSet<IDomObject>(); if (selector.SelectorType == SelectorType.HTML) { HtmlParser.DomElementFactory factory = new HtmlParser.DomElementFactory(Document); foreach (var obj in factory.CreateObjects(selector.Html)) { yield return obj; } yield break; } // Result-list position selectors are simple -- skip out of main matching code if so if (selector.SelectorType.HasFlag(SelectorType.Position) && selector.IsResultListPosition) { foreach (var obj in GetResultPositionMatches(curList, selector)) { yield return obj; } yield break; } // Otherwise, try to match each element individually stack = new Stack<MatchElement>(); foreach (var e in curList) { // We must check everything again when looking for specific depth of children // otherwise - no point - skip em if (selector.TraversalType != TraversalType.Child && uniqueElements.Contains(e)) { continue; } stack.Push(new MatchElement(e, 0)); int matchIndex = 0; while (stack.Count != 0) { var current = stack.Pop(); if (Matches(selector, current.Object, current.Depth)) { temporaryResults.Add(current.Object); matchIndex++; } // Add children to stack (in reverse order, so they are processed in the correct order when popped) // Don't keep going to children if the target depth is < the depth. Though the match would still fail, // stuff would end up the unique list which we might need to test later if it appears directly in the source list // causing it to be ignored. if (selector.TraversalType != TraversalType.Filter && current.Object is IDomElement && (selector.TraversalType != TraversalType.Child || selector.ChildDepth > current.Depth)) { SelectorType selectorType = selector.SelectorType; IDomElement elm = current.Element; if (selector.TraversalType == TraversalType.Child && selector.ChildDepth == current.Depth + 1 && selector.IsDomIndexPosition) { temporaryResults.AddRange(GetDomPositionMatches(elm, selector)); selectorType &= ~SelectorType.Position; } if (selectorType == 0) { continue; } for (int j = elm.ChildNodes.Count - 1; j >= 0; j--) { IDomObject obj = elm[j]; if (selector.TraversalType == TraversalType.Child && !uniqueElements.Add(obj)) { continue; } if (obj.NodeType == NodeType.ELEMENT_NODE) { stack.Push(new MatchElement(obj, current.Depth + 1)); } } } } } foreach (var obj in temporaryResults) { yield return obj; } yield break; }
/// <summary> /// Determine if an element matches a position-type filter /// </summary> /// <param name="elm"></param> /// <param name="selector"></param> /// <returns></returns> protected IEnumerable<IDomObject> GetDomPositionMatches(IDomElement elm, Selector selector) { if (selector.PositionType == PositionType.NthChild) { return NthChildMatcher.GetMatchingChildren(elm,selector.Criteria); } else { return GetSimpleDomPostionMatches(elm,selector.PositionType); } }
private CQ FindImpl(Selector selector) { CQ csq = NewCqInDomain(); var selection = selector.ToContextSelector().Select(Document, this); csq.AddSelection(selection); csq.Selector = selector; return csq; }
public SelectorEngine(IDomDocument document, Selector selector) { Document = document; Selector = selector; }
/// <summary> /// Test /// </summary> /// <param name="selector"></param> /// <param name="obj"></param> /// <param name="matchIndex"></param> /// <param name="depth"></param> /// <returns></returns> protected bool Matches(Selector selector, IDomObject obj, int depth) { bool match = true; switch (selector.TraversalType) { case TraversalType.Child: if (selector.ChildDepth != depth) { return false; } break; case TraversalType.Descendent: if (depth == 0) { return false; } break; } if (selector.SelectorType.HasFlag(SelectorType.All)) { return true; } if (!(obj is IDomElement)) { return false; } IDomElement elm = (IDomElement)obj; // Check each selector from easier/more specific to harder. e.g. ID is going to eliminate a lot of things. if (selector.SelectorType.HasFlag(SelectorType.ID) && selector.ID != elm.Id) { return false; } if (selector.SelectorType.HasFlag(SelectorType.Class) && !elm.HasClass(selector.Class)) { return false; } if (selector.SelectorType.HasFlag(SelectorType.Tag) && !String.Equals(elm.NodeName, selector.Tag, StringComparison.CurrentCultureIgnoreCase)) { return false; } if (selector.SelectorType.HasFlag(SelectorType.Attribute)) { string value; match = elm.TryGetAttribute(selector.AttributeName, out value); if (!match || (match && selector.AttributeSelectorType.IsOneOf(AttributeSelectorType.NotExists, AttributeSelectorType.NotEquals))) { return false; } switch (selector.AttributeSelectorType) { case AttributeSelectorType.Exists: break; case AttributeSelectorType.Equals: match = selector.AttributeValue == value; break; case AttributeSelectorType.StartsWith: match = value.Length >= selector.AttributeValue.Length && value.Substring(0, selector.AttributeValue.Length) == selector.AttributeValue; break; case AttributeSelectorType.Contains: match = value.IndexOf(selector.AttributeValue) >= 0; break; case AttributeSelectorType.ContainsWord: match = ContainsWord(value, selector.AttributeValue); break; case AttributeSelectorType.NotEquals: match = value.IndexOf(selector.AttributeValue) == 0; break; case AttributeSelectorType.EndsWith: int len = selector.AttributeValue.Length; match = value.Length >= len && value.Substring(value.Length - len) == selector.AttributeValue; break; default: throw new InvalidOperationException("No AttributeSelectorType set"); } if (!match) { return false; } } if (selector.SelectorType.HasFlag(SelectorType.Other)) { return IsVisible(elm); } if (selector.SelectorType.HasFlag(SelectorType.Position) && selector.TraversalType == TraversalType.Filter && !MatchesDOMPosition(elm, selector.PositionType, selector.PositionType == PositionType.NthChild ? selector.Criteria : null)) { return false; } // remove this so it doesn't get re-run // selector.SelectorType &= ~SelectorType.Position; if (selector.SelectorType.HasFlag(SelectorType.Contains) && !ContainsText(elm, selector.Criteria)) { return false; } return true; }
/// <summary> /// Parse the string, and return a sequence of Selector objects /// </summary> /// <param name="selector"></param> /// <returns></returns> public Selector Parse(string selector) { Selectors = new Selector(); string sel = (selector ?? String.Empty).Trim(); if (IsHtml(selector)) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; Selectors.Add(Current); return Selectors; } scanner = Scanner.Create(sel); while (!scanner.Finished) { switch (scanner.Current) { case '*': StartNewSelector(SelectorType.All); scanner.Next(); break; case '<': // not selecting - creating html Current.Html = sel; scanner.End(); break; case ':': scanner.Next(); string key = scanner.Get(MatchFunctions.PseudoSelector).ToLower(); switch (key) { case "input": AddTagSelector("input"); AddTagSelector("textarea",true); AddTagSelector("select",true); AddTagSelector("button",true); break; case "text": StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.Equals; Current.AttributeName = "type"; Current.AttributeValue = "text"; StartNewSelector(SelectorType.AttributeValue | SelectorType.Tag, CombinatorType.Grouped, Current.TraversalType); Current.Tag = "input"; Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "type"; Current.SelectorType |= SelectorType.Tag; Current.Tag = "input"; break; case "checkbox": case "radio": case "button": case "file": case "image": case "password": AddInputSelector(key,"input"); break; case "reset": case "submit": AddInputSelector(key); break; case "checked": case "selected": case "disabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.Exists; Current.AttributeName = key; break; case "enabled": StartNewSelector(SelectorType.AttributeValue); Current.AttributeSelectorType = AttributeSelectorType.NotExists; Current.AttributeName = "disabled"; break; case "first-letter": case "first-line": case "before": case "after": throw new NotImplementedException("The CSS pseudoelement selectors are not implemented in CsQuery."); case "target": case "link": case "hover": case "active": case "focus": case "visited": throw new NotImplementedException("Pseudoclasses that require a browser aren't implemented."); default: if (!AddPseudoSelector(key)) { throw new ArgumentException("Unknown pseudo-class :\"" + key + "\". If this is a valid CSS or jQuery selector, please let us know."); } break; } break; case '.': StartNewSelector(SelectorType.Class); scanner.Next(); Current.Class = scanner.Get(MatchFunctions.CssClassName); break; case '#': scanner.Next(); if (!scanner.Finished) { StartNewSelector(SelectorType.ID); Current.ID = scanner.Get(MatchFunctions.HtmlIDValue()); } break; case '[': StartNewSelector(SelectorType.AttributeValue); IStringScanner innerScanner = scanner.ExpectBoundedBy('[', true).ToNewScanner(); Current.AttributeName = innerScanner.Get(MatchFunctions.HTMLAttribute()); innerScanner.SkipWhitespace(); if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { string matchType = innerScanner.Get("=", "^=", "*=", "~=", "$=", "!=","|="); // CSS allows [attr=] as a synonym for [attr] if (innerScanner.Finished) { Current.AttributeSelectorType = AttributeSelectorType.Exists; } else { var rawValue = innerScanner.Expect(expectsOptionallyQuotedValue()).ToNewScanner(); Current.AttributeValue = rawValue.Finished ? "" : rawValue.Get(new EscapedString()); switch (matchType) { case "=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Equals; break; case "^=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWith; // attributevalue starts with "" matches nothing if (Current.AttributeValue == "") { Current.AttributeValue = "" + (char)0; } break; case "*=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.Contains; break; case "~=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.ContainsWord; break; case "$=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.EndsWith; break; case "!=": Current.AttributeSelectorType = AttributeSelectorType.NotEquals; // must matched manually - missing also validates as notEquals break; case "|=": Current.SelectorType |= SelectorType.AttributeValue; Current.AttributeSelectorType = AttributeSelectorType.StartsWithOrHyphen; break; default: throw new ArgumentException("Unknown attibute matching operator '" + matchType + "'"); } } } break; case ',': FinishSelector(); NextCombinatorType = CombinatorType.Root; NextTraversalType = TraversalType.All; scanner.NextNonWhitespace(); break; case '+': StartNewSelector(TraversalType.Adjacent); scanner.NextNonWhitespace(); break; case '~': StartNewSelector(TraversalType.Sibling); scanner.NextNonWhitespace(); break; case '>': StartNewSelector(TraversalType.Child); // This is a wierd thing because if you use the > selector against a set directly, the meaning is "filter" // whereas if it is used in a combination selector the meaning is "filter for 1st child" //Current.ChildDepth = (Current.CombinatorType == CombinatorType.Root ? 0 : 1); Current.ChildDepth = 1; scanner.NextNonWhitespace(); break; case ' ': // if a ">" or "," is later found, it will be overridden. scanner.NextNonWhitespace(); NextTraversalType = TraversalType.Descendent; break; default: string tag = ""; if (scanner.TryGet(MatchFunctions.HTMLTagSelectorName(), out tag)) { AddTagSelector(tag); } else { if (scanner.Index == 0) { Current.Html = sel; Current.SelectorType = SelectorType.HTML; scanner.End(); } else { throw new ArgumentException(scanner.LastError); } } break; } } // Close any open selectors FinishSelector(); if (Selectors.Count == 0) { var empty = new SelectorClause { SelectorType = SelectorType.None, TraversalType = TraversalType.Filter }; Selectors.Add(empty); } return Selectors; }
public Selector Clone() { Selector clone = new Selector(); clone.SelectorType = SelectorType; clone.TraversalType = TraversalType; clone.CombinatorType = CombinatorType; clone.PositionType = PositionType; clone.AttributeName = AttributeName; clone.AttributeSelectorType = AttributeSelectorType; clone.AttributeValue = AttributeValue; clone.ChildDepth = ChildDepth; clone.Class = Class; clone.Criteria = Criteria; clone.Html = Html; clone.ID = ID; clone.NoIndex = NoIndex; clone.PositionIndex = PositionIndex; clone.SelectElements = SelectElements; clone.Tag = Tag; clone.OtherType = OtherType; if (HasSubSelectors) { foreach (var selector in SubSelectors) { clone.SubSelectors.Add(selector.Clone()); } } return clone; }
/// <summary> /// Configures a new instance for a sequence of elements and an existing context. /// </summary> /// /// <param name="selector"> /// A valid CSS selector. /// </param> /// <param name="context"> /// The context. /// </param> private void ConfigureNewInstance(string selector, CQ context) { CsQueryParent = context; if (!String.IsNullOrEmpty(selector)) { Selector = new Selector(selector); SetSelection(Selector.ToContextSelector().Select(Document, context), Selector.IsHtml ? SelectionSetOrder.OrderAdded : SelectionSetOrder.Ascending); } }
private CQ FindImpl(Selector selector) { CQ csq = NewCqInDomain(); csq.AddSelection(selector.Select(Document, this)); csq.Selector = selector; return csq; }
/// <summary> /// Select elements from within a context. /// </summary> /// /// <param name="selector"> /// A string containing a selector expression. /// </param> /// <param name="context"> /// The point in the document at which the selector should begin matching; similar to the context /// argument of the CQ.Create(selector, context) method. /// </param> /// /// <returns> /// A new CQ object. /// </returns> /// /// <url> /// http://api.jquery.com/jQuery/#jQuery1 /// </url> public CQ Select(string selector, IDomObject context) { var selectors = new Selector(selector); var selection = selectors.Select(Document, context); CQ csq = new CQ(selection, this); csq.Selector = selectors; return csq; }
/// <summary> /// Selects all elements that do not match the given selector. /// </summary> /// /// <param name="selector"> /// A CSS selector. /// </param> /// /// <returns> /// A new CQ object. /// </returns> /// /// <url> /// http://api.jquery.com/not/ /// </url> public CQ Not(string selector) { var notSelector = new Selector(selector); return new CQ(notSelector.Except(Document, SelectionSet)); }
/// <summary> /// Select elements and return a new CSQuery object. /// </summary> /// /// <param name="selector"> /// A string containing a selector expression. /// </param> /// /// <returns> /// A new CQ object. /// </returns> public CQ Select(string selector) { var sel = new Selector(selector); if (sel.IsHmtl) { CQ csq = CQ.Create(selector); csq.CsQueryParent = this; return csq; } else { return Select(sel); } }
/// <summary> /// Select elements from within a context. /// </summary> /// /// <param name="selector"> /// A string containing a selector expression. /// </param> /// <param name="context"> /// The point in the document at which the selector should begin matching; similar to the context /// argument of the CQ.Create(selector, context) method. /// </param> /// /// <returns> /// A new CQ object. /// </returns> /// /// <url> /// http://api.jquery.com/jQuery/#jQuery1 /// </url> public CQ Select(string selector, IDomObject context) { var selectors = new Selector(selector); var selection = selectors.ToContextSelector().Select(Document, context); CQ csq = NewInstance(selection, this); csq.Selector = selectors; return csq; }
/// <summary> /// Select elements from within a context. /// </summary> /// /// <param name="selector"> /// A string containing a selector expression. /// </param> /// <param name="context"> /// The points in the document at which the selector should begin matching; similar to the /// context argument of the CQ.Create(selector, context) method. Only elements found below the /// members of the sequence in the document can be matched. /// </param> /// /// <returns> /// A new CQ object. /// </returns> /// /// <url> /// http://api.jquery.com/jQuery/#jQuery1 /// </url> public CQ Select(string selector, IEnumerable<IDomObject> context) { var selectors = new Selector(selector).ToContextSelector(); IEnumerable<IDomObject> selection = selectors.Select(Document, context); CQ csq = NewInstance(selection, (CQ)this); csq.Selector = selectors; return csq; }
private void _CQ(string selector, CQ context) { CsQueryParent = context; if (!String.IsNullOrEmpty(selector)) { Selector = new Selector(selector); SetSelection(Selector.Select(Document, context), Selector.IsHmtl ? SelectionSetOrder.OrderAdded : SelectionSetOrder.Ascending); } }
protected void ClearCurrent() { _Current = null; }