Ejemplo n.º 1
0
        /// <summary>
        /// Select implementation. The public method automatically remaps a selector with the knowledge
        /// that the context is external (and not part of a chain)
        /// </summary>
        ///
        /// <exception cref="ArgumentNullException">
        /// Thrown when one or more required arguments are null.
        /// </exception>
        ///
        /// <param name="context">
        /// The context in which the selector applies. If null, the selector is run against the entire
        /// Document. If not, the selector is run against this sequence of elements.
        /// </param>
        ///
        /// <returns>
        /// A list of elements. This method returns a list (rather than a sequence) because the sequence
        /// must be enumerated to ensure that end-users don't cause the selector to be rerun repeatedly,
        /// and that the values are not mutable (e.g. if the underlying source changes).
        /// </returns>

        public IList <IDomObject> Select(IEnumerable <IDomObject> context)
        {
            // this holds the final output

            HashSet <IDomObject> output = new HashSet <IDomObject>();

            if (Selector == null)
            {
                throw new ArgumentNullException("The selector cannot be null.");
            }

            if (Selector.Count == 0)
            {
                return(EmptyEnumerable().ToList());
            }

            ActiveSelectors = new List <SelectorClause>(Selector);

            // First just check if we ended up here with an HTML selector; if so, hand it off.

            var firstSelector = ActiveSelectors[0];

            if (firstSelector.SelectorType == SelectorType.HTML)
            {
                return(CsQuery.Implementation.
                       DomDocument.Create(firstSelector.Html, HtmlParsingMode.Fragment)
                       .ChildNodes
                       .ToList());
            }

            // this holds any results that carried over from the previous loop for chaining

            IEnumerable <IDomObject> lastResult = null;

            // this is the source from which selections are made in a given iteration; it could be the DOM
            // root, a context, or the previous result set.

            IEnumerable <IDomObject> selectionSource = null;

            // Disable the index if there is no context (e.g. disconnected elements)
            // or if the first element is not indexed, or the context is not from the same document as this
            // selector is bound. Determine which features can be used for this query by casting the index
            // to the known interfaces.


            bool useIndex;

            if (context.IsNullOrEmpty())
            {
                useIndex = true;
            }
            else
            {
                IDomObject first = context.First();
                useIndex = !first.IsDisconnected && first.IsIndexed && first.Document == Document;
            }

            IDomIndexRanged rangedIndex = null;
            IDomIndexSimple simpleIndex = null;

            if (useIndex)
            {
                rangedIndex = Document.DocumentIndex as IDomIndexRanged;
                simpleIndex = Document.DocumentIndex as IDomIndexSimple;
            }

            for (activeSelectorId = 0; activeSelectorId < ActiveSelectors.Count; activeSelectorId++)
            {
                var selector = ActiveSelectors[activeSelectorId].Clone();

                if (lastResult != null &&
                    (selector.CombinatorType == CombinatorType.Root || selector.CombinatorType == CombinatorType.Context))
                {
                    // we will alter the selector during each iteration to remove the parts that have already been
                    // parsed, so use a copy. This is a selector that was chained with the selector grouping
                    // combinator "," -- we always output the results so far when beginning a new group.

                    output.AddRange(lastResult);
                    lastResult = null;
                }

                // For "and" combinator types, we want to leave everything as it was -- the results of this
                // selector should compound with the prior. This is not an actual CSS combinator, this is the
                // equivalent of grouping parenthesis. That is, in CSS there's no way to say "(input[submit],
                // button):visible" - that is group the results on selector part and apply a filter to it. But
                // we need to do exactly this for certain selector types (for example the jQuery :button
                // selector).

                if (selector.CombinatorType != CombinatorType.Grouped)
                {
                    selectionSource = GetSelectionSource(selector, context, lastResult);
                    lastResult      = null;
                }

                var          key = new List <ulong>();
                SelectorType removeSelectorType = 0;

                // determine the type of traversal & depth for this selector

                int  depth       = 0;
                bool descendants = true;

                switch (selector.TraversalType)
                {
                case TraversalType.Child:
                    depth       = selector.ChildDepth;
                    descendants = false;
                    break;

                case TraversalType.Filter:
                case TraversalType.Adjacent:
                case TraversalType.Sibling:
                    depth       = 0;
                    descendants = false;
                    break;

                case TraversalType.Descendent:
                    depth       = 1;
                    descendants = true;
                    break;
                    // default: fall through with default values set above.
                }

                bool canUseBasicIndex = (selectionSource == null) &&
                                        descendants &&
                                        depth == 0;


                // build index keys when possible for the active index type

                if (rangedIndex != null ||
                    (simpleIndex != null && canUseBasicIndex) &&
                    !selector.NoIndex)
                {
                    // We don't want to use the index for "NotEquals" selectors because a missing attribute
                    // is considered a valid match

                    if (selector.SelectorType.HasFlag(SelectorType.AttributeValue) &&
                        selector.AttributeSelectorType != AttributeSelectorType.NotExists &&
                        selector.AttributeSelectorType != AttributeSelectorType.NotEquals)
                    {
                        key.Add('!');
                        key.Add(HtmlData.Tokenize(selector.AttributeName));

                        // AttributeValue must still be matched manually - so remove this flag only if the
                        // selector is conclusive without further checking

                        if (selector.AttributeSelectorType == AttributeSelectorType.Exists)
                        {
                            removeSelectorType = SelectorType.AttributeValue;
                        }
                    }
                    else if (selector.SelectorType.HasFlag(SelectorType.Tag))
                    {
                        key.Add('+');
                        key.Add(HtmlData.Tokenize(selector.Tag));
                        removeSelectorType = SelectorType.Tag;
                    }
                    else if (selector.SelectorType.HasFlag(SelectorType.ID))
                    {
                        key.Add('#');
                        key.Add(HtmlData.TokenizeCaseSensitive(selector.ID));
                        removeSelectorType = SelectorType.ID;
                    }
                    else if (selector.SelectorType.HasFlag(SelectorType.Class))
                    {
                        key.Add('.');
                        key.Add(HtmlData.TokenizeCaseSensitive(selector.Class));
                        removeSelectorType = SelectorType.Class;
                    }
                }

                // If part of the selector was indexed, key will not be empty. Return initial set from the
                // index. If any selectors remain after this they will be searched the hard way.

                IEnumerable <IDomObject> result = null;

                if (key.Count > 0)
                {
                    // This is the main index access point: if we have an index key, we'll get as much as we can from the index.
                    // Anything else will be handled manually.



                    if (selectionSource == null)
                    {
                        // we don't need to test for index features at this point; if canUseBasicIndex = false and we
                        // are here, then the prior logic dictates that the ranged index is available. But always use
                        // the simple index if that's all we need because it could be faster.

                        result = simpleIndex.QueryIndex(key.ToArray());
                    }
                    else
                    {
                        HashSet <IDomObject> elementMatches = new HashSet <IDomObject>();
                        result = elementMatches;

                        foreach (IDomObject obj in selectionSource)
                        {
                            var subKey = key.Concat(HtmlData.indexSeparator).Concat(obj.NodePath).ToArray();

                            var matches = rangedIndex.QueryIndex(subKey, depth, descendants);

                            elementMatches.AddRange(matches);
                        }
                    }

                    selector.SelectorType &= ~removeSelectorType;

                    // Special case for attribute selectors: when Attribute Value attribute selector is present, we
                    // still need to filter for the correct value afterwards. But we need to change the traversal
                    // type because any nodes with the correct attribute type have already been selected.

                    if (selector.SelectorType.HasFlag(SelectorType.AttributeValue))
                    {
                        selector.TraversalType = TraversalType.Filter;
                    }
                }


                // If any selectors were not handled via the index, match them manually

                if (selector.SelectorType != 0)
                {
                    // if there are no temporary results (b/c there was no indexed selector) then use selection
                    // source instead (e.g. start from the same point that the index would have)

                    result = GetMatches(result ?? selectionSource ?? Document.ChildElements, selector);
                }

                lastResult = lastResult == null ?
                             result : lastResult.Concat(result);
            }

            // After the loop has finished, output any results from the last iteration.

            output.AddRange(lastResult);

            // Return the results as a list so that any user will not cause the selector to be run again

            return(output.OrderBy(item => item.NodePath, Implementation.PathKeyComparer.Comparer).ToList());
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Select from the bound Document using index. First non-class/tag/id selector will result in
        /// this being passed off to GetMatches.
        /// </summary>
        ///
        /// <exception cref="ArgumentNullException">
        /// Thrown when one or more required arguments are null.
        /// </exception>
        ///
        /// <param name="context">
        /// The context in which the selector applies. If null, the selector is run against the entire
        /// Document. If not, the selector is run against this sequence of elements.
        /// </param>
        ///
        /// <returns>
        /// A list of elements matching the selector.
        /// </returns>

        public List <IDomObject> Select(IEnumerable <IDomObject> context)
        {
            // this holds the final output

            HashSet <IDomObject> output = new HashSet <IDomObject>();

            if (Selector == null)
            {
                throw new ArgumentNullException("The selector cannot be null.");
            }

            if (Selector.Count == 0)
            {
                return(EmptyEnumerable().ToList());
            }

            ActiveSelectors = new List <SelectorClause>(Selector);

            // First just check if we ended up here with an HTML selector; if so, hand it off.

            var firstSelector = ActiveSelectors[0];

            if (firstSelector.SelectorType == SelectorType.HTML)
            {
                HtmlParser.HtmlElementFactory factory =
                    new HtmlParser.HtmlElementFactory(firstSelector.Html);

                // Return the factory ouptut as a list because otherwise the enumerator could end up
                // as the actual source of the selection, meaning it would get re-parsed each time

                return(factory.ParseAsFragment());
            }

            // this holds any results that carried over from the previous loop for chaining

            IEnumerable <IDomObject> lastResult = null;

            // this is the source from which selections are made in a given iteration; it could be the DOM
            // root, a context, or the previous result set.

            IEnumerable <IDomObject> selectionSource = null;

            // Disable the index if there is no context (e.g. disconnected elements)
            // or if the first element is not indexed.

            bool useIndex = context.IsNullOrEmpty() ||
                            (!context.First().IsDisconnected&& context.First().IsIndexed);


            for (activeSelectorId = 0; activeSelectorId < ActiveSelectors.Count; activeSelectorId++)
            {
                var selector = ActiveSelectors[activeSelectorId].Clone();

                if (lastResult != null)
                {
                    // we will alter the selector during each iteration to remove the parts that have already been
                    // parsed, so use a copy. This is a selector that was chained with the selector grouping
                    // combinator "," -- we always output the results so far when beginning a new group.

                    if (selector.CombinatorType == CombinatorType.Root && lastResult != null)
                    {
                        output.AddRange(lastResult);
                        lastResult = null;
                    }
                }

                // For "and" combinator types, we want to leave everything as it was -- the results of this
                // selector should compound with the prior. This is not an actual CSS combinator, this is the
                // equivalent of grouping parenthesis. That is, in CSS there's no way to say "(input[submit],
                // button):visible" - that is group the results on selector part and apply a filter to it. But
                // we need to do exactly this for certain selector types (for example the jQuery :button
                // selector).

                if (selector.CombinatorType != CombinatorType.Grouped)
                {
                    selectionSource = GetSelectionSource(selector, context, lastResult);
                    lastResult      = null;
                }

                string       key = "";
                SelectorType removeSelectorType = 0;

                if (useIndex && !selector.NoIndex)
                {
#if DEBUG_PATH
                    if (selector.SelectorType.HasFlag(SelectorType.AttributeValue) &&
                        selector.AttributeSelectorType != AttributeSelectorType.NotExists &&
                        selector.AttributeSelectorType != AttributeSelectorType.NotEquals)
                    {
                        key = "!" + selector.AttributeName.ToLower();

                        // AttributeValue must still be matched manually - so remove this flag only if the
                        // selector is conclusive without further checking

                        if (selector.AttributeSelectorType == AttributeSelectorType.Exists)
                        {
                            removeSelectorType = SelectorType.AttributeValue;
                        }
                    }
                    else if (selector.SelectorType.HasFlag(SelectorType.Tag))
                    {
                        key = "+" + selector.Tag.ToLower();
                        removeSelectorType = SelectorType.Tag;
                    }
                    else if (selector.SelectorType.HasFlag(SelectorType.ID))
                    {
                        key = "#" + selector.ID;
                        removeSelectorType = SelectorType.ID;
                    }
                    else if (selector.SelectorType.HasFlag(SelectorType.Class))
                    {
                        key = "." + selector.Class;
                        removeSelectorType = SelectorType.Class;
                    }
#else
                    // We don't want to use the index for "NotEquals" selectors because a missing attribute
                    // is considered a valid match

                    if (selector.SelectorType.HasFlag(SelectorType.AttributeValue) &&
                        selector.AttributeSelectorType != AttributeSelectorType.NotExists &&
                        selector.AttributeSelectorType != AttributeSelectorType.NotEquals)
                    {
                        key = "!" + (char)HtmlData.Tokenize(selector.AttributeName);

                        // AttributeValue must still be matched manually - so remove this flag only if the
                        // selector is conclusive without further checking

                        if (selector.AttributeSelectorType == AttributeSelectorType.Exists)
                        {
                            removeSelectorType = SelectorType.AttributeValue;
                        }
                    }
                    else if (selector.SelectorType.HasFlag(SelectorType.Tag))
                    {
                        key = "+" + (char)HtmlData.Tokenize(selector.Tag);
                        removeSelectorType = SelectorType.Tag;
                    }
                    else if (selector.SelectorType.HasFlag(SelectorType.ID))
                    {
                        key = "#" + (char)HtmlData.TokenizeCaseSensitive(selector.ID);
                        removeSelectorType = SelectorType.ID;
                    }
                    else if (selector.SelectorType.HasFlag(SelectorType.Class))
                    {
                        key = "." + (char)HtmlData.TokenizeCaseSensitive(selector.Class);
                        removeSelectorType = SelectorType.Class;
                    }
#endif
                }

                // If part of the selector was indexed, key will not be empty. Return initial set from the
                // index. If any selectors remain after this they will be searched the hard way.

                IEnumerable <IDomObject> result = null;

                if (key != String.Empty)
                {
                    // This is the main index access point: if we have an index key, we'll get as much as we can from the index.
                    // Anything else will be handled manually.

                    int  depth       = 0;
                    bool descendants = true;

                    switch (selector.TraversalType)
                    {
                    case TraversalType.Child:
                        depth       = selector.ChildDepth;
                        descendants = false;
                        break;

                    case TraversalType.Filter:
                    case TraversalType.Adjacent:
                    case TraversalType.Sibling:
                        depth       = 0;
                        descendants = false;
                        break;

                    case TraversalType.Descendent:
                        depth       = 1;
                        descendants = true;
                        break;
                    }

                    if (selectionSource == null)
                    {
                        result = Document.DocumentIndex.QueryIndex(key + HtmlData.indexSeparator, depth, descendants);
                    }
                    else
                    {
                        HashSet <IDomObject> elementMatches = new HashSet <IDomObject>();
                        result = elementMatches;

                        foreach (IDomObject obj in selectionSource)
                        {
                            elementMatches.AddRange(Document.DocumentIndex.QueryIndex(key + HtmlData.indexSeparator + obj.Path,
                                                                                      depth, descendants));
                        }
                    }
                    selector.SelectorType &= ~removeSelectorType;

                    // Special case for attribute selectors: when Attribute Value attribute selector is present, we
                    // still need to filter for the correct value afterwards. But we need to change the traversal
                    // type because any nodes with the correct attribute type have already been selected.

                    if (selector.SelectorType.HasFlag(SelectorType.AttributeValue))
                    {
                        selector.TraversalType = TraversalType.Filter;
                    }
                }
                else if (selector.SelectorType.HasFlag(SelectorType.Elements))
                {
                    HashSet <IDomObject> elementMatches = new HashSet <IDomObject>();
                    result = elementMatches;
                    foreach (IDomObject obj in GetAllChildOrDescendants(selector.TraversalType, selectionSource))
                    {
                        //key = HtmlData.indexSeparator + obj.Path;
                        HashSet <IDomObject> srcKeys = new HashSet <IDomObject>(Document.DocumentIndex.QueryIndex(HtmlData.indexSeparator + obj.Path));
                        foreach (IDomObject match in selector.SelectElements)
                        {
                            if (srcKeys.Contains(match))
                            {
                                elementMatches.Add(match);
                            }
                        }
                    }

                    selector.SelectorType &= ~SelectorType.Elements;
                }

                // If any selectors were not handled via the index, match them manually

                if (selector.SelectorType != 0)
                {
                    // if there are no temporary results (b/c there was no indexed selector) then use selection
                    // source instead (e.g. start from the same point that the index would have)

                    result = GetMatches(result ?? selectionSource ?? Document.ChildElements, selector);
                }

                lastResult = lastResult == null ?
                             result : lastResult.Concat(result);
            }

            // After the loop has finished, output any results from the last iteration.

            output.AddRange(lastResult);

            // Return the results as a list so that any user will not cause the selector to be run again

            return(output.OrderBy(item => item.Path, StringComparer.Ordinal).ToList());
        }