コード例 #1
0
 /// <summary>
 /// Initializes a new instance of the DocumentProducerTreeComparableTask class.
 /// </summary>
 /// <param name="producer">The producer to fetch from.</param>
 /// <param name="taskPriorityFunction">The callback to determine the fetch priority of the document producer.</param>
 public DocumentProducerTreeComparableTask(
     DocumentProducerTree producer,
     Func <DocumentProducerTree, int> taskPriorityFunction)
     : base(taskPriorityFunction(producer))
 {
     this.producer = producer;
 }
コード例 #2
0
 /// <summary>
 /// Tries to schedule a fetch from the document producer tree.
 /// </summary>
 /// <param name="documentProducerTree">The document producer tree to schedule a fetch for.</param>
 /// <returns>Whether or not the fetch was successfully scheduled.</returns>
 private bool TryScheduleFetch(DocumentProducerTree documentProducerTree)
 {
     return(this.comparableTaskScheduler.TryQueueTask(
                new DocumentProducerTreeComparableTask(
                    documentProducerTree,
                    this.fetchPrioirtyFunction),
                default(TimeSpan)));
 }
コード例 #3
0
        /// <summary>
        /// Drains documents from this execution context.
        /// </summary>
        /// <param name="maxElements">The maximum number of documents to drains.</param>
        /// <param name="token">The cancellation token.</param>
        /// <returns>A task that when awaited on returns a FeedResponse of results.</returns>
        public override async Task <FeedResponse <CosmosElement> > DrainAsync(int maxElements, CancellationToken token)
        {
            // In order to maintain the continuation token for the user we must drain with a few constraints
            // 1) We fully drain from the left most partition before moving on to the next partition
            // 2) We drain only full pages from the document producer so we aren't left with a partial page
            //  otherwise we would need to add to the continuation token how many items to skip over on that page.

            // Only drain from the leftmost (current) document producer tree
            DocumentProducerTree currentDocumentProducerTree = this.PopCurrentDocumentProducerTree();

            // This might be the first time we have seen this document producer tree so we need to buffer documents
            if (currentDocumentProducerTree.Current == null)
            {
                await currentDocumentProducerTree.MoveNextAsync(token);
            }

            int itemsLeftInCurrentPage = currentDocumentProducerTree.ItemsLeftInCurrentPage;

            // Only drain full pages or less if this is a top query.
            List <CosmosElement> results = new List <CosmosElement>();

            for (int i = 0; i < Math.Min(itemsLeftInCurrentPage, maxElements); i++)
            {
                results.Add(currentDocumentProducerTree.Current);
                await currentDocumentProducerTree.MoveNextAsync(token);
            }

            if (currentDocumentProducerTree.HasMoreResults)
            {
                this.PushCurrentDocumentProducerTree(currentDocumentProducerTree);
            }

            // At this point the document producer tree should have internally called MoveNextPage, since we fully drained a page.
            return(new FeedResponse <CosmosElement>(
                       results,
                       results.Count,
                       this.GetResponseHeaders(),
                       false,
                       this.GetQueryMetrics(),
                       null,
                       null,
                       this.GetAndResetResponseLengthBytes()));
        }
コード例 #4
0
        /// <summary>
        /// After a split you need to maintain the continuation tokens for all the child document producers until a condition is met.
        /// For example lets say that a document producer is at continuation X and it gets split,
        /// then the children each get continuation X, but since you only drain from one of them at a time you are left with the first child having
        /// continuation X + delta and the second child having continuation X (draw this out if you are following along).
        /// At this point you have the answer the question: "Which continuation token do you return to the user?".
        /// Let's say you return X, then when you come back to the first child you will be repeating work, thus returning some documents more than once.
        /// Let's say you return X + delta, then you fine when you return to the first child, but when you get to the second child you don't have a continuation token
        /// meaning that you will be repeating all the document for the second partition up until X and again you will be returning some documents more than once.
        /// Thus you have to return the continuation token for both children.
        /// Both this means you are returning more than 1 continuation token for the rest of the query.
        /// Well a naive optimization is to flush the continuation for a child partition once you are done draining from it, which isn't bad for a parallel query,
        /// but if you have an order by query you might not be done with a producer until the end of the query.
        /// The next optimization for a parallel query is to flush the continuation token the moment you start reading from a child partition.
        /// This works for a parallel query, but breaks for an order by query.
        /// The final realization is that for an order by query you are only choosing between multiple child partitions when their is a tie,
        /// so the key is that you can dump the continuation token the moment you come across a new order by item.
        /// For order by queries that is determined by the order by field and for parallel queries that is the moment you come by a new rid (which is any document, since rids are unique within a partition).
        /// So by passing an equality comparer to the document producers they can determine whether they are still "active".
        /// </summary>
        /// <returns>
        /// Returns all document producers whose continuation token you have to return.
        /// Only during a split will this list contain more than 1 item.
        /// </returns>
        public IEnumerable <DocumentProducer> GetActiveDocumentProducers()
        {
            lock (this.documentProducerForest)
            {
                DocumentProducerTree current = this.documentProducerForest.Peek().CurrentDocumentProducerTree;
                if (current.HasMoreResults && !current.IsActive)
                {
                    // If the current document producer tree has more results, but isn't active.
                    // then we still want to emit it, since it won't get picked up in the below for loop.
                    yield return(current.Root);
                }

                foreach (DocumentProducerTree documentProducerTree in this.documentProducerForest)
                {
                    foreach (DocumentProducer documentProducer in documentProducerTree.GetActiveDocumentProducers())
                    {
                        yield return(documentProducer);
                    }
                }
            }
        }
コード例 #5
0
        /// <summary>
        /// Function that is given to all the document producers to call on once they are done fetching.
        /// This is so that the CrossPartitionQueryExecutionContext can aggregate metadata from them.
        /// </summary>
        /// <param name="producer">The document producer that just finished fetching.</param>
        /// <param name="itemsBuffered">The number of items that the producer just fetched.</param>
        /// <param name="resourceUnitUsage">The amount of RUs that the producer just consumed.</param>
        /// <param name="queryMetrics">The query metrics that the producer just got back from the backend.</param>
        /// <param name="responseLengthBytes">The length of the response the producer just got back in bytes.</param>
        /// <param name="token">The cancellation token.</param>
        /// <remarks>
        /// This function is by nature a bit racy.
        /// A query might be fully drained but a background task is still fetching documents so this will get called after the context is done.
        /// </remarks>
        private void OnDocumentProducerTreeCompleteFetching(
            DocumentProducerTree producer,
            int itemsBuffered,
            double resourceUnitUsage,
            QueryMetrics queryMetrics,
            long responseLengthBytes,
            CancellationToken token)
        {
            // Update charge and states
            this.requestChargeTracker.AddCharge(resourceUnitUsage);
            Interlocked.Add(ref this.totalBufferedItems, itemsBuffered);
            this.IncrementResponseLengthBytes(responseLengthBytes);
            this.partitionedQueryMetrics.Add(Tuple.Create(producer.PartitionKeyRange.Id, queryMetrics));

            // Adjust the producer page size so that we reach the optimal page size.
            producer.PageSize = Math.Min((long)(producer.PageSize * DynamicPageSizeAdjustmentFactor), this.actualMaxPageSize);

            // Adjust Max Degree Of Paralleism if neccesary
            // (needs to wait for comparable task scheudler refactor).

            // Fetch again if necessary
            if (producer.HasMoreBackendResults)
            {
                // 4mb is the max reponse size
                long expectedResponseSize = Math.Min(producer.PageSize, 4 * 1024 * 1024);
                if (this.CanPrefetch && this.FreeItemSpace > expectedResponseSize)
                {
                    this.TryScheduleFetch(producer);
                }
            }

            this.TraceVerbose(string.Format(
                                  CultureInfo.InvariantCulture,
                                  "Id: {0}, size: {1}, resourceUnitUsage: {2}, taskScheduler.CurrentRunningTaskCount: {3}",
                                  producer.PartitionKeyRange.Id,
                                  itemsBuffered,
                                  resourceUnitUsage,
                                  this.comparableTaskScheduler.CurrentRunningTaskCount,
                                  this.CorrelatedActivityId));
        }
コード例 #6
0
        /// <summary>
        /// Initializes cross partition query execution context by initializing the necessary document producers.
        /// </summary>
        /// <param name="collectionRid">The collection to drain from.</param>
        /// <param name="partitionKeyRanges">The partitions to target.</param>
        /// <param name="initialPageSize">The page size to start the document producers off with.</param>
        /// <param name="querySpecForInit">The query specification for the rewritten query.</param>
        /// <param name="targetRangeToContinuationMap">Map from partition to it's corresponding continuation token.</param>
        /// <param name="deferFirstPage">Whether or not we should defer the fetch of the first page from each partition.</param>
        /// <param name="filter">The filter to inject in the predicate.</param>
        /// <param name="filterCallback">The callback used to filter each partition.</param>
        /// <param name="token">The cancellation token.</param>
        /// <returns>A task to await on.</returns>
        protected async Task InitializeAsync(
            string collectionRid,
            IReadOnlyList <PartitionKeyRange> partitionKeyRanges,
            int initialPageSize,
            SqlQuerySpec querySpecForInit,
            Dictionary <string, string> targetRangeToContinuationMap,
            bool deferFirstPage,
            string filter,
            Func <DocumentProducerTree, Task> filterCallback,
            CancellationToken token)
        {
            CollectionCache collectionCache = await this.Client.GetCollectionCacheAsync();

            INameValueCollection requestHeaders = await this.CreateCommonHeadersAsync(this.GetFeedOptions(null));

            this.TraceInformation(string.Format(
                                      CultureInfo.InvariantCulture,
                                      "parallel~contextbase.initializeasync, queryspec {0}, maxbuffereditemcount: {1}, target partitionkeyrange count: {2}, maximumconcurrencylevel: {3}, documentproducer initial page size {4}",
                                      JsonConvert.SerializeObject(this.querySpec, DefaultJsonSerializationSettings.Value),
                                      this.actualMaxBufferedItemCount,
                                      partitionKeyRanges.Count,
                                      this.comparableTaskScheduler.MaximumConcurrencyLevel,
                                      initialPageSize));

            List <DocumentProducerTree> documentProducerTrees = new List <DocumentProducerTree>();

            foreach (PartitionKeyRange partitionKeyRange in partitionKeyRanges)
            {
                string initialContinuationToken = (targetRangeToContinuationMap != null && targetRangeToContinuationMap.ContainsKey(partitionKeyRange.Id)) ? targetRangeToContinuationMap[partitionKeyRange.Id] : null;

                DocumentProducerTree documentProducerTree = new DocumentProducerTree(
                    partitionKeyRange,
                    //// Create Document Service Request callback
                    (pkRange, continuationToken, pageSize) =>
                {
                    INameValueCollection headers = requestHeaders.Clone();
                    headers[HttpConstants.HttpHeaders.Continuation] = continuationToken;
                    headers[HttpConstants.HttpHeaders.PageSize]     = pageSize.ToString(CultureInfo.InvariantCulture);
                    return(this.CreateDocumentServiceRequest(
                               headers,
                               querySpecForInit,
                               pkRange,
                               collectionRid));
                },
                    this.ExecuteRequestLazyAsync,
                    //// Retry policy callback
                    () => new NonRetriableInvalidPartitionExceptionRetryPolicy(collectionCache, this.Client.ResetSessionTokenRetryPolicy.GetRequestPolicy()),
                    this.OnDocumentProducerTreeCompleteFetching,
                    this.documentProducerForest.Comparer as IComparer <DocumentProducerTree>,
                    this.equalityComparer,
                    this.Client,
                    deferFirstPage,
                    collectionRid,
                    initialPageSize,
                    initialContinuationToken);

                documentProducerTree.Filter = filter;

                // Prefetch if necessary, and populate consume queue.
                if (this.CanPrefetch)
                {
                    this.TryScheduleFetch(documentProducerTree);
                }

                documentProducerTrees.Add(documentProducerTree);
            }

            // Using loop fisson so that we can load the document producers in parallel
            foreach (DocumentProducerTree documentProducerTree in documentProducerTrees)
            {
                if (!deferFirstPage)
                {
                    await documentProducerTree.MoveNextIfNotSplitAsync(token);
                }

                if (filterCallback != null)
                {
                    await filterCallback(documentProducerTree);
                }

                if (documentProducerTree.HasMoreResults)
                {
                    this.documentProducerForest.Enqueue(documentProducerTree);
                }
            }
        }
コード例 #7
0
 /// <summary>
 /// Pushes a document producer back to the queue.
 /// </summary>
 public void PushCurrentDocumentProducerTree(DocumentProducerTree documentProducerTree)
 {
     this.documentProducerForest.Enqueue(documentProducerTree);
 }
コード例 #8
0
        /// <summary>
        /// When resuming an order by query we need to filter the document producers.
        /// </summary>
        /// <param name="producer">The producer to filter down.</param>
        /// <param name="sortOrders">The sort orders.</param>
        /// <param name="continuationToken">The continuation token.</param>
        /// <param name="cancellationToken">The cancellation token.</param>
        /// <returns>A task to await on.</returns>
        private async Task FilterAsync(
            DocumentProducerTree producer,
            SortOrder[] sortOrders,
            OrderByContinuationToken continuationToken,
            CancellationToken cancellationToken)
        {
            // When we resume a query on a partition there is a possibility that we only read a partial page from the backend
            // meaning that will we repeat some documents if we didn't do anything about it.
            // The solution is to filter all the documents that come before in the sort order, since we have already emitted them to the client.
            // The key is to seek until we get an order by value that matches the order by value we left off on.
            // Once we do that we need to seek to the correct _rid within the term,
            // since there might be many documents with the same order by value we left off on.

            foreach (DocumentProducerTree tree in producer)
            {
                if (!ResourceId.TryParse(continuationToken.Rid, out ResourceId continuationRid))
                {
                    this.TraceWarning(string.Format(
                                          CultureInfo.InvariantCulture,
                                          "Invalid Rid in the continuation token {0} for OrderBy~Context.",
                                          continuationToken.CompositeContinuationToken.Token));
                    throw new BadRequestException(RMResources.InvalidContinuationToken);
                }

                Dictionary <string, ResourceId> resourceIds = new Dictionary <string, ResourceId>();
                int  itemToSkip = continuationToken.SkipCount;
                bool continuationRidVerified = false;

                while (true)
                {
                    OrderByQueryResult orderByResult = new OrderByQueryResult(tree.Current);
                    // Throw away documents until it matches the item from the continuation token.
                    int cmp = 0;
                    for (int i = 0; i < sortOrders.Length; ++i)
                    {
                        cmp = ItemComparer.Instance.Compare(
                            continuationToken.OrderByItems[i].Item,
                            orderByResult.OrderByItems[i].Item);

                        if (cmp != 0)
                        {
                            cmp = sortOrders[i] != SortOrder.Descending ? cmp : -cmp;
                            break;
                        }
                    }

                    if (cmp < 0)
                    {
                        // We might have passed the item due to deletions and filters.
                        break;
                    }

                    if (cmp == 0)
                    {
                        ResourceId rid;
                        if (!resourceIds.TryGetValue(orderByResult.Rid, out rid))
                        {
                            if (!ResourceId.TryParse(orderByResult.Rid, out rid))
                            {
                                this.TraceWarning(string.Format(
                                                      CultureInfo.InvariantCulture,
                                                      "Invalid Rid in the continuation token {0} for OrderBy~Context.",
                                                      continuationToken.CompositeContinuationToken.Token));
                                throw new BadRequestException(RMResources.InvalidContinuationToken);
                            }

                            resourceIds.Add(orderByResult.Rid, rid);
                        }

                        if (!continuationRidVerified)
                        {
                            if (continuationRid.Database != rid.Database || continuationRid.DocumentCollection != rid.DocumentCollection)
                            {
                                this.TraceWarning(string.Format(
                                                      CultureInfo.InvariantCulture,
                                                      "Invalid Rid in the continuation token {0} for OrderBy~Context.",
                                                      continuationToken.CompositeContinuationToken.Token));
                                throw new BadRequestException(RMResources.InvalidContinuationToken);
                            }

                            continuationRidVerified = true;
                        }

                        // Once the item matches the order by items from the continuation tokens
                        // We still need to remove all the documents that have a lower rid in the rid sort order.
                        // If there is a tie in the sort order the documents should be in _rid order in the same direction as the first order by field.
                        // So if it's ORDER BY c.age ASC, c.name DESC the _rids are ASC
                        // If ti's ORDER BY c.age DESC, c.name DESC the _rids are DESC
                        cmp = continuationRid.Document.CompareTo(rid.Document);
                        if (sortOrders[0] == SortOrder.Descending)
                        {
                            cmp = -cmp;
                        }

                        // We might have passed the item due to deletions and filters.
                        // We also have a skip count for JOINs
                        if (cmp < 0 || (cmp == 0 && itemToSkip-- <= 0))
                        {
                            break;
                        }
                    }

                    if (!await tree.MoveNextAsync(cancellationToken))
                    {
                        break;
                    }
                }
            }
        }
コード例 #9
0
        /// <summary>
        /// Drains a page of documents from this context.
        /// </summary>
        /// <param name="maxElements">The maximum number of elements.</param>
        /// <param name="cancellationToken">The cancellation token.</param>
        /// <returns>A task that when awaited on return a page of documents.</returns>
        public override async Task <FeedResponse <CosmosElement> > DrainAsync(int maxElements, CancellationToken cancellationToken)
        {
            //// In order to maintain the continuation toke for the user we must drain with a few constraints
            //// 1) We always drain from the partition, which has the highest priority item first
            //// 2) If multiple partitions have the same priority item then we drain from the left most first
            ////   otherwise we would need to keep track of how many of each item we drained from each partition
            ////   (just like parallel queries).
            //// Visually that look the following case where we have three partitions that are numbered and store letters.
            //// For teaching purposes I have made each item a tuple of the following form:
            ////      <item stored in partition, partition number>
            //// So that duplicates across partitions are distinct, but duplicates within partitions are indistinguishable.
            ////      |-------|   |-------|   |-------|
            ////      | <a,1> |   | <a,2> |   | <a,3> |
            ////      | <a,1> |   | <b,2> |   | <c,3> |
            ////      | <a,1> |   | <b,2> |   | <c,3> |
            ////      | <d,1> |   | <c,2> |   | <c,3> |
            ////      | <d,1> |   | <e,2> |   | <f,3> |
            ////      | <e,1> |   | <h,2> |   | <j,3> |
            ////      | <f,1> |   | <i,2> |   | <k,3> |
            ////      |-------|   |-------|   |-------|
            //// Now the correct drain order in this case is:
            ////  <a,1>,<a,1>,<a,1>,<a,2>,<a,3>,<b,2>,<b,2>,<c,2>,<c,3>,<c,3>,<c,3>,
            ////  <d,1>,<d,1>,<e,1>,<e,2>,<f,1>,<f,3>,<h,2>,<i,2>,<j,3>,<k,3>
            //// In more mathematical terms
            ////  1) <x, y> always comes before <z, y> where x < z
            ////  2) <i, j> always come before <i, k> where j < k

            List <CosmosElement> results = new List <CosmosElement>();

            while (!this.IsDone && results.Count < maxElements)
            {
                // Only drain from the highest priority document producer
                // We need to pop and push back the document producer tree, since the priority changes according to the sort order.
                DocumentProducerTree currentDocumentProducerTree = this.PopCurrentDocumentProducerTree();
                OrderByQueryResult   orderByQueryResult          = new OrderByQueryResult(currentDocumentProducerTree.Current);

                // Only add the payload, since other stuff is garbage from the caller's perspective.
                results.Add(orderByQueryResult.Payload);

                // If we are at the begining of the page and seeing an rid from the previous page we should increment the skip count
                // due to the fact that JOINs can make a document appear multiple times and across continuations, so we don't want to
                // surface this more than needed. More information can be found in the continuation token docs.
                if (this.ShouldIncrementSkipCount(currentDocumentProducerTree.CurrentDocumentProducerTree.Root))
                {
                    ++this.skipCount;
                }
                else
                {
                    this.skipCount = 0;
                }

                this.previousRid = orderByQueryResult.Rid;

                await currentDocumentProducerTree.MoveNextAsync(cancellationToken);

                this.PushCurrentDocumentProducerTree(currentDocumentProducerTree);
            }

            return(new FeedResponse <CosmosElement>(
                       results,
                       results.Count,
                       this.GetResponseHeaders(),
                       false,
                       this.GetQueryMetrics(),
                       null,
                       null,
                       this.GetAndResetResponseLengthBytes()));
        }