/// <summary> /// Initializes a new instance of the DocumentProducerTreeComparableTask class. /// </summary> /// <param name="producer">The producer to fetch from.</param> /// <param name="taskPriorityFunction">The callback to determine the fetch priority of the document producer.</param> public DocumentProducerTreeComparableTask( DocumentProducerTree producer, Func <DocumentProducerTree, int> taskPriorityFunction) : base(taskPriorityFunction(producer)) { this.producer = producer; }
/// <summary> /// Tries to schedule a fetch from the document producer tree. /// </summary> /// <param name="documentProducerTree">The document producer tree to schedule a fetch for.</param> /// <returns>Whether or not the fetch was successfully scheduled.</returns> private bool TryScheduleFetch(DocumentProducerTree documentProducerTree) { return(this.comparableTaskScheduler.TryQueueTask( new DocumentProducerTreeComparableTask( documentProducerTree, this.fetchPrioirtyFunction), default(TimeSpan))); }
/// <summary> /// Drains documents from this execution context. /// </summary> /// <param name="maxElements">The maximum number of documents to drains.</param> /// <param name="token">The cancellation token.</param> /// <returns>A task that when awaited on returns a FeedResponse of results.</returns> public override async Task <FeedResponse <CosmosElement> > DrainAsync(int maxElements, CancellationToken token) { // In order to maintain the continuation token for the user we must drain with a few constraints // 1) We fully drain from the left most partition before moving on to the next partition // 2) We drain only full pages from the document producer so we aren't left with a partial page // otherwise we would need to add to the continuation token how many items to skip over on that page. // Only drain from the leftmost (current) document producer tree DocumentProducerTree currentDocumentProducerTree = this.PopCurrentDocumentProducerTree(); // This might be the first time we have seen this document producer tree so we need to buffer documents if (currentDocumentProducerTree.Current == null) { await currentDocumentProducerTree.MoveNextAsync(token); } int itemsLeftInCurrentPage = currentDocumentProducerTree.ItemsLeftInCurrentPage; // Only drain full pages or less if this is a top query. List <CosmosElement> results = new List <CosmosElement>(); for (int i = 0; i < Math.Min(itemsLeftInCurrentPage, maxElements); i++) { results.Add(currentDocumentProducerTree.Current); await currentDocumentProducerTree.MoveNextAsync(token); } if (currentDocumentProducerTree.HasMoreResults) { this.PushCurrentDocumentProducerTree(currentDocumentProducerTree); } // At this point the document producer tree should have internally called MoveNextPage, since we fully drained a page. return(new FeedResponse <CosmosElement>( results, results.Count, this.GetResponseHeaders(), false, this.GetQueryMetrics(), null, null, this.GetAndResetResponseLengthBytes())); }
/// <summary> /// After a split you need to maintain the continuation tokens for all the child document producers until a condition is met. /// For example lets say that a document producer is at continuation X and it gets split, /// then the children each get continuation X, but since you only drain from one of them at a time you are left with the first child having /// continuation X + delta and the second child having continuation X (draw this out if you are following along). /// At this point you have the answer the question: "Which continuation token do you return to the user?". /// Let's say you return X, then when you come back to the first child you will be repeating work, thus returning some documents more than once. /// Let's say you return X + delta, then you fine when you return to the first child, but when you get to the second child you don't have a continuation token /// meaning that you will be repeating all the document for the second partition up until X and again you will be returning some documents more than once. /// Thus you have to return the continuation token for both children. /// Both this means you are returning more than 1 continuation token for the rest of the query. /// Well a naive optimization is to flush the continuation for a child partition once you are done draining from it, which isn't bad for a parallel query, /// but if you have an order by query you might not be done with a producer until the end of the query. /// The next optimization for a parallel query is to flush the continuation token the moment you start reading from a child partition. /// This works for a parallel query, but breaks for an order by query. /// The final realization is that for an order by query you are only choosing between multiple child partitions when their is a tie, /// so the key is that you can dump the continuation token the moment you come across a new order by item. /// For order by queries that is determined by the order by field and for parallel queries that is the moment you come by a new rid (which is any document, since rids are unique within a partition). /// So by passing an equality comparer to the document producers they can determine whether they are still "active". /// </summary> /// <returns> /// Returns all document producers whose continuation token you have to return. /// Only during a split will this list contain more than 1 item. /// </returns> public IEnumerable <DocumentProducer> GetActiveDocumentProducers() { lock (this.documentProducerForest) { DocumentProducerTree current = this.documentProducerForest.Peek().CurrentDocumentProducerTree; if (current.HasMoreResults && !current.IsActive) { // If the current document producer tree has more results, but isn't active. // then we still want to emit it, since it won't get picked up in the below for loop. yield return(current.Root); } foreach (DocumentProducerTree documentProducerTree in this.documentProducerForest) { foreach (DocumentProducer documentProducer in documentProducerTree.GetActiveDocumentProducers()) { yield return(documentProducer); } } } }
/// <summary> /// Function that is given to all the document producers to call on once they are done fetching. /// This is so that the CrossPartitionQueryExecutionContext can aggregate metadata from them. /// </summary> /// <param name="producer">The document producer that just finished fetching.</param> /// <param name="itemsBuffered">The number of items that the producer just fetched.</param> /// <param name="resourceUnitUsage">The amount of RUs that the producer just consumed.</param> /// <param name="queryMetrics">The query metrics that the producer just got back from the backend.</param> /// <param name="responseLengthBytes">The length of the response the producer just got back in bytes.</param> /// <param name="token">The cancellation token.</param> /// <remarks> /// This function is by nature a bit racy. /// A query might be fully drained but a background task is still fetching documents so this will get called after the context is done. /// </remarks> private void OnDocumentProducerTreeCompleteFetching( DocumentProducerTree producer, int itemsBuffered, double resourceUnitUsage, QueryMetrics queryMetrics, long responseLengthBytes, CancellationToken token) { // Update charge and states this.requestChargeTracker.AddCharge(resourceUnitUsage); Interlocked.Add(ref this.totalBufferedItems, itemsBuffered); this.IncrementResponseLengthBytes(responseLengthBytes); this.partitionedQueryMetrics.Add(Tuple.Create(producer.PartitionKeyRange.Id, queryMetrics)); // Adjust the producer page size so that we reach the optimal page size. producer.PageSize = Math.Min((long)(producer.PageSize * DynamicPageSizeAdjustmentFactor), this.actualMaxPageSize); // Adjust Max Degree Of Paralleism if neccesary // (needs to wait for comparable task scheudler refactor). // Fetch again if necessary if (producer.HasMoreBackendResults) { // 4mb is the max reponse size long expectedResponseSize = Math.Min(producer.PageSize, 4 * 1024 * 1024); if (this.CanPrefetch && this.FreeItemSpace > expectedResponseSize) { this.TryScheduleFetch(producer); } } this.TraceVerbose(string.Format( CultureInfo.InvariantCulture, "Id: {0}, size: {1}, resourceUnitUsage: {2}, taskScheduler.CurrentRunningTaskCount: {3}", producer.PartitionKeyRange.Id, itemsBuffered, resourceUnitUsage, this.comparableTaskScheduler.CurrentRunningTaskCount, this.CorrelatedActivityId)); }
/// <summary> /// Initializes cross partition query execution context by initializing the necessary document producers. /// </summary> /// <param name="collectionRid">The collection to drain from.</param> /// <param name="partitionKeyRanges">The partitions to target.</param> /// <param name="initialPageSize">The page size to start the document producers off with.</param> /// <param name="querySpecForInit">The query specification for the rewritten query.</param> /// <param name="targetRangeToContinuationMap">Map from partition to it's corresponding continuation token.</param> /// <param name="deferFirstPage">Whether or not we should defer the fetch of the first page from each partition.</param> /// <param name="filter">The filter to inject in the predicate.</param> /// <param name="filterCallback">The callback used to filter each partition.</param> /// <param name="token">The cancellation token.</param> /// <returns>A task to await on.</returns> protected async Task InitializeAsync( string collectionRid, IReadOnlyList <PartitionKeyRange> partitionKeyRanges, int initialPageSize, SqlQuerySpec querySpecForInit, Dictionary <string, string> targetRangeToContinuationMap, bool deferFirstPage, string filter, Func <DocumentProducerTree, Task> filterCallback, CancellationToken token) { CollectionCache collectionCache = await this.Client.GetCollectionCacheAsync(); INameValueCollection requestHeaders = await this.CreateCommonHeadersAsync(this.GetFeedOptions(null)); this.TraceInformation(string.Format( CultureInfo.InvariantCulture, "parallel~contextbase.initializeasync, queryspec {0}, maxbuffereditemcount: {1}, target partitionkeyrange count: {2}, maximumconcurrencylevel: {3}, documentproducer initial page size {4}", JsonConvert.SerializeObject(this.querySpec, DefaultJsonSerializationSettings.Value), this.actualMaxBufferedItemCount, partitionKeyRanges.Count, this.comparableTaskScheduler.MaximumConcurrencyLevel, initialPageSize)); List <DocumentProducerTree> documentProducerTrees = new List <DocumentProducerTree>(); foreach (PartitionKeyRange partitionKeyRange in partitionKeyRanges) { string initialContinuationToken = (targetRangeToContinuationMap != null && targetRangeToContinuationMap.ContainsKey(partitionKeyRange.Id)) ? targetRangeToContinuationMap[partitionKeyRange.Id] : null; DocumentProducerTree documentProducerTree = new DocumentProducerTree( partitionKeyRange, //// Create Document Service Request callback (pkRange, continuationToken, pageSize) => { INameValueCollection headers = requestHeaders.Clone(); headers[HttpConstants.HttpHeaders.Continuation] = continuationToken; headers[HttpConstants.HttpHeaders.PageSize] = pageSize.ToString(CultureInfo.InvariantCulture); return(this.CreateDocumentServiceRequest( headers, querySpecForInit, pkRange, collectionRid)); }, this.ExecuteRequestLazyAsync, //// Retry policy callback () => new NonRetriableInvalidPartitionExceptionRetryPolicy(collectionCache, this.Client.ResetSessionTokenRetryPolicy.GetRequestPolicy()), this.OnDocumentProducerTreeCompleteFetching, this.documentProducerForest.Comparer as IComparer <DocumentProducerTree>, this.equalityComparer, this.Client, deferFirstPage, collectionRid, initialPageSize, initialContinuationToken); documentProducerTree.Filter = filter; // Prefetch if necessary, and populate consume queue. if (this.CanPrefetch) { this.TryScheduleFetch(documentProducerTree); } documentProducerTrees.Add(documentProducerTree); } // Using loop fisson so that we can load the document producers in parallel foreach (DocumentProducerTree documentProducerTree in documentProducerTrees) { if (!deferFirstPage) { await documentProducerTree.MoveNextIfNotSplitAsync(token); } if (filterCallback != null) { await filterCallback(documentProducerTree); } if (documentProducerTree.HasMoreResults) { this.documentProducerForest.Enqueue(documentProducerTree); } } }
/// <summary> /// Pushes a document producer back to the queue. /// </summary> public void PushCurrentDocumentProducerTree(DocumentProducerTree documentProducerTree) { this.documentProducerForest.Enqueue(documentProducerTree); }
/// <summary> /// When resuming an order by query we need to filter the document producers. /// </summary> /// <param name="producer">The producer to filter down.</param> /// <param name="sortOrders">The sort orders.</param> /// <param name="continuationToken">The continuation token.</param> /// <param name="cancellationToken">The cancellation token.</param> /// <returns>A task to await on.</returns> private async Task FilterAsync( DocumentProducerTree producer, SortOrder[] sortOrders, OrderByContinuationToken continuationToken, CancellationToken cancellationToken) { // When we resume a query on a partition there is a possibility that we only read a partial page from the backend // meaning that will we repeat some documents if we didn't do anything about it. // The solution is to filter all the documents that come before in the sort order, since we have already emitted them to the client. // The key is to seek until we get an order by value that matches the order by value we left off on. // Once we do that we need to seek to the correct _rid within the term, // since there might be many documents with the same order by value we left off on. foreach (DocumentProducerTree tree in producer) { if (!ResourceId.TryParse(continuationToken.Rid, out ResourceId continuationRid)) { this.TraceWarning(string.Format( CultureInfo.InvariantCulture, "Invalid Rid in the continuation token {0} for OrderBy~Context.", continuationToken.CompositeContinuationToken.Token)); throw new BadRequestException(RMResources.InvalidContinuationToken); } Dictionary <string, ResourceId> resourceIds = new Dictionary <string, ResourceId>(); int itemToSkip = continuationToken.SkipCount; bool continuationRidVerified = false; while (true) { OrderByQueryResult orderByResult = new OrderByQueryResult(tree.Current); // Throw away documents until it matches the item from the continuation token. int cmp = 0; for (int i = 0; i < sortOrders.Length; ++i) { cmp = ItemComparer.Instance.Compare( continuationToken.OrderByItems[i].Item, orderByResult.OrderByItems[i].Item); if (cmp != 0) { cmp = sortOrders[i] != SortOrder.Descending ? cmp : -cmp; break; } } if (cmp < 0) { // We might have passed the item due to deletions and filters. break; } if (cmp == 0) { ResourceId rid; if (!resourceIds.TryGetValue(orderByResult.Rid, out rid)) { if (!ResourceId.TryParse(orderByResult.Rid, out rid)) { this.TraceWarning(string.Format( CultureInfo.InvariantCulture, "Invalid Rid in the continuation token {0} for OrderBy~Context.", continuationToken.CompositeContinuationToken.Token)); throw new BadRequestException(RMResources.InvalidContinuationToken); } resourceIds.Add(orderByResult.Rid, rid); } if (!continuationRidVerified) { if (continuationRid.Database != rid.Database || continuationRid.DocumentCollection != rid.DocumentCollection) { this.TraceWarning(string.Format( CultureInfo.InvariantCulture, "Invalid Rid in the continuation token {0} for OrderBy~Context.", continuationToken.CompositeContinuationToken.Token)); throw new BadRequestException(RMResources.InvalidContinuationToken); } continuationRidVerified = true; } // Once the item matches the order by items from the continuation tokens // We still need to remove all the documents that have a lower rid in the rid sort order. // If there is a tie in the sort order the documents should be in _rid order in the same direction as the first order by field. // So if it's ORDER BY c.age ASC, c.name DESC the _rids are ASC // If ti's ORDER BY c.age DESC, c.name DESC the _rids are DESC cmp = continuationRid.Document.CompareTo(rid.Document); if (sortOrders[0] == SortOrder.Descending) { cmp = -cmp; } // We might have passed the item due to deletions and filters. // We also have a skip count for JOINs if (cmp < 0 || (cmp == 0 && itemToSkip-- <= 0)) { break; } } if (!await tree.MoveNextAsync(cancellationToken)) { break; } } } }
/// <summary> /// Drains a page of documents from this context. /// </summary> /// <param name="maxElements">The maximum number of elements.</param> /// <param name="cancellationToken">The cancellation token.</param> /// <returns>A task that when awaited on return a page of documents.</returns> public override async Task <FeedResponse <CosmosElement> > DrainAsync(int maxElements, CancellationToken cancellationToken) { //// In order to maintain the continuation toke for the user we must drain with a few constraints //// 1) We always drain from the partition, which has the highest priority item first //// 2) If multiple partitions have the same priority item then we drain from the left most first //// otherwise we would need to keep track of how many of each item we drained from each partition //// (just like parallel queries). //// Visually that look the following case where we have three partitions that are numbered and store letters. //// For teaching purposes I have made each item a tuple of the following form: //// <item stored in partition, partition number> //// So that duplicates across partitions are distinct, but duplicates within partitions are indistinguishable. //// |-------| |-------| |-------| //// | <a,1> | | <a,2> | | <a,3> | //// | <a,1> | | <b,2> | | <c,3> | //// | <a,1> | | <b,2> | | <c,3> | //// | <d,1> | | <c,2> | | <c,3> | //// | <d,1> | | <e,2> | | <f,3> | //// | <e,1> | | <h,2> | | <j,3> | //// | <f,1> | | <i,2> | | <k,3> | //// |-------| |-------| |-------| //// Now the correct drain order in this case is: //// <a,1>,<a,1>,<a,1>,<a,2>,<a,3>,<b,2>,<b,2>,<c,2>,<c,3>,<c,3>,<c,3>, //// <d,1>,<d,1>,<e,1>,<e,2>,<f,1>,<f,3>,<h,2>,<i,2>,<j,3>,<k,3> //// In more mathematical terms //// 1) <x, y> always comes before <z, y> where x < z //// 2) <i, j> always come before <i, k> where j < k List <CosmosElement> results = new List <CosmosElement>(); while (!this.IsDone && results.Count < maxElements) { // Only drain from the highest priority document producer // We need to pop and push back the document producer tree, since the priority changes according to the sort order. DocumentProducerTree currentDocumentProducerTree = this.PopCurrentDocumentProducerTree(); OrderByQueryResult orderByQueryResult = new OrderByQueryResult(currentDocumentProducerTree.Current); // Only add the payload, since other stuff is garbage from the caller's perspective. results.Add(orderByQueryResult.Payload); // If we are at the begining of the page and seeing an rid from the previous page we should increment the skip count // due to the fact that JOINs can make a document appear multiple times and across continuations, so we don't want to // surface this more than needed. More information can be found in the continuation token docs. if (this.ShouldIncrementSkipCount(currentDocumentProducerTree.CurrentDocumentProducerTree.Root)) { ++this.skipCount; } else { this.skipCount = 0; } this.previousRid = orderByQueryResult.Rid; await currentDocumentProducerTree.MoveNextAsync(cancellationToken); this.PushCurrentDocumentProducerTree(currentDocumentProducerTree); } return(new FeedResponse <CosmosElement>( results, results.Count, this.GetResponseHeaders(), false, this.GetQueryMetrics(), null, null, this.GetAndResetResponseLengthBytes())); }