async Task <DocumentServiceLease> CheckpointAsync(DocumentServiceLease lease, string continuation, ChangeFeedObserverContext context)
        {
            Debug.Assert(lease != null);
            Debug.Assert(!string.IsNullOrEmpty(continuation));

            DocumentServiceLease result = null;

            try
            {
                result = (DocumentServiceLease)await this.checkpointManager.CheckpointAsync(lease, continuation, lease.SequenceNumber + 1);

                Debug.Assert(result.ContinuationToken == continuation, "ContinuationToken was not updated!");
                TraceLog.Informational(string.Format("Checkpoint: partition {0}, new continuation '{1}'", lease.PartitionId, continuation));
            }
            catch (LeaseLostException)
            {
                TraceLog.Warning(string.Format("Partition {0}: failed to checkpoint due to lost lease", context.PartitionKeyRangeId));
                throw;
            }
            catch (Exception ex)
            {
                TraceLog.Error(string.Format("Partition {0}: failed to checkpoint due to unexpected error: {1}", context.PartitionKeyRangeId, ex.Message));
                throw;
            }

            Debug.Assert(result != null);
            return(await Task.FromResult <DocumentServiceLease>(result));
        }
        async Task InitializeAsync()
        {
            this.documentClient = new DocumentClient(this.collectionLocation.Uri, this.collectionLocation.MasterKey, this.collectionLocation.ConnectionPolicy);

            Uri      databaseUri = UriFactory.CreateDatabaseUri(this.collectionLocation.DatabaseName);
            Database database    = await this.documentClient.ReadDatabaseAsync(databaseUri);

            Uri collectionUri = UriFactory.CreateDocumentCollectionUri(this.collectionLocation.DatabaseName, this.collectionLocation.CollectionName);
            ResourceResponse <DocumentCollection> collectionResponse = await this.documentClient.ReadDocumentCollectionAsync(
                collectionUri,
                new RequestOptions { PopulateQuotaInfo = true });

            DocumentCollection collection = collectionResponse.Resource;

            this.collectionSelfLink = collection.SelfLink;

            // Grab the options-supplied prefix if present otherwise leave it empty.
            string optionsPrefix = this.options.LeasePrefix ?? string.Empty;

            // Beyond this point all access to collection is done via this self link: if collection is removed, we won't access new one using same name by accident.
            this.leasePrefix = string.Format(CultureInfo.InvariantCulture, "{0}{1}_{2}_{3}", optionsPrefix, this.collectionLocation.Uri.Host, database.ResourceId, collection.ResourceId);

            var leaseManager = new DocumentServiceLeaseManager(
                this.auxCollectionLocation,
                this.leasePrefix,
                this.options.LeaseExpirationInterval,
                this.options.LeaseRenewInterval);
            await leaseManager.InitializeAsync();

            this.leaseManager      = leaseManager;
            this.checkpointManager = (ICheckpointManager)leaseManager;

            if (this.options.DiscardExistingLeases)
            {
                TraceLog.Warning(string.Format("Host '{0}': removing all leases, as requested by ChangeFeedHostOptions", this.HostName));
                await this.leaseManager.DeleteAllAsync();
            }

            // Note: lease store is never stale as we use monitored colleciton Rid as id prefix for aux collection.
            // Collection was removed and re-created, the rid would change.
            // If it's not deleted, it's not stale. If it's deleted, it's not stale as it doesn't exist.
            await this.leaseManager.CreateLeaseStoreIfNotExistsAsync();

            var ranges = new Dictionary <string, PartitionKeyRange>();

            foreach (var range in await this.EnumPartitionKeyRangesAsync(this.collectionSelfLink))
            {
                ranges.Add(range.Id, range);
            }

            TraceLog.Informational(string.Format("Source collection: '{0}', {1} partition(s), {2} document(s)", this.collectionLocation.CollectionName, ranges.Count, GetDocumentCount(collectionResponse)));

            await this.CreateLeases(ranges);

            this.partitionManager = new PartitionManager <DocumentServiceLease>(this.HostName, this.leaseManager, this.options);
            await this.partitionManager.SubscribeAsync(this);

            await this.partitionManager.InitializeAsync();
        }
        async Task AddLeaseAsync(T lease)
        {
            if (this.currentlyOwnedPartitions.TryAdd(lease.PartitionId, lease))
            {
                bool failedToInitialize = false;
                try
                {
                    TraceLog.Informational(string.Format("Host '{0}' opening event processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));

                    await this.partitionObserverManager.NotifyPartitionAcquiredAsync(lease);

                    TraceLog.Informational(string.Format("Host '{0}' opened event processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));
                }
                catch (Exception ex)
                {
                    TraceLog.Informational(string.Format("Host '{0}' failed to initialize processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));

                    failedToInitialize = true;

                    // Eat any exceptions during notification of observers
                    TraceLog.Exception(ex);
                }

                // We need to release the lease if we fail to initialize the processor, so some other node can pick up the parition
                if (failedToInitialize)
                {
                    await this.RemoveLeaseAsync(lease, true, ChangeFeedObserverCloseReason.ObserverError);
                }
            }
            else
            {
                // We already acquired lease for this partition but it looks like we previously owned this partition
                // and haven't completed the shutdown process for it yet.  Release lease for possible others hosts to
                // pick it up.
                try
                {
                    TraceLog.Warning(string.Format("Host '{0}' unable to add PartitionId '{1}' with lease token '{2}' to currently owned partitions.", this.workerName, lease.PartitionId, lease.ConcurrencyToken));

                    await this.leaseManager.ReleaseAsync(lease);

                    TraceLog.Informational(string.Format("Host '{0}' successfully released lease on PartitionId '{1}' with lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));
                }
                catch (LeaseLostException)
                {
                    // We have already shutdown the processor so we can ignore any LeaseLost at this point
                    TraceLog.Informational(string.Format("Host '{0}' failed to release lease for PartitionId '{1}' with lease token '{2}' due to conflict.", this.workerName, lease.PartitionId, lease.ConcurrencyToken));
                }
                catch (Exception ex)
                {
                    TraceLog.Exception(ex);
                }
            }
        }
Example #4
0
        async Task InitializeAsync()
        {
            this.documentClient = new DocumentClient(this.collectionLocation.Uri, this.collectionLocation.MasterKey, this.collectionLocation.ConnectionPolicy);

            Uri      databaseUri = UriFactory.CreateDatabaseUri(this.collectionLocation.DatabaseName);
            Database database    = await this.documentClient.ReadDatabaseAsync(databaseUri);

            Uri collectionUri             = UriFactory.CreateDocumentCollectionUri(this.collectionLocation.DatabaseName, this.collectionLocation.CollectionName);
            DocumentCollection collection = await this.documentClient.ReadDocumentCollectionAsync(collectionUri);

            this.collectionSelfLink = collection.SelfLink;

            // Beyond this point all access to colleciton is done via this self link: if collection is removed, we won't access new one using same name by accident.
            this.leasePrefix = string.Format(CultureInfo.InvariantCulture, "{0}_{1}_{2}", this.collectionLocation.Uri.Host, database.ResourceId, collection.ResourceId);

            var leaseManager = new DocumentServiceLeaseManager(
                this.auxCollectionLocation,
                this.leasePrefix,
                this.options.LeaseExpirationInterval,
                this.options.LeaseRenewInterval);
            await leaseManager.InitializeAsync();

            this.leaseManager      = leaseManager;
            this.checkpointManager = (ICheckpointManager)leaseManager;

            if (this.options.DiscardExistingLeases)
            {
                TraceLog.Warning(string.Format("Host '{0}': removing all leases, as requested by ChangeFeedHostOptions", this.HostName));
                await this.leaseManager.DeleteAllAsync();
            }

            // Note: lease store is never stale as we use monitored colleciton Rid as id prefix for aux collection.
            // Collection was removed and re-created, the rid would change.
            // If it's not deleted, it's not stale. If it's deleted, it's not stale as it doesn't exist.
            await this.leaseManager.CreateLeaseStoreIfNotExistsAsync();

            string[] rangeIds = await this.EnumPartitionKeyRangeIds(this.collectionSelfLink);

            Parallel.ForEach(rangeIds, async rangeId => {
                this.statsSinceLastCheckpoint.AddOrUpdate(
                    rangeId,
                    new CheckpointStats(),
                    (partitionId, existingStats) => existingStats);

                await this.leaseManager.CreateLeaseIfNotExistAsync(rangeId);
            });

            this.partitionManager = new PartitionManager <DocumentServiceLease>(this.HostName, this.leaseManager, this.options);
            await this.partitionManager.SubscribeAsync(this);

            await this.partitionManager.InitializeAsync();
        }
        /// <summary>
        /// Asynchronously checks the current existing leases and calculates an estimate of remaining work per leased partitions.
        /// </summary>
        /// <returns>An estimate amount of remaining documents to be processed</returns>
        public async Task <long> GetEstimatedRemainingWork()
        {
            await this.InitializeAsync();

            long remaining            = 0;
            ChangeFeedOptions options = new ChangeFeedOptions
            {
                MaxItemCount = 1
            };

            foreach (DocumentServiceLease existingLease in await this.leaseManager.ListLeases())
            {
                options.PartitionKeyRangeId = existingLease.PartitionId;
                options.RequestContinuation = existingLease.ContinuationToken;
                IDocumentQuery <Document> query    = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options);
                FeedResponse <Document>   response = null;

                try
                {
                    response = await query.ExecuteNextAsync <Document>();

                    long parsedLSNFromSessionToken = TryConvertToNumber(ParseAmountFromSessionToken(response.SessionToken));
                    long lastSequenceNumber        = response.Count > 0 ? TryConvertToNumber(response.First().GetPropertyValue <string>(LSNPropertyName)) : parsedLSNFromSessionToken;
                    long partitionRemaining        = parsedLSNFromSessionToken - lastSequenceNumber;
                    remaining += partitionRemaining < 0 ? 0 : partitionRemaining;
                }
                catch (DocumentClientException ex)
                {
                    ExceptionDispatchInfo   exceptionDispatchInfo = ExceptionDispatchInfo.Capture(ex);
                    DocumentClientException dcex = (DocumentClientException)exceptionDispatchInfo.SourceException;
                    if ((StatusCode.NotFound == (StatusCode)dcex.StatusCode && SubStatusCode.ReadSessionNotAvailable != (SubStatusCode)GetSubStatusCode(dcex)) ||
                        StatusCode.Gone == (StatusCode)dcex.StatusCode)
                    {
                        // We are not explicitly handling Splits here to avoid any collision with an Observer that might have picked this up and managing the split
                        TraceLog.Error(string.Format("GetEstimateWork > Partition {0}: resource gone (subStatus={1}).", existingLease.PartitionId, GetSubStatusCode(dcex)));
                    }
                    else if (StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode ||
                             StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode)
                    {
                        TraceLog.Warning(string.Format("GetEstimateWork > Partition {0}: retriable exception : {1}", existingLease.PartitionId, dcex.Message));
                    }
                    else
                    {
                        TraceLog.Error(string.Format("GetEstimateWork > Partition {0}: Unhandled exception", ex.Error.Message));
                    }
                }
            }

            return(remaining);
        }
        internal async Task CheckpointAsync(string continuation, ChangeFeedObserverContext context)
        {
            if (string.IsNullOrEmpty(continuation))
            {
                throw new ArgumentException("continuation");
            }
            if (context == null)
            {
                throw new ArgumentNullException("context");
            }
            if (string.IsNullOrEmpty(context.PartitionKeyRangeId))
            {
                throw new ArgumentException("context.PartitionKeyRangeId");
            }

            WorkerData workerData;

            this.partitionKeyRangeIdToWorkerMap.TryGetValue(context.PartitionKeyRangeId, out workerData);

            if (workerData == null)
            {
                TraceLog.Warning(string.Format("CheckpointAsync: called at wrong time, failed to get worker data for partition {0}. Most likely the partition is not longer owned by this host.", context.PartitionKeyRangeId));
                throw new LeaseLostException(string.Format("Failed to find lease for partition {0} in the set of owned leases.", context.PartitionKeyRangeId));
            }

            if (workerData.Lease == null)
            {
                TraceLog.Error(string.Format("CheckpointAsync: found the worker data but lease is null, for partition {0}. This should never happen.", context.PartitionKeyRangeId));
                throw new LeaseLostException(string.Format("Failed to find lease for partition {0}.", context.PartitionKeyRangeId));
            }

            await workerData.CheckpointInProgress.WaitAsync();

            try
            {
                if (workerData.Cancellation.IsCancellationRequested)
                {
                    TraceLog.Warning(string.Format("CheckpointAsync: called at wrong time, partition {0} is shutting down. The ownership of the partition by this host is about to end.", context.PartitionKeyRangeId));
                    throw new LeaseLostException(string.Format("CheckpointAsync: partition {0} is shutting down.", context.PartitionKeyRangeId));
                }

                workerData.Lease = await this.CheckpointAsync(workerData.Lease, continuation, context);
            }
            finally
            {
                workerData.CheckpointInProgress.Release();
            }
        }
        private static long TryConvertToNumber(string number)
        {
            if (string.IsNullOrEmpty(number))
            {
                return(0);
            }

            long parsed = 0;

            if (!long.TryParse(number, NumberStyles.Any, CultureInfo.InvariantCulture, out parsed))
            {
                TraceLog.Warning(string.Format(CultureInfo.InvariantCulture, "Cannot parse number '{0}'.", number));
                return(0);
            }

            return(parsed);
        }
        /// <summary>
        /// Create leases for new partitions and take care of split partitions.
        /// </summary>
        private async Task CreateLeases(IDictionary <string, PartitionKeyRange> ranges)
        {
            Debug.Assert(ranges != null);

            // Get leases after getting ranges, to make sure that no other hosts checked in continuation for split partition after we got leases.
            var existingLeases = new Dictionary <string, DocumentServiceLease>();

            foreach (var lease in await this.leaseManager.ListLeases())
            {
                existingLeases.Add(lease.PartitionId, lease);
            }

            var gonePartitionIds = new HashSet <string>();

            foreach (var partitionId in existingLeases.Keys)
            {
                if (!ranges.ContainsKey(partitionId))
                {
                    gonePartitionIds.Add(partitionId);
                }
            }

            var addedPartitionIds = new List <string>();

            foreach (var range in ranges)
            {
                if (!existingLeases.ContainsKey(range.Key))
                {
                    addedPartitionIds.Add(range.Key);
                }
            }

            // Create leases for new partitions, if there was split, use continuation from parent partition.
            var parentIdToChildLeases = new ConcurrentDictionary <string, ConcurrentQueue <DocumentServiceLease> >();
            await addedPartitionIds.ForEachAsync(
                async addedRangeId =>
            {
                this.statsSinceLastCheckpoint.AddOrUpdate(
                    addedRangeId,
                    new CheckpointStats(),
                    (partitionId, existingStats) => existingStats);

                string continuationToken = null;
                string parentIds         = string.Empty;
                var range = ranges[addedRangeId];
                if (range.Parents != null && range.Parents.Count > 0)       // Check for split.
                {
                    foreach (var parentRangeId in range.Parents)
                    {
                        if (gonePartitionIds.Contains(parentRangeId))
                        {
                            // Transfer continiation from lease for gone parent to lease for its child partition.
                            Debug.Assert(existingLeases[parentRangeId] != null);

                            parentIds += parentIds.Length == 0 ? parentRangeId : "," + parentRangeId;
                            if (continuationToken != null)
                            {
                                TraceLog.Warning(string.Format("Partition {0}: found more than one parent, new continuation '{1}', current '{2}', will use '{3}'", addedRangeId, existingLeases[parentRangeId].ContinuationToken, existingLeases[parentRangeId].ContinuationToken));
                            }

                            continuationToken = existingLeases[parentRangeId].ContinuationToken;
                        }
                    }
                }

                bool wasCreated = await this.leaseManager.CreateLeaseIfNotExistAsync(addedRangeId, continuationToken);

                if (wasCreated)
                {
                    if (parentIds.Length == 0)
                    {
                        TraceLog.Informational(string.Format("Created lease for partition '{0}', continuation '{1}'.", addedRangeId, continuationToken));
                    }
                    else
                    {
                        TraceLog.Informational(string.Format("Created lease for partition '{0}' as child of split partition(s) '{1}', continuation '{2}'.", addedRangeId, parentIds, continuationToken));
                    }
                }
                else
                {
                    TraceLog.Warning(string.Format("Some other host created lease for '{0}' as child of split partition(s) '{1}', continuation '{2}'.", addedRangeId, parentIds, continuationToken));
                }
            },
                this.options.DegreeOfParallelism);

            // Remove leases for splitted (and thus gone partitions) and update continuation token.
            await gonePartitionIds.ForEachAsync(
                async goneRangeId =>
            {
                await this.leaseManager.DeleteAsync(existingLeases[goneRangeId]);
                TraceLog.Informational(string.Format("Deleted lease for gone (splitted) partition '{0}', continuation '{1}'", goneRangeId, existingLeases[goneRangeId].ContinuationToken));

                CheckpointStats removedStatsUnused;
                this.statsSinceLastCheckpoint.TryRemove(goneRangeId, out removedStatsUnused);
            },
                this.options.DegreeOfParallelism);
        }
        async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease)
        {
            Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease");
            TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId));

#if DEBUG
            Interlocked.Increment(ref this.partitionCount);
#endif

            IChangeFeedObserver       observer = this.observerFactory.CreateObserver();
            ChangeFeedObserverContext context  = new ChangeFeedObserverContext {
                PartitionKeyRangeId = lease.PartitionId
            };
            CancellationTokenSource cancellation = new CancellationTokenSource();

            // Create ChangeFeedOptions to use for this worker.
            ChangeFeedOptions options = new ChangeFeedOptions
            {
                MaxItemCount        = this.changeFeedOptions.MaxItemCount,
                PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId,
                SessionToken        = this.changeFeedOptions.SessionToken,
                StartFromBeginning  = this.changeFeedOptions.StartFromBeginning,
                RequestContinuation = this.changeFeedOptions.RequestContinuation
            };

            var workerTask = await Task.Factory.StartNew(async() =>
            {
                ChangeFeedObserverCloseReason?closeReason = null;
                try
                {
                    try
                    {
                        await observer.OpenAsync(context);
                    }
                    catch (Exception ex)
                    {
                        TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex));
                        closeReason = ChangeFeedObserverCloseReason.ObserverError;
                        throw;
                    }

                    options.PartitionKeyRangeId = lease.PartitionId;
                    if (!string.IsNullOrEmpty(lease.ContinuationToken))
                    {
                        options.RequestContinuation = lease.ContinuationToken;
                    }

                    CheckpointStats checkpointStats = null;
                    if (!this.statsSinceLastCheckpoint.TryGetValue(lease.PartitionId, out checkpointStats) || checkpointStats == null)
                    {
                        // It could be that the lease was created by different host and we picked it up.
                        checkpointStats = this.statsSinceLastCheckpoint.AddOrUpdate(
                            lease.PartitionId,
                            new CheckpointStats(),
                            (partitionId, existingStats) => existingStats);
                        Trace.TraceWarning(string.Format("Added stats for partition '{0}' for which the lease was picked up after the host was started.", lease.PartitionId));
                    }

                    IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options);

                    TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken));

                    string lastContinuation = options.RequestContinuation;

                    try
                    {
                        while (this.isShutdown == 0)
                        {
                            do
                            {
                                ExceptionDispatchInfo exceptionDispatchInfo = null;
                                FeedResponse <Document> response            = null;

                                try
                                {
                                    response         = await query.ExecuteNextAsync <Document>();
                                    lastContinuation = response.ResponseContinuation;
                                }
                                catch (DocumentClientException ex)
                                {
                                    exceptionDispatchInfo = ExceptionDispatchInfo.Capture(ex);
                                }

                                if (exceptionDispatchInfo != null)
                                {
                                    DocumentClientException dcex = (DocumentClientException)exceptionDispatchInfo.SourceException;

                                    if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && SubStatusCode.ReadSessionNotAvailable != (SubStatusCode)GetSubStatusCode(dcex))
                                    {
                                        // Most likely, the database or collection was removed while we were enumerating.
                                        // Shut down. The user will need to start over.
                                        // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks.
                                        TraceLog.Error(string.Format("Partition {0}: resource gone (subStatus={1}). Aborting.", context.PartitionKeyRangeId, GetSubStatusCode(dcex)));
                                        await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone));
                                        break;
                                    }
                                    else if (StatusCode.Gone == (StatusCode)dcex.StatusCode)
                                    {
                                        SubStatusCode subStatusCode = (SubStatusCode)GetSubStatusCode(dcex);
                                        if (SubStatusCode.PartitionKeyRangeGone == subStatusCode)
                                        {
                                            bool isSuccess = await HandleSplitAsync(context.PartitionKeyRangeId, lastContinuation, lease.Id);
                                            if (!isSuccess)
                                            {
                                                TraceLog.Error(string.Format("Partition {0}: HandleSplit failed! Aborting.", context.PartitionKeyRangeId));
                                                await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone));
                                                break;
                                            }

                                            // Throw LeaseLostException so that we take the lease down.
                                            throw new LeaseLostException(lease, exceptionDispatchInfo.SourceException, true);
                                        }
                                        else if (SubStatusCode.Splitting == subStatusCode)
                                        {
                                            TraceLog.Warning(string.Format("Partition {0} is splitting. Will retry to read changes until split finishes. {1}", context.PartitionKeyRangeId, dcex.Message));
                                        }
                                        else
                                        {
                                            exceptionDispatchInfo.Throw();
                                        }
                                    }
                                    else if (StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode ||
                                             StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode)
                                    {
                                        TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message));
                                    }
                                    else
                                    {
                                        exceptionDispatchInfo.Throw();
                                    }

                                    await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token);
                                }

                                if (response != null)
                                {
                                    if (response.Count > 0)
                                    {
                                        List <Document> docs = new List <Document>();
                                        docs.AddRange(response);

                                        try
                                        {
                                            context.FeedResponse = response;
                                            await observer.ProcessChangesAsync(context, docs);
                                        }
                                        catch (Exception ex)
                                        {
                                            TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex));
                                            closeReason = ChangeFeedObserverCloseReason.ObserverError;
                                            throw;
                                        }
                                        finally
                                        {
                                            context.FeedResponse = null;
                                        }
                                    }

                                    checkpointStats.ProcessedDocCount += (uint)response.Count;

                                    if (IsCheckpointNeeded(lease, checkpointStats))
                                    {
                                        lease = await CheckpointAsync(lease, response.ResponseContinuation, context);
                                        checkpointStats.Reset();
                                    }
                                    else if (response.Count > 0)
                                    {
                                        TraceLog.Informational(string.Format("Checkpoint: not checkpointing for partition {0}, {1} docs, new continuation '{2}' as frequency condition is not met", lease.PartitionId, response.Count, response.ResponseContinuation));
                                    }
                                }
                            }while (query.HasMoreResults && this.isShutdown == 0);

                            if (this.isShutdown == 0)
                            {
                                await Task.Delay(this.options.FeedPollDelay, cancellation.Token);
                            }
                        } // Outer while (this.isShutdown == 0) loop.

                        closeReason = ChangeFeedObserverCloseReason.Shutdown;
                    }
                    catch (TaskCanceledException)
                    {
                        Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested");
                        TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId));
                    }
                }
                catch (LeaseLostException ex)
                {
                    closeReason = ex.IsGone ? ChangeFeedObserverCloseReason.LeaseGone : ChangeFeedObserverCloseReason.LeaseLost;
                }
                catch (Exception ex)
                {
                    TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex));
                    if (!closeReason.HasValue)
                    {
                        closeReason = ChangeFeedObserverCloseReason.Unknown;
                    }
                }

                if (closeReason.HasValue)
                {
                    TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value));

                    // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task.
                    await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value));
                }

                TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId));
            });

            var newWorkerData = new WorkerData(workerTask, observer, context, cancellation);
            this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); });
        }
        async Task LeaseRenewer()
        {
            while (this.isStarted == 1 || !this.shutdownComplete)
            {
                try
                {
                    TraceLog.Informational(string.Format("Host '{0}' starting renewal of Leases.", this.workerName));

                    ConcurrentBag <T> renewedLeases       = new ConcurrentBag <T>();
                    ConcurrentBag <T> failedToRenewLeases = new ConcurrentBag <T>();
                    List <Task>       renewTasks          = new List <Task>();

                    // Renew leases for all currently owned partitions in parallel
                    foreach (T lease in this.currentlyOwnedPartitions.Values)
                    {
                        renewTasks.Add(this.RenewLeaseAsync(lease).ContinueWith(renewResult =>
                        {
                            if (renewResult.Result != null)
                            {
                                renewedLeases.Add(renewResult.Result);
                            }
                            else
                            {
                                // Keep track of all failed attempts to renew so we can trigger shutdown for these partitions
                                failedToRenewLeases.Add(lease);
                            }
                        }));
                    }

                    // Renew leases for all partitions currently in shutdown
                    List <T> failedToRenewShutdownLeases = new List <T>();
                    foreach (T shutdownLeases in this.keepRenewingDuringClose.Values)
                    {
                        renewTasks.Add(this.RenewLeaseAsync(shutdownLeases).ContinueWith(renewResult =>
                        {
                            if (renewResult.Result != null)
                            {
                                renewedLeases.Add(renewResult.Result);
                            }
                            else
                            {
                                // Keep track of all failed attempts to renew shutdown leases so we can remove them from further renew attempts
                                failedToRenewShutdownLeases.Add(shutdownLeases);
                            }
                        }));
                    }

                    // Wait for all renews to complete
                    await Task.WhenAll(renewTasks.ToArray());

                    // Update renewed leases.
                    foreach (T lease in renewedLeases)
                    {
                        bool updateResult = this.currentlyOwnedPartitions.TryUpdate(lease.PartitionId, lease, lease);
                        if (!updateResult)
                        {
                            TraceLog.Warning(string.Format("Host '{0}' Renewed lease {1} but failed to update it in the map (ignorable).", this.workerName, lease));
                        }
                    }

                    // Trigger shutdown of all partitions we failed to renew leases
                    await failedToRenewLeases.ForEachAsync(
                        async lease => await this.RemoveLeaseAsync(lease, false, ChangeFeedObserverCloseReason.LeaseLost),
                        this.options.DegreeOfParallelism);

                    // Now remove all failed renewals of shutdown leases from further renewals
                    foreach (T failedToRenewShutdownLease in failedToRenewShutdownLeases)
                    {
                        T removedLease = null;
                        this.keepRenewingDuringClose.TryRemove(failedToRenewShutdownLease.PartitionId, out removedLease);
                    }

                    await Task.Delay(this.options.LeaseRenewInterval, this.leaseRenewerCancellationTokenSource.Token);
                }
                catch (OperationCanceledException)
                {
                    TraceLog.Informational(string.Format("Host '{0}' Renewer task canceled.", this.workerName));
                }
                catch (Exception ex)
                {
                    TraceLog.Exception(ex);
                }
            }

            this.currentlyOwnedPartitions.Clear();
            this.keepRenewingDuringClose.Clear();
            TraceLog.Informational(string.Format("Host '{0}' Renewer task completed.", this.workerName));
        }
        async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease)
        {
            Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease");
            TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId));

#if DEBUG
            Interlocked.Increment(ref this.partitionCount);
#endif

            IChangeFeedObserver       observer = this.observerFactory.CreateObserver();
            ChangeFeedObserverContext context  = new ChangeFeedObserverContext {
                PartitionKeyRangeId = lease.PartitionId
            };
            CancellationTokenSource cancellation = new CancellationTokenSource();

            // Create ChangeFeedOptions to use for this worker.
            ChangeFeedOptions options = new ChangeFeedOptions
            {
                MaxItemCount        = this.changeFeedOptions.MaxItemCount,
                PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId,
                SessionToken        = this.changeFeedOptions.SessionToken,
                StartFromBeginning  = this.changeFeedOptions.StartFromBeginning,
                RequestContinuation = this.changeFeedOptions.RequestContinuation
            };

            var workerTask = await Task.Factory.StartNew(async() =>
            {
                ChangeFeedObserverCloseReason?closeReason = null;
                try
                {
                    try
                    {
                        await observer.OpenAsync(context);
                    }
                    catch (Exception ex)
                    {
                        TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex));
                        closeReason = ChangeFeedObserverCloseReason.ObserverError;
                        throw;
                    }

                    options.PartitionKeyRangeId = lease.PartitionId;
                    if (!string.IsNullOrEmpty(lease.ContinuationToken))
                    {
                        options.RequestContinuation = lease.ContinuationToken;
                    }

                    IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options);

                    TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken));

                    try
                    {
                        while (this.isShutdown == 0)
                        {
                            do
                            {
                                DocumentClientException dcex     = null;
                                FeedResponse <Document> response = null;

                                try
                                {
                                    response = await query.ExecuteNextAsync <Document>();
                                }
                                catch (DocumentClientException ex)
                                {
                                    if (StatusCode.NotFound != (StatusCode)ex.StatusCode &&
                                        StatusCode.TooManyRequests != (StatusCode)ex.StatusCode &&
                                        StatusCode.ServiceUnavailable != (StatusCode)ex.StatusCode)
                                    {
                                        throw;
                                    }

                                    dcex = ex;
                                }

                                if (dcex != null)
                                {
                                    const int ReadSessionNotAvailable = 1002;
                                    if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && GetSubStatusCode(dcex) != ReadSessionNotAvailable)
                                    {
                                        // Most likely, the database or collection was removed while we were enumerating.
                                        // Shut down. The user will need to start over.
                                        // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks.
                                        await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone));
                                        break;
                                    }
                                    else
                                    {
                                        Debug.Assert(StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode || StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode);
                                        TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message));
                                        await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token);
                                    }
                                }

                                if (response != null)
                                {
                                    if (response.Count > 0)
                                    {
                                        List <Document> docs = new List <Document>();
                                        docs.AddRange(response);

                                        try
                                        {
                                            await observer.ProcessChangesAsync(context, docs);
                                        }
                                        catch (Exception ex)
                                        {
                                            TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex));
                                            closeReason = ChangeFeedObserverCloseReason.ObserverError;
                                            throw;
                                        }

                                        // Checkpoint after every successful delivery to the client.
                                        lease = await CheckpointAsync(lease, response.ResponseContinuation, context);
                                    }
                                    else if (string.IsNullOrEmpty(lease.ContinuationToken))
                                    {
                                        // Checkpoint if we've never done that for this lease.
                                        lease = await CheckpointAsync(lease, response.ResponseContinuation, context);
                                    }
                                }
                            }while (query.HasMoreResults && this.isShutdown == 0);

                            if (this.isShutdown == 0)
                            {
                                await Task.Delay(this.options.FeedPollDelay, cancellation.Token);
                            }
                        } // Outer while (this.isShutdown == 0) loop.
                    }
                    catch (TaskCanceledException)
                    {
                        Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested");
                        TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId));
                    }
                }
                catch (LeaseLostException)
                {
                    closeReason = ChangeFeedObserverCloseReason.LeaseLost;
                }
                catch (Exception ex)
                {
                    TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex));
                    if (!closeReason.HasValue)
                    {
                        closeReason = ChangeFeedObserverCloseReason.Unknown;
                    }
                }

                if (closeReason.HasValue)
                {
                    TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value));

                    // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task.
                    await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value));
                }

                TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId));
            });

            var newWorkerData = new WorkerData(workerTask, observer, context, cancellation);
            this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); });
        }