async Task <T> RenewLeaseAsync(T lease)
        {
            T renewedLease = null;

            try
            {
                TraceLog.Informational(string.Format("Host '{0}' renewing lease for PartitionId '{1}' with lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));

                renewedLease = await this.leaseManager.RenewAsync(lease);
            }
            catch (LeaseLostException)
            {
                TraceLog.Informational(string.Format("Host '{0}' got LeaseLostException trying to renew lease for  PartitionId '{1}' with lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));
            }
            catch (Exception ex)
            {
                TraceLog.Exception(ex);

                // Eat any exceptions during renew and keep going.
                // Consider the lease as renewed.  Maybe lease store outage is causing the lease to not get renewed.
                renewedLease = lease;
            }
            finally
            {
                TraceLog.Informational(string.Format("Host '{0}' attempted to renew lease for PartitionId '{1}' and lease token '{2}' with result: '{3}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken, renewedLease != null));
            }

            return(renewedLease);
        }
        async Task <DocumentServiceLease> CheckpointAsync(DocumentServiceLease lease, string continuation, ChangeFeedObserverContext context)
        {
            Debug.Assert(lease != null);
            Debug.Assert(!string.IsNullOrEmpty(continuation));

            DocumentServiceLease result = null;

            try
            {
                result = (DocumentServiceLease)await this.checkpointManager.CheckpointAsync(lease, continuation, lease.SequenceNumber + 1);

                Debug.Assert(result.ContinuationToken == continuation, "ContinuationToken was not updated!");
                TraceLog.Informational(string.Format("Checkpoint: partition {0}, new continuation '{1}'", lease.PartitionId, continuation));
            }
            catch (LeaseLostException)
            {
                TraceLog.Warning(string.Format("Partition {0}: failed to checkpoint due to lost lease", context.PartitionKeyRangeId));
                throw;
            }
            catch (Exception ex)
            {
                TraceLog.Error(string.Format("Partition {0}: failed to checkpoint due to unexpected error: {1}", context.PartitionKeyRangeId, ex.Message));
                throw;
            }

            Debug.Assert(result != null);
            return(await Task.FromResult <DocumentServiceLease>(result));
        }
        async Task StopAsync(ChangeFeedObserverCloseReason reason)
        {
            if (Interlocked.CompareExchange(ref this.isShutdown, 1, 0) != 0)
            {
                return;
            }

            TraceLog.Informational(string.Format("Host '{0}': STOP signal received!", this.HostName));

            List <Task> closingTasks = new List <Task>();

            // Trigger stop for PartitionManager so it triggers shutdown of AcquireLease task and starts processor shutdown
            closingTasks.Add(this.partitionManager.StopAsync(reason));

            // Stop all workers.
            TraceLog.Informational(string.Format("Host '{0}': Cancelling {1} workers.", this.HostName, this.partitionKeyRangeIdToWorkerMap.Count));
            foreach (var item in this.partitionKeyRangeIdToWorkerMap.Values)
            {
                item.Cancellation.Cancel();
                closingTasks.Add(item.Task);
            }

            // wait for everything to shutdown
            TraceLog.Informational(string.Format("Host '{0}': Waiting for {1} closing tasks...", this.HostName, closingTasks.Count));
            await Task.WhenAll(closingTasks.ToArray());

            this.partitionKeyRangeIdToWorkerMap.Clear();

            if (this.leaseManager is IDisposable)
            {
                ((IDisposable)this.leaseManager).Dispose();
            }

            TraceLog.Informational(string.Format("Host '{0}': stopped.", this.HostName));
        }
        async Task IPartitionObserver <DocumentServiceLease> .OnPartitionReleasedAsync(DocumentServiceLease l, ChangeFeedObserverCloseReason reason)
        {
#if DEBUG
            Interlocked.Decrement(ref this.partitionCount);
#endif

            TraceLog.Informational(string.Format("Host '{0}' releasing partition {1}...", this.HostName, l.PartitionId));
            WorkerData workerData = null;
            if (this.partitionKeyRangeIdToWorkerMap.TryGetValue(l.PartitionId, out workerData))
            {
                workerData.Cancellation.Cancel();

                try
                {
                    await workerData.Observer.CloseAsync(workerData.Context, reason);
                }
                catch (Exception ex)
                {
                    // Eat all client exceptions.
                    TraceLog.Error(string.Format("IChangeFeedObserver.CloseAsync: exception: {0}", ex));
                }

                await workerData.Task;
                this.partitionKeyRangeIdToWorkerMap.TryRemove(l.PartitionId, out workerData);
            }

            TraceLog.Informational(string.Format("Host '{0}' partition {1}: released!", this.HostName, workerData.Context.PartitionKeyRangeId));
        }
        /// <summary>
        /// Handle split for given partition.
        /// </summary>
        /// <param name="partitionKeyRangeId">The id of the partition that was splitted, aka parent partition.</param>
        /// <param name="continuationToken">Continuation token on split partition before split.</param>
        /// <param name="leaseId">The id of the lease. This is needed to avoid extra call to ILeaseManager to get the lease by partitionId.</param>
        /// <returns>True on success, false on failure.</returns>
        private async Task <bool> HandleSplitAsync(string partitionKeyRangeId, string continuationToken, string leaseId)
        {
            Debug.Assert(!string.IsNullOrEmpty(partitionKeyRangeId));
            Debug.Assert(!string.IsNullOrEmpty(leaseId));

            TraceLog.Informational(string.Format("Partition {0} is gone due to split, continuation '{1}'", partitionKeyRangeId, continuationToken));

            List <PartitionKeyRange> allRanges = await this.EnumPartitionKeyRangesAsync(this.collectionSelfLink);

            var childRanges = new List <PartitionKeyRange>(allRanges.Where(range => range.Parents.Contains(partitionKeyRangeId)));

            if (childRanges.Count < 2)
            {
                TraceLog.Error(string.Format("Partition {0} had split but we failed to find at least 2 child paritions."));
                return(false);
            }

            var tasks = new List <Task>();

            foreach (var childRange in childRanges)
            {
                tasks.Add(this.leaseManager.CreateLeaseIfNotExistAsync(childRange.Id, continuationToken));
                TraceLog.Informational(string.Format("Creating lease for partition '{0}' as child of partition '{1}', continuation '{2}'", childRange.Id, partitionKeyRangeId, continuationToken));
            }

            await Task.WhenAll(tasks);

            await this.leaseManager.DeleteAsync(new DocumentServiceLease { Id = leaseId });

            TraceLog.Informational(string.Format("Deleted lease for gone (splitted) partition '{0}' continuation '{1}'", partitionKeyRangeId, continuationToken));

            // Note: the rest is up to lease taker, that after waking up would consume these new leases.
            return(true);
        }
        async Task InitializeAsync()
        {
            this.documentClient = new DocumentClient(this.collectionLocation.Uri, this.collectionLocation.MasterKey, this.collectionLocation.ConnectionPolicy);

            Uri      databaseUri = UriFactory.CreateDatabaseUri(this.collectionLocation.DatabaseName);
            Database database    = await this.documentClient.ReadDatabaseAsync(databaseUri);

            Uri collectionUri = UriFactory.CreateDocumentCollectionUri(this.collectionLocation.DatabaseName, this.collectionLocation.CollectionName);
            ResourceResponse <DocumentCollection> collectionResponse = await this.documentClient.ReadDocumentCollectionAsync(
                collectionUri,
                new RequestOptions { PopulateQuotaInfo = true });

            DocumentCollection collection = collectionResponse.Resource;

            this.collectionSelfLink = collection.SelfLink;

            // Grab the options-supplied prefix if present otherwise leave it empty.
            string optionsPrefix = this.options.LeasePrefix ?? string.Empty;

            // Beyond this point all access to collection is done via this self link: if collection is removed, we won't access new one using same name by accident.
            this.leasePrefix = string.Format(CultureInfo.InvariantCulture, "{0}{1}_{2}_{3}", optionsPrefix, this.collectionLocation.Uri.Host, database.ResourceId, collection.ResourceId);

            var leaseManager = new DocumentServiceLeaseManager(
                this.auxCollectionLocation,
                this.leasePrefix,
                this.options.LeaseExpirationInterval,
                this.options.LeaseRenewInterval);
            await leaseManager.InitializeAsync();

            this.leaseManager      = leaseManager;
            this.checkpointManager = (ICheckpointManager)leaseManager;

            if (this.options.DiscardExistingLeases)
            {
                TraceLog.Warning(string.Format("Host '{0}': removing all leases, as requested by ChangeFeedHostOptions", this.HostName));
                await this.leaseManager.DeleteAllAsync();
            }

            // Note: lease store is never stale as we use monitored colleciton Rid as id prefix for aux collection.
            // Collection was removed and re-created, the rid would change.
            // If it's not deleted, it's not stale. If it's deleted, it's not stale as it doesn't exist.
            await this.leaseManager.CreateLeaseStoreIfNotExistsAsync();

            var ranges = new Dictionary <string, PartitionKeyRange>();

            foreach (var range in await this.EnumPartitionKeyRangesAsync(this.collectionSelfLink))
            {
                ranges.Add(range.Id, range);
            }

            TraceLog.Informational(string.Format("Source collection: '{0}', {1} partition(s), {2} document(s)", this.collectionLocation.CollectionName, ranges.Count, GetDocumentCount(collectionResponse)));

            await this.CreateLeases(ranges);

            this.partitionManager = new PartitionManager <DocumentServiceLease>(this.HostName, this.leaseManager, this.options);
            await this.partitionManager.SubscribeAsync(this);

            await this.partitionManager.InitializeAsync();
        }
예제 #7
0
        async Task RemoveLeaseAsync(T lease, bool hasOwnership, ChangeFeedObserverCloseReason closeReason = ChangeFeedObserverCloseReason.Unknown)
        {
            ChangeFeedObserverCloseReason reason =
                closeReason != ChangeFeedObserverCloseReason.Unknown ? closeReason :
                hasOwnership ? ChangeFeedObserverCloseReason.Shutdown : ChangeFeedObserverCloseReason.LeaseLost;

            if (lease != null && this.currentlyOwnedPartitions != null && this.currentlyOwnedPartitions.TryRemove(lease.PartitionId, out lease))
            {
                TraceLog.Informational(string.Format("Host '{0}' successfully removed PartitionId '{1}' with lease token '{2}' from currently owned partitions.", this.workerName, lease.PartitionId, lease.ConcurrencyToken));

                try
                {
                    if (hasOwnership)
                    {
                        this.keepRenewingDuringClose.TryAdd(lease.PartitionId, lease);
                    }

                    TraceLog.Informational(string.Format("Host '{0}' closing event processor for PartitionId '{1}' and lease token '{2}' with reason '{3}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken, reason));

                    // Notify the host that we lost partition so shutdown can be triggered on the host
                    await this.partitionObserverManager.NotifyPartitionReleasedAsync(lease, reason);

                    TraceLog.Informational(string.Format("Host '{0}' closed event processor for PartitionId '{1}' and lease token '{2}' with reason '{3}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken, reason));
                }
                catch (Exception ex)
                {
                    // Eat any exceptions during notification of observers
                    TraceLog.Exception(ex);
                }
                finally
                {
                    if (hasOwnership)
                    {
                        this.keepRenewingDuringClose.TryRemove(lease.PartitionId, out lease);
                    }
                }

                if (hasOwnership)
                {
                    try
                    {
                        await this.leaseManager.ReleaseAsync(lease);

                        TraceLog.Informational(string.Format("Host '{0}' successfully released lease on PartitionId '{1}' with lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));
                    }
                    catch (LeaseLostException)
                    {
                        // We have already shutdown the processor so we can ignore any LeaseLost at this point
                        TraceLog.Informational(string.Format("Host '{0}' failed to release lease for PartitionId '{1}' with lease token '{2}' due to conflict.", this.workerName, lease.PartitionId, lease.ConcurrencyToken));
                    }
                    catch (Exception ex)
                    {
                        TraceLog.Exception(ex);
                    }
                }
            }
        }
        async Task AddLeaseAsync(T lease)
        {
            if (this.currentlyOwnedPartitions.TryAdd(lease.PartitionId, lease))
            {
                bool failedToInitialize = false;
                try
                {
                    TraceLog.Informational(string.Format("Host '{0}' opening event processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));

                    await this.partitionObserverManager.NotifyPartitionAcquiredAsync(lease);

                    TraceLog.Informational(string.Format("Host '{0}' opened event processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));
                }
                catch (Exception ex)
                {
                    TraceLog.Informational(string.Format("Host '{0}' failed to initialize processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));

                    failedToInitialize = true;

                    // Eat any exceptions during notification of observers
                    TraceLog.Exception(ex);
                }

                // We need to release the lease if we fail to initialize the processor, so some other node can pick up the parition
                if (failedToInitialize)
                {
                    await this.RemoveLeaseAsync(lease, true, ChangeFeedObserverCloseReason.ObserverError);
                }
            }
            else
            {
                // We already acquired lease for this partition but it looks like we previously owned this partition
                // and haven't completed the shutdown process for it yet.  Release lease for possible others hosts to
                // pick it up.
                try
                {
                    TraceLog.Warning(string.Format("Host '{0}' unable to add PartitionId '{1}' with lease token '{2}' to currently owned partitions.", this.workerName, lease.PartitionId, lease.ConcurrencyToken));

                    await this.leaseManager.ReleaseAsync(lease);

                    TraceLog.Informational(string.Format("Host '{0}' successfully released lease on PartitionId '{1}' with lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken));
                }
                catch (LeaseLostException)
                {
                    // We have already shutdown the processor so we can ignore any LeaseLost at this point
                    TraceLog.Informational(string.Format("Host '{0}' failed to release lease for PartitionId '{1}' with lease token '{2}' due to conflict.", this.workerName, lease.PartitionId, lease.ConcurrencyToken));
                }
                catch (Exception ex)
                {
                    TraceLog.Exception(ex);
                }
            }
        }
        async Task <T> TryAcquireLeaseAsync(T lease)
        {
            try
            {
                return(await this.leaseManager.AcquireAsync(lease, this.workerName));
            }
            catch (LeaseLostException)
            {
                TraceLog.Informational(string.Format("Host '{0}' failed to acquire lease for PartitionId '{1}' due to conflict.", this.workerName, lease.PartitionId));
            }
            catch (Exception ex)
            {
                // Eat any exceptions during acquiring lease.
                TraceLog.Exception(ex);
            }

            return(null);
        }
        async Task <T> TryStealLeaseAsync(T lease)
        {
            try
            {
                return(await this.leaseManager.AcquireAsync(lease, this.workerName));
            }
            catch (LeaseLostException)
            {
                // Concurrency issue in stealing the lease, someone else got it before us
                TraceLog.Informational(string.Format("Host '{0}' failed to steal lease for PartitionId '{1}' due to conflict.", this.workerName, lease.PartitionId));
            }
            catch (Exception ex)
            {
                // Eat any exceptions during stealing
                TraceLog.Exception(ex);
            }

            return(null);
        }
        async Task LeaseTakerAsync()
        {
            while (this.isStarted == 1)
            {
                try
                {
                    TraceLog.Informational(string.Format("Host '{0}' starting to check for available leases.", this.workerName));
                    var availableLeases = await this.TakeLeasesAsync();

                    if (availableLeases.Count > 0)
                    {
                        TraceLog.Informational(string.Format("Host '{0}' adding {1} leases...", this.workerName, availableLeases.Count));
                    }

                    var addLeaseTasks = new List <Task>();
                    foreach (var kvp in availableLeases)
                    {
                        addLeaseTasks.Add(this.AddLeaseAsync(kvp.Value));
                    }

                    await Task.WhenAll(addLeaseTasks.ToArray());
                }
                catch (Exception ex)
                {
                    TraceLog.Exception(ex);
                }

                try
                {
                    await Task.Delay(this.options.LeaseAcquireInterval, this.leaseTakerCancellationTokenSource.Token);
                }
                catch (OperationCanceledException)
                {
                    TraceLog.Informational(string.Format("Host '{0}' AcquireLease task canceled.", this.workerName));
                }
            }

            TraceLog.Informational(string.Format("Host '{0}' AcquireLease task completed.", this.workerName));
        }
        public async Task InitializeAsync()
        {
            List <T> leases    = new List <T>();
            List <T> allLeases = new List <T>();

            TraceLog.Verbose(string.Format("Host '{0}' starting renew leases assigned to this host on initialize.", this.workerName));

            foreach (var lease in await this.leaseManager.ListLeases())
            {
                allLeases.Add(lease);

                if (string.Compare(lease.Owner, this.workerName, StringComparison.OrdinalIgnoreCase) == 0)
                {
                    T renewedLease = await this.RenewLeaseAsync(lease);

                    if (renewedLease != null)
                    {
                        leases.Add(renewedLease);
                    }
                    else
                    {
                        TraceLog.Informational(string.Format("Host '{0}' unable to renew lease '{1}' on startup.", this.workerName, lease.PartitionId));
                    }
                }
            }

            var addLeaseTasks = new List <Task>();

            foreach (T lease in leases)
            {
                TraceLog.Informational(string.Format("Host '{0}' acquired lease for PartitionId '{1}' on startup.", this.workerName, lease.PartitionId));
                addLeaseTasks.Add(this.AddLeaseAsync(lease));
            }

            await Task.WhenAll(addLeaseTasks.ToArray());
        }
        async Task LeaseRenewer()
        {
            while (this.isStarted == 1 || !this.shutdownComplete)
            {
                try
                {
                    TraceLog.Informational(string.Format("Host '{0}' starting renewal of Leases.", this.workerName));

                    ConcurrentBag <T> renewedLeases       = new ConcurrentBag <T>();
                    ConcurrentBag <T> failedToRenewLeases = new ConcurrentBag <T>();
                    List <Task>       renewTasks          = new List <Task>();

                    // Renew leases for all currently owned partitions in parallel
                    foreach (T lease in this.currentlyOwnedPartitions.Values)
                    {
                        renewTasks.Add(this.RenewLeaseAsync(lease).ContinueWith(renewResult =>
                        {
                            if (renewResult.Result != null)
                            {
                                renewedLeases.Add(renewResult.Result);
                            }
                            else
                            {
                                // Keep track of all failed attempts to renew so we can trigger shutdown for these partitions
                                failedToRenewLeases.Add(lease);
                            }
                        }));
                    }

                    // Renew leases for all partitions currently in shutdown
                    List <T> failedToRenewShutdownLeases = new List <T>();
                    foreach (T shutdownLeases in this.keepRenewingDuringClose.Values)
                    {
                        renewTasks.Add(this.RenewLeaseAsync(shutdownLeases).ContinueWith(renewResult =>
                        {
                            if (renewResult.Result != null)
                            {
                                renewedLeases.Add(renewResult.Result);
                            }
                            else
                            {
                                // Keep track of all failed attempts to renew shutdown leases so we can remove them from further renew attempts
                                failedToRenewShutdownLeases.Add(shutdownLeases);
                            }
                        }));
                    }

                    // Wait for all renews to complete
                    await Task.WhenAll(renewTasks.ToArray());

                    // Update renewed leases.
                    foreach (T lease in renewedLeases)
                    {
                        bool updateResult = this.currentlyOwnedPartitions.TryUpdate(lease.PartitionId, lease, lease);
                        if (!updateResult)
                        {
                            TraceLog.Warning(string.Format("Host '{0}' Renewed lease {1} but failed to update it in the map (ignorable).", this.workerName, lease));
                        }
                    }

                    // Trigger shutdown of all partitions we failed to renew leases
                    await failedToRenewLeases.ForEachAsync(
                        async lease => await this.RemoveLeaseAsync(lease, false, ChangeFeedObserverCloseReason.LeaseLost),
                        this.options.DegreeOfParallelism);

                    // Now remove all failed renewals of shutdown leases from further renewals
                    foreach (T failedToRenewShutdownLease in failedToRenewShutdownLeases)
                    {
                        T removedLease = null;
                        this.keepRenewingDuringClose.TryRemove(failedToRenewShutdownLease.PartitionId, out removedLease);
                    }

                    await Task.Delay(this.options.LeaseRenewInterval, this.leaseRenewerCancellationTokenSource.Token);
                }
                catch (OperationCanceledException)
                {
                    TraceLog.Informational(string.Format("Host '{0}' Renewer task canceled.", this.workerName));
                }
                catch (Exception ex)
                {
                    TraceLog.Exception(ex);
                }
            }

            this.currentlyOwnedPartitions.Clear();
            this.keepRenewingDuringClose.Clear();
            TraceLog.Informational(string.Format("Host '{0}' Renewer task completed.", this.workerName));
        }
        /// <summary>
        /// Create leases for new partitions and take care of split partitions.
        /// </summary>
        private async Task CreateLeases(IDictionary <string, PartitionKeyRange> ranges)
        {
            Debug.Assert(ranges != null);

            // Get leases after getting ranges, to make sure that no other hosts checked in continuation for split partition after we got leases.
            var existingLeases = new Dictionary <string, DocumentServiceLease>();

            foreach (var lease in await this.leaseManager.ListLeases())
            {
                existingLeases.Add(lease.PartitionId, lease);
            }

            var gonePartitionIds = new HashSet <string>();

            foreach (var partitionId in existingLeases.Keys)
            {
                if (!ranges.ContainsKey(partitionId))
                {
                    gonePartitionIds.Add(partitionId);
                }
            }

            var addedPartitionIds = new List <string>();

            foreach (var range in ranges)
            {
                if (!existingLeases.ContainsKey(range.Key))
                {
                    addedPartitionIds.Add(range.Key);
                }
            }

            // Create leases for new partitions, if there was split, use continuation from parent partition.
            var parentIdToChildLeases = new ConcurrentDictionary <string, ConcurrentQueue <DocumentServiceLease> >();
            await addedPartitionIds.ForEachAsync(
                async addedRangeId =>
            {
                this.statsSinceLastCheckpoint.AddOrUpdate(
                    addedRangeId,
                    new CheckpointStats(),
                    (partitionId, existingStats) => existingStats);

                string continuationToken = null;
                string parentIds         = string.Empty;
                var range = ranges[addedRangeId];
                if (range.Parents != null && range.Parents.Count > 0)       // Check for split.
                {
                    foreach (var parentRangeId in range.Parents)
                    {
                        if (gonePartitionIds.Contains(parentRangeId))
                        {
                            // Transfer continiation from lease for gone parent to lease for its child partition.
                            Debug.Assert(existingLeases[parentRangeId] != null);

                            parentIds += parentIds.Length == 0 ? parentRangeId : "," + parentRangeId;
                            if (continuationToken != null)
                            {
                                TraceLog.Warning(string.Format("Partition {0}: found more than one parent, new continuation '{1}', current '{2}', will use '{3}'", addedRangeId, existingLeases[parentRangeId].ContinuationToken, existingLeases[parentRangeId].ContinuationToken));
                            }

                            continuationToken = existingLeases[parentRangeId].ContinuationToken;
                        }
                    }
                }

                bool wasCreated = await this.leaseManager.CreateLeaseIfNotExistAsync(addedRangeId, continuationToken);

                if (wasCreated)
                {
                    if (parentIds.Length == 0)
                    {
                        TraceLog.Informational(string.Format("Created lease for partition '{0}', continuation '{1}'.", addedRangeId, continuationToken));
                    }
                    else
                    {
                        TraceLog.Informational(string.Format("Created lease for partition '{0}' as child of split partition(s) '{1}', continuation '{2}'.", addedRangeId, parentIds, continuationToken));
                    }
                }
                else
                {
                    TraceLog.Warning(string.Format("Some other host created lease for '{0}' as child of split partition(s) '{1}', continuation '{2}'.", addedRangeId, parentIds, continuationToken));
                }
            },
                this.options.DegreeOfParallelism);

            // Remove leases for splitted (and thus gone partitions) and update continuation token.
            await gonePartitionIds.ForEachAsync(
                async goneRangeId =>
            {
                await this.leaseManager.DeleteAsync(existingLeases[goneRangeId]);
                TraceLog.Informational(string.Format("Deleted lease for gone (splitted) partition '{0}', continuation '{1}'", goneRangeId, existingLeases[goneRangeId].ContinuationToken));

                CheckpointStats removedStatsUnused;
                this.statsSinceLastCheckpoint.TryRemove(goneRangeId, out removedStatsUnused);
            },
                this.options.DegreeOfParallelism);
        }
        async Task <IDictionary <string, T> > TakeLeasesAsync()
        {
            IDictionary <string, T>   allPartitions          = new Dictionary <string, T>();
            IDictionary <string, T>   takenLeases            = new Dictionary <string, T>();
            IDictionary <string, int> workerToPartitionCount = new Dictionary <string, int>();
            List <T> expiredLeases = new List <T>();

            foreach (var lease in await this.leaseManager.ListLeases())
            {
                Debug.Assert(lease.PartitionId != null, "TakeLeasesAsync: lease.PartitionId cannot be null.");

                allPartitions.Add(lease.PartitionId, lease);
                if (string.IsNullOrWhiteSpace(lease.Owner) || await this.leaseManager.IsExpired(lease))
                {
                    TraceLog.Verbose(string.Format("Found unused or expired lease: {0}", lease));
                    expiredLeases.Add(lease);
                }
                else
                {
                    int    count      = 0;
                    string assignedTo = lease.Owner;
                    if (workerToPartitionCount.TryGetValue(assignedTo, out count))
                    {
                        workerToPartitionCount[assignedTo] = count + 1;
                    }
                    else
                    {
                        workerToPartitionCount.Add(assignedTo, 1);
                    }
                }
            }

            if (!workerToPartitionCount.ContainsKey(this.workerName))
            {
                workerToPartitionCount.Add(this.workerName, 0);
            }

            int partitionCount = allPartitions.Count;
            int workerCount    = workerToPartitionCount.Count;

            if (partitionCount > 0)
            {
                int target = 1;

                if (partitionCount > workerCount)
                {
                    target = (int)Math.Ceiling((double)partitionCount / (double)workerCount);
                }

                Debug.Assert(this.options.MinPartitionCount <= this.options.MaxPartitionCount);

                if (this.options.MaxPartitionCount > 0 && target > this.options.MaxPartitionCount)
                {
                    target = this.options.MaxPartitionCount;
                }

                if (this.options.MinPartitionCount > 0 && target < this.options.MinPartitionCount)
                {
                    target = this.options.MinPartitionCount;
                }

                int myCount = workerToPartitionCount[this.workerName];
                int partitionsNeededForMe = target - myCount;
                TraceLog.Informational(
                    string.Format(
                        "Host '{0}' {1} partitions, {2} hosts, {3} available leases, target = {4}, min = {5}, max = {6}, mine = {7}, will try to take {8} lease(s) for myself'.",
                        this.workerName,
                        partitionCount,
                        workerCount,
                        expiredLeases.Count,
                        target,
                        this.options.MinPartitionCount,
                        this.options.MaxPartitionCount,
                        myCount,
                        Math.Max(partitionsNeededForMe, 0)));

                if (partitionsNeededForMe > 0)
                {
                    HashSet <T> partitionsToAcquire = new HashSet <T>();
                    if (expiredLeases.Count > 0)
                    {
                        foreach (T leaseToTake in expiredLeases)
                        {
                            if (partitionsNeededForMe == 0)
                            {
                                break;
                            }

                            TraceLog.Informational(string.Format("Host '{0}' attempting to take lease for PartitionId '{1}'.", this.workerName, leaseToTake.PartitionId));
                            T acquiredLease = await this.TryAcquireLeaseAsync(leaseToTake);

                            if (acquiredLease != null)
                            {
                                TraceLog.Informational(string.Format("Host '{0}' successfully acquired lease for PartitionId '{1}': {2}", this.workerName, leaseToTake.PartitionId, acquiredLease));
                                takenLeases.Add(acquiredLease.PartitionId, acquiredLease);

                                partitionsNeededForMe--;
                            }
                        }
                    }
                    else
                    {
                        KeyValuePair <string, int> workerToStealFrom = default(KeyValuePair <string, int>);
                        foreach (var kvp in workerToPartitionCount)
                        {
                            if (kvp.Equals(default(KeyValuePair <string, int>)) || workerToStealFrom.Value < kvp.Value)
                            {
                                workerToStealFrom = kvp;
                            }
                        }

                        if (workerToStealFrom.Value > target - (partitionsNeededForMe > 1 ? 1 : 0))
                        {
                            foreach (var kvp in allPartitions)
                            {
                                if (string.Equals(kvp.Value.Owner, workerToStealFrom.Key, StringComparison.OrdinalIgnoreCase))
                                {
                                    T leaseToTake = kvp.Value;
                                    TraceLog.Informational(string.Format("Host '{0}' attempting to steal lease from '{1}' for PartitionId '{2}'.", this.workerName, workerToStealFrom.Key, leaseToTake.PartitionId));
                                    T stolenLease = await this.TryStealLeaseAsync(leaseToTake);

                                    if (stolenLease != null)
                                    {
                                        TraceLog.Informational(string.Format("Host '{0}' stole lease from '{1}' for PartitionId '{2}'.", this.workerName, workerToStealFrom.Key, leaseToTake.PartitionId));
                                        takenLeases.Add(stolenLease.PartitionId, stolenLease);

                                        partitionsNeededForMe--;

                                        // Only steal one lease at a time
                                        break;
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(takenLeases);
        }
        async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease)
        {
            Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease");
            TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId));

#if DEBUG
            Interlocked.Increment(ref this.partitionCount);
#endif

            IChangeFeedObserver       observer = this.observerFactory.CreateObserver();
            ChangeFeedObserverContext context  = new ChangeFeedObserverContext {
                PartitionKeyRangeId = lease.PartitionId
            };
            CancellationTokenSource cancellation = new CancellationTokenSource();

            // Create ChangeFeedOptions to use for this worker.
            ChangeFeedOptions options = new ChangeFeedOptions
            {
                MaxItemCount        = this.changeFeedOptions.MaxItemCount,
                PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId,
                SessionToken        = this.changeFeedOptions.SessionToken,
                StartFromBeginning  = this.changeFeedOptions.StartFromBeginning,
                RequestContinuation = this.changeFeedOptions.RequestContinuation
            };

            var workerTask = await Task.Factory.StartNew(async() =>
            {
                ChangeFeedObserverCloseReason?closeReason = null;
                try
                {
                    try
                    {
                        await observer.OpenAsync(context);
                    }
                    catch (Exception ex)
                    {
                        TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex));
                        closeReason = ChangeFeedObserverCloseReason.ObserverError;
                        throw;
                    }

                    options.PartitionKeyRangeId = lease.PartitionId;
                    if (!string.IsNullOrEmpty(lease.ContinuationToken))
                    {
                        options.RequestContinuation = lease.ContinuationToken;
                    }

                    CheckpointStats checkpointStats = null;
                    if (!this.statsSinceLastCheckpoint.TryGetValue(lease.PartitionId, out checkpointStats) || checkpointStats == null)
                    {
                        // It could be that the lease was created by different host and we picked it up.
                        checkpointStats = this.statsSinceLastCheckpoint.AddOrUpdate(
                            lease.PartitionId,
                            new CheckpointStats(),
                            (partitionId, existingStats) => existingStats);
                        Trace.TraceWarning(string.Format("Added stats for partition '{0}' for which the lease was picked up after the host was started.", lease.PartitionId));
                    }

                    IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options);

                    TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken));

                    string lastContinuation = options.RequestContinuation;

                    try
                    {
                        while (this.isShutdown == 0)
                        {
                            do
                            {
                                ExceptionDispatchInfo exceptionDispatchInfo = null;
                                FeedResponse <Document> response            = null;

                                try
                                {
                                    response         = await query.ExecuteNextAsync <Document>();
                                    lastContinuation = response.ResponseContinuation;
                                }
                                catch (DocumentClientException ex)
                                {
                                    exceptionDispatchInfo = ExceptionDispatchInfo.Capture(ex);
                                }

                                if (exceptionDispatchInfo != null)
                                {
                                    DocumentClientException dcex = (DocumentClientException)exceptionDispatchInfo.SourceException;

                                    if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && SubStatusCode.ReadSessionNotAvailable != (SubStatusCode)GetSubStatusCode(dcex))
                                    {
                                        // Most likely, the database or collection was removed while we were enumerating.
                                        // Shut down. The user will need to start over.
                                        // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks.
                                        TraceLog.Error(string.Format("Partition {0}: resource gone (subStatus={1}). Aborting.", context.PartitionKeyRangeId, GetSubStatusCode(dcex)));
                                        await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone));
                                        break;
                                    }
                                    else if (StatusCode.Gone == (StatusCode)dcex.StatusCode)
                                    {
                                        SubStatusCode subStatusCode = (SubStatusCode)GetSubStatusCode(dcex);
                                        if (SubStatusCode.PartitionKeyRangeGone == subStatusCode)
                                        {
                                            bool isSuccess = await HandleSplitAsync(context.PartitionKeyRangeId, lastContinuation, lease.Id);
                                            if (!isSuccess)
                                            {
                                                TraceLog.Error(string.Format("Partition {0}: HandleSplit failed! Aborting.", context.PartitionKeyRangeId));
                                                await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone));
                                                break;
                                            }

                                            // Throw LeaseLostException so that we take the lease down.
                                            throw new LeaseLostException(lease, exceptionDispatchInfo.SourceException, true);
                                        }
                                        else if (SubStatusCode.Splitting == subStatusCode)
                                        {
                                            TraceLog.Warning(string.Format("Partition {0} is splitting. Will retry to read changes until split finishes. {1}", context.PartitionKeyRangeId, dcex.Message));
                                        }
                                        else
                                        {
                                            exceptionDispatchInfo.Throw();
                                        }
                                    }
                                    else if (StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode ||
                                             StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode)
                                    {
                                        TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message));
                                    }
                                    else
                                    {
                                        exceptionDispatchInfo.Throw();
                                    }

                                    await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token);
                                }

                                if (response != null)
                                {
                                    if (response.Count > 0)
                                    {
                                        List <Document> docs = new List <Document>();
                                        docs.AddRange(response);

                                        try
                                        {
                                            context.FeedResponse = response;
                                            await observer.ProcessChangesAsync(context, docs);
                                        }
                                        catch (Exception ex)
                                        {
                                            TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex));
                                            closeReason = ChangeFeedObserverCloseReason.ObserverError;
                                            throw;
                                        }
                                        finally
                                        {
                                            context.FeedResponse = null;
                                        }
                                    }

                                    checkpointStats.ProcessedDocCount += (uint)response.Count;

                                    if (IsCheckpointNeeded(lease, checkpointStats))
                                    {
                                        lease = await CheckpointAsync(lease, response.ResponseContinuation, context);
                                        checkpointStats.Reset();
                                    }
                                    else if (response.Count > 0)
                                    {
                                        TraceLog.Informational(string.Format("Checkpoint: not checkpointing for partition {0}, {1} docs, new continuation '{2}' as frequency condition is not met", lease.PartitionId, response.Count, response.ResponseContinuation));
                                    }
                                }
                            }while (query.HasMoreResults && this.isShutdown == 0);

                            if (this.isShutdown == 0)
                            {
                                await Task.Delay(this.options.FeedPollDelay, cancellation.Token);
                            }
                        } // Outer while (this.isShutdown == 0) loop.

                        closeReason = ChangeFeedObserverCloseReason.Shutdown;
                    }
                    catch (TaskCanceledException)
                    {
                        Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested");
                        TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId));
                    }
                }
                catch (LeaseLostException ex)
                {
                    closeReason = ex.IsGone ? ChangeFeedObserverCloseReason.LeaseGone : ChangeFeedObserverCloseReason.LeaseLost;
                }
                catch (Exception ex)
                {
                    TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex));
                    if (!closeReason.HasValue)
                    {
                        closeReason = ChangeFeedObserverCloseReason.Unknown;
                    }
                }

                if (closeReason.HasValue)
                {
                    TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value));

                    // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task.
                    await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value));
                }

                TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId));
            });

            var newWorkerData = new WorkerData(workerTask, observer, context, cancellation);
            this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); });
        }
        async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease)
        {
            Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease");
            TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId));

#if DEBUG
            Interlocked.Increment(ref this.partitionCount);
#endif

            IChangeFeedObserver       observer = this.observerFactory.CreateObserver();
            ChangeFeedObserverContext context  = new ChangeFeedObserverContext {
                PartitionKeyRangeId = lease.PartitionId
            };
            CancellationTokenSource cancellation = new CancellationTokenSource();

            // Create ChangeFeedOptions to use for this worker.
            ChangeFeedOptions options = new ChangeFeedOptions
            {
                MaxItemCount        = this.changeFeedOptions.MaxItemCount,
                PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId,
                SessionToken        = this.changeFeedOptions.SessionToken,
                StartFromBeginning  = this.changeFeedOptions.StartFromBeginning,
                RequestContinuation = this.changeFeedOptions.RequestContinuation
            };

            var workerTask = await Task.Factory.StartNew(async() =>
            {
                ChangeFeedObserverCloseReason?closeReason = null;
                try
                {
                    try
                    {
                        await observer.OpenAsync(context);
                    }
                    catch (Exception ex)
                    {
                        TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex));
                        closeReason = ChangeFeedObserverCloseReason.ObserverError;
                        throw;
                    }

                    options.PartitionKeyRangeId = lease.PartitionId;
                    if (!string.IsNullOrEmpty(lease.ContinuationToken))
                    {
                        options.RequestContinuation = lease.ContinuationToken;
                    }

                    IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options);

                    TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken));

                    try
                    {
                        while (this.isShutdown == 0)
                        {
                            do
                            {
                                DocumentClientException dcex     = null;
                                FeedResponse <Document> response = null;

                                try
                                {
                                    response = await query.ExecuteNextAsync <Document>();
                                }
                                catch (DocumentClientException ex)
                                {
                                    if (StatusCode.NotFound != (StatusCode)ex.StatusCode &&
                                        StatusCode.TooManyRequests != (StatusCode)ex.StatusCode &&
                                        StatusCode.ServiceUnavailable != (StatusCode)ex.StatusCode)
                                    {
                                        throw;
                                    }

                                    dcex = ex;
                                }

                                if (dcex != null)
                                {
                                    const int ReadSessionNotAvailable = 1002;
                                    if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && GetSubStatusCode(dcex) != ReadSessionNotAvailable)
                                    {
                                        // Most likely, the database or collection was removed while we were enumerating.
                                        // Shut down. The user will need to start over.
                                        // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks.
                                        await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone));
                                        break;
                                    }
                                    else
                                    {
                                        Debug.Assert(StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode || StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode);
                                        TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message));
                                        await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token);
                                    }
                                }

                                if (response != null)
                                {
                                    if (response.Count > 0)
                                    {
                                        List <Document> docs = new List <Document>();
                                        docs.AddRange(response);

                                        try
                                        {
                                            await observer.ProcessChangesAsync(context, docs);
                                        }
                                        catch (Exception ex)
                                        {
                                            TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex));
                                            closeReason = ChangeFeedObserverCloseReason.ObserverError;
                                            throw;
                                        }

                                        // Checkpoint after every successful delivery to the client.
                                        lease = await CheckpointAsync(lease, response.ResponseContinuation, context);
                                    }
                                    else if (string.IsNullOrEmpty(lease.ContinuationToken))
                                    {
                                        // Checkpoint if we've never done that for this lease.
                                        lease = await CheckpointAsync(lease, response.ResponseContinuation, context);
                                    }
                                }
                            }while (query.HasMoreResults && this.isShutdown == 0);

                            if (this.isShutdown == 0)
                            {
                                await Task.Delay(this.options.FeedPollDelay, cancellation.Token);
                            }
                        } // Outer while (this.isShutdown == 0) loop.
                    }
                    catch (TaskCanceledException)
                    {
                        Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested");
                        TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId));
                    }
                }
                catch (LeaseLostException)
                {
                    closeReason = ChangeFeedObserverCloseReason.LeaseLost;
                }
                catch (Exception ex)
                {
                    TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex));
                    if (!closeReason.HasValue)
                    {
                        closeReason = ChangeFeedObserverCloseReason.Unknown;
                    }
                }

                if (closeReason.HasValue)
                {
                    TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value));

                    // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task.
                    await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value));
                }

                TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId));
            });

            var newWorkerData = new WorkerData(workerTask, observer, context, cancellation);
            this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); });
        }