async Task <DocumentServiceLease> CheckpointAsync(DocumentServiceLease lease, string continuation, ChangeFeedObserverContext context)
        {
            Debug.Assert(lease != null);
            Debug.Assert(!string.IsNullOrEmpty(continuation));

            DocumentServiceLease result = null;

            try
            {
                result = (DocumentServiceLease)await this.checkpointManager.CheckpointAsync(lease, continuation, lease.SequenceNumber + 1);

                Debug.Assert(result.ContinuationToken == continuation, "ContinuationToken was not updated!");
                TraceLog.Verbose(string.Format("Checkpoint: partition {0}, new continuation '{1}'", lease.PartitionId, continuation));
            }
            catch (LeaseLostException)
            {
                TraceLog.Warning(string.Format("Partition {0}: failed to checkpoint due to lost lease", context.PartitionKeyRangeId));
                throw;
            }
            catch (Exception ex)
            {
                TraceLog.Error(string.Format("Partition {0}: failed to checkpoint due to unexpected error: {1}", context.PartitionKeyRangeId, ex.Message));
                throw;
            }

            return(await Task.FromResult <DocumentServiceLease>(result));
        }
        public async Task InitializeAsync()
        {
            List <T> leases    = new List <T>();
            List <T> allLeases = new List <T>();

            TraceLog.Verbose(string.Format("Host '{0}' starting renew leases assigned to this host on initialize.", this.workerName));

            foreach (var lease in await this.leaseManager.ListLeases())
            {
                allLeases.Add(lease);

                if (string.Compare(lease.Owner, this.workerName, StringComparison.OrdinalIgnoreCase) == 0)
                {
                    T renewedLease = await this.RenewLeaseAsync(lease);

                    if (renewedLease != null)
                    {
                        leases.Add(renewedLease);
                    }
                    else
                    {
                        TraceLog.Informational(string.Format("Host '{0}' unable to renew lease '{1}' on startup.", this.workerName, lease.PartitionId));
                    }
                }
            }

            var addLeaseTasks = new List <Task>();

            foreach (T lease in leases)
            {
                TraceLog.Informational(string.Format("Host '{0}' acquired lease for PartitionId '{1}' on startup.", this.workerName, lease.PartitionId));
                addLeaseTasks.Add(this.AddLeaseAsync(lease));
            }

            await Task.WhenAll(addLeaseTasks.ToArray());
        }
        async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease)
        {
            Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease");
            TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId));

#if DEBUG
            Interlocked.Increment(ref this.partitionCount);
#endif

            IChangeFeedObserver       observer = this.observerFactory.CreateObserver();
            ChangeFeedObserverContext context  = new ChangeFeedObserverContext {
                PartitionKeyRangeId = lease.PartitionId
            };
            CancellationTokenSource cancellation = new CancellationTokenSource();

            // Create ChangeFeedOptions to use for this worker.
            ChangeFeedOptions options = new ChangeFeedOptions
            {
                MaxItemCount        = this.changeFeedOptions.MaxItemCount,
                PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId,
                SessionToken        = this.changeFeedOptions.SessionToken,
                StartFromBeginning  = this.changeFeedOptions.StartFromBeginning,
                RequestContinuation = this.changeFeedOptions.RequestContinuation
            };

            var workerTask = await Task.Factory.StartNew(async() =>
            {
                ChangeFeedObserverCloseReason?closeReason = null;
                try
                {
                    try
                    {
                        await observer.OpenAsync(context);
                    }
                    catch (Exception ex)
                    {
                        TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex));
                        closeReason = ChangeFeedObserverCloseReason.ObserverError;
                        throw;
                    }

                    options.PartitionKeyRangeId = lease.PartitionId;
                    if (!string.IsNullOrEmpty(lease.ContinuationToken))
                    {
                        options.RequestContinuation = lease.ContinuationToken;
                    }

                    CheckpointStats checkpointStats = null;
                    if (!this.statsSinceLastCheckpoint.TryGetValue(lease.PartitionId, out checkpointStats) || checkpointStats == null)
                    {
                        // It could be that the lease was created by different host and we picked it up.
                        checkpointStats = this.statsSinceLastCheckpoint.AddOrUpdate(
                            lease.PartitionId,
                            new CheckpointStats(),
                            (partitionId, existingStats) => existingStats);
                        Trace.TraceWarning(string.Format("Added stats for partition '{0}' for which the lease was picked up after the host was started.", lease.PartitionId));
                    }

                    IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options);

                    TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken));

                    string lastContinuation = options.RequestContinuation;

                    try
                    {
                        while (this.isShutdown == 0)
                        {
                            do
                            {
                                ExceptionDispatchInfo exceptionDispatchInfo = null;
                                FeedResponse <Document> response            = null;

                                try
                                {
                                    response         = await query.ExecuteNextAsync <Document>();
                                    lastContinuation = response.ResponseContinuation;
                                }
                                catch (DocumentClientException ex)
                                {
                                    exceptionDispatchInfo = ExceptionDispatchInfo.Capture(ex);
                                }

                                if (exceptionDispatchInfo != null)
                                {
                                    DocumentClientException dcex = (DocumentClientException)exceptionDispatchInfo.SourceException;

                                    if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && SubStatusCode.ReadSessionNotAvailable != (SubStatusCode)GetSubStatusCode(dcex))
                                    {
                                        // Most likely, the database or collection was removed while we were enumerating.
                                        // Shut down. The user will need to start over.
                                        // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks.
                                        TraceLog.Error(string.Format("Partition {0}: resource gone (subStatus={1}). Aborting.", context.PartitionKeyRangeId, GetSubStatusCode(dcex)));
                                        await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone));
                                        break;
                                    }
                                    else if (StatusCode.Gone == (StatusCode)dcex.StatusCode)
                                    {
                                        SubStatusCode subStatusCode = (SubStatusCode)GetSubStatusCode(dcex);
                                        if (SubStatusCode.PartitionKeyRangeGone == subStatusCode)
                                        {
                                            bool isSuccess = await HandleSplitAsync(context.PartitionKeyRangeId, lastContinuation, lease.Id);
                                            if (!isSuccess)
                                            {
                                                TraceLog.Error(string.Format("Partition {0}: HandleSplit failed! Aborting.", context.PartitionKeyRangeId));
                                                await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone));
                                                break;
                                            }

                                            // Throw LeaseLostException so that we take the lease down.
                                            throw new LeaseLostException(lease, exceptionDispatchInfo.SourceException, true);
                                        }
                                        else if (SubStatusCode.Splitting == subStatusCode)
                                        {
                                            TraceLog.Warning(string.Format("Partition {0} is splitting. Will retry to read changes until split finishes. {1}", context.PartitionKeyRangeId, dcex.Message));
                                        }
                                        else
                                        {
                                            exceptionDispatchInfo.Throw();
                                        }
                                    }
                                    else if (StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode ||
                                             StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode)
                                    {
                                        TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message));
                                    }
                                    else
                                    {
                                        exceptionDispatchInfo.Throw();
                                    }

                                    await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token);
                                }

                                if (response != null)
                                {
                                    if (response.Count > 0)
                                    {
                                        List <Document> docs = new List <Document>();
                                        docs.AddRange(response);

                                        try
                                        {
                                            context.FeedResponse = response;
                                            await observer.ProcessChangesAsync(context, docs);
                                        }
                                        catch (Exception ex)
                                        {
                                            TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex));
                                            closeReason = ChangeFeedObserverCloseReason.ObserverError;
                                            throw;
                                        }
                                        finally
                                        {
                                            context.FeedResponse = null;
                                        }
                                    }

                                    checkpointStats.ProcessedDocCount += (uint)response.Count;

                                    if (IsCheckpointNeeded(lease, checkpointStats))
                                    {
                                        lease = await CheckpointAsync(lease, response.ResponseContinuation, context);
                                        checkpointStats.Reset();
                                    }
                                    else if (response.Count > 0)
                                    {
                                        TraceLog.Informational(string.Format("Checkpoint: not checkpointing for partition {0}, {1} docs, new continuation '{2}' as frequency condition is not met", lease.PartitionId, response.Count, response.ResponseContinuation));
                                    }
                                }
                            }while (query.HasMoreResults && this.isShutdown == 0);

                            if (this.isShutdown == 0)
                            {
                                await Task.Delay(this.options.FeedPollDelay, cancellation.Token);
                            }
                        } // Outer while (this.isShutdown == 0) loop.

                        closeReason = ChangeFeedObserverCloseReason.Shutdown;
                    }
                    catch (TaskCanceledException)
                    {
                        Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested");
                        TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId));
                    }
                }
                catch (LeaseLostException ex)
                {
                    closeReason = ex.IsGone ? ChangeFeedObserverCloseReason.LeaseGone : ChangeFeedObserverCloseReason.LeaseLost;
                }
                catch (Exception ex)
                {
                    TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex));
                    if (!closeReason.HasValue)
                    {
                        closeReason = ChangeFeedObserverCloseReason.Unknown;
                    }
                }

                if (closeReason.HasValue)
                {
                    TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value));

                    // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task.
                    await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value));
                }

                TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId));
            });

            var newWorkerData = new WorkerData(workerTask, observer, context, cancellation);
            this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); });
        }
        async Task <IDictionary <string, T> > TakeLeasesAsync()
        {
            IDictionary <string, T>   allPartitions          = new Dictionary <string, T>();
            IDictionary <string, T>   takenLeases            = new Dictionary <string, T>();
            IDictionary <string, int> workerToPartitionCount = new Dictionary <string, int>();
            List <T> expiredLeases = new List <T>();

            foreach (var lease in await this.leaseManager.ListLeases())
            {
                Debug.Assert(lease.PartitionId != null, "TakeLeasesAsync: lease.PartitionId cannot be null.");

                allPartitions.Add(lease.PartitionId, lease);
                if (string.IsNullOrWhiteSpace(lease.Owner) || await this.leaseManager.IsExpired(lease))
                {
                    TraceLog.Verbose(string.Format("Found unused or expired lease: {0}", lease));
                    expiredLeases.Add(lease);
                }
                else
                {
                    int    count      = 0;
                    string assignedTo = lease.Owner;
                    if (workerToPartitionCount.TryGetValue(assignedTo, out count))
                    {
                        workerToPartitionCount[assignedTo] = count + 1;
                    }
                    else
                    {
                        workerToPartitionCount.Add(assignedTo, 1);
                    }
                }
            }

            if (!workerToPartitionCount.ContainsKey(this.workerName))
            {
                workerToPartitionCount.Add(this.workerName, 0);
            }

            int partitionCount = allPartitions.Count;
            int workerCount    = workerToPartitionCount.Count;

            if (partitionCount > 0)
            {
                int target = 1;

                if (partitionCount > workerCount)
                {
                    target = (int)Math.Ceiling((double)partitionCount / (double)workerCount);
                }

                Debug.Assert(this.options.MinPartitionCount <= this.options.MaxPartitionCount);

                if (this.options.MaxPartitionCount > 0 && target > this.options.MaxPartitionCount)
                {
                    target = this.options.MaxPartitionCount;
                }

                if (this.options.MinPartitionCount > 0 && target < this.options.MinPartitionCount)
                {
                    target = this.options.MinPartitionCount;
                }

                int myCount = workerToPartitionCount[this.workerName];
                int partitionsNeededForMe = target - myCount;
                TraceLog.Informational(
                    string.Format(
                        "Host '{0}' {1} partitions, {2} hosts, {3} available leases, target = {4}, min = {5}, max = {6}, mine = {7}, will try to take {8} lease(s) for myself'.",
                        this.workerName,
                        partitionCount,
                        workerCount,
                        expiredLeases.Count,
                        target,
                        this.options.MinPartitionCount,
                        this.options.MaxPartitionCount,
                        myCount,
                        Math.Max(partitionsNeededForMe, 0)));

                if (partitionsNeededForMe > 0)
                {
                    HashSet <T> partitionsToAcquire = new HashSet <T>();
                    if (expiredLeases.Count > 0)
                    {
                        foreach (T leaseToTake in expiredLeases)
                        {
                            if (partitionsNeededForMe == 0)
                            {
                                break;
                            }

                            TraceLog.Informational(string.Format("Host '{0}' attempting to take lease for PartitionId '{1}'.", this.workerName, leaseToTake.PartitionId));
                            T acquiredLease = await this.TryAcquireLeaseAsync(leaseToTake);

                            if (acquiredLease != null)
                            {
                                TraceLog.Informational(string.Format("Host '{0}' successfully acquired lease for PartitionId '{1}': {2}", this.workerName, leaseToTake.PartitionId, acquiredLease));
                                takenLeases.Add(acquiredLease.PartitionId, acquiredLease);

                                partitionsNeededForMe--;
                            }
                        }
                    }
                    else
                    {
                        KeyValuePair <string, int> workerToStealFrom = default(KeyValuePair <string, int>);
                        foreach (var kvp in workerToPartitionCount)
                        {
                            if (kvp.Equals(default(KeyValuePair <string, int>)) || workerToStealFrom.Value < kvp.Value)
                            {
                                workerToStealFrom = kvp;
                            }
                        }

                        if (workerToStealFrom.Value > target - (partitionsNeededForMe > 1 ? 1 : 0))
                        {
                            foreach (var kvp in allPartitions)
                            {
                                if (string.Equals(kvp.Value.Owner, workerToStealFrom.Key, StringComparison.OrdinalIgnoreCase))
                                {
                                    T leaseToTake = kvp.Value;
                                    TraceLog.Informational(string.Format("Host '{0}' attempting to steal lease from '{1}' for PartitionId '{2}'.", this.workerName, workerToStealFrom.Key, leaseToTake.PartitionId));
                                    T stolenLease = await this.TryStealLeaseAsync(leaseToTake);

                                    if (stolenLease != null)
                                    {
                                        TraceLog.Informational(string.Format("Host '{0}' stole lease from '{1}' for PartitionId '{2}'.", this.workerName, workerToStealFrom.Key, leaseToTake.PartitionId));
                                        takenLeases.Add(stolenLease.PartitionId, stolenLease);

                                        partitionsNeededForMe--;

                                        // Only steal one lease at a time
                                        break;
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(takenLeases);
        }
        async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease)
        {
            Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease");
            TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId));

#if DEBUG
            Interlocked.Increment(ref this.partitionCount);
#endif

            IChangeFeedObserver       observer = this.observerFactory.CreateObserver();
            ChangeFeedObserverContext context  = new ChangeFeedObserverContext {
                PartitionKeyRangeId = lease.PartitionId
            };
            CancellationTokenSource cancellation = new CancellationTokenSource();

            // Create ChangeFeedOptions to use for this worker.
            ChangeFeedOptions options = new ChangeFeedOptions
            {
                MaxItemCount        = this.changeFeedOptions.MaxItemCount,
                PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId,
                SessionToken        = this.changeFeedOptions.SessionToken,
                StartFromBeginning  = this.changeFeedOptions.StartFromBeginning,
                RequestContinuation = this.changeFeedOptions.RequestContinuation
            };

            var workerTask = await Task.Factory.StartNew(async() =>
            {
                ChangeFeedObserverCloseReason?closeReason = null;
                try
                {
                    try
                    {
                        await observer.OpenAsync(context);
                    }
                    catch (Exception ex)
                    {
                        TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex));
                        closeReason = ChangeFeedObserverCloseReason.ObserverError;
                        throw;
                    }

                    options.PartitionKeyRangeId = lease.PartitionId;
                    if (!string.IsNullOrEmpty(lease.ContinuationToken))
                    {
                        options.RequestContinuation = lease.ContinuationToken;
                    }

                    IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options);

                    TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken));

                    try
                    {
                        while (this.isShutdown == 0)
                        {
                            do
                            {
                                DocumentClientException dcex     = null;
                                FeedResponse <Document> response = null;

                                try
                                {
                                    response = await query.ExecuteNextAsync <Document>();
                                }
                                catch (DocumentClientException ex)
                                {
                                    if (StatusCode.NotFound != (StatusCode)ex.StatusCode &&
                                        StatusCode.TooManyRequests != (StatusCode)ex.StatusCode &&
                                        StatusCode.ServiceUnavailable != (StatusCode)ex.StatusCode)
                                    {
                                        throw;
                                    }

                                    dcex = ex;
                                }

                                if (dcex != null)
                                {
                                    const int ReadSessionNotAvailable = 1002;
                                    if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && GetSubStatusCode(dcex) != ReadSessionNotAvailable)
                                    {
                                        // Most likely, the database or collection was removed while we were enumerating.
                                        // Shut down. The user will need to start over.
                                        // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks.
                                        await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone));
                                        break;
                                    }
                                    else
                                    {
                                        Debug.Assert(StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode || StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode);
                                        TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message));
                                        await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token);
                                    }
                                }

                                if (response != null)
                                {
                                    if (response.Count > 0)
                                    {
                                        List <Document> docs = new List <Document>();
                                        docs.AddRange(response);

                                        try
                                        {
                                            await observer.ProcessChangesAsync(context, docs);
                                        }
                                        catch (Exception ex)
                                        {
                                            TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex));
                                            closeReason = ChangeFeedObserverCloseReason.ObserverError;
                                            throw;
                                        }

                                        // Checkpoint after every successful delivery to the client.
                                        lease = await CheckpointAsync(lease, response.ResponseContinuation, context);
                                    }
                                    else if (string.IsNullOrEmpty(lease.ContinuationToken))
                                    {
                                        // Checkpoint if we've never done that for this lease.
                                        lease = await CheckpointAsync(lease, response.ResponseContinuation, context);
                                    }
                                }
                            }while (query.HasMoreResults && this.isShutdown == 0);

                            if (this.isShutdown == 0)
                            {
                                await Task.Delay(this.options.FeedPollDelay, cancellation.Token);
                            }
                        } // Outer while (this.isShutdown == 0) loop.
                    }
                    catch (TaskCanceledException)
                    {
                        Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested");
                        TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId));
                    }
                }
                catch (LeaseLostException)
                {
                    closeReason = ChangeFeedObserverCloseReason.LeaseLost;
                }
                catch (Exception ex)
                {
                    TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex));
                    if (!closeReason.HasValue)
                    {
                        closeReason = ChangeFeedObserverCloseReason.Unknown;
                    }
                }

                if (closeReason.HasValue)
                {
                    TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value));

                    // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task.
                    await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value));
                }

                TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId));
            });

            var newWorkerData = new WorkerData(workerTask, observer, context, cancellation);
            this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); });
        }