async Task <DocumentServiceLease> CheckpointAsync(DocumentServiceLease lease, string continuation, ChangeFeedObserverContext context) { Debug.Assert(lease != null); Debug.Assert(!string.IsNullOrEmpty(continuation)); DocumentServiceLease result = null; try { result = (DocumentServiceLease)await this.checkpointManager.CheckpointAsync(lease, continuation, lease.SequenceNumber + 1); Debug.Assert(result.ContinuationToken == continuation, "ContinuationToken was not updated!"); TraceLog.Informational(string.Format("Checkpoint: partition {0}, new continuation '{1}'", lease.PartitionId, continuation)); } catch (LeaseLostException) { TraceLog.Warning(string.Format("Partition {0}: failed to checkpoint due to lost lease", context.PartitionKeyRangeId)); throw; } catch (Exception ex) { TraceLog.Error(string.Format("Partition {0}: failed to checkpoint due to unexpected error: {1}", context.PartitionKeyRangeId, ex.Message)); throw; } Debug.Assert(result != null); return(await Task.FromResult <DocumentServiceLease>(result)); }
async Task InitializeAsync() { this.documentClient = new DocumentClient(this.collectionLocation.Uri, this.collectionLocation.MasterKey, this.collectionLocation.ConnectionPolicy); Uri databaseUri = UriFactory.CreateDatabaseUri(this.collectionLocation.DatabaseName); Database database = await this.documentClient.ReadDatabaseAsync(databaseUri); Uri collectionUri = UriFactory.CreateDocumentCollectionUri(this.collectionLocation.DatabaseName, this.collectionLocation.CollectionName); ResourceResponse <DocumentCollection> collectionResponse = await this.documentClient.ReadDocumentCollectionAsync( collectionUri, new RequestOptions { PopulateQuotaInfo = true }); DocumentCollection collection = collectionResponse.Resource; this.collectionSelfLink = collection.SelfLink; // Grab the options-supplied prefix if present otherwise leave it empty. string optionsPrefix = this.options.LeasePrefix ?? string.Empty; // Beyond this point all access to collection is done via this self link: if collection is removed, we won't access new one using same name by accident. this.leasePrefix = string.Format(CultureInfo.InvariantCulture, "{0}{1}_{2}_{3}", optionsPrefix, this.collectionLocation.Uri.Host, database.ResourceId, collection.ResourceId); var leaseManager = new DocumentServiceLeaseManager( this.auxCollectionLocation, this.leasePrefix, this.options.LeaseExpirationInterval, this.options.LeaseRenewInterval); await leaseManager.InitializeAsync(); this.leaseManager = leaseManager; this.checkpointManager = (ICheckpointManager)leaseManager; if (this.options.DiscardExistingLeases) { TraceLog.Warning(string.Format("Host '{0}': removing all leases, as requested by ChangeFeedHostOptions", this.HostName)); await this.leaseManager.DeleteAllAsync(); } // Note: lease store is never stale as we use monitored colleciton Rid as id prefix for aux collection. // Collection was removed and re-created, the rid would change. // If it's not deleted, it's not stale. If it's deleted, it's not stale as it doesn't exist. await this.leaseManager.CreateLeaseStoreIfNotExistsAsync(); var ranges = new Dictionary <string, PartitionKeyRange>(); foreach (var range in await this.EnumPartitionKeyRangesAsync(this.collectionSelfLink)) { ranges.Add(range.Id, range); } TraceLog.Informational(string.Format("Source collection: '{0}', {1} partition(s), {2} document(s)", this.collectionLocation.CollectionName, ranges.Count, GetDocumentCount(collectionResponse))); await this.CreateLeases(ranges); this.partitionManager = new PartitionManager <DocumentServiceLease>(this.HostName, this.leaseManager, this.options); await this.partitionManager.SubscribeAsync(this); await this.partitionManager.InitializeAsync(); }
async Task AddLeaseAsync(T lease) { if (this.currentlyOwnedPartitions.TryAdd(lease.PartitionId, lease)) { bool failedToInitialize = false; try { TraceLog.Informational(string.Format("Host '{0}' opening event processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); await this.partitionObserverManager.NotifyPartitionAcquiredAsync(lease); TraceLog.Informational(string.Format("Host '{0}' opened event processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); } catch (Exception ex) { TraceLog.Informational(string.Format("Host '{0}' failed to initialize processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); failedToInitialize = true; // Eat any exceptions during notification of observers TraceLog.Exception(ex); } // We need to release the lease if we fail to initialize the processor, so some other node can pick up the parition if (failedToInitialize) { await this.RemoveLeaseAsync(lease, true, ChangeFeedObserverCloseReason.ObserverError); } } else { // We already acquired lease for this partition but it looks like we previously owned this partition // and haven't completed the shutdown process for it yet. Release lease for possible others hosts to // pick it up. try { TraceLog.Warning(string.Format("Host '{0}' unable to add PartitionId '{1}' with lease token '{2}' to currently owned partitions.", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); await this.leaseManager.ReleaseAsync(lease); TraceLog.Informational(string.Format("Host '{0}' successfully released lease on PartitionId '{1}' with lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); } catch (LeaseLostException) { // We have already shutdown the processor so we can ignore any LeaseLost at this point TraceLog.Informational(string.Format("Host '{0}' failed to release lease for PartitionId '{1}' with lease token '{2}' due to conflict.", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); } catch (Exception ex) { TraceLog.Exception(ex); } } }
async Task InitializeAsync() { this.documentClient = new DocumentClient(this.collectionLocation.Uri, this.collectionLocation.MasterKey, this.collectionLocation.ConnectionPolicy); Uri databaseUri = UriFactory.CreateDatabaseUri(this.collectionLocation.DatabaseName); Database database = await this.documentClient.ReadDatabaseAsync(databaseUri); Uri collectionUri = UriFactory.CreateDocumentCollectionUri(this.collectionLocation.DatabaseName, this.collectionLocation.CollectionName); DocumentCollection collection = await this.documentClient.ReadDocumentCollectionAsync(collectionUri); this.collectionSelfLink = collection.SelfLink; // Beyond this point all access to colleciton is done via this self link: if collection is removed, we won't access new one using same name by accident. this.leasePrefix = string.Format(CultureInfo.InvariantCulture, "{0}_{1}_{2}", this.collectionLocation.Uri.Host, database.ResourceId, collection.ResourceId); var leaseManager = new DocumentServiceLeaseManager( this.auxCollectionLocation, this.leasePrefix, this.options.LeaseExpirationInterval, this.options.LeaseRenewInterval); await leaseManager.InitializeAsync(); this.leaseManager = leaseManager; this.checkpointManager = (ICheckpointManager)leaseManager; if (this.options.DiscardExistingLeases) { TraceLog.Warning(string.Format("Host '{0}': removing all leases, as requested by ChangeFeedHostOptions", this.HostName)); await this.leaseManager.DeleteAllAsync(); } // Note: lease store is never stale as we use monitored colleciton Rid as id prefix for aux collection. // Collection was removed and re-created, the rid would change. // If it's not deleted, it's not stale. If it's deleted, it's not stale as it doesn't exist. await this.leaseManager.CreateLeaseStoreIfNotExistsAsync(); string[] rangeIds = await this.EnumPartitionKeyRangeIds(this.collectionSelfLink); Parallel.ForEach(rangeIds, async rangeId => { this.statsSinceLastCheckpoint.AddOrUpdate( rangeId, new CheckpointStats(), (partitionId, existingStats) => existingStats); await this.leaseManager.CreateLeaseIfNotExistAsync(rangeId); }); this.partitionManager = new PartitionManager <DocumentServiceLease>(this.HostName, this.leaseManager, this.options); await this.partitionManager.SubscribeAsync(this); await this.partitionManager.InitializeAsync(); }
/// <summary> /// Asynchronously checks the current existing leases and calculates an estimate of remaining work per leased partitions. /// </summary> /// <returns>An estimate amount of remaining documents to be processed</returns> public async Task <long> GetEstimatedRemainingWork() { await this.InitializeAsync(); long remaining = 0; ChangeFeedOptions options = new ChangeFeedOptions { MaxItemCount = 1 }; foreach (DocumentServiceLease existingLease in await this.leaseManager.ListLeases()) { options.PartitionKeyRangeId = existingLease.PartitionId; options.RequestContinuation = existingLease.ContinuationToken; IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options); FeedResponse <Document> response = null; try { response = await query.ExecuteNextAsync <Document>(); long parsedLSNFromSessionToken = TryConvertToNumber(ParseAmountFromSessionToken(response.SessionToken)); long lastSequenceNumber = response.Count > 0 ? TryConvertToNumber(response.First().GetPropertyValue <string>(LSNPropertyName)) : parsedLSNFromSessionToken; long partitionRemaining = parsedLSNFromSessionToken - lastSequenceNumber; remaining += partitionRemaining < 0 ? 0 : partitionRemaining; } catch (DocumentClientException ex) { ExceptionDispatchInfo exceptionDispatchInfo = ExceptionDispatchInfo.Capture(ex); DocumentClientException dcex = (DocumentClientException)exceptionDispatchInfo.SourceException; if ((StatusCode.NotFound == (StatusCode)dcex.StatusCode && SubStatusCode.ReadSessionNotAvailable != (SubStatusCode)GetSubStatusCode(dcex)) || StatusCode.Gone == (StatusCode)dcex.StatusCode) { // We are not explicitly handling Splits here to avoid any collision with an Observer that might have picked this up and managing the split TraceLog.Error(string.Format("GetEstimateWork > Partition {0}: resource gone (subStatus={1}).", existingLease.PartitionId, GetSubStatusCode(dcex))); } else if (StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode || StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode) { TraceLog.Warning(string.Format("GetEstimateWork > Partition {0}: retriable exception : {1}", existingLease.PartitionId, dcex.Message)); } else { TraceLog.Error(string.Format("GetEstimateWork > Partition {0}: Unhandled exception", ex.Error.Message)); } } } return(remaining); }
internal async Task CheckpointAsync(string continuation, ChangeFeedObserverContext context) { if (string.IsNullOrEmpty(continuation)) { throw new ArgumentException("continuation"); } if (context == null) { throw new ArgumentNullException("context"); } if (string.IsNullOrEmpty(context.PartitionKeyRangeId)) { throw new ArgumentException("context.PartitionKeyRangeId"); } WorkerData workerData; this.partitionKeyRangeIdToWorkerMap.TryGetValue(context.PartitionKeyRangeId, out workerData); if (workerData == null) { TraceLog.Warning(string.Format("CheckpointAsync: called at wrong time, failed to get worker data for partition {0}. Most likely the partition is not longer owned by this host.", context.PartitionKeyRangeId)); throw new LeaseLostException(string.Format("Failed to find lease for partition {0} in the set of owned leases.", context.PartitionKeyRangeId)); } if (workerData.Lease == null) { TraceLog.Error(string.Format("CheckpointAsync: found the worker data but lease is null, for partition {0}. This should never happen.", context.PartitionKeyRangeId)); throw new LeaseLostException(string.Format("Failed to find lease for partition {0}.", context.PartitionKeyRangeId)); } await workerData.CheckpointInProgress.WaitAsync(); try { if (workerData.Cancellation.IsCancellationRequested) { TraceLog.Warning(string.Format("CheckpointAsync: called at wrong time, partition {0} is shutting down. The ownership of the partition by this host is about to end.", context.PartitionKeyRangeId)); throw new LeaseLostException(string.Format("CheckpointAsync: partition {0} is shutting down.", context.PartitionKeyRangeId)); } workerData.Lease = await this.CheckpointAsync(workerData.Lease, continuation, context); } finally { workerData.CheckpointInProgress.Release(); } }
private static long TryConvertToNumber(string number) { if (string.IsNullOrEmpty(number)) { return(0); } long parsed = 0; if (!long.TryParse(number, NumberStyles.Any, CultureInfo.InvariantCulture, out parsed)) { TraceLog.Warning(string.Format(CultureInfo.InvariantCulture, "Cannot parse number '{0}'.", number)); return(0); } return(parsed); }
/// <summary> /// Create leases for new partitions and take care of split partitions. /// </summary> private async Task CreateLeases(IDictionary <string, PartitionKeyRange> ranges) { Debug.Assert(ranges != null); // Get leases after getting ranges, to make sure that no other hosts checked in continuation for split partition after we got leases. var existingLeases = new Dictionary <string, DocumentServiceLease>(); foreach (var lease in await this.leaseManager.ListLeases()) { existingLeases.Add(lease.PartitionId, lease); } var gonePartitionIds = new HashSet <string>(); foreach (var partitionId in existingLeases.Keys) { if (!ranges.ContainsKey(partitionId)) { gonePartitionIds.Add(partitionId); } } var addedPartitionIds = new List <string>(); foreach (var range in ranges) { if (!existingLeases.ContainsKey(range.Key)) { addedPartitionIds.Add(range.Key); } } // Create leases for new partitions, if there was split, use continuation from parent partition. var parentIdToChildLeases = new ConcurrentDictionary <string, ConcurrentQueue <DocumentServiceLease> >(); await addedPartitionIds.ForEachAsync( async addedRangeId => { this.statsSinceLastCheckpoint.AddOrUpdate( addedRangeId, new CheckpointStats(), (partitionId, existingStats) => existingStats); string continuationToken = null; string parentIds = string.Empty; var range = ranges[addedRangeId]; if (range.Parents != null && range.Parents.Count > 0) // Check for split. { foreach (var parentRangeId in range.Parents) { if (gonePartitionIds.Contains(parentRangeId)) { // Transfer continiation from lease for gone parent to lease for its child partition. Debug.Assert(existingLeases[parentRangeId] != null); parentIds += parentIds.Length == 0 ? parentRangeId : "," + parentRangeId; if (continuationToken != null) { TraceLog.Warning(string.Format("Partition {0}: found more than one parent, new continuation '{1}', current '{2}', will use '{3}'", addedRangeId, existingLeases[parentRangeId].ContinuationToken, existingLeases[parentRangeId].ContinuationToken)); } continuationToken = existingLeases[parentRangeId].ContinuationToken; } } } bool wasCreated = await this.leaseManager.CreateLeaseIfNotExistAsync(addedRangeId, continuationToken); if (wasCreated) { if (parentIds.Length == 0) { TraceLog.Informational(string.Format("Created lease for partition '{0}', continuation '{1}'.", addedRangeId, continuationToken)); } else { TraceLog.Informational(string.Format("Created lease for partition '{0}' as child of split partition(s) '{1}', continuation '{2}'.", addedRangeId, parentIds, continuationToken)); } } else { TraceLog.Warning(string.Format("Some other host created lease for '{0}' as child of split partition(s) '{1}', continuation '{2}'.", addedRangeId, parentIds, continuationToken)); } }, this.options.DegreeOfParallelism); // Remove leases for splitted (and thus gone partitions) and update continuation token. await gonePartitionIds.ForEachAsync( async goneRangeId => { await this.leaseManager.DeleteAsync(existingLeases[goneRangeId]); TraceLog.Informational(string.Format("Deleted lease for gone (splitted) partition '{0}', continuation '{1}'", goneRangeId, existingLeases[goneRangeId].ContinuationToken)); CheckpointStats removedStatsUnused; this.statsSinceLastCheckpoint.TryRemove(goneRangeId, out removedStatsUnused); }, this.options.DegreeOfParallelism); }
async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease) { Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease"); TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId)); #if DEBUG Interlocked.Increment(ref this.partitionCount); #endif IChangeFeedObserver observer = this.observerFactory.CreateObserver(); ChangeFeedObserverContext context = new ChangeFeedObserverContext { PartitionKeyRangeId = lease.PartitionId }; CancellationTokenSource cancellation = new CancellationTokenSource(); // Create ChangeFeedOptions to use for this worker. ChangeFeedOptions options = new ChangeFeedOptions { MaxItemCount = this.changeFeedOptions.MaxItemCount, PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId, SessionToken = this.changeFeedOptions.SessionToken, StartFromBeginning = this.changeFeedOptions.StartFromBeginning, RequestContinuation = this.changeFeedOptions.RequestContinuation }; var workerTask = await Task.Factory.StartNew(async() => { ChangeFeedObserverCloseReason?closeReason = null; try { try { await observer.OpenAsync(context); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } options.PartitionKeyRangeId = lease.PartitionId; if (!string.IsNullOrEmpty(lease.ContinuationToken)) { options.RequestContinuation = lease.ContinuationToken; } CheckpointStats checkpointStats = null; if (!this.statsSinceLastCheckpoint.TryGetValue(lease.PartitionId, out checkpointStats) || checkpointStats == null) { // It could be that the lease was created by different host and we picked it up. checkpointStats = this.statsSinceLastCheckpoint.AddOrUpdate( lease.PartitionId, new CheckpointStats(), (partitionId, existingStats) => existingStats); Trace.TraceWarning(string.Format("Added stats for partition '{0}' for which the lease was picked up after the host was started.", lease.PartitionId)); } IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options); TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken)); string lastContinuation = options.RequestContinuation; try { while (this.isShutdown == 0) { do { ExceptionDispatchInfo exceptionDispatchInfo = null; FeedResponse <Document> response = null; try { response = await query.ExecuteNextAsync <Document>(); lastContinuation = response.ResponseContinuation; } catch (DocumentClientException ex) { exceptionDispatchInfo = ExceptionDispatchInfo.Capture(ex); } if (exceptionDispatchInfo != null) { DocumentClientException dcex = (DocumentClientException)exceptionDispatchInfo.SourceException; if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && SubStatusCode.ReadSessionNotAvailable != (SubStatusCode)GetSubStatusCode(dcex)) { // Most likely, the database or collection was removed while we were enumerating. // Shut down. The user will need to start over. // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks. TraceLog.Error(string.Format("Partition {0}: resource gone (subStatus={1}). Aborting.", context.PartitionKeyRangeId, GetSubStatusCode(dcex))); await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone)); break; } else if (StatusCode.Gone == (StatusCode)dcex.StatusCode) { SubStatusCode subStatusCode = (SubStatusCode)GetSubStatusCode(dcex); if (SubStatusCode.PartitionKeyRangeGone == subStatusCode) { bool isSuccess = await HandleSplitAsync(context.PartitionKeyRangeId, lastContinuation, lease.Id); if (!isSuccess) { TraceLog.Error(string.Format("Partition {0}: HandleSplit failed! Aborting.", context.PartitionKeyRangeId)); await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone)); break; } // Throw LeaseLostException so that we take the lease down. throw new LeaseLostException(lease, exceptionDispatchInfo.SourceException, true); } else if (SubStatusCode.Splitting == subStatusCode) { TraceLog.Warning(string.Format("Partition {0} is splitting. Will retry to read changes until split finishes. {1}", context.PartitionKeyRangeId, dcex.Message)); } else { exceptionDispatchInfo.Throw(); } } else if (StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode || StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode) { TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message)); } else { exceptionDispatchInfo.Throw(); } await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token); } if (response != null) { if (response.Count > 0) { List <Document> docs = new List <Document>(); docs.AddRange(response); try { context.FeedResponse = response; await observer.ProcessChangesAsync(context, docs); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } finally { context.FeedResponse = null; } } checkpointStats.ProcessedDocCount += (uint)response.Count; if (IsCheckpointNeeded(lease, checkpointStats)) { lease = await CheckpointAsync(lease, response.ResponseContinuation, context); checkpointStats.Reset(); } else if (response.Count > 0) { TraceLog.Informational(string.Format("Checkpoint: not checkpointing for partition {0}, {1} docs, new continuation '{2}' as frequency condition is not met", lease.PartitionId, response.Count, response.ResponseContinuation)); } } }while (query.HasMoreResults && this.isShutdown == 0); if (this.isShutdown == 0) { await Task.Delay(this.options.FeedPollDelay, cancellation.Token); } } // Outer while (this.isShutdown == 0) loop. closeReason = ChangeFeedObserverCloseReason.Shutdown; } catch (TaskCanceledException) { Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested"); TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId)); } } catch (LeaseLostException ex) { closeReason = ex.IsGone ? ChangeFeedObserverCloseReason.LeaseGone : ChangeFeedObserverCloseReason.LeaseLost; } catch (Exception ex) { TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex)); if (!closeReason.HasValue) { closeReason = ChangeFeedObserverCloseReason.Unknown; } } if (closeReason.HasValue) { TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value)); // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task. await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value)); } TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId)); }); var newWorkerData = new WorkerData(workerTask, observer, context, cancellation); this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); }); }
async Task LeaseRenewer() { while (this.isStarted == 1 || !this.shutdownComplete) { try { TraceLog.Informational(string.Format("Host '{0}' starting renewal of Leases.", this.workerName)); ConcurrentBag <T> renewedLeases = new ConcurrentBag <T>(); ConcurrentBag <T> failedToRenewLeases = new ConcurrentBag <T>(); List <Task> renewTasks = new List <Task>(); // Renew leases for all currently owned partitions in parallel foreach (T lease in this.currentlyOwnedPartitions.Values) { renewTasks.Add(this.RenewLeaseAsync(lease).ContinueWith(renewResult => { if (renewResult.Result != null) { renewedLeases.Add(renewResult.Result); } else { // Keep track of all failed attempts to renew so we can trigger shutdown for these partitions failedToRenewLeases.Add(lease); } })); } // Renew leases for all partitions currently in shutdown List <T> failedToRenewShutdownLeases = new List <T>(); foreach (T shutdownLeases in this.keepRenewingDuringClose.Values) { renewTasks.Add(this.RenewLeaseAsync(shutdownLeases).ContinueWith(renewResult => { if (renewResult.Result != null) { renewedLeases.Add(renewResult.Result); } else { // Keep track of all failed attempts to renew shutdown leases so we can remove them from further renew attempts failedToRenewShutdownLeases.Add(shutdownLeases); } })); } // Wait for all renews to complete await Task.WhenAll(renewTasks.ToArray()); // Update renewed leases. foreach (T lease in renewedLeases) { bool updateResult = this.currentlyOwnedPartitions.TryUpdate(lease.PartitionId, lease, lease); if (!updateResult) { TraceLog.Warning(string.Format("Host '{0}' Renewed lease {1} but failed to update it in the map (ignorable).", this.workerName, lease)); } } // Trigger shutdown of all partitions we failed to renew leases await failedToRenewLeases.ForEachAsync( async lease => await this.RemoveLeaseAsync(lease, false, ChangeFeedObserverCloseReason.LeaseLost), this.options.DegreeOfParallelism); // Now remove all failed renewals of shutdown leases from further renewals foreach (T failedToRenewShutdownLease in failedToRenewShutdownLeases) { T removedLease = null; this.keepRenewingDuringClose.TryRemove(failedToRenewShutdownLease.PartitionId, out removedLease); } await Task.Delay(this.options.LeaseRenewInterval, this.leaseRenewerCancellationTokenSource.Token); } catch (OperationCanceledException) { TraceLog.Informational(string.Format("Host '{0}' Renewer task canceled.", this.workerName)); } catch (Exception ex) { TraceLog.Exception(ex); } } this.currentlyOwnedPartitions.Clear(); this.keepRenewingDuringClose.Clear(); TraceLog.Informational(string.Format("Host '{0}' Renewer task completed.", this.workerName)); }
async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease) { Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease"); TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId)); #if DEBUG Interlocked.Increment(ref this.partitionCount); #endif IChangeFeedObserver observer = this.observerFactory.CreateObserver(); ChangeFeedObserverContext context = new ChangeFeedObserverContext { PartitionKeyRangeId = lease.PartitionId }; CancellationTokenSource cancellation = new CancellationTokenSource(); // Create ChangeFeedOptions to use for this worker. ChangeFeedOptions options = new ChangeFeedOptions { MaxItemCount = this.changeFeedOptions.MaxItemCount, PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId, SessionToken = this.changeFeedOptions.SessionToken, StartFromBeginning = this.changeFeedOptions.StartFromBeginning, RequestContinuation = this.changeFeedOptions.RequestContinuation }; var workerTask = await Task.Factory.StartNew(async() => { ChangeFeedObserverCloseReason?closeReason = null; try { try { await observer.OpenAsync(context); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } options.PartitionKeyRangeId = lease.PartitionId; if (!string.IsNullOrEmpty(lease.ContinuationToken)) { options.RequestContinuation = lease.ContinuationToken; } IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options); TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken)); try { while (this.isShutdown == 0) { do { DocumentClientException dcex = null; FeedResponse <Document> response = null; try { response = await query.ExecuteNextAsync <Document>(); } catch (DocumentClientException ex) { if (StatusCode.NotFound != (StatusCode)ex.StatusCode && StatusCode.TooManyRequests != (StatusCode)ex.StatusCode && StatusCode.ServiceUnavailable != (StatusCode)ex.StatusCode) { throw; } dcex = ex; } if (dcex != null) { const int ReadSessionNotAvailable = 1002; if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && GetSubStatusCode(dcex) != ReadSessionNotAvailable) { // Most likely, the database or collection was removed while we were enumerating. // Shut down. The user will need to start over. // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks. await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone)); break; } else { Debug.Assert(StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode || StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode); TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message)); await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token); } } if (response != null) { if (response.Count > 0) { List <Document> docs = new List <Document>(); docs.AddRange(response); try { await observer.ProcessChangesAsync(context, docs); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } // Checkpoint after every successful delivery to the client. lease = await CheckpointAsync(lease, response.ResponseContinuation, context); } else if (string.IsNullOrEmpty(lease.ContinuationToken)) { // Checkpoint if we've never done that for this lease. lease = await CheckpointAsync(lease, response.ResponseContinuation, context); } } }while (query.HasMoreResults && this.isShutdown == 0); if (this.isShutdown == 0) { await Task.Delay(this.options.FeedPollDelay, cancellation.Token); } } // Outer while (this.isShutdown == 0) loop. } catch (TaskCanceledException) { Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested"); TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId)); } } catch (LeaseLostException) { closeReason = ChangeFeedObserverCloseReason.LeaseLost; } catch (Exception ex) { TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex)); if (!closeReason.HasValue) { closeReason = ChangeFeedObserverCloseReason.Unknown; } } if (closeReason.HasValue) { TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value)); // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task. await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value)); } TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId)); }); var newWorkerData = new WorkerData(workerTask, observer, context, cancellation); this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); }); }