public WorkerData(Task task, IChangeFeedObserver observer, ChangeFeedObserverContext context, CancellationTokenSource cancellation) { this.Task = task; this.Observer = observer; this.Context = context; this.Cancellation = cancellation; }
public WorkerData(Task task, IChangeFeedObserver observer, ChangeFeedObserverContext context, CancellationTokenSource cancellation, DocumentServiceLease lease) { Debug.Assert(task != null); Debug.Assert(observer != null); Debug.Assert(context != null); Debug.Assert(cancellation != null); this.Task = task; this.Observer = observer; this.Context = context; this.Cancellation = cancellation; this.Lease = lease; }
internal async Task CheckpointAsync(string continuation, ChangeFeedObserverContext context) { if (string.IsNullOrEmpty(continuation)) { throw new ArgumentException("continuation"); } if (context == null) { throw new ArgumentNullException("context"); } if (string.IsNullOrEmpty(context.PartitionKeyRangeId)) { throw new ArgumentException("context.PartitionKeyRangeId"); } WorkerData workerData; this.partitionKeyRangeIdToWorkerMap.TryGetValue(context.PartitionKeyRangeId, out workerData); if (workerData == null) { TraceLog.Warning(string.Format("CheckpointAsync: called at wrong time, failed to get worker data for partition {0}. Most likely the partition is not longer owned by this host.", context.PartitionKeyRangeId)); throw new LeaseLostException(string.Format("Failed to find lease for partition {0} in the set of owned leases.", context.PartitionKeyRangeId)); } if (workerData.Lease == null) { TraceLog.Error(string.Format("CheckpointAsync: found the worker data but lease is null, for partition {0}. This should never happen.", context.PartitionKeyRangeId)); throw new LeaseLostException(string.Format("Failed to find lease for partition {0}.", context.PartitionKeyRangeId)); } await workerData.CheckpointInProgress.WaitAsync(); try { if (workerData.Cancellation.IsCancellationRequested) { TraceLog.Warning(string.Format("CheckpointAsync: called at wrong time, partition {0} is shutting down. The ownership of the partition by this host is about to end.", context.PartitionKeyRangeId)); throw new LeaseLostException(string.Format("CheckpointAsync: partition {0} is shutting down.", context.PartitionKeyRangeId)); } workerData.Lease = await this.CheckpointAsync(workerData.Lease, continuation, context); } finally { workerData.CheckpointInProgress.Release(); } }
async Task <DocumentServiceLease> CheckpointAsync(DocumentServiceLease lease, string continuation, ChangeFeedObserverContext context) { Debug.Assert(lease != null); Debug.Assert(!string.IsNullOrEmpty(continuation)); DocumentServiceLease result = null; try { result = (DocumentServiceLease)await this.checkpointManager.CheckpointAsync(lease, continuation, lease.SequenceNumber + 1); Debug.Assert(result.ContinuationToken == continuation, "ContinuationToken was not updated!"); TraceLog.Informational(string.Format("Checkpoint: partition {0}, new continuation '{1}'", lease.PartitionId, continuation)); } catch (LeaseLostException) { TraceLog.Warning(string.Format("Partition {0}: failed to checkpoint due to lost lease", context.PartitionKeyRangeId)); throw; } catch (Exception ex) { TraceLog.Error(string.Format("Partition {0}: failed to checkpoint due to unexpected error: {1}", context.PartitionKeyRangeId, ex.Message)); throw; } Debug.Assert(result != null); return(await Task.FromResult <DocumentServiceLease>(result)); }
async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease) { Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease"); TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId)); #if DEBUG Interlocked.Increment(ref this.partitionCount); #endif IChangeFeedObserver observer = this.observerFactory.CreateObserver(); ChangeFeedObserverContext context = new ChangeFeedObserverContext { PartitionKeyRangeId = lease.PartitionId }; CancellationTokenSource cancellation = new CancellationTokenSource(); // Create ChangeFeedOptions to use for this worker. ChangeFeedOptions options = new ChangeFeedOptions { MaxItemCount = this.changeFeedOptions.MaxItemCount, PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId, SessionToken = this.changeFeedOptions.SessionToken, StartFromBeginning = this.changeFeedOptions.StartFromBeginning, RequestContinuation = this.changeFeedOptions.RequestContinuation }; var workerTask = await Task.Factory.StartNew(async() => { ChangeFeedObserverCloseReason?closeReason = null; try { try { await observer.OpenAsync(context); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } options.PartitionKeyRangeId = lease.PartitionId; if (!string.IsNullOrEmpty(lease.ContinuationToken)) { options.RequestContinuation = lease.ContinuationToken; } CheckpointStats checkpointStats = null; if (!this.statsSinceLastCheckpoint.TryGetValue(lease.PartitionId, out checkpointStats) || checkpointStats == null) { // It could be that the lease was created by different host and we picked it up. checkpointStats = this.statsSinceLastCheckpoint.AddOrUpdate( lease.PartitionId, new CheckpointStats(), (partitionId, existingStats) => existingStats); Trace.TraceWarning(string.Format("Added stats for partition '{0}' for which the lease was picked up after the host was started.", lease.PartitionId)); } IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options); TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken)); string lastContinuation = options.RequestContinuation; try { while (this.isShutdown == 0) { do { ExceptionDispatchInfo exceptionDispatchInfo = null; FeedResponse <Document> response = null; try { response = await query.ExecuteNextAsync <Document>(); lastContinuation = response.ResponseContinuation; } catch (DocumentClientException ex) { exceptionDispatchInfo = ExceptionDispatchInfo.Capture(ex); } if (exceptionDispatchInfo != null) { DocumentClientException dcex = (DocumentClientException)exceptionDispatchInfo.SourceException; if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && SubStatusCode.ReadSessionNotAvailable != (SubStatusCode)GetSubStatusCode(dcex)) { // Most likely, the database or collection was removed while we were enumerating. // Shut down. The user will need to start over. // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks. TraceLog.Error(string.Format("Partition {0}: resource gone (subStatus={1}). Aborting.", context.PartitionKeyRangeId, GetSubStatusCode(dcex))); await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone)); break; } else if (StatusCode.Gone == (StatusCode)dcex.StatusCode) { SubStatusCode subStatusCode = (SubStatusCode)GetSubStatusCode(dcex); if (SubStatusCode.PartitionKeyRangeGone == subStatusCode) { bool isSuccess = await HandleSplitAsync(context.PartitionKeyRangeId, lastContinuation, lease.Id); if (!isSuccess) { TraceLog.Error(string.Format("Partition {0}: HandleSplit failed! Aborting.", context.PartitionKeyRangeId)); await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone)); break; } // Throw LeaseLostException so that we take the lease down. throw new LeaseLostException(lease, exceptionDispatchInfo.SourceException, true); } else if (SubStatusCode.Splitting == subStatusCode) { TraceLog.Warning(string.Format("Partition {0} is splitting. Will retry to read changes until split finishes. {1}", context.PartitionKeyRangeId, dcex.Message)); } else { exceptionDispatchInfo.Throw(); } } else if (StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode || StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode) { TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message)); } else { exceptionDispatchInfo.Throw(); } await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token); } if (response != null) { if (response.Count > 0) { List <Document> docs = new List <Document>(); docs.AddRange(response); try { context.FeedResponse = response; await observer.ProcessChangesAsync(context, docs); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } finally { context.FeedResponse = null; } } checkpointStats.ProcessedDocCount += (uint)response.Count; if (IsCheckpointNeeded(lease, checkpointStats)) { lease = await CheckpointAsync(lease, response.ResponseContinuation, context); checkpointStats.Reset(); } else if (response.Count > 0) { TraceLog.Informational(string.Format("Checkpoint: not checkpointing for partition {0}, {1} docs, new continuation '{2}' as frequency condition is not met", lease.PartitionId, response.Count, response.ResponseContinuation)); } } }while (query.HasMoreResults && this.isShutdown == 0); if (this.isShutdown == 0) { await Task.Delay(this.options.FeedPollDelay, cancellation.Token); } } // Outer while (this.isShutdown == 0) loop. closeReason = ChangeFeedObserverCloseReason.Shutdown; } catch (TaskCanceledException) { Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested"); TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId)); } } catch (LeaseLostException ex) { closeReason = ex.IsGone ? ChangeFeedObserverCloseReason.LeaseGone : ChangeFeedObserverCloseReason.LeaseLost; } catch (Exception ex) { TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex)); if (!closeReason.HasValue) { closeReason = ChangeFeedObserverCloseReason.Unknown; } } if (closeReason.HasValue) { TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value)); // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task. await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value)); } TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId)); }); var newWorkerData = new WorkerData(workerTask, observer, context, cancellation); this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); }); }
async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease) { Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease"); TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId)); #if DEBUG Interlocked.Increment(ref this.partitionCount); #endif IChangeFeedObserver observer = this.observerFactory.CreateObserver(); ChangeFeedObserverContext context = new ChangeFeedObserverContext { PartitionKeyRangeId = lease.PartitionId }; CancellationTokenSource cancellation = new CancellationTokenSource(); // Create ChangeFeedOptions to use for this worker. ChangeFeedOptions options = new ChangeFeedOptions { MaxItemCount = this.changeFeedOptions.MaxItemCount, PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId, SessionToken = this.changeFeedOptions.SessionToken, StartFromBeginning = this.changeFeedOptions.StartFromBeginning, RequestContinuation = this.changeFeedOptions.RequestContinuation }; var workerTask = await Task.Factory.StartNew(async() => { ChangeFeedObserverCloseReason?closeReason = null; try { try { await observer.OpenAsync(context); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } options.PartitionKeyRangeId = lease.PartitionId; if (!string.IsNullOrEmpty(lease.ContinuationToken)) { options.RequestContinuation = lease.ContinuationToken; } IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options); TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken)); try { while (this.isShutdown == 0) { do { DocumentClientException dcex = null; FeedResponse <Document> response = null; try { response = await query.ExecuteNextAsync <Document>(); } catch (DocumentClientException ex) { if (StatusCode.NotFound != (StatusCode)ex.StatusCode && StatusCode.TooManyRequests != (StatusCode)ex.StatusCode && StatusCode.ServiceUnavailable != (StatusCode)ex.StatusCode) { throw; } dcex = ex; } if (dcex != null) { const int ReadSessionNotAvailable = 1002; if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && GetSubStatusCode(dcex) != ReadSessionNotAvailable) { // Most likely, the database or collection was removed while we were enumerating. // Shut down. The user will need to start over. // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks. await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone)); break; } else { Debug.Assert(StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode || StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode); TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message)); await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token); } } if (response != null) { if (response.Count > 0) { List <Document> docs = new List <Document>(); docs.AddRange(response); try { await observer.ProcessChangesAsync(context, docs); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } // Checkpoint after every successful delivery to the client. lease = await CheckpointAsync(lease, response.ResponseContinuation, context); } else if (string.IsNullOrEmpty(lease.ContinuationToken)) { // Checkpoint if we've never done that for this lease. lease = await CheckpointAsync(lease, response.ResponseContinuation, context); } } }while (query.HasMoreResults && this.isShutdown == 0); if (this.isShutdown == 0) { await Task.Delay(this.options.FeedPollDelay, cancellation.Token); } } // Outer while (this.isShutdown == 0) loop. } catch (TaskCanceledException) { Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested"); TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId)); } } catch (LeaseLostException) { closeReason = ChangeFeedObserverCloseReason.LeaseLost; } catch (Exception ex) { TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex)); if (!closeReason.HasValue) { closeReason = ChangeFeedObserverCloseReason.Unknown; } } if (closeReason.HasValue) { TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value)); // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task. await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value)); } TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId)); }); var newWorkerData = new WorkerData(workerTask, observer, context, cancellation); this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); }); }