async Task <T> RenewLeaseAsync(T lease) { T renewedLease = null; try { TraceLog.Informational(string.Format("Host '{0}' renewing lease for PartitionId '{1}' with lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); renewedLease = await this.leaseManager.RenewAsync(lease); } catch (LeaseLostException) { TraceLog.Informational(string.Format("Host '{0}' got LeaseLostException trying to renew lease for PartitionId '{1}' with lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); } catch (Exception ex) { TraceLog.Exception(ex); // Eat any exceptions during renew and keep going. // Consider the lease as renewed. Maybe lease store outage is causing the lease to not get renewed. renewedLease = lease; } finally { TraceLog.Informational(string.Format("Host '{0}' attempted to renew lease for PartitionId '{1}' and lease token '{2}' with result: '{3}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken, renewedLease != null)); } return(renewedLease); }
async Task <DocumentServiceLease> CheckpointAsync(DocumentServiceLease lease, string continuation, ChangeFeedObserverContext context) { Debug.Assert(lease != null); Debug.Assert(!string.IsNullOrEmpty(continuation)); DocumentServiceLease result = null; try { result = (DocumentServiceLease)await this.checkpointManager.CheckpointAsync(lease, continuation, lease.SequenceNumber + 1); Debug.Assert(result.ContinuationToken == continuation, "ContinuationToken was not updated!"); TraceLog.Informational(string.Format("Checkpoint: partition {0}, new continuation '{1}'", lease.PartitionId, continuation)); } catch (LeaseLostException) { TraceLog.Warning(string.Format("Partition {0}: failed to checkpoint due to lost lease", context.PartitionKeyRangeId)); throw; } catch (Exception ex) { TraceLog.Error(string.Format("Partition {0}: failed to checkpoint due to unexpected error: {1}", context.PartitionKeyRangeId, ex.Message)); throw; } Debug.Assert(result != null); return(await Task.FromResult <DocumentServiceLease>(result)); }
async Task StopAsync(ChangeFeedObserverCloseReason reason) { if (Interlocked.CompareExchange(ref this.isShutdown, 1, 0) != 0) { return; } TraceLog.Informational(string.Format("Host '{0}': STOP signal received!", this.HostName)); List <Task> closingTasks = new List <Task>(); // Trigger stop for PartitionManager so it triggers shutdown of AcquireLease task and starts processor shutdown closingTasks.Add(this.partitionManager.StopAsync(reason)); // Stop all workers. TraceLog.Informational(string.Format("Host '{0}': Cancelling {1} workers.", this.HostName, this.partitionKeyRangeIdToWorkerMap.Count)); foreach (var item in this.partitionKeyRangeIdToWorkerMap.Values) { item.Cancellation.Cancel(); closingTasks.Add(item.Task); } // wait for everything to shutdown TraceLog.Informational(string.Format("Host '{0}': Waiting for {1} closing tasks...", this.HostName, closingTasks.Count)); await Task.WhenAll(closingTasks.ToArray()); this.partitionKeyRangeIdToWorkerMap.Clear(); if (this.leaseManager is IDisposable) { ((IDisposable)this.leaseManager).Dispose(); } TraceLog.Informational(string.Format("Host '{0}': stopped.", this.HostName)); }
async Task IPartitionObserver <DocumentServiceLease> .OnPartitionReleasedAsync(DocumentServiceLease l, ChangeFeedObserverCloseReason reason) { #if DEBUG Interlocked.Decrement(ref this.partitionCount); #endif TraceLog.Informational(string.Format("Host '{0}' releasing partition {1}...", this.HostName, l.PartitionId)); WorkerData workerData = null; if (this.partitionKeyRangeIdToWorkerMap.TryGetValue(l.PartitionId, out workerData)) { workerData.Cancellation.Cancel(); try { await workerData.Observer.CloseAsync(workerData.Context, reason); } catch (Exception ex) { // Eat all client exceptions. TraceLog.Error(string.Format("IChangeFeedObserver.CloseAsync: exception: {0}", ex)); } await workerData.Task; this.partitionKeyRangeIdToWorkerMap.TryRemove(l.PartitionId, out workerData); } TraceLog.Informational(string.Format("Host '{0}' partition {1}: released!", this.HostName, workerData.Context.PartitionKeyRangeId)); }
/// <summary> /// Handle split for given partition. /// </summary> /// <param name="partitionKeyRangeId">The id of the partition that was splitted, aka parent partition.</param> /// <param name="continuationToken">Continuation token on split partition before split.</param> /// <param name="leaseId">The id of the lease. This is needed to avoid extra call to ILeaseManager to get the lease by partitionId.</param> /// <returns>True on success, false on failure.</returns> private async Task <bool> HandleSplitAsync(string partitionKeyRangeId, string continuationToken, string leaseId) { Debug.Assert(!string.IsNullOrEmpty(partitionKeyRangeId)); Debug.Assert(!string.IsNullOrEmpty(leaseId)); TraceLog.Informational(string.Format("Partition {0} is gone due to split, continuation '{1}'", partitionKeyRangeId, continuationToken)); List <PartitionKeyRange> allRanges = await this.EnumPartitionKeyRangesAsync(this.collectionSelfLink); var childRanges = new List <PartitionKeyRange>(allRanges.Where(range => range.Parents.Contains(partitionKeyRangeId))); if (childRanges.Count < 2) { TraceLog.Error(string.Format("Partition {0} had split but we failed to find at least 2 child paritions.")); return(false); } var tasks = new List <Task>(); foreach (var childRange in childRanges) { tasks.Add(this.leaseManager.CreateLeaseIfNotExistAsync(childRange.Id, continuationToken)); TraceLog.Informational(string.Format("Creating lease for partition '{0}' as child of partition '{1}', continuation '{2}'", childRange.Id, partitionKeyRangeId, continuationToken)); } await Task.WhenAll(tasks); await this.leaseManager.DeleteAsync(new DocumentServiceLease { Id = leaseId }); TraceLog.Informational(string.Format("Deleted lease for gone (splitted) partition '{0}' continuation '{1}'", partitionKeyRangeId, continuationToken)); // Note: the rest is up to lease taker, that after waking up would consume these new leases. return(true); }
async Task InitializeAsync() { this.documentClient = new DocumentClient(this.collectionLocation.Uri, this.collectionLocation.MasterKey, this.collectionLocation.ConnectionPolicy); Uri databaseUri = UriFactory.CreateDatabaseUri(this.collectionLocation.DatabaseName); Database database = await this.documentClient.ReadDatabaseAsync(databaseUri); Uri collectionUri = UriFactory.CreateDocumentCollectionUri(this.collectionLocation.DatabaseName, this.collectionLocation.CollectionName); ResourceResponse <DocumentCollection> collectionResponse = await this.documentClient.ReadDocumentCollectionAsync( collectionUri, new RequestOptions { PopulateQuotaInfo = true }); DocumentCollection collection = collectionResponse.Resource; this.collectionSelfLink = collection.SelfLink; // Grab the options-supplied prefix if present otherwise leave it empty. string optionsPrefix = this.options.LeasePrefix ?? string.Empty; // Beyond this point all access to collection is done via this self link: if collection is removed, we won't access new one using same name by accident. this.leasePrefix = string.Format(CultureInfo.InvariantCulture, "{0}{1}_{2}_{3}", optionsPrefix, this.collectionLocation.Uri.Host, database.ResourceId, collection.ResourceId); var leaseManager = new DocumentServiceLeaseManager( this.auxCollectionLocation, this.leasePrefix, this.options.LeaseExpirationInterval, this.options.LeaseRenewInterval); await leaseManager.InitializeAsync(); this.leaseManager = leaseManager; this.checkpointManager = (ICheckpointManager)leaseManager; if (this.options.DiscardExistingLeases) { TraceLog.Warning(string.Format("Host '{0}': removing all leases, as requested by ChangeFeedHostOptions", this.HostName)); await this.leaseManager.DeleteAllAsync(); } // Note: lease store is never stale as we use monitored colleciton Rid as id prefix for aux collection. // Collection was removed and re-created, the rid would change. // If it's not deleted, it's not stale. If it's deleted, it's not stale as it doesn't exist. await this.leaseManager.CreateLeaseStoreIfNotExistsAsync(); var ranges = new Dictionary <string, PartitionKeyRange>(); foreach (var range in await this.EnumPartitionKeyRangesAsync(this.collectionSelfLink)) { ranges.Add(range.Id, range); } TraceLog.Informational(string.Format("Source collection: '{0}', {1} partition(s), {2} document(s)", this.collectionLocation.CollectionName, ranges.Count, GetDocumentCount(collectionResponse))); await this.CreateLeases(ranges); this.partitionManager = new PartitionManager <DocumentServiceLease>(this.HostName, this.leaseManager, this.options); await this.partitionManager.SubscribeAsync(this); await this.partitionManager.InitializeAsync(); }
async Task RemoveLeaseAsync(T lease, bool hasOwnership, ChangeFeedObserverCloseReason closeReason = ChangeFeedObserverCloseReason.Unknown) { ChangeFeedObserverCloseReason reason = closeReason != ChangeFeedObserverCloseReason.Unknown ? closeReason : hasOwnership ? ChangeFeedObserverCloseReason.Shutdown : ChangeFeedObserverCloseReason.LeaseLost; if (lease != null && this.currentlyOwnedPartitions != null && this.currentlyOwnedPartitions.TryRemove(lease.PartitionId, out lease)) { TraceLog.Informational(string.Format("Host '{0}' successfully removed PartitionId '{1}' with lease token '{2}' from currently owned partitions.", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); try { if (hasOwnership) { this.keepRenewingDuringClose.TryAdd(lease.PartitionId, lease); } TraceLog.Informational(string.Format("Host '{0}' closing event processor for PartitionId '{1}' and lease token '{2}' with reason '{3}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken, reason)); // Notify the host that we lost partition so shutdown can be triggered on the host await this.partitionObserverManager.NotifyPartitionReleasedAsync(lease, reason); TraceLog.Informational(string.Format("Host '{0}' closed event processor for PartitionId '{1}' and lease token '{2}' with reason '{3}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken, reason)); } catch (Exception ex) { // Eat any exceptions during notification of observers TraceLog.Exception(ex); } finally { if (hasOwnership) { this.keepRenewingDuringClose.TryRemove(lease.PartitionId, out lease); } } if (hasOwnership) { try { await this.leaseManager.ReleaseAsync(lease); TraceLog.Informational(string.Format("Host '{0}' successfully released lease on PartitionId '{1}' with lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); } catch (LeaseLostException) { // We have already shutdown the processor so we can ignore any LeaseLost at this point TraceLog.Informational(string.Format("Host '{0}' failed to release lease for PartitionId '{1}' with lease token '{2}' due to conflict.", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); } catch (Exception ex) { TraceLog.Exception(ex); } } } }
async Task AddLeaseAsync(T lease) { if (this.currentlyOwnedPartitions.TryAdd(lease.PartitionId, lease)) { bool failedToInitialize = false; try { TraceLog.Informational(string.Format("Host '{0}' opening event processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); await this.partitionObserverManager.NotifyPartitionAcquiredAsync(lease); TraceLog.Informational(string.Format("Host '{0}' opened event processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); } catch (Exception ex) { TraceLog.Informational(string.Format("Host '{0}' failed to initialize processor for PartitionId '{1}' and lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); failedToInitialize = true; // Eat any exceptions during notification of observers TraceLog.Exception(ex); } // We need to release the lease if we fail to initialize the processor, so some other node can pick up the parition if (failedToInitialize) { await this.RemoveLeaseAsync(lease, true, ChangeFeedObserverCloseReason.ObserverError); } } else { // We already acquired lease for this partition but it looks like we previously owned this partition // and haven't completed the shutdown process for it yet. Release lease for possible others hosts to // pick it up. try { TraceLog.Warning(string.Format("Host '{0}' unable to add PartitionId '{1}' with lease token '{2}' to currently owned partitions.", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); await this.leaseManager.ReleaseAsync(lease); TraceLog.Informational(string.Format("Host '{0}' successfully released lease on PartitionId '{1}' with lease token '{2}'", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); } catch (LeaseLostException) { // We have already shutdown the processor so we can ignore any LeaseLost at this point TraceLog.Informational(string.Format("Host '{0}' failed to release lease for PartitionId '{1}' with lease token '{2}' due to conflict.", this.workerName, lease.PartitionId, lease.ConcurrencyToken)); } catch (Exception ex) { TraceLog.Exception(ex); } } }
async Task <T> TryAcquireLeaseAsync(T lease) { try { return(await this.leaseManager.AcquireAsync(lease, this.workerName)); } catch (LeaseLostException) { TraceLog.Informational(string.Format("Host '{0}' failed to acquire lease for PartitionId '{1}' due to conflict.", this.workerName, lease.PartitionId)); } catch (Exception ex) { // Eat any exceptions during acquiring lease. TraceLog.Exception(ex); } return(null); }
async Task <T> TryStealLeaseAsync(T lease) { try { return(await this.leaseManager.AcquireAsync(lease, this.workerName)); } catch (LeaseLostException) { // Concurrency issue in stealing the lease, someone else got it before us TraceLog.Informational(string.Format("Host '{0}' failed to steal lease for PartitionId '{1}' due to conflict.", this.workerName, lease.PartitionId)); } catch (Exception ex) { // Eat any exceptions during stealing TraceLog.Exception(ex); } return(null); }
async Task LeaseTakerAsync() { while (this.isStarted == 1) { try { TraceLog.Informational(string.Format("Host '{0}' starting to check for available leases.", this.workerName)); var availableLeases = await this.TakeLeasesAsync(); if (availableLeases.Count > 0) { TraceLog.Informational(string.Format("Host '{0}' adding {1} leases...", this.workerName, availableLeases.Count)); } var addLeaseTasks = new List <Task>(); foreach (var kvp in availableLeases) { addLeaseTasks.Add(this.AddLeaseAsync(kvp.Value)); } await Task.WhenAll(addLeaseTasks.ToArray()); } catch (Exception ex) { TraceLog.Exception(ex); } try { await Task.Delay(this.options.LeaseAcquireInterval, this.leaseTakerCancellationTokenSource.Token); } catch (OperationCanceledException) { TraceLog.Informational(string.Format("Host '{0}' AcquireLease task canceled.", this.workerName)); } } TraceLog.Informational(string.Format("Host '{0}' AcquireLease task completed.", this.workerName)); }
public async Task InitializeAsync() { List <T> leases = new List <T>(); List <T> allLeases = new List <T>(); TraceLog.Verbose(string.Format("Host '{0}' starting renew leases assigned to this host on initialize.", this.workerName)); foreach (var lease in await this.leaseManager.ListLeases()) { allLeases.Add(lease); if (string.Compare(lease.Owner, this.workerName, StringComparison.OrdinalIgnoreCase) == 0) { T renewedLease = await this.RenewLeaseAsync(lease); if (renewedLease != null) { leases.Add(renewedLease); } else { TraceLog.Informational(string.Format("Host '{0}' unable to renew lease '{1}' on startup.", this.workerName, lease.PartitionId)); } } } var addLeaseTasks = new List <Task>(); foreach (T lease in leases) { TraceLog.Informational(string.Format("Host '{0}' acquired lease for PartitionId '{1}' on startup.", this.workerName, lease.PartitionId)); addLeaseTasks.Add(this.AddLeaseAsync(lease)); } await Task.WhenAll(addLeaseTasks.ToArray()); }
async Task LeaseRenewer() { while (this.isStarted == 1 || !this.shutdownComplete) { try { TraceLog.Informational(string.Format("Host '{0}' starting renewal of Leases.", this.workerName)); ConcurrentBag <T> renewedLeases = new ConcurrentBag <T>(); ConcurrentBag <T> failedToRenewLeases = new ConcurrentBag <T>(); List <Task> renewTasks = new List <Task>(); // Renew leases for all currently owned partitions in parallel foreach (T lease in this.currentlyOwnedPartitions.Values) { renewTasks.Add(this.RenewLeaseAsync(lease).ContinueWith(renewResult => { if (renewResult.Result != null) { renewedLeases.Add(renewResult.Result); } else { // Keep track of all failed attempts to renew so we can trigger shutdown for these partitions failedToRenewLeases.Add(lease); } })); } // Renew leases for all partitions currently in shutdown List <T> failedToRenewShutdownLeases = new List <T>(); foreach (T shutdownLeases in this.keepRenewingDuringClose.Values) { renewTasks.Add(this.RenewLeaseAsync(shutdownLeases).ContinueWith(renewResult => { if (renewResult.Result != null) { renewedLeases.Add(renewResult.Result); } else { // Keep track of all failed attempts to renew shutdown leases so we can remove them from further renew attempts failedToRenewShutdownLeases.Add(shutdownLeases); } })); } // Wait for all renews to complete await Task.WhenAll(renewTasks.ToArray()); // Update renewed leases. foreach (T lease in renewedLeases) { bool updateResult = this.currentlyOwnedPartitions.TryUpdate(lease.PartitionId, lease, lease); if (!updateResult) { TraceLog.Warning(string.Format("Host '{0}' Renewed lease {1} but failed to update it in the map (ignorable).", this.workerName, lease)); } } // Trigger shutdown of all partitions we failed to renew leases await failedToRenewLeases.ForEachAsync( async lease => await this.RemoveLeaseAsync(lease, false, ChangeFeedObserverCloseReason.LeaseLost), this.options.DegreeOfParallelism); // Now remove all failed renewals of shutdown leases from further renewals foreach (T failedToRenewShutdownLease in failedToRenewShutdownLeases) { T removedLease = null; this.keepRenewingDuringClose.TryRemove(failedToRenewShutdownLease.PartitionId, out removedLease); } await Task.Delay(this.options.LeaseRenewInterval, this.leaseRenewerCancellationTokenSource.Token); } catch (OperationCanceledException) { TraceLog.Informational(string.Format("Host '{0}' Renewer task canceled.", this.workerName)); } catch (Exception ex) { TraceLog.Exception(ex); } } this.currentlyOwnedPartitions.Clear(); this.keepRenewingDuringClose.Clear(); TraceLog.Informational(string.Format("Host '{0}' Renewer task completed.", this.workerName)); }
/// <summary> /// Create leases for new partitions and take care of split partitions. /// </summary> private async Task CreateLeases(IDictionary <string, PartitionKeyRange> ranges) { Debug.Assert(ranges != null); // Get leases after getting ranges, to make sure that no other hosts checked in continuation for split partition after we got leases. var existingLeases = new Dictionary <string, DocumentServiceLease>(); foreach (var lease in await this.leaseManager.ListLeases()) { existingLeases.Add(lease.PartitionId, lease); } var gonePartitionIds = new HashSet <string>(); foreach (var partitionId in existingLeases.Keys) { if (!ranges.ContainsKey(partitionId)) { gonePartitionIds.Add(partitionId); } } var addedPartitionIds = new List <string>(); foreach (var range in ranges) { if (!existingLeases.ContainsKey(range.Key)) { addedPartitionIds.Add(range.Key); } } // Create leases for new partitions, if there was split, use continuation from parent partition. var parentIdToChildLeases = new ConcurrentDictionary <string, ConcurrentQueue <DocumentServiceLease> >(); await addedPartitionIds.ForEachAsync( async addedRangeId => { this.statsSinceLastCheckpoint.AddOrUpdate( addedRangeId, new CheckpointStats(), (partitionId, existingStats) => existingStats); string continuationToken = null; string parentIds = string.Empty; var range = ranges[addedRangeId]; if (range.Parents != null && range.Parents.Count > 0) // Check for split. { foreach (var parentRangeId in range.Parents) { if (gonePartitionIds.Contains(parentRangeId)) { // Transfer continiation from lease for gone parent to lease for its child partition. Debug.Assert(existingLeases[parentRangeId] != null); parentIds += parentIds.Length == 0 ? parentRangeId : "," + parentRangeId; if (continuationToken != null) { TraceLog.Warning(string.Format("Partition {0}: found more than one parent, new continuation '{1}', current '{2}', will use '{3}'", addedRangeId, existingLeases[parentRangeId].ContinuationToken, existingLeases[parentRangeId].ContinuationToken)); } continuationToken = existingLeases[parentRangeId].ContinuationToken; } } } bool wasCreated = await this.leaseManager.CreateLeaseIfNotExistAsync(addedRangeId, continuationToken); if (wasCreated) { if (parentIds.Length == 0) { TraceLog.Informational(string.Format("Created lease for partition '{0}', continuation '{1}'.", addedRangeId, continuationToken)); } else { TraceLog.Informational(string.Format("Created lease for partition '{0}' as child of split partition(s) '{1}', continuation '{2}'.", addedRangeId, parentIds, continuationToken)); } } else { TraceLog.Warning(string.Format("Some other host created lease for '{0}' as child of split partition(s) '{1}', continuation '{2}'.", addedRangeId, parentIds, continuationToken)); } }, this.options.DegreeOfParallelism); // Remove leases for splitted (and thus gone partitions) and update continuation token. await gonePartitionIds.ForEachAsync( async goneRangeId => { await this.leaseManager.DeleteAsync(existingLeases[goneRangeId]); TraceLog.Informational(string.Format("Deleted lease for gone (splitted) partition '{0}', continuation '{1}'", goneRangeId, existingLeases[goneRangeId].ContinuationToken)); CheckpointStats removedStatsUnused; this.statsSinceLastCheckpoint.TryRemove(goneRangeId, out removedStatsUnused); }, this.options.DegreeOfParallelism); }
async Task <IDictionary <string, T> > TakeLeasesAsync() { IDictionary <string, T> allPartitions = new Dictionary <string, T>(); IDictionary <string, T> takenLeases = new Dictionary <string, T>(); IDictionary <string, int> workerToPartitionCount = new Dictionary <string, int>(); List <T> expiredLeases = new List <T>(); foreach (var lease in await this.leaseManager.ListLeases()) { Debug.Assert(lease.PartitionId != null, "TakeLeasesAsync: lease.PartitionId cannot be null."); allPartitions.Add(lease.PartitionId, lease); if (string.IsNullOrWhiteSpace(lease.Owner) || await this.leaseManager.IsExpired(lease)) { TraceLog.Verbose(string.Format("Found unused or expired lease: {0}", lease)); expiredLeases.Add(lease); } else { int count = 0; string assignedTo = lease.Owner; if (workerToPartitionCount.TryGetValue(assignedTo, out count)) { workerToPartitionCount[assignedTo] = count + 1; } else { workerToPartitionCount.Add(assignedTo, 1); } } } if (!workerToPartitionCount.ContainsKey(this.workerName)) { workerToPartitionCount.Add(this.workerName, 0); } int partitionCount = allPartitions.Count; int workerCount = workerToPartitionCount.Count; if (partitionCount > 0) { int target = 1; if (partitionCount > workerCount) { target = (int)Math.Ceiling((double)partitionCount / (double)workerCount); } Debug.Assert(this.options.MinPartitionCount <= this.options.MaxPartitionCount); if (this.options.MaxPartitionCount > 0 && target > this.options.MaxPartitionCount) { target = this.options.MaxPartitionCount; } if (this.options.MinPartitionCount > 0 && target < this.options.MinPartitionCount) { target = this.options.MinPartitionCount; } int myCount = workerToPartitionCount[this.workerName]; int partitionsNeededForMe = target - myCount; TraceLog.Informational( string.Format( "Host '{0}' {1} partitions, {2} hosts, {3} available leases, target = {4}, min = {5}, max = {6}, mine = {7}, will try to take {8} lease(s) for myself'.", this.workerName, partitionCount, workerCount, expiredLeases.Count, target, this.options.MinPartitionCount, this.options.MaxPartitionCount, myCount, Math.Max(partitionsNeededForMe, 0))); if (partitionsNeededForMe > 0) { HashSet <T> partitionsToAcquire = new HashSet <T>(); if (expiredLeases.Count > 0) { foreach (T leaseToTake in expiredLeases) { if (partitionsNeededForMe == 0) { break; } TraceLog.Informational(string.Format("Host '{0}' attempting to take lease for PartitionId '{1}'.", this.workerName, leaseToTake.PartitionId)); T acquiredLease = await this.TryAcquireLeaseAsync(leaseToTake); if (acquiredLease != null) { TraceLog.Informational(string.Format("Host '{0}' successfully acquired lease for PartitionId '{1}': {2}", this.workerName, leaseToTake.PartitionId, acquiredLease)); takenLeases.Add(acquiredLease.PartitionId, acquiredLease); partitionsNeededForMe--; } } } else { KeyValuePair <string, int> workerToStealFrom = default(KeyValuePair <string, int>); foreach (var kvp in workerToPartitionCount) { if (kvp.Equals(default(KeyValuePair <string, int>)) || workerToStealFrom.Value < kvp.Value) { workerToStealFrom = kvp; } } if (workerToStealFrom.Value > target - (partitionsNeededForMe > 1 ? 1 : 0)) { foreach (var kvp in allPartitions) { if (string.Equals(kvp.Value.Owner, workerToStealFrom.Key, StringComparison.OrdinalIgnoreCase)) { T leaseToTake = kvp.Value; TraceLog.Informational(string.Format("Host '{0}' attempting to steal lease from '{1}' for PartitionId '{2}'.", this.workerName, workerToStealFrom.Key, leaseToTake.PartitionId)); T stolenLease = await this.TryStealLeaseAsync(leaseToTake); if (stolenLease != null) { TraceLog.Informational(string.Format("Host '{0}' stole lease from '{1}' for PartitionId '{2}'.", this.workerName, workerToStealFrom.Key, leaseToTake.PartitionId)); takenLeases.Add(stolenLease.PartitionId, stolenLease); partitionsNeededForMe--; // Only steal one lease at a time break; } } } } } } } return(takenLeases); }
async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease) { Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease"); TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId)); #if DEBUG Interlocked.Increment(ref this.partitionCount); #endif IChangeFeedObserver observer = this.observerFactory.CreateObserver(); ChangeFeedObserverContext context = new ChangeFeedObserverContext { PartitionKeyRangeId = lease.PartitionId }; CancellationTokenSource cancellation = new CancellationTokenSource(); // Create ChangeFeedOptions to use for this worker. ChangeFeedOptions options = new ChangeFeedOptions { MaxItemCount = this.changeFeedOptions.MaxItemCount, PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId, SessionToken = this.changeFeedOptions.SessionToken, StartFromBeginning = this.changeFeedOptions.StartFromBeginning, RequestContinuation = this.changeFeedOptions.RequestContinuation }; var workerTask = await Task.Factory.StartNew(async() => { ChangeFeedObserverCloseReason?closeReason = null; try { try { await observer.OpenAsync(context); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } options.PartitionKeyRangeId = lease.PartitionId; if (!string.IsNullOrEmpty(lease.ContinuationToken)) { options.RequestContinuation = lease.ContinuationToken; } CheckpointStats checkpointStats = null; if (!this.statsSinceLastCheckpoint.TryGetValue(lease.PartitionId, out checkpointStats) || checkpointStats == null) { // It could be that the lease was created by different host and we picked it up. checkpointStats = this.statsSinceLastCheckpoint.AddOrUpdate( lease.PartitionId, new CheckpointStats(), (partitionId, existingStats) => existingStats); Trace.TraceWarning(string.Format("Added stats for partition '{0}' for which the lease was picked up after the host was started.", lease.PartitionId)); } IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options); TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken)); string lastContinuation = options.RequestContinuation; try { while (this.isShutdown == 0) { do { ExceptionDispatchInfo exceptionDispatchInfo = null; FeedResponse <Document> response = null; try { response = await query.ExecuteNextAsync <Document>(); lastContinuation = response.ResponseContinuation; } catch (DocumentClientException ex) { exceptionDispatchInfo = ExceptionDispatchInfo.Capture(ex); } if (exceptionDispatchInfo != null) { DocumentClientException dcex = (DocumentClientException)exceptionDispatchInfo.SourceException; if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && SubStatusCode.ReadSessionNotAvailable != (SubStatusCode)GetSubStatusCode(dcex)) { // Most likely, the database or collection was removed while we were enumerating. // Shut down. The user will need to start over. // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks. TraceLog.Error(string.Format("Partition {0}: resource gone (subStatus={1}). Aborting.", context.PartitionKeyRangeId, GetSubStatusCode(dcex))); await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone)); break; } else if (StatusCode.Gone == (StatusCode)dcex.StatusCode) { SubStatusCode subStatusCode = (SubStatusCode)GetSubStatusCode(dcex); if (SubStatusCode.PartitionKeyRangeGone == subStatusCode) { bool isSuccess = await HandleSplitAsync(context.PartitionKeyRangeId, lastContinuation, lease.Id); if (!isSuccess) { TraceLog.Error(string.Format("Partition {0}: HandleSplit failed! Aborting.", context.PartitionKeyRangeId)); await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone)); break; } // Throw LeaseLostException so that we take the lease down. throw new LeaseLostException(lease, exceptionDispatchInfo.SourceException, true); } else if (SubStatusCode.Splitting == subStatusCode) { TraceLog.Warning(string.Format("Partition {0} is splitting. Will retry to read changes until split finishes. {1}", context.PartitionKeyRangeId, dcex.Message)); } else { exceptionDispatchInfo.Throw(); } } else if (StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode || StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode) { TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message)); } else { exceptionDispatchInfo.Throw(); } await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token); } if (response != null) { if (response.Count > 0) { List <Document> docs = new List <Document>(); docs.AddRange(response); try { context.FeedResponse = response; await observer.ProcessChangesAsync(context, docs); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } finally { context.FeedResponse = null; } } checkpointStats.ProcessedDocCount += (uint)response.Count; if (IsCheckpointNeeded(lease, checkpointStats)) { lease = await CheckpointAsync(lease, response.ResponseContinuation, context); checkpointStats.Reset(); } else if (response.Count > 0) { TraceLog.Informational(string.Format("Checkpoint: not checkpointing for partition {0}, {1} docs, new continuation '{2}' as frequency condition is not met", lease.PartitionId, response.Count, response.ResponseContinuation)); } } }while (query.HasMoreResults && this.isShutdown == 0); if (this.isShutdown == 0) { await Task.Delay(this.options.FeedPollDelay, cancellation.Token); } } // Outer while (this.isShutdown == 0) loop. closeReason = ChangeFeedObserverCloseReason.Shutdown; } catch (TaskCanceledException) { Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested"); TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId)); } } catch (LeaseLostException ex) { closeReason = ex.IsGone ? ChangeFeedObserverCloseReason.LeaseGone : ChangeFeedObserverCloseReason.LeaseLost; } catch (Exception ex) { TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex)); if (!closeReason.HasValue) { closeReason = ChangeFeedObserverCloseReason.Unknown; } } if (closeReason.HasValue) { TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value)); // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task. await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value)); } TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId)); }); var newWorkerData = new WorkerData(workerTask, observer, context, cancellation); this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); }); }
async Task IPartitionObserver <DocumentServiceLease> .OnPartitionAcquiredAsync(DocumentServiceLease lease) { Debug.Assert(lease != null && !string.IsNullOrEmpty(lease.Owner), "lease"); TraceLog.Informational(string.Format("Host '{0}' partition {1}: acquired!", this.HostName, lease.PartitionId)); #if DEBUG Interlocked.Increment(ref this.partitionCount); #endif IChangeFeedObserver observer = this.observerFactory.CreateObserver(); ChangeFeedObserverContext context = new ChangeFeedObserverContext { PartitionKeyRangeId = lease.PartitionId }; CancellationTokenSource cancellation = new CancellationTokenSource(); // Create ChangeFeedOptions to use for this worker. ChangeFeedOptions options = new ChangeFeedOptions { MaxItemCount = this.changeFeedOptions.MaxItemCount, PartitionKeyRangeId = this.changeFeedOptions.PartitionKeyRangeId, SessionToken = this.changeFeedOptions.SessionToken, StartFromBeginning = this.changeFeedOptions.StartFromBeginning, RequestContinuation = this.changeFeedOptions.RequestContinuation }; var workerTask = await Task.Factory.StartNew(async() => { ChangeFeedObserverCloseReason?closeReason = null; try { try { await observer.OpenAsync(context); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.OpenAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } options.PartitionKeyRangeId = lease.PartitionId; if (!string.IsNullOrEmpty(lease.ContinuationToken)) { options.RequestContinuation = lease.ContinuationToken; } IDocumentQuery <Document> query = this.documentClient.CreateDocumentChangeFeedQuery(this.collectionSelfLink, options); TraceLog.Verbose(string.Format("Worker start: partition '{0}', continuation '{1}'", lease.PartitionId, lease.ContinuationToken)); try { while (this.isShutdown == 0) { do { DocumentClientException dcex = null; FeedResponse <Document> response = null; try { response = await query.ExecuteNextAsync <Document>(); } catch (DocumentClientException ex) { if (StatusCode.NotFound != (StatusCode)ex.StatusCode && StatusCode.TooManyRequests != (StatusCode)ex.StatusCode && StatusCode.ServiceUnavailable != (StatusCode)ex.StatusCode) { throw; } dcex = ex; } if (dcex != null) { const int ReadSessionNotAvailable = 1002; if (StatusCode.NotFound == (StatusCode)dcex.StatusCode && GetSubStatusCode(dcex) != ReadSessionNotAvailable) { // Most likely, the database or collection was removed while we were enumerating. // Shut down. The user will need to start over. // Note: this has to be a new task, can't await for shutdown here, as shudown awaits for all worker tasks. await Task.Factory.StartNew(() => this.StopAsync(ChangeFeedObserverCloseReason.ResourceGone)); break; } else { Debug.Assert(StatusCode.TooManyRequests == (StatusCode)dcex.StatusCode || StatusCode.ServiceUnavailable == (StatusCode)dcex.StatusCode); TraceLog.Warning(string.Format("Partition {0}: retriable exception : {1}", context.PartitionKeyRangeId, dcex.Message)); await Task.Delay(dcex.RetryAfter != TimeSpan.Zero ? dcex.RetryAfter : this.options.FeedPollDelay, cancellation.Token); } } if (response != null) { if (response.Count > 0) { List <Document> docs = new List <Document>(); docs.AddRange(response); try { await observer.ProcessChangesAsync(context, docs); } catch (Exception ex) { TraceLog.Error(string.Format("IChangeFeedObserver.ProcessChangesAsync exception: {0}", ex)); closeReason = ChangeFeedObserverCloseReason.ObserverError; throw; } // Checkpoint after every successful delivery to the client. lease = await CheckpointAsync(lease, response.ResponseContinuation, context); } else if (string.IsNullOrEmpty(lease.ContinuationToken)) { // Checkpoint if we've never done that for this lease. lease = await CheckpointAsync(lease, response.ResponseContinuation, context); } } }while (query.HasMoreResults && this.isShutdown == 0); if (this.isShutdown == 0) { await Task.Delay(this.options.FeedPollDelay, cancellation.Token); } } // Outer while (this.isShutdown == 0) loop. } catch (TaskCanceledException) { Debug.Assert(cancellation.IsCancellationRequested, "cancellation.IsCancellationRequested"); TraceLog.Informational(string.Format("Cancel signal received for partition {0} worker!", context.PartitionKeyRangeId)); } } catch (LeaseLostException) { closeReason = ChangeFeedObserverCloseReason.LeaseLost; } catch (Exception ex) { TraceLog.Error(string.Format("Partition {0} exception: {1}", context.PartitionKeyRangeId, ex)); if (!closeReason.HasValue) { closeReason = ChangeFeedObserverCloseReason.Unknown; } } if (closeReason.HasValue) { TraceLog.Informational(string.Format("Releasing lease for partition {0} due to an error, reason: {1}!", context.PartitionKeyRangeId, closeReason.Value)); // Note: this has to be a new task, because OnPartitionReleasedAsync awaits for worker task. await Task.Factory.StartNew(async() => await this.partitionManager.TryReleasePartitionAsync(context.PartitionKeyRangeId, true, closeReason.Value)); } TraceLog.Informational(string.Format("Partition {0}: worker finished!", context.PartitionKeyRangeId)); }); var newWorkerData = new WorkerData(workerTask, observer, context, cancellation); this.partitionKeyRangeIdToWorkerMap.AddOrUpdate(context.PartitionKeyRangeId, newWorkerData, (string id, WorkerData d) => { return(newWorkerData); }); }