/// <summary> /// Stops an owned partition processing task in case it is running. It is also removed from the tasks dictionary /// along with its corresponding token source. /// </summary> /// /// <param name="partitionId">The identifier of the Event Hub partition whose processing is being stopped.</param> /// <param name="reason">The reason why the processing for the specified partition is being stopped.</param> /// /// <returns>A task to be resolved on when the operation has completed.</returns> /// private async Task StopPartitionProcessingIfRunningAsync(string partitionId, ProcessingStoppedReason reason) { if (ActivePartitionProcessors.TryRemove(partitionId, out var processingTask) && ActivePartitionProcessorTokenSources.TryRemove(partitionId, out var tokenSource)) { try { tokenSource.Cancel(); await processingTask.ConfigureAwait(false); } catch (Exception ex) when(ex is TaskCanceledException || ex is OperationCanceledException) { // Nothing to do here. These exceptions are expected. } catch (Exception) { // TODO: delegate the exception handling to an Exception Callback. } finally { tokenSource.Dispose(); } } // TODO: if reason = Shutdown or OwnershipLost and we got an exception when closing, what should the final reason be? PartitionContexts.TryRemove(partitionId, out var context); await ProcessingForPartitionStoppedAsync(reason, context); }
/// <summary> /// Performs load balancing between multiple <see cref="EventProcessorClient" /> instances, claiming others' partitions to enforce /// a more equal distribution when necessary. It also manages its own partition processing tasks and ownership. /// </summary> /// /// <param name="cancellationToken">A <see cref="CancellationToken"/> instance to signal the request to cancel the operation.</param> /// /// <returns>A task to be resolved on when the operation has completed.</returns> /// private async Task RunAsync(CancellationToken cancellationToken) { // We'll use this connection to retrieve an updated list of partition ids from the service. await using var connection = CreateConnection(); while (!cancellationToken.IsCancellationRequested) { Stopwatch cycleDuration = Stopwatch.StartNew(); // Renew this instance's ownership so they don't expire. // TODO: renew only after retrieving updated ownership so we use updated checkpoints. await RenewOwnershipAsync().ConfigureAwait(false); // From the storage service provided by the user, obtain a complete list of ownership, including expired ones. We may still need // their eTags to claim orphan partitions. var completeOwnershipList = (await ListOwnershipAsync(FullyQualifiedNamespace, EventHubName, ConsumerGroup) .ConfigureAwait(false)) .ToList(); // Filter the complete ownership list to obtain only the ones that are still active. The expiration time defaults to 30 seconds, // but it may be overridden by a derived class. var utcNow = DateTimeOffset.UtcNow; IEnumerable <PartitionOwnership> activeOwnership = completeOwnershipList .Where(ownership => utcNow.Subtract(ownership.LastModifiedTime.Value) < OwnershipExpiration); // Dispose of all previous partition ownership instances and get a whole new dictionary. InstanceOwnership = activeOwnership .Where(ownership => ownership.OwnerIdentifier == Identifier) .ToDictionary(ownership => ownership.PartitionId); // Some previously owned partitions might have had their ownership expired or might have been stolen, so we need to stop // the processing tasks we don't need anymore. await Task.WhenAll(ActivePartitionProcessors.Keys .Except(InstanceOwnership.Keys) .Select(partitionId => StopPartitionProcessingIfRunningAsync(partitionId, ProcessingStoppedReason.OwnershipLost))) .ConfigureAwait(false); // Now that we are left with processing tasks that should be running, check their status. If any has stopped, it // means a failure has happened, so try closing it and starting a new one. In case we don't have a task that should // exist, create it. This might happen if the user hasn't updated ActivePartitionProcessors when initializing processing // in the previous cycle. await Task.WhenAll(InstanceOwnership .Select(async kvp => { if (!ActivePartitionProcessors.TryGetValue(kvp.Key, out Task processingTask) || processingTask.IsCompleted) { // TODO: if the task fails, what's the expected reason? await StopPartitionProcessingIfRunningAsync(kvp.Key, ProcessingStoppedReason.Shutdown).ConfigureAwait(false); var context = CreateContext(kvp.Key); PartitionContexts[kvp.Key] = context; await InitializeProcessingForPartitionAsync(context).ConfigureAwait(false); } })) .ConfigureAwait(false); // Get a complete list of the partition ids present in the Event Hub. This should be immutable for the time being, but // it may change in the future. var partitionIds = await connection.GetPartitionIdsAsync(RetryPolicy).ConfigureAwait(false); // Find an ownership to claim and try to claim it. The method will return null if this instance was not eligible to // increase its ownership list, if no claimable ownership could be found or if a claim attempt has failed. var claimedOwnership = await FindAndClaimOwnershipAsync(partitionIds, completeOwnershipList, activeOwnership).ConfigureAwait(false); if (claimedOwnership != null) { InstanceOwnership[claimedOwnership.PartitionId] = claimedOwnership; var context = CreateContext(claimedOwnership.PartitionId); PartitionContexts[claimedOwnership.PartitionId] = context; await InitializeProcessingForPartitionAsync(context).ConfigureAwait(false); } // Wait the remaining time, if any, to start the next cycle. The total time of a cycle defaults to 10 seconds, // but it may be overridden by a derived class. var remainingTimeUntilNextCycle = LoadBalanceUpdate.CalculateRemaining(cycleDuration.Elapsed); // If a stop request has been issued, Task.Delay will throw a TaskCanceledException. This is expected and it // will be caught by the StopAsync method. await Task.Delay(remainingTimeUntilNextCycle, cancellationToken).ConfigureAwait(false); } // If cancellation has been requested, throw an exception so we can keep a consistent behavior. cancellationToken.ThrowIfCancellationRequested <TaskCanceledException>(); }