// Summary: // // 1. Process one catalog page at a time. // 2. Within a given catalog page, batch catalog commit entries by lower-cased package ID. // 3. Process up to `n` batches in parallel. Note that the batches may span multiple catalog commits. // 4. Cease processing new batches if a failure has been observed. This job will eventually retry // batches on its next outermost job loop. // 5. If a failure has been observed, wait for all existing tasks to complete. Avoid task cancellation // as that could lead to the entirety of a package registration being in an inconsistent state. // To be fair, a well-timed exception could have the same result, but registration updates have never // been transactional. Actively cancelling tasks would make an inconsistent registration more likely. // 6. Update the cursor if and only if all preceding commits and the current (oldest) commit have been // fully and successfully processed. protected override async Task <bool> FetchAsync( CollectorHttpClient client, ReadWriteCursor front, ReadCursor back, CancellationToken cancellationToken) { IEnumerable <CatalogItem> catalogItems = await FetchCatalogItemsAsync(client, front, cancellationToken); var hasAnyBatchFailed = false; var hasAnyBatchBeenProcessed = false; foreach (CatalogItem catalogItem in catalogItems) { JObject page = await client.GetJObjectAsync(catalogItem.Uri, cancellationToken); JToken context = page["@context"]; CatalogItemBatch[] batches = await CreateBatchesAsync(front, back, page); SortedDictionary <DateTime, CommitBatchTasks> commitBatchTasksMap = CreateCommitBatchTasksMap(batches); var unprocessedBatches = new Queue <CatalogItemBatch>(batches); var processingBatches = new Queue <BatchTask>(); CatalogItemBatch lastBatch = unprocessedBatches.LastOrDefault(); var exceptions = new List <Exception>(); EnqueueBatchesIfNoFailures( client, context, commitBatchTasksMap, unprocessedBatches, processingBatches, lastBatch, cancellationToken); while (processingBatches.Any()) { var activeTasks = processingBatches.Where(batch => !batch.Task.IsCompleted) .Select(batch => batch.Task) .DefaultIfEmpty(CompletedTask); await Task.WhenAny(activeTasks); while (!hasAnyBatchFailed && commitBatchTasksMap.Any()) { var commitBatchTasks = commitBatchTasksMap.First().Value; var isCommitFullyProcessed = commitBatchTasks.BatchTasks.All(batch => batch.Task != null && batch.Task.IsCompleted); if (!isCommitFullyProcessed) { break; } var isCommitSuccessfullyProcessed = commitBatchTasks.BatchTasks.All(batch => batch.Task.Status == TaskStatus.RanToCompletion); if (isCommitSuccessfullyProcessed) { var commitTimeStamp = commitBatchTasks.CommitTimeStamp; front.Value = commitTimeStamp; await front.SaveAsync(cancellationToken); Trace.TraceInformation($"{nameof(RegistrationCollector)}.{nameof(FetchAsync)} {nameof(front)}.{nameof(front.Value)} saved since timestamp changed from previous: {{0}}", front); DequeueBatchesWhileMatches(processingBatches, batch => batch.CommitTimeStamp == commitTimeStamp); commitBatchTasksMap.Remove(commitTimeStamp); } else // Canceled or Failed { hasAnyBatchFailed = true; exceptions.AddRange( commitBatchTasks.BatchTasks .Select(batch => batch.Task) .Where(task => (task.IsFaulted || task.IsCanceled) && task.Exception != null) .Select(task => task.Exception)); } } if (hasAnyBatchFailed) { DequeueBatchesWhileMatches(processingBatches, batch => batch.Task.IsCompleted); } hasAnyBatchBeenProcessed = true; EnqueueBatchesIfNoFailures( client, context, commitBatchTasksMap, unprocessedBatches, processingBatches, lastBatch, cancellationToken); } if (hasAnyBatchFailed) { var innerException = exceptions.Count == 1 ? exceptions.Single() : new AggregateException(exceptions); throw new BatchProcessingException(innerException); } } return(hasAnyBatchBeenProcessed); }
private Task SetAsync(DateTime value, CancellationToken cancellationToken) { _cursor.Value = value; return(_cursor.SaveAsync(cancellationToken)); }