Пример #1
0
        // Summary:
        //
        //      1.  Process one catalog page at a time.
        //      2.  Within a given catalog page, batch catalog commit entries by lower-cased package ID.
        //      3.  Process up to `n` batches in parallel.  Note that the batches may span multiple catalog commits.
        //      4.  Cease processing new batches if a failure has been observed.  This job will eventually retry
        //          batches on its next outermost job loop.
        //      5.  If a failure has been observed, wait for all existing tasks to complete.  Avoid task cancellation
        //          as that could lead to the entirety of a package registration being in an inconsistent state.
        //          To be fair, a well-timed exception could have the same result, but registration updates have never
        //          been transactional.  Actively cancelling tasks would make an inconsistent registration more likely.
        //      6.  Update the cursor if and only if all preceding commits and the current (oldest) commit have been
        //          fully and successfully processed.
        protected override async Task <bool> FetchAsync(
            CollectorHttpClient client,
            ReadWriteCursor front,
            ReadCursor back,
            CancellationToken cancellationToken)
        {
            IEnumerable <CatalogItem> catalogItems = await FetchCatalogItemsAsync(client, front, cancellationToken);

            var hasAnyBatchFailed        = false;
            var hasAnyBatchBeenProcessed = false;

            foreach (CatalogItem catalogItem in catalogItems)
            {
                JObject page = await client.GetJObjectAsync(catalogItem.Uri, cancellationToken);

                JToken             context = page["@context"];
                CatalogItemBatch[] batches = await CreateBatchesAsync(front, back, page);

                SortedDictionary <DateTime, CommitBatchTasks> commitBatchTasksMap = CreateCommitBatchTasksMap(batches);

                var unprocessedBatches = new Queue <CatalogItemBatch>(batches);
                var processingBatches  = new Queue <BatchTask>();

                CatalogItemBatch lastBatch = unprocessedBatches.LastOrDefault();
                var exceptions             = new List <Exception>();

                EnqueueBatchesIfNoFailures(
                    client,
                    context,
                    commitBatchTasksMap,
                    unprocessedBatches,
                    processingBatches,
                    lastBatch,
                    cancellationToken);

                while (processingBatches.Any())
                {
                    var activeTasks = processingBatches.Where(batch => !batch.Task.IsCompleted)
                                      .Select(batch => batch.Task)
                                      .DefaultIfEmpty(CompletedTask);

                    await Task.WhenAny(activeTasks);

                    while (!hasAnyBatchFailed && commitBatchTasksMap.Any())
                    {
                        var commitBatchTasks       = commitBatchTasksMap.First().Value;
                        var isCommitFullyProcessed = commitBatchTasks.BatchTasks.All(batch => batch.Task != null && batch.Task.IsCompleted);

                        if (!isCommitFullyProcessed)
                        {
                            break;
                        }

                        var isCommitSuccessfullyProcessed = commitBatchTasks.BatchTasks.All(batch => batch.Task.Status == TaskStatus.RanToCompletion);

                        if (isCommitSuccessfullyProcessed)
                        {
                            var commitTimeStamp = commitBatchTasks.CommitTimeStamp;

                            front.Value = commitTimeStamp;

                            await front.SaveAsync(cancellationToken);

                            Trace.TraceInformation($"{nameof(RegistrationCollector)}.{nameof(FetchAsync)} {nameof(front)}.{nameof(front.Value)} saved since timestamp changed from previous: {{0}}", front);

                            DequeueBatchesWhileMatches(processingBatches, batch => batch.CommitTimeStamp == commitTimeStamp);

                            commitBatchTasksMap.Remove(commitTimeStamp);
                        }
                        else // Canceled or Failed
                        {
                            hasAnyBatchFailed = true;

                            exceptions.AddRange(
                                commitBatchTasks.BatchTasks
                                .Select(batch => batch.Task)
                                .Where(task => (task.IsFaulted || task.IsCanceled) && task.Exception != null)
                                .Select(task => task.Exception));
                        }
                    }

                    if (hasAnyBatchFailed)
                    {
                        DequeueBatchesWhileMatches(processingBatches, batch => batch.Task.IsCompleted);
                    }

                    hasAnyBatchBeenProcessed = true;

                    EnqueueBatchesIfNoFailures(
                        client,
                        context,
                        commitBatchTasksMap,
                        unprocessedBatches,
                        processingBatches,
                        lastBatch,
                        cancellationToken);
                }

                if (hasAnyBatchFailed)
                {
                    var innerException = exceptions.Count == 1 ? exceptions.Single() : new AggregateException(exceptions);

                    throw new BatchProcessingException(innerException);
                }
            }

            return(hasAnyBatchBeenProcessed);
        }
Пример #2
0
 private Task SetAsync(DateTime value, CancellationToken cancellationToken)
 {
     _cursor.Value = value;
     return(_cursor.SaveAsync(cancellationToken));
 }