Example #1
0
        internal static async Task <bool> ProcessCatalogCommitsAsync(
            CollectorHttpClient client,
            ReadWriteCursor front,
            ReadCursor back,
            FetchCatalogCommitsAsync fetchCatalogCommitsAsync,
            CreateCommitItemBatchesAsync createCommitItemBatchesAsync,
            ProcessCommitItemBatchAsync processCommitItemBatchAsync,
            int maxConcurrentBatches,
            ILogger logger,
            CancellationToken cancellationToken)
        {
            var rootItems = await fetchCatalogCommitsAsync(client, front, back, cancellationToken);

            var hasAnyBatchFailed        = false;
            var hasAnyBatchBeenProcessed = false;

            foreach (CatalogCommit rootItem in rootItems)
            {
                JObject page = await client.GetJObjectAsync(rootItem.Uri, cancellationToken);

                var context = (JObject)page["@context"];
                CatalogCommitItemBatch[] batches = await CreateBatchesForAllAvailableItemsInPageAsync(
                    front,
                    back,
                    page,
                    context,
                    createCommitItemBatchesAsync);

                if (!batches.Any())
                {
                    continue;
                }

                hasAnyBatchBeenProcessed = true;

                DateTime maxCommitTimeStamp = GetMaxCommitTimeStamp(batches);
                var      unprocessedBatches = batches.ToList();
                var      processingBatches  = new List <CatalogCommitItemBatchTask>();
                var      exceptions         = new List <Exception>();

                StartProcessingBatchesIfNoFailures(
                    client,
                    context,
                    unprocessedBatches,
                    processingBatches,
                    maxConcurrentBatches,
                    processCommitItemBatchAsync,
                    cancellationToken);

                while (processingBatches.Any())
                {
                    var activeTasks = processingBatches.Where(batch => !batch.Task.IsCompleted)
                                      .Select(batch => batch.Task)
                                      .DefaultIfEmpty(Task.CompletedTask);

                    await Task.WhenAny(activeTasks);

                    for (var i = 0; i < processingBatches.Count; ++i)
                    {
                        var batch = processingBatches[i];

                        if (batch.Task.IsFaulted || batch.Task.IsCanceled)
                        {
                            hasAnyBatchFailed = true;

                            if (batch.Task.Exception != null)
                            {
                                var exception = ExceptionUtilities.Unwrap(batch.Task.Exception);

                                exceptions.Add(exception);
                            }
                        }

                        if (batch.Task.IsCompleted)
                        {
                            processingBatches.RemoveAt(i);
                            --i;
                        }
                    }

                    if (!hasAnyBatchFailed)
                    {
                        StartProcessingBatchesIfNoFailures(
                            client,
                            context,
                            unprocessedBatches,
                            processingBatches,
                            maxConcurrentBatches,
                            processCommitItemBatchAsync,
                            cancellationToken);
                    }
                }

                if (hasAnyBatchFailed)
                {
                    foreach (var exception in exceptions)
                    {
                        logger.LogError(_eventId, exception, Strings.BatchProcessingFailure);
                    }

                    var innerException = exceptions.Count == 1 ? exceptions.Single() : new AggregateException(exceptions);

                    throw new BatchProcessingException(innerException);
                }

                front.Value = maxCommitTimeStamp;

                await front.SaveAsync(cancellationToken);

                Trace.TraceInformation($"{nameof(CatalogCommitUtilities)}.{nameof(ProcessCatalogCommitsAsync)} " +
                                       $"{nameof(front)}.{nameof(front.Value)} saved since timestamp changed from previous: {{0}}", front);
            }

            return(hasAnyBatchBeenProcessed);
        }
        protected override async Task <bool> FetchAsync(
            CollectorHttpClient client,
            ReadWriteCursor front,
            ReadCursor back,
            CancellationToken cancellationToken)
        {
            IEnumerable <CatalogCommit> commits = await FetchCatalogCommitsAsync(client, front, cancellationToken);

            bool acceptNextBatch = false;

            foreach (CatalogCommit commit in commits)
            {
                JObject page = await client.GetJObjectAsync(commit.Uri, cancellationToken);

                JToken context = null;
                page.TryGetValue("@context", out context);

                var batches = await CreateBatchesAsync(page["items"]
                                                       .Select(item => CatalogCommitItem.Create((JObject)context, (JObject)item))
                                                       .Where(item => item.CommitTimeStamp > front.Value && item.CommitTimeStamp <= back.Value));

                var orderedBatches = batches
                                     .OrderBy(batch => batch.CommitTimeStamp)
                                     .ToList();

                var      lastBatch = orderedBatches.LastOrDefault();
                DateTime?previousCommitTimeStamp = null;

                foreach (var batch in orderedBatches)
                {
                    // If the commit timestamp has changed from the previous batch, commit. This is important because if
                    // two batches have the same commit timestamp but processing the second fails, we should not
                    // progress the cursor forward.
                    if (previousCommitTimeStamp.HasValue && previousCommitTimeStamp != batch.CommitTimeStamp)
                    {
                        front.Value = previousCommitTimeStamp.Value;
                        await front.SaveAsync(cancellationToken);

                        Trace.TraceInformation("CommitCatalog.Fetch front.Value saved since timestamp changed from previous: {0}", front);
                    }

                    using (_telemetryService.TrackDuration(TelemetryConstants.ProcessBatchSeconds, new Dictionary <string, string>()
                    {
                        { TelemetryConstants.BatchItemCount, batch.Items.Count.ToString() }
                    }))
                    {
                        acceptNextBatch = await OnProcessBatchAsync(
                            client,
                            batch.Items,
                            context,
                            batch.CommitTimeStamp,
                            batch.CommitTimeStamp == lastBatch.CommitTimeStamp,
                            cancellationToken);
                    }

                    // If this is the last batch, commit the cursor.
                    if (ReferenceEquals(batch, lastBatch))
                    {
                        front.Value = batch.CommitTimeStamp;
                        await front.SaveAsync(cancellationToken);

                        Trace.TraceInformation("CommitCatalog.Fetch front.Value saved due to last batch: {0}", front);
                    }

                    previousCommitTimeStamp = batch.CommitTimeStamp;

                    Trace.TraceInformation("CommitCatalog.Fetch front.Value is: {0}", front);

                    if (!acceptNextBatch)
                    {
                        break;
                    }
                }

                if (!acceptNextBatch)
                {
                    break;
                }
            }

            return(acceptNextBatch);
        }