Esempio n. 1
0
        /// <summary>
        /// Processes the provided list of catalog items while handling known retriable errors. It is this method's
        /// responsibility to throw an exception if the operation is unsuccessful and <paramref name="allowRetry"/> is
        /// <c>false</c>, even if the failure is generally retriable. Failure to do so could lead to an infinite loop
        /// in the caller.
        /// </summary>
        /// <param name="itemList">The item list to use for the Azure Search updates.</param>
        /// <param name="allowRetry">
        /// False to make any error encountered bubble out as an exception. True if retriable errors should returned a
        /// result with <see cref="ProcessItemsResult.Success"/> set to <c>false</c>.
        /// </param>
        /// <returns>The result, including success boolean and the next item list to use for a retry.</returns>
        private async Task <ProcessItemsResult> ProcessItemsAsync(List <CatalogCommitItem> itemList, bool allowRetry)
        {
            var latestItems = _utility.GetLatestPerIdentity(itemList);
            var allWork     = _utility.GroupById(latestItems);

            using (_telemetryService.TrackCatalog2AzureSearchProcessBatch(itemList.Count, latestItems.Count, allWork.Count))
            {
                // In parallel, generate all index actions required to handle this batch.
                var allIndexActions = await ProcessWorkAsync(latestItems, allWork);

                // In sequence, push batches of index actions to Azure Search. We do this because the maximum set of catalog
                // items that can be processed here is a single catalog page, which has around 550 items. The maximum batch
                // size for pushing to Azure Search is 1000 documents so there is no benefit to parallelizing this part.
                // Azure Search indexing on their side is more efficient with fewer, larger batches.
                var batchPusher = _batchPusherFactory();
                foreach (var indexAction in allIndexActions)
                {
                    batchPusher.EnqueueIndexActions(indexAction.Id, indexAction.Value);
                }

                try
                {
                    var finishResult = await batchPusher.TryFinishAsync();

                    if (allowRetry && !finishResult.Success)
                    {
                        _logger.LogWarning("Retrying catalog batch due to access condition failures on package IDs: {Ids}", finishResult.FailedPackageIds);
                        return(new ProcessItemsResult(success: false, items: itemList));
                    }

                    finishResult.EnsureSuccess();
                    return(new ProcessItemsResult(success: true, items: itemList));
                }
                catch (InvalidOperationException ex) when(allowRetry)
                {
                    var result = await _fixUpEvaluator.TryFixUpAsync(itemList, allIndexActions, ex);

                    if (!result.Applicable)
                    {
                        throw;
                    }

                    _logger.LogWarning("Retrying catalog batch due to Azure Search bug fix-up.");
                    return(new ProcessItemsResult(success: false, items: result.ItemList));
                }
            }
        }
        private async Task ProcessItemsAsync(IEnumerable <CatalogCommitItem> items, bool allowFixUp)
        {
            var itemList    = items.ToList();
            var latestItems = _utility.GetLatestPerIdentity(items);
            var allWork     = _utility.GroupById(latestItems);

            using (_telemetryService.TrackCatalog2AzureSearchProcessBatch(itemList.Count, latestItems.Count, allWork.Count))
            {
                // In parallel, generate all index actions required to handle this batch.
                var allIndexActions = await ProcessWorkAsync(latestItems, allWork);

                // In sequence, push batches of index actions to Azure Search. We do this because the maximum set of catalog
                // items that can be processed here is a single catalog page, which has around 550 items. The maximum batch
                // size for pushing to Azure Search is 1000 documents so there is no benefit to parallelizing this part.
                // Azure Search indexing on their side is more efficient with fewer, larger batches.
                var batchPusher = _batchPusherFactory();
                foreach (var indexAction in allIndexActions)
                {
                    batchPusher.EnqueueIndexActions(indexAction.Id, indexAction.Value);
                }

                try
                {
                    await batchPusher.FinishAsync();
                }
                catch (InvalidOperationException ex) when(allowFixUp)
                {
                    var result = await _fixUpEvaluator.TryFixUpAsync(itemList, allIndexActions, ex);

                    if (!result.Applicable)
                    {
                        throw;
                    }

                    await ProcessItemsAsync(result.ItemList, allowFixUp : false);
                }
            }
        }
        public async Task OnProcessBatchAsync(IEnumerable <CatalogCommitItem> items)
        {
            var itemList = items.ToList();

            _logger.LogInformation("Got {Count} catalog commit items to process.", itemList.Count);

            var latestItems = _utility.GetLatestPerIdentity(itemList);

            _logger.LogInformation("Got {Count} unique package identities.", latestItems.Count);

            var allWork = _utility.GroupById(latestItems);

            _logger.LogInformation("Got {Count} unique IDs.", allWork.Count);

            var allEntryToLeaf = await _utility.GetEntryToDetailsLeafAsync(latestItems);

            _logger.LogInformation("Fetched {Count} package details leaves.", allEntryToLeaf.Count);

            _logger.LogInformation("Starting {Count} workers processing each package ID batch.", _options.Value.MaxConcurrentIds);
            await ParallelAsync.Repeat(
                async() =>
            {
                await Task.Yield();
                while (allWork.TryTake(out var work))
                {
                    var entryToLeaf = work
                                      .Value
                                      .Where(CommitCollectorUtility.IsOnlyPackageDetails)
                                      .ToDictionary(e => e, e => allEntryToLeaf[e], ReferenceEqualityComparer <CatalogCommitItem> .Default);

                    await _updater.UpdateAsync(work.Id, work.Value, entryToLeaf);
                }
            },
                _options.Value.MaxConcurrentIds);

            _logger.LogInformation("All workers have completed successfully.");
        }