/// <summary> /// Processes the provided list of catalog items while handling known retriable errors. It is this method's /// responsibility to throw an exception if the operation is unsuccessful and <paramref name="allowRetry"/> is /// <c>false</c>, even if the failure is generally retriable. Failure to do so could lead to an infinite loop /// in the caller. /// </summary> /// <param name="itemList">The item list to use for the Azure Search updates.</param> /// <param name="allowRetry"> /// False to make any error encountered bubble out as an exception. True if retriable errors should returned a /// result with <see cref="ProcessItemsResult.Success"/> set to <c>false</c>. /// </param> /// <returns>The result, including success boolean and the next item list to use for a retry.</returns> private async Task <ProcessItemsResult> ProcessItemsAsync(List <CatalogCommitItem> itemList, bool allowRetry) { var latestItems = _utility.GetLatestPerIdentity(itemList); var allWork = _utility.GroupById(latestItems); using (_telemetryService.TrackCatalog2AzureSearchProcessBatch(itemList.Count, latestItems.Count, allWork.Count)) { // In parallel, generate all index actions required to handle this batch. var allIndexActions = await ProcessWorkAsync(latestItems, allWork); // In sequence, push batches of index actions to Azure Search. We do this because the maximum set of catalog // items that can be processed here is a single catalog page, which has around 550 items. The maximum batch // size for pushing to Azure Search is 1000 documents so there is no benefit to parallelizing this part. // Azure Search indexing on their side is more efficient with fewer, larger batches. var batchPusher = _batchPusherFactory(); foreach (var indexAction in allIndexActions) { batchPusher.EnqueueIndexActions(indexAction.Id, indexAction.Value); } try { var finishResult = await batchPusher.TryFinishAsync(); if (allowRetry && !finishResult.Success) { _logger.LogWarning("Retrying catalog batch due to access condition failures on package IDs: {Ids}", finishResult.FailedPackageIds); return(new ProcessItemsResult(success: false, items: itemList)); } finishResult.EnsureSuccess(); return(new ProcessItemsResult(success: true, items: itemList)); } catch (InvalidOperationException ex) when(allowRetry) { var result = await _fixUpEvaluator.TryFixUpAsync(itemList, allIndexActions, ex); if (!result.Applicable) { throw; } _logger.LogWarning("Retrying catalog batch due to Azure Search bug fix-up."); return(new ProcessItemsResult(success: false, items: result.ItemList)); } } }
private async Task ProcessItemsAsync(IEnumerable <CatalogCommitItem> items, bool allowFixUp) { var itemList = items.ToList(); var latestItems = _utility.GetLatestPerIdentity(items); var allWork = _utility.GroupById(latestItems); using (_telemetryService.TrackCatalog2AzureSearchProcessBatch(itemList.Count, latestItems.Count, allWork.Count)) { // In parallel, generate all index actions required to handle this batch. var allIndexActions = await ProcessWorkAsync(latestItems, allWork); // In sequence, push batches of index actions to Azure Search. We do this because the maximum set of catalog // items that can be processed here is a single catalog page, which has around 550 items. The maximum batch // size for pushing to Azure Search is 1000 documents so there is no benefit to parallelizing this part. // Azure Search indexing on their side is more efficient with fewer, larger batches. var batchPusher = _batchPusherFactory(); foreach (var indexAction in allIndexActions) { batchPusher.EnqueueIndexActions(indexAction.Id, indexAction.Value); } try { await batchPusher.FinishAsync(); } catch (InvalidOperationException ex) when(allowFixUp) { var result = await _fixUpEvaluator.TryFixUpAsync(itemList, allIndexActions, ex); if (!result.Applicable) { throw; } await ProcessItemsAsync(result.ItemList, allowFixUp : false); } } }
public async Task OnProcessBatchAsync(IEnumerable <CatalogCommitItem> items) { var itemList = items.ToList(); _logger.LogInformation("Got {Count} catalog commit items to process.", itemList.Count); var latestItems = _utility.GetLatestPerIdentity(itemList); _logger.LogInformation("Got {Count} unique package identities.", latestItems.Count); var allWork = _utility.GroupById(latestItems); _logger.LogInformation("Got {Count} unique IDs.", allWork.Count); var allEntryToLeaf = await _utility.GetEntryToDetailsLeafAsync(latestItems); _logger.LogInformation("Fetched {Count} package details leaves.", allEntryToLeaf.Count); _logger.LogInformation("Starting {Count} workers processing each package ID batch.", _options.Value.MaxConcurrentIds); await ParallelAsync.Repeat( async() => { await Task.Yield(); while (allWork.TryTake(out var work)) { var entryToLeaf = work .Value .Where(CommitCollectorUtility.IsOnlyPackageDetails) .ToDictionary(e => e, e => allEntryToLeaf[e], ReferenceEqualityComparer <CatalogCommitItem> .Default); await _updater.UpdateAsync(work.Id, work.Value, entryToLeaf); } }, _options.Value.MaxConcurrentIds); _logger.LogInformation("All workers have completed successfully."); }