private async Task <bool> ProcessPageAsync(DateTimeOffset minCommitTimestamp, CatalogPageItem pageItem, CancellationToken cancellationToken) { var page = await _client.GetPageAsync(pageItem.Url); var leafItems = page.GetLeavesInBounds( minCommitTimestamp, _settings.MaxCommitTimestamp, _settings.ExcludeRedundantLeaves); _logger.LogInformation( "On page {page}, {leaves} out of {totalLeaves} were in the time bounds.", pageItem.Url, leafItems.Count, page.Items.Count); DateTimeOffset?newCursor = null; var success = true; for (var i = 0; i < leafItems.Count; i++) { var leafItem = leafItems[i]; if (newCursor.HasValue && newCursor.Value != leafItem.CommitTimestamp) { await _cursor.SetAsync(newCursor.Value); } newCursor = leafItem.CommitTimestamp; success = await ProcessLeafAsync(leafItem); if (!success) { _logger.LogWarning( "{unprocessedLeaves} out of {leaves} leaves were left incomplete due to a processing failure.", leafItems.Count - i, leafItems.Count); break; } if (cancellationToken.IsCancellationRequested) { _logger.LogWarning( "{unprocessedLeaves} out of {leaves} leaves were left incomplete due to a cancellation request.", leafItems.Count - i, leafItems.Count); success = false; break; } } if (newCursor.HasValue && success) { await _cursor.SetAsync(newCursor.Value); } return(success); }
private async Task <IEnumerable <CatalogLeafItem> > GetCatalogLeafItems( ICatalogClient catalogClient, CatalogIndex catalogIndex, DateTimeOffset cursor, CancellationToken cancellationToken) { var catalogLeafItems = new ConcurrentBag <CatalogLeafItem>(); var catalogPageUrls = new ConcurrentBag <string>( catalogIndex .Items .Where(i => i.CommitTimestamp > cursor) .Select(i => i.CatalogPageUrl)); await ParallelHelper.ProcessInParallel( catalogPageUrls, ProcessCatalogPageUrlAsync, cancellationToken); return(catalogLeafItems); async Task ProcessCatalogPageUrlAsync(string catalogPageUrl, CancellationToken token) { _logger.LogInformation("Processing catalog page {CatalogPageUrl}...", catalogPageUrl); var page = await catalogClient.GetPageAsync(catalogPageUrl, token); foreach (var catalogLeafItem in page.Items.Where(i => i.CommitTimestamp > cursor)) { catalogLeafItems.Add(catalogLeafItem); } _logger.LogInformation("Processed catalog page {CatalogPageUrl}", catalogPageUrl); } }
private async Task ProcessPageAsync(DateTimeOffset minCommitTimestamp, CatalogPageItem pageItem, CancellationToken token) { var page = await _client.GetPageAsync(pageItem.Url, token); var leafItems = page.GetLeavesInBounds( minCommitTimestamp, _settings.MaxCommitTimestamp, _settings.ExcludeRedundantLeaves); _logger.LogInformation( "On page {page}, {leaves} out of {totalLeaves} were in the time bounds.", pageItem.Url, leafItems.Count, page.Items.Count); DateTimeOffset?newCursor = null; var tasks = new List <Task <CatalogLeaf> >(); foreach (var batch in leafItems .Select((v, i) => new { Index = i, Value = v }) .GroupBy(v => v.Index / 25) .Select(v => v.Select(p => p.Value))) { foreach (var leafItem in batch) { newCursor = leafItem.CommitTimestamp; tasks.Add(ProcessLeafAsync(leafItem, token)); } await Task.WhenAll(tasks); foreach (var task in tasks) { if (task.Result is PackageDeleteCatalogLeaf del) { await _leafProcessor.ProcessPackageDeleteAsync(del, token); } else if (task.Result is PackageDetailsCatalogLeaf detail) { await _leafProcessor.ProcessPackageDetailsAsync(detail, token); } else { _logger.LogError("Unsupported leaf type: {type}.", task.Result?.GetType()); } } tasks.Clear(); } if (newCursor.HasValue) { await _cursor.SetAsync(newCursor.Value, token); } }
private async Task <CatalogPage> GetPageAsync(string pageUrl, CancellationToken cancellationToken) { try { await _throttle.WaitAsync(cancellationToken); if (!cancellationToken.IsCancellationRequested) { return(await _client.GetPageAsync(pageUrl)); } return(null); } finally { _throttle.Release(); } }
public async static Task <(CatalogIndex, IEnumerable <CatalogLeafItem>)> LoadCatalogAsync( this ICatalogClient catalogClient, DateTimeOffset minCursor, DateTimeOffset maxCursor, ILogger logger, CancellationToken cancellationToken) { var catalogIndex = await catalogClient.GetIndexAsync(cancellationToken); var catalogLeafItems = new ConcurrentBag <CatalogLeafItem>(); var catalogPageUrls = new ConcurrentBag <CatalogPageItem>( catalogIndex.GetPagesInBounds(minCursor, maxCursor)); await ParallelAsync.RunAsync( catalogPageUrls, ProcessCatalogPageAsync, cancellationToken); return(catalogIndex, catalogLeafItems); async Task ProcessCatalogPageAsync(CatalogPageItem pageItem, CancellationToken token) { logger.LogInformation("Processing catalog page {CatalogPageUrl}...", pageItem.CatalogPageUrl); var page = await catalogClient.GetPageAsync(pageItem.CatalogPageUrl, token); var leafs = page.GetLeavesInBounds(minCursor, maxCursor, excludeRedundantLeaves: true); foreach (var catalogLeafItem in leafs) { catalogLeafItems.Add(catalogLeafItem); } logger.LogInformation("Processed catalog page {CatalogPageUrl}", pageItem.CatalogPageUrl); } }