async Task ProcessCatalogAsync(string catalogIndexUrl) { _logger.LogInformation("Downloading catalog index: {Url}", catalogIndexUrl); var catalogIndex = await DownloadAndParseAsync <CatalogIndex>(catalogIndexUrl); var cursor = _cursorProvider.GetCursor(catalogIndex.Path); cursor.Read(); FilterItems <CatalogIndex, CatalogPageItem>(catalogIndex, cursor, DateTimeOffset.MaxValue); _logger.LogDebug("Found {Count} pages with new data.", catalogIndex.Value.Items.Count); if (_config.MaxPages.HasValue && _config.MaxPages.Value < catalogIndex.Value.Items.Count) { catalogIndex.Value.Items = catalogIndex .Value .Items .Take(_config.MaxPages.Value) .ToList(); _logger.LogInformation("Only processing {Count} new pages, due to max pages limit.", catalogIndex.Value.Items.Count); } if (_config.Depth == DownloadDepth.CatalogIndex) { UpdateCursorFromItems <CatalogIndex, CatalogPageItem>(cursor, catalogIndex); return; } using (_logger.Indent()) { var completedCommits = 0; foreach (var pageItem in catalogIndex.Value.Items) { _logger.LogInformation("Downloading catalog page: {Url}", pageItem.Url); var page = await DownloadAndParseAsync <CatalogPage>(pageItem.Url); FilterItems <CatalogPage, CatalogLeafItem>(page, cursor, pageItem.CommitTimestamp); _logger.LogDebug("Found {Count} new leaves in this page.", page.Value.Items.Count); var pageCommits = page .Value .Items .Select(x => x.CommitTimestamp) .Distinct() .OrderBy(x => x) .ToList(); var commitCount = pageCommits.Count; if (_config.MaxCommits.HasValue) { var remainingCommits = _config.MaxCommits.Value - completedCommits; if (pageCommits.Count > remainingCommits) { commitCount = remainingCommits; FilterItems <CatalogPage, CatalogLeafItem>(page, cursor, pageCommits[remainingCommits - 1]); _logger.LogDebug("Only processing {Count} new leaves, due to max commits limit.", page.Value.Items.Count); } } await _visitor.OnCatalogPageAsync(page.Value); if (_config.Depth == DownloadDepth.CatalogPage) { UpdateCursorFromItems <CatalogPage, CatalogLeafItem>(cursor, page); } else { if (page.Value.Items.Any()) { using (_logger.Indent()) { var commitTimestampCount = page .Value .Items .GroupBy(x => x.CommitTimestamp) .ToDictionary(x => x.Key, x => x.Count()); var work = new ConcurrentQueue <BaseCatalogItem>(page.Value.Items); var tasks = Enumerable .Range(0, _config.ParallelDownloads) .Select(async i => { while (work.TryDequeue(out var leafItem)) { await DownloadLeafAsync( cursor, commitTimestampCount, leafItem); } }) .ToList(); await Task.WhenAll(tasks); } } } completedCommits += commitCount; if (_config.MaxCommits.HasValue && completedCommits >= _config.MaxCommits.Value) { _logger.LogInformation("Completed {CompletedCommits} commits. Terminating.", completedCommits); return; } } } }