Esempio n. 1
0
        async Task ProcessCatalogAsync(string catalogIndexUrl)
        {
            _logger.LogInformation("Downloading catalog index: {Url}", catalogIndexUrl);
            var catalogIndex = await DownloadAndParseAsync <CatalogIndex>(catalogIndexUrl);

            var cursor = _cursorProvider.GetCursor(catalogIndex.Path);

            cursor.Read();
            FilterItems <CatalogIndex, CatalogPageItem>(catalogIndex, cursor, DateTimeOffset.MaxValue);
            _logger.LogDebug("Found {Count} pages with new data.", catalogIndex.Value.Items.Count);

            if (_config.MaxPages.HasValue &&
                _config.MaxPages.Value < catalogIndex.Value.Items.Count)
            {
                catalogIndex.Value.Items = catalogIndex
                                           .Value
                                           .Items
                                           .Take(_config.MaxPages.Value)
                                           .ToList();
                _logger.LogInformation("Only processing {Count} new pages, due to max pages limit.", catalogIndex.Value.Items.Count);
            }

            if (_config.Depth == DownloadDepth.CatalogIndex)
            {
                UpdateCursorFromItems <CatalogIndex, CatalogPageItem>(cursor, catalogIndex);
                return;
            }

            using (_logger.Indent())
            {
                var completedCommits = 0;
                foreach (var pageItem in catalogIndex.Value.Items)
                {
                    _logger.LogInformation("Downloading catalog page: {Url}", pageItem.Url);
                    var page = await DownloadAndParseAsync <CatalogPage>(pageItem.Url);

                    FilterItems <CatalogPage, CatalogLeafItem>(page, cursor, pageItem.CommitTimestamp);
                    _logger.LogDebug("Found {Count} new leaves in this page.", page.Value.Items.Count);

                    var pageCommits = page
                                      .Value
                                      .Items
                                      .Select(x => x.CommitTimestamp)
                                      .Distinct()
                                      .OrderBy(x => x)
                                      .ToList();
                    var commitCount = pageCommits.Count;
                    if (_config.MaxCommits.HasValue)
                    {
                        var remainingCommits = _config.MaxCommits.Value - completedCommits;
                        if (pageCommits.Count > remainingCommits)
                        {
                            commitCount = remainingCommits;
                            FilterItems <CatalogPage, CatalogLeafItem>(page, cursor, pageCommits[remainingCommits - 1]);
                            _logger.LogDebug("Only processing {Count} new leaves, due to max commits limit.", page.Value.Items.Count);
                        }
                    }

                    await _visitor.OnCatalogPageAsync(page.Value);

                    if (_config.Depth == DownloadDepth.CatalogPage)
                    {
                        UpdateCursorFromItems <CatalogPage, CatalogLeafItem>(cursor, page);
                    }
                    else
                    {
                        if (page.Value.Items.Any())
                        {
                            using (_logger.Indent())
                            {
                                var commitTimestampCount = page
                                                           .Value
                                                           .Items
                                                           .GroupBy(x => x.CommitTimestamp)
                                                           .ToDictionary(x => x.Key, x => x.Count());

                                var work = new ConcurrentQueue <BaseCatalogItem>(page.Value.Items);

                                var tasks = Enumerable
                                            .Range(0, _config.ParallelDownloads)
                                            .Select(async i =>
                                {
                                    while (work.TryDequeue(out var leafItem))
                                    {
                                        await DownloadLeafAsync(
                                            cursor,
                                            commitTimestampCount,
                                            leafItem);
                                    }
                                })
                                            .ToList();
                                await Task.WhenAll(tasks);
                            }
                        }
                    }

                    completedCommits += commitCount;

                    if (_config.MaxCommits.HasValue && completedCommits >= _config.MaxCommits.Value)
                    {
                        _logger.LogInformation("Completed {CompletedCommits} commits. Terminating.", completedCommits);
                        return;
                    }
                }
            }
        }