Пример #1
0
        private async Task <bool> ProcessPageAsync(DateTimeOffset minCommitTimestamp, CatalogPageItem pageItem, CancellationToken cancellationToken)
        {
            var page = await _client.GetPageAsync(pageItem.Url);

            var leafItems = page.GetLeavesInBounds(
                minCommitTimestamp,
                _settings.MaxCommitTimestamp,
                _settings.ExcludeRedundantLeaves);

            _logger.LogInformation(
                "On page {page}, {leaves} out of {totalLeaves} were in the time bounds.",
                pageItem.Url,
                leafItems.Count,
                page.Items.Count);

            DateTimeOffset?newCursor = null;
            var            success   = true;

            for (var i = 0; i < leafItems.Count; i++)
            {
                var leafItem = leafItems[i];

                if (newCursor.HasValue && newCursor.Value != leafItem.CommitTimestamp)
                {
                    await _cursor.SetAsync(newCursor.Value);
                }

                newCursor = leafItem.CommitTimestamp;

                success = await ProcessLeafAsync(leafItem);

                if (!success)
                {
                    _logger.LogWarning(
                        "{unprocessedLeaves} out of {leaves} leaves were left incomplete due to a processing failure.",
                        leafItems.Count - i,
                        leafItems.Count);
                    break;
                }

                if (cancellationToken.IsCancellationRequested)
                {
                    _logger.LogWarning(
                        "{unprocessedLeaves} out of {leaves} leaves were left incomplete due to a cancellation request.",
                        leafItems.Count - i,
                        leafItems.Count);

                    success = false;

                    break;
                }
            }

            if (newCursor.HasValue && success)
            {
                await _cursor.SetAsync(newCursor.Value);
            }

            return(success);
        }
Пример #2
0
        private async Task <IEnumerable <CatalogLeafItem> > GetCatalogLeafItems(
            ICatalogClient catalogClient,
            CatalogIndex catalogIndex,
            DateTimeOffset cursor,
            CancellationToken cancellationToken)
        {
            var catalogLeafItems = new ConcurrentBag <CatalogLeafItem>();
            var catalogPageUrls  = new ConcurrentBag <string>(
                catalogIndex
                .Items
                .Where(i => i.CommitTimestamp > cursor)
                .Select(i => i.CatalogPageUrl));

            await ParallelHelper.ProcessInParallel(
                catalogPageUrls,
                ProcessCatalogPageUrlAsync,
                cancellationToken);

            return(catalogLeafItems);

            async Task ProcessCatalogPageUrlAsync(string catalogPageUrl, CancellationToken token)
            {
                _logger.LogInformation("Processing catalog page {CatalogPageUrl}...", catalogPageUrl);

                var page = await catalogClient.GetPageAsync(catalogPageUrl, token);

                foreach (var catalogLeafItem in page.Items.Where(i => i.CommitTimestamp > cursor))
                {
                    catalogLeafItems.Add(catalogLeafItem);
                }

                _logger.LogInformation("Processed catalog page {CatalogPageUrl}", catalogPageUrl);
            }
        }
Пример #3
0
        private async Task ProcessPageAsync(DateTimeOffset minCommitTimestamp, CatalogPageItem pageItem, CancellationToken token)
        {
            var page = await _client.GetPageAsync(pageItem.Url, token);

            var leafItems = page.GetLeavesInBounds(
                minCommitTimestamp,
                _settings.MaxCommitTimestamp,
                _settings.ExcludeRedundantLeaves);

            _logger.LogInformation(
                "On page {page}, {leaves} out of {totalLeaves} were in the time bounds.",
                pageItem.Url,
                leafItems.Count,
                page.Items.Count);

            DateTimeOffset?newCursor = null;

            var tasks = new List <Task <CatalogLeaf> >();

            foreach (var batch in leafItems
                     .Select((v, i) => new { Index = i, Value = v })
                     .GroupBy(v => v.Index / 25)
                     .Select(v => v.Select(p => p.Value)))
            {
                foreach (var leafItem in batch)
                {
                    newCursor = leafItem.CommitTimestamp;

                    tasks.Add(ProcessLeafAsync(leafItem, token));
                }

                await Task.WhenAll(tasks);

                foreach (var task in tasks)
                {
                    if (task.Result is PackageDeleteCatalogLeaf del)
                    {
                        await _leafProcessor.ProcessPackageDeleteAsync(del, token);
                    }
                    else if (task.Result is PackageDetailsCatalogLeaf detail)
                    {
                        await _leafProcessor.ProcessPackageDetailsAsync(detail, token);
                    }
                    else
                    {
                        _logger.LogError("Unsupported leaf type: {type}.", task.Result?.GetType());
                    }
                }

                tasks.Clear();
            }

            if (newCursor.HasValue)
            {
                await _cursor.SetAsync(newCursor.Value, token);
            }
        }
Пример #4
0
        private async Task <CatalogPage> GetPageAsync(string pageUrl, CancellationToken cancellationToken)
        {
            try
            {
                await _throttle.WaitAsync(cancellationToken);

                if (!cancellationToken.IsCancellationRequested)
                {
                    return(await _client.GetPageAsync(pageUrl));
                }

                return(null);
            }
            finally
            {
                _throttle.Release();
            }
        }
        public async static Task <(CatalogIndex, IEnumerable <CatalogLeafItem>)> LoadCatalogAsync(
            this ICatalogClient catalogClient,
            DateTimeOffset minCursor,
            DateTimeOffset maxCursor,
            ILogger logger,
            CancellationToken cancellationToken)
        {
            var catalogIndex = await catalogClient.GetIndexAsync(cancellationToken);

            var catalogLeafItems = new ConcurrentBag <CatalogLeafItem>();
            var catalogPageUrls  = new ConcurrentBag <CatalogPageItem>(
                catalogIndex.GetPagesInBounds(minCursor, maxCursor));

            await ParallelAsync.RunAsync(
                catalogPageUrls,
                ProcessCatalogPageAsync,
                cancellationToken);

            return(catalogIndex, catalogLeafItems);

            async Task ProcessCatalogPageAsync(CatalogPageItem pageItem, CancellationToken token)
            {
                logger.LogInformation("Processing catalog page {CatalogPageUrl}...", pageItem.CatalogPageUrl);

                var page = await catalogClient.GetPageAsync(pageItem.CatalogPageUrl, token);

                var leafs = page.GetLeavesInBounds(minCursor, maxCursor, excludeRedundantLeaves: true);

                foreach (var catalogLeafItem in leafs)
                {
                    catalogLeafItems.Add(catalogLeafItem);
                }

                logger.LogInformation("Processed catalog page {CatalogPageUrl}", pageItem.CatalogPageUrl);
            }
        }