示例#1
0
        public async Task RunAsync(CancellationToken cancellationToken = default)
        {
            if (!_options.Value.Enqueue)
            {
                _logger.LogError("V3 rebuild does not support direct processing at this time, please use --enqueue");
                return;
            }

            var minCursor = DateTimeOffset.MinValue;
            var maxCursor = await _cursor.GetAsync(cancellationToken);

            if (maxCursor == null)
            {
                maxCursor = DateTimeOffset.MinValue;
            }

            _logger.LogInformation("Finding catalog leafs committed before time {Cursor}...", maxCursor);

            var catalogClient = _clientFactory.CreateCatalogClient();

            var(catalogIndex, catalogLeafItems) = await catalogClient.LoadCatalogAsync(
                minCursor,
                maxCursor.Value,
                _logger,
                cancellationToken);

            var messages = catalogLeafItems
                           .Select(l => l.PackageId.ToLowerInvariant())
                           .Distinct()
                           .Select(ToMessage)
                           .ToList();

            await _queue.SendAsync(messages, cancellationToken);
        }
示例#2
0
        private async Task ProcessPackageDetailsAsync(
            CatalogLeafItem catalogLeafItem,
            CancellationToken cancellationToken)
        {
            var catalogClient = _clientFactory.CreateCatalogClient();
            var catalogLeaf   = await catalogClient.GetPackageDetailsLeafAsync(catalogLeafItem.CatalogLeafUrl, cancellationToken);

            await IndexPackageAsync(catalogLeaf, cancellationToken);
        }
        public async Task RunAsync(CancellationToken cancellationToken = default)
        {
            if (_options.Value.PackageId != null)
            {
                _logger.LogError("Adding individual packages is not supported at this time");
                return;
            }

            var maxCursor = DateTimeOffset.MaxValue;
            var minCursor = await _cursor.GetAsync(cancellationToken);

            if (minCursor == null)
            {
                minCursor = DateTimeOffset.MinValue;
            }

            _logger.LogInformation("Finding catalog leafs committed after time {Cursor}...", minCursor);

            var catalogClient = _clientFactory.CreateCatalogClient();

            var(catalogIndex, catalogLeafItems) = await catalogClient.LoadCatalogAsync(
                minCursor.Value,
                maxCursor,
                _logger,
                cancellationToken);

            _logger.LogInformation("Removing duplicate catalog leafs...");

            catalogLeafItems = catalogLeafItems
                               .GroupBy(l => new PackageIdentity(l.PackageId, l.ParsePackageVersion()))
                               .Select(g => g.OrderByDescending(l => l.CommitTimestamp).First())
                               .Where(l => l.IsPackageDetails())
                               .ToList();

            _logger.LogInformation("Processing {CatalogLeafs} catalog leafs...", catalogLeafItems.Count());

            await _leafProcessor.ProcessAsync(catalogLeafItems, cancellationToken);

            await _cursor.SetAsync(catalogIndex.CommitTimestamp, cancellationToken);

            _logger.LogInformation("Finished processing catalog leafs");
        }
        public async Task RunAsync(CancellationToken cancellationToken)
        {
            var minCursor = DateTimeOffset.MinValue;
            var maxCursor = await _cursor.GetAsync(cancellationToken);

            if (maxCursor == null)
            {
                maxCursor = DateTimeOffset.MinValue;
            }

            _logger.LogInformation("Finding catalog leafs committed before time {Cursor}...", maxCursor);

            var catalogClient = _clientFactory.CreateCatalogClient();

            var(catalogIndex, catalogLeafItems) = await catalogClient.LoadCatalogAsync(
                minCursor,
                maxCursor.Value,
                _logger,
                cancellationToken);

            _logger.LogInformation("Removing duplicate catalog leafs...");

            var packageIds = catalogLeafItems
                             .GroupBy(l => new PackageIdentity(l.PackageId, l.ParsePackageVersion()))
                             .Select(g => g.OrderByDescending(l => l.CommitTimestamp).First())
                             .Where(l => l.IsPackageDetails())
                             .Select(l => l.PackageId)
                             .Distinct(StringComparer.OrdinalIgnoreCase)
                             .ToList();

            _logger.LogInformation("Processing {PackageCount} packages", packageIds.Count);

            var channel = Channel.CreateBounded <IndexAction <KeyedDocument> >(new BoundedChannelOptions(5000)
            {
                FullMode     = BoundedChannelFullMode.Wait,
                SingleWriter = false,
                SingleReader = false,
            });

            var produceTask = ProduceIndexActionsAsync(
                channel.Writer,
                new ConcurrentBag <string>(packageIds),
                cancellationToken);
            var consumeTask1 = ConsumeIndexActionsAsync(
                channel.Reader,
                cancellationToken);
            var consumeTask2 = ConsumeIndexActionsAsync(
                channel.Reader,
                cancellationToken);
            var consumeTask3 = ConsumeIndexActionsAsync(
                channel.Reader,
                cancellationToken);

            await Task.WhenAll(
                produceTask,
                consumeTask1,
                consumeTask2,
                consumeTask3);

            _logger.LogInformation("Finished rebuilding search");
        }
        public async Task <DateTimeOffset> ProduceAsync(
            ChannelWriter <CatalogLeafItem> channel,
            DateTimeOffset minCursor,
            DateTimeOffset maxCursor,
            CancellationToken cancellationToken)
        {
            _logger.LogInformation("Fetching catalog index...");
            var client       = _factory.CreateCatalogClient();
            var catalogIndex = await client.GetIndexAsync(cancellationToken);

            var pages = catalogIndex.GetPagesInBounds(minCursor, maxCursor);

            var maxPages = _options.Value.MaxPages;

            if (maxPages.HasValue)
            {
                pages = pages.Take(maxPages.Value).ToList();
            }

            if (!pages.Any() || minCursor == maxCursor)
            {
                _logger.LogInformation("No pending leaf items on the catalog.");
                channel.Complete();
                return(minCursor);
            }

            var work    = new ConcurrentBag <CatalogPageItem>(pages);
            var workers = Math.Min(_options.Value.ProducerWorkers, pages.Count);

            _logger.LogInformation(
                "Fetching {Pages} catalog pages using {ProducerWorkers} workers...",
                pages.Count,
                workers);

            var tasks = Enumerable
                        .Repeat(0, workers)
                        .Select(async _ =>
            {
                await Task.Yield();

                while (work.TryTake(out var pageItem))
                {
                    var done = false;
                    while (!done)
                    {
                        try
                        {
                            _logger.LogDebug("Processing catalog page {PageUrl}...", pageItem.CatalogPageUrl);
                            var page = await client.GetPageAsync(pageItem.CatalogPageUrl, cancellationToken);

                            foreach (var leaf in page.Items)
                            {
                                // Don't process leaves that are not within the cursors.
                                if (leaf.CommitTimestamp <= minCursor)
                                {
                                    continue;
                                }
                                if (leaf.CommitTimestamp > maxCursor)
                                {
                                    continue;
                                }

                                if (!channel.TryWrite(leaf))
                                {
                                    await channel.WriteAsync(leaf, cancellationToken);
                                }
                            }

                            _logger.LogDebug("Processed catalog page {PageUrl}.", pageItem.CatalogPageUrl);
                            done = true;
                        }
                        catch (Exception e) when(!cancellationToken.IsCancellationRequested)
                        {
                            _logger.LogError(e, "Retrying catalog page {PageUrl} in 5 seconds...", pageItem.CatalogPageUrl);
                            await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken);
                        }
                    }
                }
            });

            await Task.WhenAll(tasks);

            var cursor = pages.Last().CommitTimestamp;

            _logger.LogInformation("Fetched catalog pages up to cursor {Cursor}", cursor);
            channel.Complete();

            return(cursor);
        }