public async Task RunAsync(CancellationToken cancellationToken = default) { if (!_options.Value.Enqueue) { _logger.LogError("V3 rebuild does not support direct processing at this time, please use --enqueue"); return; } var minCursor = DateTimeOffset.MinValue; var maxCursor = await _cursor.GetAsync(cancellationToken); if (maxCursor == null) { maxCursor = DateTimeOffset.MinValue; } _logger.LogInformation("Finding catalog leafs committed before time {Cursor}...", maxCursor); var catalogClient = _clientFactory.CreateCatalogClient(); var(catalogIndex, catalogLeafItems) = await catalogClient.LoadCatalogAsync( minCursor, maxCursor.Value, _logger, cancellationToken); var messages = catalogLeafItems .Select(l => l.PackageId.ToLowerInvariant()) .Distinct() .Select(ToMessage) .ToList(); await _queue.SendAsync(messages, cancellationToken); }
private async Task ProcessPackageDetailsAsync( CatalogLeafItem catalogLeafItem, CancellationToken cancellationToken) { var catalogClient = _clientFactory.CreateCatalogClient(); var catalogLeaf = await catalogClient.GetPackageDetailsLeafAsync(catalogLeafItem.CatalogLeafUrl, cancellationToken); await IndexPackageAsync(catalogLeaf, cancellationToken); }
public async Task RunAsync(CancellationToken cancellationToken = default) { if (_options.Value.PackageId != null) { _logger.LogError("Adding individual packages is not supported at this time"); return; } var maxCursor = DateTimeOffset.MaxValue; var minCursor = await _cursor.GetAsync(cancellationToken); if (minCursor == null) { minCursor = DateTimeOffset.MinValue; } _logger.LogInformation("Finding catalog leafs committed after time {Cursor}...", minCursor); var catalogClient = _clientFactory.CreateCatalogClient(); var(catalogIndex, catalogLeafItems) = await catalogClient.LoadCatalogAsync( minCursor.Value, maxCursor, _logger, cancellationToken); _logger.LogInformation("Removing duplicate catalog leafs..."); catalogLeafItems = catalogLeafItems .GroupBy(l => new PackageIdentity(l.PackageId, l.ParsePackageVersion())) .Select(g => g.OrderByDescending(l => l.CommitTimestamp).First()) .Where(l => l.IsPackageDetails()) .ToList(); _logger.LogInformation("Processing {CatalogLeafs} catalog leafs...", catalogLeafItems.Count()); await _leafProcessor.ProcessAsync(catalogLeafItems, cancellationToken); await _cursor.SetAsync(catalogIndex.CommitTimestamp, cancellationToken); _logger.LogInformation("Finished processing catalog leafs"); }
public async Task RunAsync(CancellationToken cancellationToken) { var minCursor = DateTimeOffset.MinValue; var maxCursor = await _cursor.GetAsync(cancellationToken); if (maxCursor == null) { maxCursor = DateTimeOffset.MinValue; } _logger.LogInformation("Finding catalog leafs committed before time {Cursor}...", maxCursor); var catalogClient = _clientFactory.CreateCatalogClient(); var(catalogIndex, catalogLeafItems) = await catalogClient.LoadCatalogAsync( minCursor, maxCursor.Value, _logger, cancellationToken); _logger.LogInformation("Removing duplicate catalog leafs..."); var packageIds = catalogLeafItems .GroupBy(l => new PackageIdentity(l.PackageId, l.ParsePackageVersion())) .Select(g => g.OrderByDescending(l => l.CommitTimestamp).First()) .Where(l => l.IsPackageDetails()) .Select(l => l.PackageId) .Distinct(StringComparer.OrdinalIgnoreCase) .ToList(); _logger.LogInformation("Processing {PackageCount} packages", packageIds.Count); var channel = Channel.CreateBounded <IndexAction <KeyedDocument> >(new BoundedChannelOptions(5000) { FullMode = BoundedChannelFullMode.Wait, SingleWriter = false, SingleReader = false, }); var produceTask = ProduceIndexActionsAsync( channel.Writer, new ConcurrentBag <string>(packageIds), cancellationToken); var consumeTask1 = ConsumeIndexActionsAsync( channel.Reader, cancellationToken); var consumeTask2 = ConsumeIndexActionsAsync( channel.Reader, cancellationToken); var consumeTask3 = ConsumeIndexActionsAsync( channel.Reader, cancellationToken); await Task.WhenAll( produceTask, consumeTask1, consumeTask2, consumeTask3); _logger.LogInformation("Finished rebuilding search"); }
public async Task <DateTimeOffset> ProduceAsync( ChannelWriter <CatalogLeafItem> channel, DateTimeOffset minCursor, DateTimeOffset maxCursor, CancellationToken cancellationToken) { _logger.LogInformation("Fetching catalog index..."); var client = _factory.CreateCatalogClient(); var catalogIndex = await client.GetIndexAsync(cancellationToken); var pages = catalogIndex.GetPagesInBounds(minCursor, maxCursor); var maxPages = _options.Value.MaxPages; if (maxPages.HasValue) { pages = pages.Take(maxPages.Value).ToList(); } if (!pages.Any() || minCursor == maxCursor) { _logger.LogInformation("No pending leaf items on the catalog."); channel.Complete(); return(minCursor); } var work = new ConcurrentBag <CatalogPageItem>(pages); var workers = Math.Min(_options.Value.ProducerWorkers, pages.Count); _logger.LogInformation( "Fetching {Pages} catalog pages using {ProducerWorkers} workers...", pages.Count, workers); var tasks = Enumerable .Repeat(0, workers) .Select(async _ => { await Task.Yield(); while (work.TryTake(out var pageItem)) { var done = false; while (!done) { try { _logger.LogDebug("Processing catalog page {PageUrl}...", pageItem.CatalogPageUrl); var page = await client.GetPageAsync(pageItem.CatalogPageUrl, cancellationToken); foreach (var leaf in page.Items) { // Don't process leaves that are not within the cursors. if (leaf.CommitTimestamp <= minCursor) { continue; } if (leaf.CommitTimestamp > maxCursor) { continue; } if (!channel.TryWrite(leaf)) { await channel.WriteAsync(leaf, cancellationToken); } } _logger.LogDebug("Processed catalog page {PageUrl}.", pageItem.CatalogPageUrl); done = true; } catch (Exception e) when(!cancellationToken.IsCancellationRequested) { _logger.LogError(e, "Retrying catalog page {PageUrl} in 5 seconds...", pageItem.CatalogPageUrl); await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken); } } } }); await Task.WhenAll(tasks); var cursor = pages.Last().CommitTimestamp; _logger.LogInformation("Fetched catalog pages up to cursor {Cursor}", cursor); channel.Complete(); return(cursor); }