private async Task <bool> CheckPackages( IReadOnlyCollection <IPackageStatusOutdatedCheckSource> sources, CancellationToken cancellationToken) { Logger.LogInformation("Fetching packages to check status of."); var packagesToCheck = new List <PackageStatusOutdatedCheck>(); await _monitoringCursor.LoadAsync(cancellationToken); foreach (var source in sources) { packagesToCheck.AddRange(await source.GetPackagesToCheckAsync( _monitoringCursor.Value - ReprocessRange, Top, cancellationToken)); } var packagesToCheckBag = new ConcurrentBag <PackageStatusOutdatedCheck>(packagesToCheck); Logger.LogInformation("Found {PackagesToCheckCount} packages to check status of.", packagesToCheck.Count()); await ParallelAsync.Repeat(() => ProcessPackagesAsync(packagesToCheckBag, cancellationToken)); Logger.LogInformation("Finished checking status of packages."); foreach (var source in sources) { await source.MarkPackagesCheckedAsync(cancellationToken); } return(packagesToCheck.Any()); }
private async Task <bool> PushIndexChangesAsync() { // The "old" data in this case is the download count data that was last indexed by this job (or // initialized by Db2AzureSearch). _logger.LogInformation("Fetching old download count data from blob storage."); var oldResult = await _downloadDataClient.ReadLatestIndexedAsync( AccessConditionWrapper.GenerateEmptyCondition(), _stringCache); // The "new" data in this case is from the statistics pipeline. _logger.LogInformation("Fetching new download count data from blob storage."); var newData = await _auxiliaryFileClient.LoadDownloadDataAsync(); _logger.LogInformation("Removing invalid IDs and versions from the old data."); CleanDownloadData(oldResult.Data); _logger.LogInformation("Removing invalid IDs and versions from the new data."); CleanDownloadData(newData); // Fetch the download overrides from the auxiliary file. Note that the overriden downloads are kept // separate from downloads data as the original data will be persisted to auxiliary data, whereas the // overriden data will be persisted to Azure Search. _logger.LogInformation("Overriding download count data."); var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync(); var overridenDownloads = newData.ApplyDownloadOverrides(downloadOverrides, _logger); _logger.LogInformation("Detecting download count changes."); var changes = _downloadSetComparer.Compare(oldResult.Data, overridenDownloads); var idBag = new ConcurrentBag <string>(changes.Keys); _logger.LogInformation("{Count} package IDs have download count changes.", idBag.Count); if (!changes.Any()) { return(false); } _logger.LogInformation( "Starting {Count} workers pushing download count changes to Azure Search.", _options.Value.MaxConcurrentBatches); await ParallelAsync.Repeat( () => WorkAsync(idBag, changes), _options.Value.MaxConcurrentBatches); _logger.LogInformation("All of the download count changes have been pushed to Azure Search."); _logger.LogInformation("Uploading the new download count data to blob storage."); await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition()); return(true); }
public async Task ExecuteAsync() { var stopwatch = Stopwatch.StartNew(); var outcome = JobOutcome.Failure; try { _logger.LogInformation("Fetching old owner data from blob storage."); var storageResult = await _ownerDataClient.ReadLatestIndexedAsync(); _logger.LogInformation("Fetching new owner data from the database."); var databaseResult = await _databaseFetcher.GetPackageIdToOwnersAsync(); _logger.LogInformation("Detecting owner changes."); var changes = _ownerSetComparer.CompareOwners(storageResult.Result, databaseResult); var changesBag = new ConcurrentBag <IdAndValue <string[]> >(changes.Select(x => new IdAndValue <string[]>(x.Key, x.Value))); _logger.LogInformation("{Count} package IDs have owner changes.", changesBag.Count); if (!changes.Any()) { outcome = JobOutcome.NoOp; return; } _logger.LogInformation( "Starting {Count} workers pushing owners changes to Azure Search.", _options.Value.MaxConcurrentBatches); await ParallelAsync.Repeat(() => WorkAndRetryAsync(changesBag), _options.Value.MaxConcurrentBatches); _logger.LogInformation("All of the owner changes have been pushed to Azure Search."); // Persist in storage the list of all package IDs that have owner changes. This allows debugging and future // analytics on frequency of ownership changes. _logger.LogInformation("Uploading the package IDs that have owner changes to blob storage."); await _ownerDataClient.UploadChangeHistoryAsync(changes.Keys.ToList()); _logger.LogInformation("Uploading the new owner data to blob storage."); await _ownerDataClient.ReplaceLatestIndexedAsync(databaseResult, storageResult.AccessCondition); outcome = JobOutcome.Success; } finally { stopwatch.Stop(); _telemetryService.TrackUpdateOwnersCompleted(outcome, stopwatch.Elapsed); } }
protected override async Task RunInternalAsync(CancellationToken cancellationToken) { // We should stop processing messages if the job runner cancels us. var queueMessageCancellationToken = cancellationToken; // We should stop dequeuing more messages if too much time elapses. Logger.LogInformation("Processing messages for {Duration} before restarting the job loop.", _queueLoopDuration); using (var queueLoopCancellationTokenSource = new CancellationTokenSource(_queueLoopDuration)) using (var timeoutCancellationTokenSource = new CancellationTokenSource()) { var queueLoopCancellationToken = queueLoopCancellationTokenSource.Token; var workerId = 0; var allWorkersTask = ParallelAsync.Repeat( () => ProcessPackagesAsync( Interlocked.Increment(ref workerId), queueLoopCancellationToken, queueMessageCancellationToken), _workerCount); // Wait for a specific amount of time past the loop duration. If a worker task is hanging for whatever // reason we don't want to the shutdown to be blocked indefinitely. // // Imagine one worker is stuck and all of the rest of the workers have successfully stopped consuming // messages. This would mean that this process is stuck in a seemingly "healthy" state (no exceptions, // the process is still alive) but it will never terminate and no queue messages will be processed. By // design all jobs must be resilient to unexpected termination (machine shutdown, etc) so not waiting // for a slow worker task to gracefully finish is acceptable. var loopDurationPlusShutdownTask = Task.Delay(_queueLoopDuration.Add(MaxShutdownTime), timeoutCancellationTokenSource.Token); var firstTask = await Task.WhenAny(allWorkersTask, loopDurationPlusShutdownTask); if (firstTask == loopDurationPlusShutdownTask) { Logger.LogWarning("Not all workers shut down gracefully after {Duration}.", MaxShutdownTime); } else { timeoutCancellationTokenSource.Cancel(); Logger.LogInformation("All workers gracefully shut down."); } } }
private async Task <bool> PushIndexChangesAsync() { // The "old" data in this case is the download count data that was last indexed by this job (or // initialized by Db2AzureSearch). _logger.LogInformation("Fetching old download count data from blob storage."); var oldResult = await _downloadDataClient.ReadLatestIndexedAsync( AccessConditionWrapper.GenerateEmptyCondition(), _stringCache); // The "new" data in this case is from the statistics pipeline. _logger.LogInformation("Fetching new download count data from blob storage."); var newData = await _auxiliaryFileClient.LoadDownloadDataAsync(); _logger.LogInformation("Removing invalid IDs and versions from the old downloads data."); CleanDownloadData(oldResult.Data); _logger.LogInformation("Removing invalid IDs and versions from the new downloads data."); CleanDownloadData(newData); _logger.LogInformation("Detecting download count changes."); var changes = _downloadSetComparer.Compare(oldResult.Data, newData); _logger.LogInformation("{Count} package IDs have download count changes.", changes.Count); // The "old" data is the popularity transfers data that was last indexed by this job (or // initialized by Db2AzureSearch). _logger.LogInformation("Fetching old popularity transfer data from blob storage."); var oldTransfers = await _popularityTransferDataClient.ReadLatestIndexedAsync( AccessConditionWrapper.GenerateEmptyCondition(), _stringCache); // The "new" data is the latest popularity transfers data from the database. _logger.LogInformation("Fetching new popularity transfer data from database."); var newTransfers = await GetPopularityTransfersAsync(); _logger.LogInformation("Applying download transfers to download changes."); ApplyDownloadTransfers( newData, oldTransfers.Data, newTransfers, changes); var idBag = new ConcurrentBag <string>(changes.Keys); _logger.LogInformation("{Count} package IDs need to be updated.", idBag.Count); if (!changes.Any()) { return(false); } _logger.LogInformation( "Starting {Count} workers pushing download count changes to Azure Search.", _options.Value.MaxConcurrentBatches); await ParallelAsync.Repeat( () => WorkAndRetryAsync(idBag, changes), _options.Value.MaxConcurrentBatches); _logger.LogInformation("All of the download count changes have been pushed to Azure Search."); _logger.LogInformation("Uploading the new download count data to blob storage."); await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition()); _logger.LogInformation("Uploading the new popularity transfer data to blob storage."); await _popularityTransferDataClient.ReplaceLatestIndexedAsync( newTransfers, oldTransfers.Metadata.GetIfMatchCondition()); return(true); }
protected override async Task RunInternalAsync(CancellationToken cancellationToken) { await ParallelAsync.Repeat(() => ProcessPackagesAsync(cancellationToken)); }