private async Task <bool> CheckPackages(
            IReadOnlyCollection <IPackageStatusOutdatedCheckSource> sources,
            CancellationToken cancellationToken)
        {
            Logger.LogInformation("Fetching packages to check status of.");
            var packagesToCheck = new List <PackageStatusOutdatedCheck>();
            await _monitoringCursor.LoadAsync(cancellationToken);

            foreach (var source in sources)
            {
                packagesToCheck.AddRange(await source.GetPackagesToCheckAsync(
                                             _monitoringCursor.Value - ReprocessRange, Top, cancellationToken));
            }

            var packagesToCheckBag = new ConcurrentBag <PackageStatusOutdatedCheck>(packagesToCheck);

            Logger.LogInformation("Found {PackagesToCheckCount} packages to check status of.", packagesToCheck.Count());
            await ParallelAsync.Repeat(() => ProcessPackagesAsync(packagesToCheckBag, cancellationToken));

            Logger.LogInformation("Finished checking status of packages.");

            foreach (var source in sources)
            {
                await source.MarkPackagesCheckedAsync(cancellationToken);
            }

            return(packagesToCheck.Any());
        }
Beispiel #2
0
        private async Task <bool> PushIndexChangesAsync()
        {
            // The "old" data in this case is the download count data that was last indexed by this job (or
            // initialized by Db2AzureSearch).
            _logger.LogInformation("Fetching old download count data from blob storage.");
            var oldResult = await _downloadDataClient.ReadLatestIndexedAsync(
                AccessConditionWrapper.GenerateEmptyCondition(),
                _stringCache);

            // The "new" data in this case is from the statistics pipeline.
            _logger.LogInformation("Fetching new download count data from blob storage.");
            var newData = await _auxiliaryFileClient.LoadDownloadDataAsync();

            _logger.LogInformation("Removing invalid IDs and versions from the old data.");
            CleanDownloadData(oldResult.Data);

            _logger.LogInformation("Removing invalid IDs and versions from the new data.");
            CleanDownloadData(newData);

            // Fetch the download overrides from the auxiliary file. Note that the overriden downloads are kept
            // separate from downloads data as the original data will be persisted to auxiliary data, whereas the
            // overriden data will be persisted to Azure Search.
            _logger.LogInformation("Overriding download count data.");
            var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync();

            var overridenDownloads = newData.ApplyDownloadOverrides(downloadOverrides, _logger);

            _logger.LogInformation("Detecting download count changes.");
            var changes = _downloadSetComparer.Compare(oldResult.Data, overridenDownloads);
            var idBag   = new ConcurrentBag <string>(changes.Keys);

            _logger.LogInformation("{Count} package IDs have download count changes.", idBag.Count);

            if (!changes.Any())
            {
                return(false);
            }

            _logger.LogInformation(
                "Starting {Count} workers pushing download count changes to Azure Search.",
                _options.Value.MaxConcurrentBatches);
            await ParallelAsync.Repeat(
                () => WorkAsync(idBag, changes),
                _options.Value.MaxConcurrentBatches);

            _logger.LogInformation("All of the download count changes have been pushed to Azure Search.");

            _logger.LogInformation("Uploading the new download count data to blob storage.");
            await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition());

            return(true);
        }
Beispiel #3
0
        public async Task ExecuteAsync()
        {
            var stopwatch = Stopwatch.StartNew();
            var outcome   = JobOutcome.Failure;

            try
            {
                _logger.LogInformation("Fetching old owner data from blob storage.");
                var storageResult = await _ownerDataClient.ReadLatestIndexedAsync();

                _logger.LogInformation("Fetching new owner data from the database.");
                var databaseResult = await _databaseFetcher.GetPackageIdToOwnersAsync();

                _logger.LogInformation("Detecting owner changes.");
                var changes    = _ownerSetComparer.CompareOwners(storageResult.Result, databaseResult);
                var changesBag = new ConcurrentBag <IdAndValue <string[]> >(changes.Select(x => new IdAndValue <string[]>(x.Key, x.Value)));
                _logger.LogInformation("{Count} package IDs have owner changes.", changesBag.Count);

                if (!changes.Any())
                {
                    outcome = JobOutcome.NoOp;
                    return;
                }

                _logger.LogInformation(
                    "Starting {Count} workers pushing owners changes to Azure Search.",
                    _options.Value.MaxConcurrentBatches);
                await ParallelAsync.Repeat(() => WorkAndRetryAsync(changesBag), _options.Value.MaxConcurrentBatches);

                _logger.LogInformation("All of the owner changes have been pushed to Azure Search.");

                // Persist in storage the list of all package IDs that have owner changes. This allows debugging and future
                // analytics on frequency of ownership changes.
                _logger.LogInformation("Uploading the package IDs that have owner changes to blob storage.");
                await _ownerDataClient.UploadChangeHistoryAsync(changes.Keys.ToList());

                _logger.LogInformation("Uploading the new owner data to blob storage.");
                await _ownerDataClient.ReplaceLatestIndexedAsync(databaseResult, storageResult.AccessCondition);

                outcome = JobOutcome.Success;
            }
            finally
            {
                stopwatch.Stop();
                _telemetryService.TrackUpdateOwnersCompleted(outcome, stopwatch.Elapsed);
            }
        }
Beispiel #4
0
        protected override async Task RunInternalAsync(CancellationToken cancellationToken)
        {
            // We should stop processing messages if the job runner cancels us.
            var queueMessageCancellationToken = cancellationToken;

            // We should stop dequeuing more messages if too much time elapses.
            Logger.LogInformation("Processing messages for {Duration} before restarting the job loop.", _queueLoopDuration);
            using (var queueLoopCancellationTokenSource = new CancellationTokenSource(_queueLoopDuration))
                using (var timeoutCancellationTokenSource = new CancellationTokenSource())
                {
                    var queueLoopCancellationToken = queueLoopCancellationTokenSource.Token;

                    var workerId       = 0;
                    var allWorkersTask = ParallelAsync.Repeat(
                        () => ProcessPackagesAsync(
                            Interlocked.Increment(ref workerId),
                            queueLoopCancellationToken,
                            queueMessageCancellationToken),
                        _workerCount);

                    // Wait for a specific amount of time past the loop duration. If a worker task is hanging for whatever
                    // reason we don't want to the shutdown to be blocked indefinitely.
                    //
                    // Imagine one worker is stuck and all of the rest of the workers have successfully stopped consuming
                    // messages. This would mean that this process is stuck in a seemingly "healthy" state (no exceptions,
                    // the process is still alive) but it will never terminate and no queue messages will be processed. By
                    // design all jobs must be resilient to unexpected termination (machine shutdown, etc) so not waiting
                    // for a slow worker task to gracefully finish is acceptable.
                    var loopDurationPlusShutdownTask = Task.Delay(_queueLoopDuration.Add(MaxShutdownTime), timeoutCancellationTokenSource.Token);

                    var firstTask = await Task.WhenAny(allWorkersTask, loopDurationPlusShutdownTask);

                    if (firstTask == loopDurationPlusShutdownTask)
                    {
                        Logger.LogWarning("Not all workers shut down gracefully after {Duration}.", MaxShutdownTime);
                    }
                    else
                    {
                        timeoutCancellationTokenSource.Cancel();
                        Logger.LogInformation("All workers gracefully shut down.");
                    }
                }
        }
        private async Task <bool> PushIndexChangesAsync()
        {
            // The "old" data in this case is the download count data that was last indexed by this job (or
            // initialized by Db2AzureSearch).
            _logger.LogInformation("Fetching old download count data from blob storage.");
            var oldResult = await _downloadDataClient.ReadLatestIndexedAsync(
                AccessConditionWrapper.GenerateEmptyCondition(),
                _stringCache);

            // The "new" data in this case is from the statistics pipeline.
            _logger.LogInformation("Fetching new download count data from blob storage.");
            var newData = await _auxiliaryFileClient.LoadDownloadDataAsync();

            _logger.LogInformation("Removing invalid IDs and versions from the old downloads data.");
            CleanDownloadData(oldResult.Data);

            _logger.LogInformation("Removing invalid IDs and versions from the new downloads data.");
            CleanDownloadData(newData);

            _logger.LogInformation("Detecting download count changes.");
            var changes = _downloadSetComparer.Compare(oldResult.Data, newData);

            _logger.LogInformation("{Count} package IDs have download count changes.", changes.Count);

            // The "old" data is the popularity transfers data that was last indexed by this job (or
            // initialized by Db2AzureSearch).
            _logger.LogInformation("Fetching old popularity transfer data from blob storage.");
            var oldTransfers = await _popularityTransferDataClient.ReadLatestIndexedAsync(
                AccessConditionWrapper.GenerateEmptyCondition(),
                _stringCache);

            // The "new" data is the latest popularity transfers data from the database.
            _logger.LogInformation("Fetching new popularity transfer data from database.");
            var newTransfers = await GetPopularityTransfersAsync();

            _logger.LogInformation("Applying download transfers to download changes.");
            ApplyDownloadTransfers(
                newData,
                oldTransfers.Data,
                newTransfers,
                changes);

            var idBag = new ConcurrentBag <string>(changes.Keys);

            _logger.LogInformation("{Count} package IDs need to be updated.", idBag.Count);

            if (!changes.Any())
            {
                return(false);
            }

            _logger.LogInformation(
                "Starting {Count} workers pushing download count changes to Azure Search.",
                _options.Value.MaxConcurrentBatches);
            await ParallelAsync.Repeat(
                () => WorkAndRetryAsync(idBag, changes),
                _options.Value.MaxConcurrentBatches);

            _logger.LogInformation("All of the download count changes have been pushed to Azure Search.");

            _logger.LogInformation("Uploading the new download count data to blob storage.");
            await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition());

            _logger.LogInformation("Uploading the new popularity transfer data to blob storage.");
            await _popularityTransferDataClient.ReplaceLatestIndexedAsync(
                newTransfers,
                oldTransfers.Metadata.GetIfMatchCondition());

            return(true);
        }
 protected override async Task RunInternalAsync(CancellationToken cancellationToken)
 {
     await ParallelAsync.Repeat(() => ProcessPackagesAsync(cancellationToken));
 }