Ejemplo n.º 1
0
        private async Task <bool> PushIndexChangesAsync()
        {
            // The "old" data in this case is the download count data that was last indexed by this job (or
            // initialized by Db2AzureSearch).
            _logger.LogInformation("Fetching old download count data from blob storage.");
            var oldResult = await _downloadDataClient.ReadLatestIndexedAsync(
                AccessConditionWrapper.GenerateEmptyCondition(),
                _stringCache);

            // The "new" data in this case is from the statistics pipeline.
            _logger.LogInformation("Fetching new download count data from blob storage.");
            var newData = await _auxiliaryFileClient.LoadDownloadDataAsync();

            _logger.LogInformation("Removing invalid IDs and versions from the old data.");
            CleanDownloadData(oldResult.Data);

            _logger.LogInformation("Removing invalid IDs and versions from the new data.");
            CleanDownloadData(newData);

            // Fetch the download overrides from the auxiliary file. Note that the overriden downloads are kept
            // separate from downloads data as the original data will be persisted to auxiliary data, whereas the
            // overriden data will be persisted to Azure Search.
            _logger.LogInformation("Overriding download count data.");
            var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync();

            var overridenDownloads = newData.ApplyDownloadOverrides(downloadOverrides, _logger);

            _logger.LogInformation("Detecting download count changes.");
            var changes = _downloadSetComparer.Compare(oldResult.Data, overridenDownloads);
            var idBag   = new ConcurrentBag <string>(changes.Keys);

            _logger.LogInformation("{Count} package IDs have download count changes.", idBag.Count);

            if (!changes.Any())
            {
                return(false);
            }

            _logger.LogInformation(
                "Starting {Count} workers pushing download count changes to Azure Search.",
                _options.Value.MaxConcurrentBatches);
            await ParallelAsync.Repeat(
                () => WorkAsync(idBag, changes),
                _options.Value.MaxConcurrentBatches);

            _logger.LogInformation("All of the download count changes have been pushed to Azure Search.");

            _logger.LogInformation("Uploading the new download count data to blob storage.");
            await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition());

            return(true);
        }
Ejemplo n.º 2
0
        private async Task <bool> PushIndexChangesAsync()
        {
            // The "old" data in this case is the download count data that was last indexed by this job (or
            // initialized by Db2AzureSearch).
            _logger.LogInformation("Fetching old download count data from blob storage.");
            var oldResult = await _downloadDataClient.ReadLatestIndexedAsync(
                AccessConditionWrapper.GenerateEmptyCondition(),
                _stringCache);

            // The "new" data in this case is from the statistics pipeline.
            _logger.LogInformation("Fetching new download count data from blob storage.");
            var newData = await _auxiliaryFileClient.LoadDownloadDataAsync();

            _logger.LogInformation("Removing invalid IDs and versions from the old downloads data.");
            CleanDownloadData(oldResult.Data);

            _logger.LogInformation("Removing invalid IDs and versions from the new downloads data.");
            CleanDownloadData(newData);

            _logger.LogInformation("Detecting download count changes.");
            var changes = _downloadSetComparer.Compare(oldResult.Data, newData);

            _logger.LogInformation("{Count} package IDs have download count changes.", changes.Count);

            // The "old" data is the popularity transfers data that was last indexed by this job (or
            // initialized by Db2AzureSearch).
            _logger.LogInformation("Fetching old popularity transfer data from blob storage.");
            var oldTransfers = await _popularityTransferDataClient.ReadLatestIndexedAsync(
                AccessConditionWrapper.GenerateEmptyCondition(),
                _stringCache);

            // The "new" data is the latest popularity transfers data from the database.
            _logger.LogInformation("Fetching new popularity transfer data from database.");
            var newTransfers = await GetPopularityTransfersAsync();

            _logger.LogInformation("Applying download transfers to download changes.");
            ApplyDownloadTransfers(
                newData,
                oldTransfers.Data,
                newTransfers,
                changes);

            var idBag = new ConcurrentBag <string>(changes.Keys);

            _logger.LogInformation("{Count} package IDs need to be updated.", idBag.Count);

            if (!changes.Any())
            {
                return(false);
            }

            _logger.LogInformation(
                "Starting {Count} workers pushing download count changes to Azure Search.",
                _options.Value.MaxConcurrentBatches);
            await ParallelAsync.Repeat(
                () => WorkAndRetryAsync(idBag, changes),
                _options.Value.MaxConcurrentBatches);

            _logger.LogInformation("All of the download count changes have been pushed to Azure Search.");

            _logger.LogInformation("Uploading the new download count data to blob storage.");
            await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition());

            _logger.LogInformation("Uploading the new popularity transfer data to blob storage.");
            await _popularityTransferDataClient.ReplaceLatestIndexedAsync(
                newTransfers,
                oldTransfers.Metadata.GetIfMatchCondition());

            return(true);
        }