private async Task <bool> PushIndexChangesAsync() { // The "old" data in this case is the download count data that was last indexed by this job (or // initialized by Db2AzureSearch). _logger.LogInformation("Fetching old download count data from blob storage."); var oldResult = await _downloadDataClient.ReadLatestIndexedAsync( AccessConditionWrapper.GenerateEmptyCondition(), _stringCache); // The "new" data in this case is from the statistics pipeline. _logger.LogInformation("Fetching new download count data from blob storage."); var newData = await _auxiliaryFileClient.LoadDownloadDataAsync(); _logger.LogInformation("Removing invalid IDs and versions from the old data."); CleanDownloadData(oldResult.Data); _logger.LogInformation("Removing invalid IDs and versions from the new data."); CleanDownloadData(newData); // Fetch the download overrides from the auxiliary file. Note that the overriden downloads are kept // separate from downloads data as the original data will be persisted to auxiliary data, whereas the // overriden data will be persisted to Azure Search. _logger.LogInformation("Overriding download count data."); var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync(); var overridenDownloads = newData.ApplyDownloadOverrides(downloadOverrides, _logger); _logger.LogInformation("Detecting download count changes."); var changes = _downloadSetComparer.Compare(oldResult.Data, overridenDownloads); var idBag = new ConcurrentBag <string>(changes.Keys); _logger.LogInformation("{Count} package IDs have download count changes.", idBag.Count); if (!changes.Any()) { return(false); } _logger.LogInformation( "Starting {Count} workers pushing download count changes to Azure Search.", _options.Value.MaxConcurrentBatches); await ParallelAsync.Repeat( () => WorkAsync(idBag, changes), _options.Value.MaxConcurrentBatches); _logger.LogInformation("All of the download count changes have been pushed to Azure Search."); _logger.LogInformation("Uploading the new download count data to blob storage."); await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition()); return(true); }
private async Task <bool> PushIndexChangesAsync() { // The "old" data in this case is the download count data that was last indexed by this job (or // initialized by Db2AzureSearch). _logger.LogInformation("Fetching old download count data from blob storage."); var oldResult = await _downloadDataClient.ReadLatestIndexedAsync( AccessConditionWrapper.GenerateEmptyCondition(), _stringCache); // The "new" data in this case is from the statistics pipeline. _logger.LogInformation("Fetching new download count data from blob storage."); var newData = await _auxiliaryFileClient.LoadDownloadDataAsync(); _logger.LogInformation("Removing invalid IDs and versions from the old downloads data."); CleanDownloadData(oldResult.Data); _logger.LogInformation("Removing invalid IDs and versions from the new downloads data."); CleanDownloadData(newData); _logger.LogInformation("Detecting download count changes."); var changes = _downloadSetComparer.Compare(oldResult.Data, newData); _logger.LogInformation("{Count} package IDs have download count changes.", changes.Count); // The "old" data is the popularity transfers data that was last indexed by this job (or // initialized by Db2AzureSearch). _logger.LogInformation("Fetching old popularity transfer data from blob storage."); var oldTransfers = await _popularityTransferDataClient.ReadLatestIndexedAsync( AccessConditionWrapper.GenerateEmptyCondition(), _stringCache); // The "new" data is the latest popularity transfers data from the database. _logger.LogInformation("Fetching new popularity transfer data from database."); var newTransfers = await GetPopularityTransfersAsync(); _logger.LogInformation("Applying download transfers to download changes."); ApplyDownloadTransfers( newData, oldTransfers.Data, newTransfers, changes); var idBag = new ConcurrentBag <string>(changes.Keys); _logger.LogInformation("{Count} package IDs need to be updated.", idBag.Count); if (!changes.Any()) { return(false); } _logger.LogInformation( "Starting {Count} workers pushing download count changes to Azure Search.", _options.Value.MaxConcurrentBatches); await ParallelAsync.Repeat( () => WorkAndRetryAsync(idBag, changes), _options.Value.MaxConcurrentBatches); _logger.LogInformation("All of the download count changes have been pushed to Azure Search."); _logger.LogInformation("Uploading the new download count data to blob storage."); await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition()); _logger.LogInformation("Uploading the new popularity transfer data to blob storage."); await _popularityTransferDataClient.ReplaceLatestIndexedAsync( newTransfers, oldTransfers.Metadata.GetIfMatchCondition()); return(true); }