private ConcurrentBag <CatalogIndexEntry> GetEntries(IEnumerable <Uri> pageUris) { ConcurrentBag <CatalogIndexEntry> entries = new ConcurrentBag <CatalogIndexEntry>(); ParallelOptions options = new ParallelOptions(); options.MaxDegreeOfParallelism = 8; Parallel.ForEach(pageUris.ToArray(), options, uri => { var task = _httpClient.GetJObjectAsync(uri); task.Wait(); JObject json = task.Result; foreach (var item in json["items"]) { var entry = new CatalogIndexEntry(new Uri(item["@id"].ToString()), item["@type"].ToString(), item["commitId"].ToString(), DateTime.Parse(item["commitTimeStamp"].ToString()), item["nuget:id"].ToString(), NuGetVersion.Parse(item["nuget:version"].ToString())); entries.Add(entry); } }); return(entries); }
private string BuildPackageFileName(CatalogIndexEntry packageEntry) { var packageId = packageEntry.Id.ToLowerInvariant(); var packageVersion = packageEntry.Version.ToNormalizedString().ToLowerInvariant(); return($"{packageId}.{packageVersion}.nupkg"); }
public async Task ProcessCatalogIndexEntryAsync(CatalogIndexEntry catalogEntry) { try { var rawBlob = _container.GetBlockBlobReference(BuildPackageFileName(catalogEntry)); var blob = new AzureCloudBlockBlob(rawBlob); for (int i = 0; i < MaximumPackageProcessingAttempts; i++) { try { await _handler.ProcessPackageAsync(catalogEntry, blob); return; } catch (Exception e) when(IsRetryableException(e)) { _logger.LogWarning( 0, e, "Processing package {PackageId} {PackageVersion} failed due to an uncaught exception. " + $"Attempt {{Attempt}} of {MaximumPackageProcessingAttempts}", catalogEntry.Id, catalogEntry.Version, i + 1); } } _telemetryService.TrackHandlerFailedToProcessPackage(_handler, catalogEntry.Id, catalogEntry.Version); _logger.LogError( $"Failed to process package {{PackageId}} {{PackageVersion}} after {MaximumPackageProcessingAttempts} attempts", catalogEntry.Id, catalogEntry.Version); } catch (StorageException e) when(IsPackageDoesNotExistException(e)) { // This indicates a discrepancy between v2 and v3 APIs that should be caught by // the monitoring job. No need to track this handler failure. _logger.LogError( "Package {PackageId} {PackageVersion} is missing from the packages container!", catalogEntry.Id, catalogEntry.Version); } catch (Exception e) { _telemetryService.TrackHandlerFailedToProcessPackage(_handler, catalogEntry.Id, catalogEntry.Version); _logger.LogError( 0, e, "Could not process package {PackageId} {PackageVersion}", catalogEntry.Id, catalogEntry.Version); } }
public async Task ProcessPackageAsync(CatalogIndexEntry packageEntry, ICloudBlockBlob blob) { await blob.FetchAttributesAsync(CancellationToken.None); // Skip the package if it has a Content MD5 hash if (blob.ContentMD5 != null) { _telemetryService.TrackPackageAlreadyHasHash(packageEntry.Id, packageEntry.Version); return; } // Download the blob and calculate its hash. We use HttpClient to download blobs as Azure Blob Sotrage SDK // occassionally hangs. See: https://github.com/Azure/azure-storage-net/issues/470 string hash; using (var hashAlgorithm = MD5.Create()) using (var packageStream = await _httpClient.GetStreamAsync(blob.Uri)) { var hashBytes = hashAlgorithm.ComputeHash(packageStream); hash = Convert.ToBase64String(hashBytes); } blob.ContentMD5 = hash; var condition = AccessCondition.GenerateIfMatchCondition(blob.ETag); await blob.SetPropertiesAsync( condition, options : null, operationContext : null); _telemetryService.TrackPackageHashFixed(packageEntry.Id, packageEntry.Version); _logger.LogWarning( "Updated package {PackageId} {PackageVersion}, set hash to '{Hash}' using ETag {ETag}", packageEntry.Id, packageEntry.Version, hash, blob.ETag); }
public async Task ProcessPackageAsync(CatalogIndexEntry packageEntry, ICloudBlockBlob blob) { await blob.FetchAttributesAsync(CancellationToken.None); if (blob.ContentMD5 == null) { _telemetryService.TrackPackageMissingHash(packageEntry.Id, packageEntry.Version); _logger.LogError( "Package {PackageId} {PackageVersion} has a null Content MD5 hash!", packageEntry.Id, packageEntry.Version); return; } // Download the blob and calculate its hash. We use HttpClient to download blobs as Azure Blob Sotrage SDK // occassionally hangs. See: https://github.com/Azure/azure-storage-net/issues/470 string hash; using (var hashAlgorithm = MD5.Create()) using (var packageStream = await _httpClient.GetStreamAsync(blob.Uri)) { var hashBytes = hashAlgorithm.ComputeHash(packageStream); hash = Convert.ToBase64String(hashBytes); } if (blob.ContentMD5 != hash) { _telemetryService.TrackPackageHasIncorrectHash(packageEntry.Id, packageEntry.Version); _logger.LogError( "Package {PackageId} {PackageVersion} has an incorrect Content MD5 hash! Expected: '{ExpectedHash}', actual: '{ActualHash}'", packageEntry.Id, packageEntry.Version, hash, blob.ContentMD5); } }
private async Task ProcessPageUris(ConcurrentBag <Uri> pageUriBag, ConcurrentBag <CatalogIndexEntry> entries, StringInterner interner) { await Task.Yield(); Uri pageUri; while (pageUriBag.TryTake(out pageUri)) { var json = await _httpClient.GetJObjectAsync(pageUri); foreach (var item in json["items"]) { // This string is unique. var id = item["@id"].ToString(); // These strings should be shared. var type = interner.Intern(item["@type"].ToString()); var commitId = interner.Intern(item["commitId"].ToString()); var nugetId = interner.Intern(item["nuget:id"].ToString()); var nugetVersion = interner.Intern(item["nuget:version"].ToString()); // No string is directly operated on here. var commitTimeStamp = item["commitTimeStamp"].ToObject <DateTime>(); var entry = new CatalogIndexEntry( new Uri(id), type, commitId, commitTimeStamp, nugetId, NuGetVersion.Parse(nugetVersion)); entries.Add(entry); } } }