private async Task CommitBatch(EntitiesContext context, FileCursor cursor, Logger logger, DateTime?cursorTime) { logger.Log("Committing batch..."); var count = await context.SaveChangesAsync(); if (cursorTime.HasValue) { await cursor.Write(cursorTime.Value); } logger.Log($"{count} packages saved."); }
public async Task Collect(SqlConnection connection, Uri serviceDiscoveryUri, DateTime?lastCreateTime, string fileName) { using (var context = new EntitiesContext(connection, readOnly: true)) using (var cursor = new FileCursor(CursorFileName)) using (var logger = new Logger(ErrorsFileName)) { context.SetCommandTimeout(300); // large query var startTime = await cursor.Read(); logger.Log($"Starting metadata collection - Cursor time: {startTime:u}"); var repository = new EntityRepository <Package>(context); var packages = repository.GetAll() .Include(p => p.PackageRegistration); if (QueryIncludes != null) { packages = packages.Include(QueryIncludes); } packages = packages .Where(p => p.Created <lastCreateTime && p.Created> startTime) .Where(p => p.PackageStatusKey == PackageStatus.Available) .OrderBy(p => p.Created); if (LimitTo > 0) { packages = packages.Take(LimitTo); } var flatContainerUri = await GetFlatContainerUri(serviceDiscoveryUri); using (var csv = CreateCsvWriter(fileName)) using (var http = new HttpClient()) { // We want these downloads ignored by stats pipelines - this user agent is automatically skipped. // See https://github.com/NuGet/NuGet.Jobs/blob/262da48ed05d0366613bbf1c54f47879aad96dcd/src/Stats.ImportAzureCdnStatistics/StatisticsParser.cs#L41 http.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; AppInsights) Backfill Job: NuGet.Gallery GalleryTools"); var counter = 0; var lastCreatedDate = default(DateTime?); foreach (var package in packages) { var id = package.PackageRegistration.Id; var version = package.NormalizedVersion; var idLowered = id.ToLowerInvariant(); var versionLowered = version.ToLowerInvariant(); try { var metadata = default(TMetadata); var nuspecUri = $"{flatContainerUri}/{idLowered}/{versionLowered}/{idLowered}.nuspec"; using (var nuspecStream = await http.GetStreamAsync(nuspecUri)) { var document = LoadDocument(nuspecStream); var nuspecReader = new NuspecReader(document); if (SourceType == MetadataSourceType.NuspecOnly) { metadata = ReadMetadata(nuspecReader); } else if (SourceType == MetadataSourceType.Nupkg) { var nupkgUri = $"{flatContainerUri}/{idLowered}/{versionLowered}/{idLowered}.{versionLowered}.nupkg"; metadata = await FetchMetadataAsync(http, nupkgUri, nuspecReader, id, version, logger); } } if (ShouldWriteMetadata(metadata)) { var record = new PackageMetadata(id, version, metadata, package.Created); csv.WriteRecord(record); await csv.NextRecordAsync(); logger.LogPackage(id, version, $"Metadata saved"); } } catch (Exception e) { await logger.LogPackageError(id, version, e); } counter++; if (!lastCreatedDate.HasValue || lastCreatedDate < package.Created) { lastCreatedDate = package.Created; } if (counter >= CollectBatchSize) { logger.Log($"Writing {package.Created:u} to cursor..."); await cursor.Write(package.Created); counter = 0; } } if (counter > 0 && lastCreatedDate.HasValue) { await cursor.Write(lastCreatedDate.Value); } } } }
public async Task Collect(string connectionString, Uri serviceDiscoveryUri, DateTime?lastCreateTime, string fileName) { using (var context = new EntitiesContext(connectionString, readOnly: true)) using (var cursor = new FileCursor(CursorFileName)) using (var logger = new Logger(ErrorsFileName)) { var startTime = await cursor.Read(); logger.Log($"Starting metadata collection - Cursor time: {startTime:u}"); var repository = new EntityRepository <Package>(context); var packages = repository.GetAll() .Include(p => p.PackageRegistration) .Where(p => p.Created <lastCreateTime && p.Created> startTime) .Where(p => p.PackageStatusKey == PackageStatus.Available || p.PackageStatusKey == PackageStatus.Validating) .OrderBy(p => p.Created); var flatContainerUri = await GetFlatContainerUri(serviceDiscoveryUri); using (var csv = CreateCsvWriter(fileName)) using (var http = new HttpClient()) { var counter = 0; var lastCreatedDate = default(DateTime?); foreach (var package in packages) { var id = package.PackageRegistration.Id; var version = package.NormalizedVersion; var nuspecUri = $"{flatContainerUri}/{id.ToLowerInvariant()}/{version.ToLowerInvariant()}/{id.ToLowerInvariant()}.nuspec"; try { var metadata = await FetchMetadata(http, nuspecUri); if (ShouldWriteMetadata(metadata)) { var record = new PackageMetadata(id, version, metadata, package.Created); csv.WriteRecord(record); await csv.NextRecordAsync(); logger.LogPackage(id, version, "Metadata saved."); } } catch (Exception e) { await logger.LogPackageError(id, version, e); } counter++; if (!lastCreatedDate.HasValue || lastCreatedDate < package.Created) { lastCreatedDate = package.Created; } if (counter >= CollectBatchSize) { logger.Log($"Writing {package.Created:u} to cursor..."); await cursor.Write(package.Created); counter = 0; } } if (counter > 0 && lastCreatedDate.HasValue) { await cursor.Write(lastCreatedDate.Value); } } } }