private static void Dispose() { Log.LogMessage("Cleaning up..."); if (_context != null) { _context.Dispose(); _context = null; } if (_csvWriter != null) { _csvWriter.Dispose(); _csvWriter = null; } if (_log != null) { _log.Dispose(); _log = null; } if (_cursor != null) { _cursor.Dispose(); _cursor = null; } if (_httpClient != null) { _httpClient.Dispose(); _httpClient = null; } }
private async Task CommitBatch(EntitiesContext context, FileCursor cursor, Logger logger, DateTime?cursorTime) { logger.Log("Committing batch..."); var count = await context.SaveChangesAsync(); if (cursorTime.HasValue) { await cursor.Write(cursorTime.Value); } logger.Log($"{count} packages saved."); }
private static void Initialize(string metadataFileName, string connectionString) { var metadataFileReader = new StreamReader(metadataFileName); var configuration = new CsvHelper.Configuration.Configuration() { HasHeaderRecord = false }; configuration.RegisterClassMap <RepositoryMetadataLogMap>(); _csvReader = new CsvReader(metadataFileReader, configuration); _context = new EntitiesContext(connectionString, readOnly: false); _cursor = new FileCursor(CursorFileName); _log = new Log(ErrorsFileName); }
private static async Task Initialize(string connectionString, Uri serviceDiscoveryUri) { Log.LogMessage("Initializing"); _context = new EntitiesContext(connectionString, readOnly: true); var metadataFileStreamWriter = new StreamWriter(RepositoryMetadataFileName, append: true); metadataFileStreamWriter.AutoFlush = true; metadataFileStreamWriter.BaseStream.Seek(0, SeekOrigin.End); _csvWriter = new CsvWriter(metadataFileStreamWriter); _log = new Log(ErrorsFileName); _cursor = new FileCursor(CursorFileName); _httpClient = new HttpClient(); _flatContainerUri = await GetFlatContainerUriAsync(serviceDiscoveryUri); }
public async override Task Run() { var token = new CancellationToken(); try { var regionInformations = _configuration.RegionInformations; var instances = new List <Instance>(); foreach (var regionInformation in regionInformations) { instances.AddRange(await _searchServiceClient.GetSearchEndpointsAsync(regionInformation, token)); } var maxCommit = DateTimeOffset.MinValue; foreach (Instance instance in instances) { try { var commitDateTime = await _searchServiceClient.GetCommitDateTimeAsync(instance, token); maxCommit = commitDateTime > maxCommit ? commitDateTime : maxCommit; } catch (Exception e) { Logger.LogError("An exception was encountered so no HTTP response was returned. {Exception}", e); } } if (maxCommit == DateTimeOffset.MinValue) { Logger.LogError("Failed to retrieve a proper starting commit. Abandoning the current run."); return; } var catalogLeafProcessor = new PackageLagCatalogLeafProcessor(instances, _searchServiceClient, _telemetryService, LoggerFactory.CreateLogger <PackageLagCatalogLeafProcessor>()); if (_configuration.RetryLimit > 0) { catalogLeafProcessor.RetryLimit = _configuration.RetryLimit; } if (_configuration.WaitBetweenRetrySeconds > 0) { catalogLeafProcessor.WaitBetweenPolls = TimeSpan.FromSeconds(_configuration.WaitBetweenRetrySeconds); } var settings = new CatalogProcessorSettings { ServiceIndexUrl = _configuration.ServiceIndexUrl, DefaultMinCommitTimestamp = maxCommit, ExcludeRedundantLeaves = false }; var start = new FileCursor("cursor.json", LoggerFactory.CreateLogger <FileCursor>()); await start.SetAsync(maxCommit.AddTicks(1)); var catalogProcessor = new CatalogProcessor(start, _catalogClient, catalogLeafProcessor, settings, LoggerFactory.CreateLogger <CatalogProcessor>()); bool success; int retryCount = 0; do { success = await catalogProcessor.ProcessAsync(); if (!success || !await catalogLeafProcessor.WaitForProcessing()) { retryCount++; Logger.LogError("Processing the catalog leafs failed. Retry Count {CatalogProcessRetryCount}", retryCount); } }while (!success && retryCount < MAX_CATALOG_RETRY_COUNT); return; } catch (Exception e) { Logger.LogError("Exception Occured. {Exception}", e); return; } }
public async Task Update(SqlConnection connection, string fileName) { if (!File.Exists(fileName)) { throw new ArgumentException($"File '{fileName}' doesn't exist"); } using (var context = new EntitiesContext(connection, readOnly: false)) using (var cursor = new FileCursor(CursorFileName)) using (var logger = new Logger(ErrorsFileName)) { var startTime = await cursor.Read(); logger.Log($"Starting database update - Cursor time: {startTime:u}"); var repository = new EntityRepository <Package>(context); var packages = repository.GetAll().Include(p => p.PackageRegistration); using (var csv = CreateCsvReader(fileName)) { var counter = 0; var lastCreatedDate = default(DateTime?); var result = await TryReadMetadata(csv); while (result.Success) { var metadata = result.Metadata; if (metadata.Created >= startTime) { var package = packages.FirstOrDefault(p => p.PackageRegistration.Id == metadata.Id && p.NormalizedVersion == metadata.Version); if (package != null) { UpdatePackage(package, metadata.Metadata, context); logger.LogPackage(metadata.Id, metadata.Version, "Metadata updated."); counter++; if (!lastCreatedDate.HasValue || lastCreatedDate < package.Created) { lastCreatedDate = metadata.Created; } } else { await logger.LogPackageError(metadata.Id, metadata.Version, "Could not find package in the database."); } } if (counter >= UpdateBatchSize) { await CommitBatch(context, cursor, logger, metadata.Created); counter = 0; } result = await TryReadMetadata(csv); } if (counter > 0) { await CommitBatch(context, cursor, logger, lastCreatedDate); } } } }
public async Task Collect(SqlConnection connection, Uri serviceDiscoveryUri, DateTime?lastCreateTime, string fileName) { using (var context = new EntitiesContext(connection, readOnly: true)) using (var cursor = new FileCursor(CursorFileName)) using (var logger = new Logger(ErrorsFileName)) { context.SetCommandTimeout(300); // large query var startTime = await cursor.Read(); logger.Log($"Starting metadata collection - Cursor time: {startTime:u}"); var repository = new EntityRepository <Package>(context); var packages = repository.GetAll() .Include(p => p.PackageRegistration); if (QueryIncludes != null) { packages = packages.Include(QueryIncludes); } packages = packages .Where(p => p.Created <lastCreateTime && p.Created> startTime) .Where(p => p.PackageStatusKey == PackageStatus.Available) .OrderBy(p => p.Created); if (LimitTo > 0) { packages = packages.Take(LimitTo); } var flatContainerUri = await GetFlatContainerUri(serviceDiscoveryUri); using (var csv = CreateCsvWriter(fileName)) using (var http = new HttpClient()) { // We want these downloads ignored by stats pipelines - this user agent is automatically skipped. // See https://github.com/NuGet/NuGet.Jobs/blob/262da48ed05d0366613bbf1c54f47879aad96dcd/src/Stats.ImportAzureCdnStatistics/StatisticsParser.cs#L41 http.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; AppInsights) Backfill Job: NuGet.Gallery GalleryTools"); var counter = 0; var lastCreatedDate = default(DateTime?); foreach (var package in packages) { var id = package.PackageRegistration.Id; var version = package.NormalizedVersion; var idLowered = id.ToLowerInvariant(); var versionLowered = version.ToLowerInvariant(); try { var metadata = default(TMetadata); var nuspecUri = $"{flatContainerUri}/{idLowered}/{versionLowered}/{idLowered}.nuspec"; using (var nuspecStream = await http.GetStreamAsync(nuspecUri)) { var document = LoadDocument(nuspecStream); var nuspecReader = new NuspecReader(document); if (SourceType == MetadataSourceType.NuspecOnly) { metadata = ReadMetadata(nuspecReader); } else if (SourceType == MetadataSourceType.Nupkg) { var nupkgUri = $"{flatContainerUri}/{idLowered}/{versionLowered}/{idLowered}.{versionLowered}.nupkg"; metadata = await FetchMetadataAsync(http, nupkgUri, nuspecReader, id, version, logger); } } if (ShouldWriteMetadata(metadata)) { var record = new PackageMetadata(id, version, metadata, package.Created); csv.WriteRecord(record); await csv.NextRecordAsync(); logger.LogPackage(id, version, $"Metadata saved"); } } catch (Exception e) { await logger.LogPackageError(id, version, e); } counter++; if (!lastCreatedDate.HasValue || lastCreatedDate < package.Created) { lastCreatedDate = package.Created; } if (counter >= CollectBatchSize) { logger.Log($"Writing {package.Created:u} to cursor..."); await cursor.Write(package.Created); counter = 0; } } if (counter > 0 && lastCreatedDate.HasValue) { await cursor.Write(lastCreatedDate.Value); } } } }
public async Task Collect(string connectionString, Uri serviceDiscoveryUri, DateTime?lastCreateTime, string fileName) { using (var context = new EntitiesContext(connectionString, readOnly: true)) using (var cursor = new FileCursor(CursorFileName)) using (var logger = new Logger(ErrorsFileName)) { var startTime = await cursor.Read(); logger.Log($"Starting metadata collection - Cursor time: {startTime:u}"); var repository = new EntityRepository <Package>(context); var packages = repository.GetAll() .Include(p => p.PackageRegistration) .Where(p => p.Created <lastCreateTime && p.Created> startTime) .Where(p => p.PackageStatusKey == PackageStatus.Available || p.PackageStatusKey == PackageStatus.Validating) .OrderBy(p => p.Created); var flatContainerUri = await GetFlatContainerUri(serviceDiscoveryUri); using (var csv = CreateCsvWriter(fileName)) using (var http = new HttpClient()) { var counter = 0; var lastCreatedDate = default(DateTime?); foreach (var package in packages) { var id = package.PackageRegistration.Id; var version = package.NormalizedVersion; var nuspecUri = $"{flatContainerUri}/{id.ToLowerInvariant()}/{version.ToLowerInvariant()}/{id.ToLowerInvariant()}.nuspec"; try { var metadata = await FetchMetadata(http, nuspecUri); if (ShouldWriteMetadata(metadata)) { var record = new PackageMetadata(id, version, metadata, package.Created); csv.WriteRecord(record); await csv.NextRecordAsync(); logger.LogPackage(id, version, "Metadata saved."); } } catch (Exception e) { await logger.LogPackageError(id, version, e); } counter++; if (!lastCreatedDate.HasValue || lastCreatedDate < package.Created) { lastCreatedDate = package.Created; } if (counter >= CollectBatchSize) { logger.Log($"Writing {package.Created:u} to cursor..."); await cursor.Write(package.Created); counter = 0; } } if (counter > 0 && lastCreatedDate.HasValue) { await cursor.Write(lastCreatedDate.Value); } } } }