예제 #1
0
        public async Task Collect(SqlConnection connection, Uri serviceDiscoveryUri, DateTime?lastCreateTime, string fileName)
        {
            using (var context = new EntitiesContext(connection, readOnly: true))
                using (var cursor = new FileCursor(CursorFileName))
                    using (var logger = new Logger(ErrorsFileName))
                    {
                        context.SetCommandTimeout(300); // large query

                        var startTime = await cursor.Read();

                        logger.Log($"Starting metadata collection - Cursor time: {startTime:u}");

                        var repository = new EntityRepository <Package>(context);

                        var packages = repository.GetAll()
                                       .Include(p => p.PackageRegistration);
                        if (QueryIncludes != null)
                        {
                            packages = packages.Include(QueryIncludes);
                        }

                        packages = packages
                                   .Where(p => p.Created <lastCreateTime && p.Created> startTime)
                                   .Where(p => p.PackageStatusKey == PackageStatus.Available)
                                   .OrderBy(p => p.Created);
                        if (LimitTo > 0)
                        {
                            packages = packages.Take(LimitTo);
                        }

                        var flatContainerUri = await GetFlatContainerUri(serviceDiscoveryUri);

                        using (var csv = CreateCsvWriter(fileName))
                            using (var http = new HttpClient())
                            {
                                // We want these downloads ignored by stats pipelines - this user agent is automatically skipped.
                                // See https://github.com/NuGet/NuGet.Jobs/blob/262da48ed05d0366613bbf1c54f47879aad96dcd/src/Stats.ImportAzureCdnStatistics/StatisticsParser.cs#L41
                                http.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent",
                                                                                   "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; AppInsights)   Backfill Job: NuGet.Gallery GalleryTools");

                                var counter         = 0;
                                var lastCreatedDate = default(DateTime?);

                                foreach (var package in packages)
                                {
                                    var id             = package.PackageRegistration.Id;
                                    var version        = package.NormalizedVersion;
                                    var idLowered      = id.ToLowerInvariant();
                                    var versionLowered = version.ToLowerInvariant();

                                    try
                                    {
                                        var metadata = default(TMetadata);

                                        var nuspecUri =
                                            $"{flatContainerUri}/{idLowered}/{versionLowered}/{idLowered}.nuspec";
                                        using (var nuspecStream = await http.GetStreamAsync(nuspecUri))
                                        {
                                            var document = LoadDocument(nuspecStream);

                                            var nuspecReader = new NuspecReader(document);

                                            if (SourceType == MetadataSourceType.NuspecOnly)
                                            {
                                                metadata = ReadMetadata(nuspecReader);
                                            }
                                            else if (SourceType == MetadataSourceType.Nupkg)
                                            {
                                                var nupkgUri =
                                                    $"{flatContainerUri}/{idLowered}/{versionLowered}/{idLowered}.{versionLowered}.nupkg";
                                                metadata = await FetchMetadataAsync(http, nupkgUri, nuspecReader, id, version, logger);
                                            }
                                        }

                                        if (ShouldWriteMetadata(metadata))
                                        {
                                            var record = new PackageMetadata(id, version, metadata, package.Created);

                                            csv.WriteRecord(record);

                                            await csv.NextRecordAsync();

                                            logger.LogPackage(id, version, $"Metadata saved");
                                        }
                                    }
                                    catch (Exception e)
                                    {
                                        await logger.LogPackageError(id, version, e);
                                    }

                                    counter++;

                                    if (!lastCreatedDate.HasValue || lastCreatedDate < package.Created)
                                    {
                                        lastCreatedDate = package.Created;
                                    }

                                    if (counter >= CollectBatchSize)
                                    {
                                        logger.Log($"Writing {package.Created:u} to cursor...");
                                        await cursor.Write(package.Created);

                                        counter = 0;
                                    }
                                }

                                if (counter > 0 && lastCreatedDate.HasValue)
                                {
                                    await cursor.Write(lastCreatedDate.Value);
                                }
                            }
                    }
        }
예제 #2
0
        public async Task Update(SqlConnection connection, string fileName)
        {
            if (!File.Exists(fileName))
            {
                throw new ArgumentException($"File '{fileName}' doesn't exist");
            }

            using (var context = new EntitiesContext(connection, readOnly: false))
                using (var cursor = new FileCursor(CursorFileName))
                    using (var logger = new Logger(ErrorsFileName))
                    {
                        var startTime = await cursor.Read();

                        logger.Log($"Starting database update - Cursor time: {startTime:u}");

                        var repository = new EntityRepository <Package>(context);

                        var packages = repository.GetAll().Include(p => p.PackageRegistration);

                        using (var csv = CreateCsvReader(fileName))
                        {
                            var counter         = 0;
                            var lastCreatedDate = default(DateTime?);

                            var result = await TryReadMetadata(csv);

                            while (result.Success)
                            {
                                var metadata = result.Metadata;

                                if (metadata.Created >= startTime)
                                {
                                    var package = packages.FirstOrDefault(p => p.PackageRegistration.Id == metadata.Id && p.NormalizedVersion == metadata.Version);

                                    if (package != null)
                                    {
                                        UpdatePackage(package, metadata.Metadata, context);
                                        logger.LogPackage(metadata.Id, metadata.Version, "Metadata updated.");

                                        counter++;

                                        if (!lastCreatedDate.HasValue || lastCreatedDate < package.Created)
                                        {
                                            lastCreatedDate = metadata.Created;
                                        }
                                    }
                                    else
                                    {
                                        await logger.LogPackageError(metadata.Id, metadata.Version, "Could not find package in the database.");
                                    }
                                }

                                if (counter >= UpdateBatchSize)
                                {
                                    await CommitBatch(context, cursor, logger, metadata.Created);

                                    counter = 0;
                                }

                                result = await TryReadMetadata(csv);
                            }

                            if (counter > 0)
                            {
                                await CommitBatch(context, cursor, logger, lastCreatedDate);
                            }
                        }
                    }
        }
예제 #3
0
        public async Task Collect(string connectionString, Uri serviceDiscoveryUri, DateTime?lastCreateTime, string fileName)
        {
            using (var context = new EntitiesContext(connectionString, readOnly: true))
                using (var cursor = new FileCursor(CursorFileName))
                    using (var logger = new Logger(ErrorsFileName))
                    {
                        var startTime = await cursor.Read();

                        logger.Log($"Starting metadata collection - Cursor time: {startTime:u}");

                        var repository = new EntityRepository <Package>(context);

                        var packages = repository.GetAll()
                                       .Include(p => p.PackageRegistration)
                                       .Where(p => p.Created <lastCreateTime && p.Created> startTime)
                                       .Where(p => p.PackageStatusKey == PackageStatus.Available || p.PackageStatusKey == PackageStatus.Validating)
                                       .OrderBy(p => p.Created);

                        var flatContainerUri = await GetFlatContainerUri(serviceDiscoveryUri);

                        using (var csv = CreateCsvWriter(fileName))
                            using (var http = new HttpClient())
                            {
                                var counter         = 0;
                                var lastCreatedDate = default(DateTime?);

                                foreach (var package in packages)
                                {
                                    var id      = package.PackageRegistration.Id;
                                    var version = package.NormalizedVersion;

                                    var nuspecUri = $"{flatContainerUri}/{id.ToLowerInvariant()}/{version.ToLowerInvariant()}/{id.ToLowerInvariant()}.nuspec";

                                    try
                                    {
                                        var metadata = await FetchMetadata(http, nuspecUri);

                                        if (ShouldWriteMetadata(metadata))
                                        {
                                            var record = new PackageMetadata(id, version, metadata, package.Created);

                                            csv.WriteRecord(record);

                                            await csv.NextRecordAsync();

                                            logger.LogPackage(id, version, "Metadata saved.");
                                        }
                                    }
                                    catch (Exception e)
                                    {
                                        await logger.LogPackageError(id, version, e);
                                    }

                                    counter++;

                                    if (!lastCreatedDate.HasValue || lastCreatedDate < package.Created)
                                    {
                                        lastCreatedDate = package.Created;
                                    }

                                    if (counter >= CollectBatchSize)
                                    {
                                        logger.Log($"Writing {package.Created:u} to cursor...");
                                        await cursor.Write(package.Created);

                                        counter = 0;
                                    }
                                }

                                if (counter > 0 && lastCreatedDate.HasValue)
                                {
                                    await cursor.Write(lastCreatedDate.Value);
                                }
                            }
                    }
        }