Ejemplo n.º 1
0
        public async Task Collect(SqlConnection connection, Uri serviceDiscoveryUri, DateTime?lastCreateTime, string fileName)
        {
            using (var context = new EntitiesContext(connection, readOnly: true))
                using (var cursor = new FileCursor(CursorFileName))
                    using (var logger = new Logger(ErrorsFileName))
                    {
                        context.SetCommandTimeout(300); // large query

                        var startTime = await cursor.Read();

                        logger.Log($"Starting metadata collection - Cursor time: {startTime:u}");

                        var repository = new EntityRepository <Package>(context);

                        var packages = repository.GetAll()
                                       .Include(p => p.PackageRegistration);
                        if (QueryIncludes != null)
                        {
                            packages = packages.Include(QueryIncludes);
                        }

                        packages = packages
                                   .Where(p => p.Created <lastCreateTime && p.Created> startTime)
                                   .Where(p => p.PackageStatusKey == PackageStatus.Available)
                                   .OrderBy(p => p.Created);
                        if (LimitTo > 0)
                        {
                            packages = packages.Take(LimitTo);
                        }

                        var flatContainerUri = await GetFlatContainerUri(serviceDiscoveryUri);

                        using (var csv = CreateCsvWriter(fileName))
                            using (var http = new HttpClient())
                            {
                                // We want these downloads ignored by stats pipelines - this user agent is automatically skipped.
                                // See https://github.com/NuGet/NuGet.Jobs/blob/262da48ed05d0366613bbf1c54f47879aad96dcd/src/Stats.ImportAzureCdnStatistics/StatisticsParser.cs#L41
                                http.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent",
                                                                                   "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; AppInsights)   Backfill Job: NuGet.Gallery GalleryTools");

                                var counter         = 0;
                                var lastCreatedDate = default(DateTime?);

                                foreach (var package in packages)
                                {
                                    var id             = package.PackageRegistration.Id;
                                    var version        = package.NormalizedVersion;
                                    var idLowered      = id.ToLowerInvariant();
                                    var versionLowered = version.ToLowerInvariant();

                                    try
                                    {
                                        var metadata = default(TMetadata);

                                        var nuspecUri =
                                            $"{flatContainerUri}/{idLowered}/{versionLowered}/{idLowered}.nuspec";
                                        using (var nuspecStream = await http.GetStreamAsync(nuspecUri))
                                        {
                                            var document = LoadDocument(nuspecStream);

                                            var nuspecReader = new NuspecReader(document);

                                            if (SourceType == MetadataSourceType.NuspecOnly)
                                            {
                                                metadata = ReadMetadata(nuspecReader);
                                            }
                                            else if (SourceType == MetadataSourceType.Nupkg)
                                            {
                                                var nupkgUri =
                                                    $"{flatContainerUri}/{idLowered}/{versionLowered}/{idLowered}.{versionLowered}.nupkg";
                                                metadata = await FetchMetadataAsync(http, nupkgUri, nuspecReader, id, version, logger);
                                            }
                                        }

                                        if (ShouldWriteMetadata(metadata))
                                        {
                                            var record = new PackageMetadata(id, version, metadata, package.Created);

                                            csv.WriteRecord(record);

                                            await csv.NextRecordAsync();

                                            logger.LogPackage(id, version, $"Metadata saved");
                                        }
                                    }
                                    catch (Exception e)
                                    {
                                        await logger.LogPackageError(id, version, e);
                                    }

                                    counter++;

                                    if (!lastCreatedDate.HasValue || lastCreatedDate < package.Created)
                                    {
                                        lastCreatedDate = package.Created;
                                    }

                                    if (counter >= CollectBatchSize)
                                    {
                                        logger.Log($"Writing {package.Created:u} to cursor...");
                                        await cursor.Write(package.Created);

                                        counter = 0;
                                    }
                                }

                                if (counter > 0 && lastCreatedDate.HasValue)
                                {
                                    await cursor.Write(lastCreatedDate.Value);
                                }
                            }
                    }
        }