public AppendOnlyCatalogWriter(Storage storage, int maxPageSize = 1000, bool append = true, ICatalogGraphPersistence catalogGraphPersistence = null, CatalogContext context = null) : base(storage, catalogGraphPersistence, context) { _append = append; _first = true; MaxPageSize = maxPageSize; }
public ResolverDeleteCollector(Storage storage, int batchSize = 200) : base(batchSize) { Options.InternUris = false; _resolverFrame = JObject.Parse(Utils.GetResource("context.PackageRegistration.json")); _resolverFrame["@type"] = "PackageRegistration"; _storage = storage; }
public ResolverCollector(Storage storage, int batchSize) : base(batchSize) { Options.InternUris = false; _resolverFrame = JObject.Parse(Utils.GetResource("context.Resolver.json")); _resolverFrame["@type"] = "Resolver"; _storage = storage; }
public ResolverCollector(Storage storage, int batchSize) : base(batchSize, new Uri[] { Schema.DataTypes.Package }) { _resolverFrame = JObject.Parse(Utils.GetResource("context.Resolver.json")); _resolverFrame["@type"] = "PackageRegistration"; _storage = storage; GalleryBaseAddress = "http://tempuri.org"; ContentBaseAddress = "http://tempuri.org"; }
public AggregateStorage(Uri baseAddress, Storage primaryStorage, Storage[] secondaryStorage, WriteSecondaryStorageContentInterceptor writeSecondaryStorageContentInterceptor) : base(baseAddress) { _primaryStorage = primaryStorage; _secondaryStorage = secondaryStorage; _writeSecondaryStorageContentInterceptor = writeSecondaryStorageContentInterceptor; BaseAddress = _primaryStorage.BaseAddress; }
public CatalogWriter(Storage storage, CatalogContext context, int maxPageSize = 1000, bool append = true) { Options.InternUris = false; _storage = storage; _context = context; _append = append; _batch = new List<CatalogItem>(); MaxPageSize = maxPageSize; _first = true; _open = true; }
static async Task SaveSmallRegistration(Storage storage, Uri registrationBaseAddress, IDictionary<string, IGraph> items, Uri contentBaseAddress, int partitionSize, CancellationToken cancellationToken) { SingleGraphPersistence graphPersistence = new SingleGraphPersistence(storage); await graphPersistence.Initialize(cancellationToken); await SaveRegistration(storage, registrationBaseAddress, items, null, graphPersistence, contentBaseAddress, partitionSize, cancellationToken); // now the commit has happened the graphPersistence.Graph should contain all the data JObject frame = (new CatalogContext()).GetJsonLdContext("context.Registration.json", graphPersistence.TypeUri); StorageContent content = new StringStorageContent(Utils.CreateJson(graphPersistence.Graph, frame), "application/json", "no-store"); await storage.Save(graphPersistence.ResourceUri, content,cancellationToken); }
public static async Task<DateTime?> GetCatalogProperty(Storage storage, string propertyName, CancellationToken cancellationToken) { var json = await storage.LoadString(storage.ResolveUri("index.json"), cancellationToken); if (json != null) { var obj = JObject.Parse(json); JToken token; if (obj.TryGetValue(propertyName, out token)) { return token.ToObject<DateTime>().ToUniversalTime(); } } return null; }
protected override void Init(IDictionary<string, string> arguments, CancellationToken cancellationToken) { Gallery = arguments.GetOrThrow<string>(Arguments.Gallery); Verbose = arguments.GetOrDefault(Arguments.Verbose, false); StartDate = arguments.GetOrDefault(Arguments.StartDate, DateTimeMinValueUtc); var catalogStorageFactory = CommandHelpers.CreateStorageFactory(arguments, Verbose); var auditingStorageFactory = CommandHelpers.CreateSuffixedStorageFactory("Auditing", arguments, Verbose); Logger.LogInformation("CONFIG source: \"{ConfigSource}\" storage: \"{Storage}\"", Gallery, catalogStorageFactory); CatalogStorage = catalogStorageFactory.Create(); AuditingStorage = auditingStorageFactory.Create(); Top = 20; Timeout = TimeSpan.FromSeconds(300); }
private async Task <PackageMonitoringStatus> GetPackageAsync(CatalogStorage storage, Uri packageUri, CancellationToken token) { try { var content = await storage.LoadAsync(packageUri, token); string statusString = null; using (var stream = content.GetContentStream()) { using (var reader = new StreamReader(stream)) { statusString = await reader.ReadToEndAsync(); } } var status = JsonConvert.DeserializeObject <PackageMonitoringStatus>(statusString, JsonSerializerUtility.SerializerSettings); return(status); } catch (Exception deserializationException) { _logger.LogWarning( LogEvents.StatusDeserializationFailure, deserializationException, "Unable to deserialize package status from {PackageUri}!", packageUri); try { /// Construct a <see cref="PackageMonitoringStatus"/> from the <see cref="Uri"/> with this as the exception. return(new PackageMonitoringStatus(ParsePackageUri(packageUri), new StatusDeserializationException(deserializationException))); } catch (Exception uriParsingException) { _logger.LogError( LogEvents.StatusDeserializationFatalFailure, new AggregateException(deserializationException, uriParsingException), "Unable to get package id and version from {PackageUri}!", packageUri); return(null); } } }
private async Task StoreCursor(NuGet.Services.Metadata.Catalog.Persistence.Storage storage, Uri cursorUri, CollectorCursor value) { if (!Equals(value, CollectorCursor.None)) { Log.StoringCursor(value.Value); var cursorContent = new JObject { { "http://schema.nuget.org/collectors/resolver#cursor", new JObject { { "@value", value.Value }, { "@type", "http://www.w3.org/2001/XMLSchema#dateTime" } } }, { "http://schema.nuget.org/collectors/resolver#source", CatalogIndexUrl } }.ToString(); await storage.Save(cursorUri, new StringStorageContent( cursorContent, contentType : "application/json", cacheControl : "no-store")); Log.StoredCursor(); } }
public QueueFedJob(Config config, Storage storage, string queueName) : base(config, storage) { _queueName = queueName; }
public static async Task<SortedList<DateTime, IList<PackageIdentity>>> GetDeletedPackages(Storage auditingStorage, HttpClient client, string source, DateTime since, int top = 100) { var result = new SortedList<DateTime, IList<PackageIdentity>>(); // Get all audit blobs (based on their filename which starts with a date that can be parsed) // NOTE we're getting more files than needed (to account for a time difference between servers) var minimumFileTime = since.AddMinutes(-15); var auditRecordUris = (await auditingStorage.List(CancellationToken.None)) .Where(recordUri => FilterDeletedPackage(minimumFileTime, recordUri)); foreach (var auditRecordUri in auditRecordUris) { var contents = await auditingStorage.LoadString(auditRecordUri, CancellationToken.None); if (!string.IsNullOrEmpty(contents)) { string packageId; string packageVersion; DateTime? deletedOn; try { var auditRecord = JObject.Parse(contents); var recordPart = (JObject)auditRecord.GetValue("Record", StringComparison.OrdinalIgnoreCase); packageId = recordPart.GetValue("Id", StringComparison.OrdinalIgnoreCase).ToString(); packageVersion = recordPart.GetValue("Version", StringComparison.OrdinalIgnoreCase).ToString(); var actorPart = (JObject)auditRecord.GetValue("Actor", StringComparison.OrdinalIgnoreCase); deletedOn = actorPart.GetValue("TimestampUtc", StringComparison.OrdinalIgnoreCase).Value<DateTime>(); } catch (JsonReaderException) { Trace.TraceWarning("Audit record at {0} contains invalid JSON.", auditRecordUri); continue; } catch (NullReferenceException) { Trace.TraceWarning("Audit record at {0} does not contain required JSON properties to perform a package delete.", auditRecordUri); continue; } if (!string.IsNullOrEmpty(packageId) && !string.IsNullOrEmpty(packageVersion) && deletedOn > since) { // Mark the package "deleted" IList<PackageIdentity> packages; if (!result.TryGetValue(deletedOn.Value, out packages)) { packages = new List<PackageIdentity>(); result.Add(deletedOn.Value, packages); } packages.Add(new PackageIdentity(packageId, packageVersion)); } } } return result; }
public static async Task CreateStatisticsCatalogAsync(Storage storage, string connectionString) { const int BatchSize = 100; int i = 0; using (CatalogWriter writer = new CatalogWriter(storage, new CatalogContext(), 500)) { int lastKey = 0; int iterations = 0; while (true) { iterations++; DateTime minDownloadTimeStamp; DateTime maxDownloadTimeStamp; JArray batch = GetNextBatch(connectionString, ref lastKey, out minDownloadTimeStamp, out maxDownloadTimeStamp); if (batch == null) { break; } writer.Add(new StatisticsCatalogItem(batch, lastKey.ToString(), minDownloadTimeStamp, maxDownloadTimeStamp)); if (++i % BatchSize == 0) { await writer.Commit(); } } await writer.Commit(); } }
public RegistrationCatalogWriter(Storage storage, int partitionSize = 100, IList<Uri> cleanUpList = null, ICatalogGraphPersistence graphPersistence = null, CatalogContext context = null) : base(storage, graphPersistence, context) { _cleanUpList = cleanUpList; PartitionSize = partitionSize; }
public static async Task<DateTime> DownloadMetadata2Catalog(HttpClient client, SortedList<DateTime, IList<PackageDetails>> packages, Storage storage, DateTime lastCreated, DateTime lastEdited, DateTime lastDeleted, bool? createdPackages, CancellationToken cancellationToken, ILogger logger) { var writer = new AppendOnlyCatalogWriter(storage, maxPageSize: 550); var lastDate = DetermineLastDate(lastCreated, lastEdited, createdPackages); if (packages == null || packages.Count == 0) { return lastDate; } foreach (var entry in packages) { foreach (var packageItem in entry.Value) { // When downloading the package binary, add a query string parameter // that corresponds to the operation's timestamp. // This query string will ensure the package is not cached // (e.g. on the CDN) and returns the "latest and greatest" package metadata. var packageUri = Utilities.GetNugetCacheBustingUri(packageItem.ContentUri, entry.Key.ToString("O")); var response = await client.GetAsync(packageUri, cancellationToken); if (response.IsSuccessStatusCode) { using (var stream = await response.Content.ReadAsStreamAsync()) { CatalogItem item = Utils.CreateCatalogItem( packageItem.ContentUri.ToString(), stream, packageItem.CreatedDate, packageItem.LastEditedDate, packageItem.PublishedDate); if (item != null) { writer.Add(item); logger?.LogInformation("Add metadata from: {PackageDetailsContentUri}", packageItem.ContentUri); } else { logger?.LogWarning("Unable to extract metadata from: {PackageDetailsContentUri}", packageItem.ContentUri); } } } else { if (response.StatusCode == System.Net.HttpStatusCode.NotFound) { // the feed is out of sync with the actual package storage - if we don't have the package there is nothing to be done we might as well move onto the next package logger?.LogWarning("Unable to download: {PackageDetailsContentUri}. Http status: {HttpStatusCode}", packageItem.ContentUri, response.StatusCode); } else { // this should trigger a restart - of this program - and not move the cursor forward logger?.LogError("Unable to download: {PackageDetailsContentUri}. Http status: {HttpStatusCode}", packageItem.ContentUri, response.StatusCode); throw new Exception( $"Unable to download: {packageItem.ContentUri} http status: {response.StatusCode}"); } } } lastDate = entry.Key; } if (createdPackages.HasValue) { lastCreated = createdPackages.Value ? lastDate : lastCreated; lastEdited = !createdPackages.Value ? lastDate : lastEdited; } var commitMetadata = PackageCatalog.CreateCommitMetadata(writer.RootUri, new CommitMetadata(lastCreated, lastEdited, lastDeleted)); await writer.Commit(commitMetadata, cancellationToken); logger?.LogInformation("COMMIT metadata to catalog."); return lastDate; }
public CatalogPageCreator(Storage storage, Action<Uri> itemComplete, IEnumerable<GraphAddon> addons) : base(storage, addons) { _itemComplete = itemComplete; _threads = 8; }
private static async Task<DateTime> DownloadMetadata2Catalog(HttpClient client, SortedList<DateTime, IList<PackageDetails>> packages, Storage storage, DateTime lastCreated, DateTime lastEdited, DateTime lastDeleted, bool? createdPackages, CancellationToken cancellationToken) { var writer = new AppendOnlyCatalogWriter(storage, maxPageSize: 550); var lastDate = DetermineLastDate(lastCreated, lastEdited, createdPackages); if (packages == null || packages.Count == 0) { return lastDate; } foreach (var entry in packages) { foreach (var packageItem in entry.Value) { var response = await client.GetAsync(packageItem.ContentUri, cancellationToken); if (response.IsSuccessStatusCode) { using (var stream = await response.Content.ReadAsStreamAsync()) { var item = Utils.CreateCatalogItem(stream, entry.Key, null, packageItem.ContentUri.ToString(), packageItem.CreatedDate, packageItem.LastEditedDate, packageItem.PublishedDate, packageItem.LicenseNames, packageItem.LicenseReportUrl); if (item != null) { writer.Add(item); Trace.TraceInformation("Add: {0}", packageItem.ContentUri); } else { Trace.TraceWarning("Unable to extract metadata from: {0}", packageItem.ContentUri); } } } else { if (response.StatusCode == System.Net.HttpStatusCode.NotFound) { // the feed is out of sync with the actual package storage - if we don't have the package there is nothing to be done we might as well move onto the next package Trace.TraceWarning("Unable to download: {0} http status: {1}", packageItem.ContentUri, response.StatusCode); } else { // this should trigger a restart - of this program - and not move the cursor forward Trace.TraceError(string.Format("Unable to download: {0} http status: {1}", packageItem.ContentUri, response.StatusCode)); throw new Exception(string.Format("Unable to download: {0} http status: {1}", packageItem.ContentUri, response.StatusCode)); } } } lastDate = entry.Key; } if (createdPackages.HasValue) { lastCreated = createdPackages.Value ? lastDate : lastCreated; lastEdited = !createdPackages.Value ? lastDate : lastEdited; } var commitMetadata = PackageCatalog.CreateCommitMetadata(writer.RootUri, new CommitMetadata(lastCreated, lastEdited, lastDeleted)); await writer.Commit(commitMetadata, cancellationToken); Trace.TraceInformation("COMMIT"); return lastDate; }
static Uri GetRootResourceUri(Storage storage) { string baseAddress = string.Format("{0}{1}/", storage.BaseAddress, storage.Container); return new Uri(baseAddress + "catalog/index.json"); }
public static async Task<int> GetCount(Storage storage) { Uri rootResourceUri = GetRootResourceUri(storage); string content = await storage.Load(rootResourceUri); return CatalogRoot.GetCount(rootResourceUri, content); }
public static async Task<DateTime> GetLastCommitTimeStamp(Storage storage) { Uri rootResourceUri = GetRootResourceUri(storage); string content = await storage.Load(rootResourceUri); return CatalogRoot.GetLastCommitTimeStamp(rootResourceUri, content); }
public static async Task<IDictionary<string, string>> GetCommitUserData(Storage storage) { Uri rootResourceUri = GetRootResourceUri(storage); string content = await storage.Load(rootResourceUri); return CatalogRoot.GetCommitUserData(rootResourceUri, content); }
private static async Task<DateTime> Deletes2Catalog(SortedList<DateTime, IList<PackageIdentity>> packages, Storage storage, DateTime lastCreated, DateTime lastEdited, DateTime lastDeleted, CancellationToken cancellationToken) { var writer = new AppendOnlyCatalogWriter(storage, maxPageSize: 550); if (packages == null || packages.Count == 0) { return lastDeleted; } foreach (var entry in packages) { foreach (var packageIdentity in entry.Value) { var catalogItem = new DeleteCatalogItem(packageIdentity.Id, packageIdentity.Version, entry.Key); writer.Add(catalogItem); Trace.TraceInformation("Delete: {0} {1}", packageIdentity.Id, packageIdentity.Version); } lastDeleted = entry.Key; } var commitMetadata = PackageCatalog.CreateCommitMetadata(writer.RootUri, new CommitMetadata(lastCreated, lastEdited, lastDeleted)); await writer.Commit(commitMetadata, cancellationToken); Trace.TraceInformation("COMMIT"); return lastDeleted; }
public async Task InvokeProcessFeed(string gallery, Storage catalogStorage, Storage auditingStorage, DateTime? startDate, TimeSpan timeout, int top, bool verbose, CancellationToken cancellationToken) { await ProcessFeed(gallery, catalogStorage, auditingStorage, startDate, timeout, top, verbose, cancellationToken); }
public CatalogPageCreator(Storage storage, Action<Uri> itemComplete) : this (storage, itemComplete, Enumerable.Empty<GraphAddon>()) { }
private Uri GetPackageUri(CatalogStorage storage, FeedPackageIdentity package) { return(storage.ResolveUri(GetPackageFileName(package))); }
static async Task SaveRegistration(Storage storage, Uri registrationBaseAddress, IDictionary<string, IGraph> items, IList<Uri> cleanUpList, SingleGraphPersistence graphPersistence, Uri contentBaseAddress, int partitionSize, CancellationToken cancellationToken) { using (RegistrationCatalogWriter writer = new RegistrationCatalogWriter(storage, partitionSize, cleanUpList, graphPersistence)) { foreach (KeyValuePair<string, IGraph> item in items) { writer.Add(new RegistrationCatalogItem(new Uri(item.Key), item.Value, contentBaseAddress, registrationBaseAddress)); } await writer.Commit(DateTime.UtcNow, null, cancellationToken); } }
private Task <PackageMonitoringStatus> GetPackageAsync(CatalogStorage storage, FeedPackageIdentity package, CancellationToken token) { return(GetPackageAsync(storage, GetPackageFileName(package), token)); }
static async Task SaveLargeRegistration(Storage storage, Uri registrationBaseAddress, IDictionary<string, IGraph> items, string existingRoot, Uri contentBaseAddress, int partitionSize, CancellationToken cancellationToken) { if (existingRoot != null) { JToken compacted = JToken.Parse(existingRoot); AddExistingItems(Utils.CreateGraph(compacted), items); } IList<Uri> cleanUpList = new List<Uri>(); await SaveRegistration(storage, registrationBaseAddress, items, cleanUpList, null, contentBaseAddress, partitionSize, cancellationToken); // because there were multiple files some might now be irrelevant foreach (Uri uri in cleanUpList) { if (uri != storage.ResolveUri("index.json")) { Console.WriteLine("DELETE: {0}", uri); await storage.Delete(uri, cancellationToken); } } }
public StorageHttpMessageHandler(Storage storage) { _storage = storage; }
public GalleryPageCreator(Storage storage, Action<Uri> itemComplete) : base(storage) { _itemComplete = itemComplete; _threads = 64; }
public DurableCursor(Uri address, Storage storage, DateTime defaultValue) { _address = address; _storage = storage; _defaultValue = defaultValue; }
public CollectorJob(Config config, Storage storage, string cursorName) : base(config, storage) { _cursorName = cursorName; }
protected async Task ProcessFeed(string gallery, Storage catalogStorage, Storage auditingStorage, DateTime? startDate, TimeSpan timeout, int top, bool verbose, CancellationToken cancellationToken) { using (var client = CreateHttpClient(verbose)) { client.Timeout = timeout; // baseline timestamps var lastCreated = await GetCatalogProperty(catalogStorage, "nuget:lastCreated", cancellationToken) ?? (startDate ?? DateTime.MinValue.ToUniversalTime()); var lastEdited = await GetCatalogProperty(catalogStorage, "nuget:lastEdited", cancellationToken) ?? lastCreated; var lastDeleted = await GetCatalogProperty(catalogStorage, "nuget:lastDeleted", cancellationToken) ?? lastCreated; if (lastDeleted == DateTime.MinValue.ToUniversalTime()) { lastDeleted = lastCreated; } // fetch and add all DELETED packages if (lastDeleted > DateTime.MinValue.ToUniversalTime()) { SortedList<DateTime, IList<PackageIdentity>> deletedPackages; var previousLastDeleted = DateTime.MinValue; do { // Get deleted packages Trace.TraceInformation("CATALOG LastDeleted: {0}", lastDeleted.ToString("O")); deletedPackages = await GetDeletedPackages(auditingStorage, client, gallery, lastDeleted, top); Trace.TraceInformation("FEED DeletedPackages: {0}", deletedPackages.Count); // We want to ensure a commit only contains each package once at most. // Therefore we segment by package id + version. var deletedPackagesSegments = SegmentPackageDeletes(deletedPackages); foreach (var deletedPackagesSegment in deletedPackagesSegments) { lastDeleted = await Deletes2Catalog( deletedPackagesSegment, catalogStorage, lastCreated, lastEdited, lastDeleted, cancellationToken); // Wait for one second to ensure the next catalog commit gets a new timestamp Thread.Sleep(TimeSpan.FromSeconds(1)); } if (previousLastDeleted == lastDeleted) { break; } previousLastDeleted = lastDeleted; } while (deletedPackages.Count > 0); } // THEN fetch and add all newly CREATED packages - in order SortedList<DateTime, IList<PackageDetails>> createdPackages; var previousLastCreated = DateTime.MinValue; do { Trace.TraceInformation("CATALOG LastCreated: {0}", lastCreated.ToString("O")); createdPackages = await GetCreatedPackages(client, gallery, lastCreated, top); Trace.TraceInformation("FEED CreatedPackages: {0}", createdPackages.Count); lastCreated = await DownloadMetadata2Catalog( client, createdPackages, catalogStorage, lastCreated, lastEdited, lastDeleted, true, cancellationToken); if (previousLastCreated == lastCreated) { break; } previousLastCreated = lastCreated; } while (createdPackages.Count > 0); // THEN fetch and add all EDITED packages - in order SortedList<DateTime, IList<PackageDetails>> editedPackages; var previousLastEdited = DateTime.MinValue; do { Trace.TraceInformation("CATALOG LastEdited: {0}", lastEdited.ToString("O")); editedPackages = await GetEditedPackages(client, gallery, lastEdited, top); Trace.TraceInformation("FEED EditedPackages: {0}", editedPackages.Count); lastEdited = await DownloadMetadata2Catalog( client, editedPackages, catalogStorage, lastCreated, lastEdited, lastDeleted, false, cancellationToken); if (previousLastEdited == lastEdited) { break; } previousLastEdited = lastEdited; } while (editedPackages.Count > 0); } }