private async Task Commit(AppendOnlyCatalogWriter writer, CantonCatalogItem[] batchItems) { var orderedBatch = batchItems.ToList(); orderedBatch.Sort(CantonCatalogItem.Compare); int lastHighestCommit = 0; DateTime?latestPublished = null; // add the items to the writer foreach (var orderedItem in orderedBatch) { lastHighestCommit = orderedItem.CantonCommitId + 1; writer.Add(orderedItem); } Task cursorTask = null; // only save the cursor if we did something if (lastHighestCommit > 0) { // find the most recent package latestPublished = batchItems.Select(c => c.Published).OrderByDescending(d => d).FirstOrDefault(); // update the cursor JObject obj = new JObject(); // add one here since we are already added the current number obj.Add("cantonCommitId", lastHighestCommit); Log("Cursor cantonCommitId: " + lastHighestCommit); Cursor.Position = DateTime.UtcNow; Cursor.Metadata = obj; cursorTask = Cursor.Save(); } if (writer.Count > 0) { // perform the commit Stopwatch timer = new Stopwatch(); timer.Start(); IGraph commitData = PackageCatalog.CreateCommitMetadata(writer.RootUri, latestPublished, latestPublished); // commit await writer.Commit(DateTime.UtcNow, commitData); timer.Stop(); Console.WriteLine("Commit duration: " + timer.Elapsed); } if (cursorTask != null) { await cursorTask; } }
private async Task <DateTime> Deletes2Catalog( SortedList <DateTime, IList <FeedPackageIdentity> > packages, IStorage storage, DateTime lastCreated, DateTime lastEdited, DateTime lastDeleted, CancellationToken cancellationToken) { var writer = new AppendOnlyCatalogWriter( storage, TelemetryService, Constants.MaxPageSize); if (packages == null || packages.Count == 0) { return(lastDeleted); } foreach (var entry in packages) { foreach (var packageIdentity in entry.Value) { var catalogItem = new DeleteCatalogItem(packageIdentity.Id, packageIdentity.Version, entry.Key); writer.Add(catalogItem); Logger.LogInformation("Delete: {PackageId} {PackageVersion}", packageIdentity.Id, packageIdentity.Version); } lastDeleted = entry.Key; } var commitMetadata = PackageCatalog.CreateCommitMetadata(writer.RootUri, new CommitMetadata(lastCreated, lastEdited, lastDeleted)); await writer.Commit(commitMetadata, cancellationToken); Logger.LogInformation("COMMIT package deletes to catalog."); return(lastDeleted); }
/// <summary> /// Asynchronously writes package metadata to the catalog. /// </summary> /// <param name="packageCatalogItemCreator">A package catalog item creator.</param> /// <param name="packages">Packages to download metadata for.</param> /// <param name="storage">Storage.</param> /// <param name="lastCreated">The catalog's last created datetime.</param> /// <param name="lastEdited">The catalog's last edited datetime.</param> /// <param name="lastDeleted">The catalog's last deleted datetime.</param> /// <param name="maxDegreeOfParallelism">The maximum degree of parallelism for package processing.</param> /// <param name="createdPackages"><c>true</c> to include created packages; otherwise, <c>false</c>.</param> /// <param name="updateCreatedFromEdited"><c>true</c> to update the created cursor from the last edited cursor; /// otherwise, <c>false</c>.</param> /// <param name="cancellationToken">A cancellation token.</param> /// <param name="telemetryService">A telemetry service.</param> /// <param name="logger">A logger.</param> /// <returns>A task that represents the asynchronous operation. /// The task result (<see cref="Task{TResult}.Result" />) returns the latest /// <see cref="DateTime}" /> that was processed.</returns> public static async Task <DateTime> DownloadMetadata2CatalogAsync( IPackageCatalogItemCreator packageCatalogItemCreator, SortedList <DateTime, IList <FeedPackageDetails> > packages, IStorage storage, DateTime lastCreated, DateTime lastEdited, DateTime lastDeleted, int maxDegreeOfParallelism, bool?createdPackages, bool updateCreatedFromEdited, CancellationToken cancellationToken, ITelemetryService telemetryService, ILogger logger) { if (packageCatalogItemCreator == null) { throw new ArgumentNullException(nameof(packageCatalogItemCreator)); } if (packages == null) { throw new ArgumentNullException(nameof(packages)); } if (storage == null) { throw new ArgumentNullException(nameof(storage)); } if (maxDegreeOfParallelism < 1) { throw new ArgumentOutOfRangeException( nameof(maxDegreeOfParallelism), string.Format(Strings.ArgumentOutOfRange, 1, int.MaxValue)); } if (telemetryService == null) { throw new ArgumentNullException(nameof(telemetryService)); } if (logger == null) { throw new ArgumentNullException(nameof(logger)); } cancellationToken.ThrowIfCancellationRequested(); var writer = new AppendOnlyCatalogWriter(storage, telemetryService, Constants.MaxPageSize); var lastDate = DetermineLastDate(lastCreated, lastEdited, createdPackages); if (packages.Count == 0) { return(lastDate); } // Flatten the sorted list. var workItems = packages.SelectMany( pair => pair.Value.Select( details => new PackageWorkItem(pair.Key, details))) .ToArray(); await workItems.ForEachAsync(maxDegreeOfParallelism, async workItem => { workItem.PackageCatalogItem = await packageCatalogItemCreator.CreateAsync( workItem.FeedPackageDetails, workItem.Timestamp, cancellationToken); }); lastDate = packages.Last().Key; // AppendOnlyCatalogWriter.Add(...) is not thread-safe, so add them all at once on one thread. foreach (var workItem in workItems.Where(workItem => workItem.PackageCatalogItem != null)) { writer.Add(workItem.PackageCatalogItem); logger?.LogInformation("Add metadata from: {PackageDetailsContentUri}", workItem.FeedPackageDetails.ContentUri); } if (createdPackages.HasValue) { lastEdited = !createdPackages.Value ? lastDate : lastEdited; if (updateCreatedFromEdited) { lastCreated = lastEdited; } else { lastCreated = createdPackages.Value ? lastDate : lastCreated; } } var commitMetadata = PackageCatalog.CreateCommitMetadata(writer.RootUri, new CommitMetadata(lastCreated, lastEdited, lastDeleted)); await writer.Commit(commitMetadata, cancellationToken); logger?.LogInformation("COMMIT metadata to catalog."); return(lastDate); }
public static async Task Test3Async() { System.Net.ServicePointManager.DefaultConnectionLimit = 1024; IDictionary <string, string> packageHashLookup = LoadPackageHashLookup(); HashSet <string> packageExceptionLookup = LoadPackageExceptionLookup(); string nupkgs = @"c:\data\nuget\gallery\"; Storage storage = new FileStorage("http://*****:*****@"c:\data\site\ordered"); //StorageCredentials credentials = new StorageCredentials("", ""); //CloudStorageAccount account = new CloudStorageAccount(credentials, true); //string storageContainer = "test1"; //string storagePath = ""; //string storageBaseAddress = "http://nugetjohtaylo.blob.core.windows.net/test1"; //StorageFactory storageFactory = new AzureStorageFactory(account, storageContainer, storagePath, new Uri(storageBaseAddress)) { Verbose = true }; //Storage storage = storageFactory.Create(); AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 550); const int BatchSize = 64; int commitCount = 0; IDictionary <string, DateTime> packageCreated = LoadPackageCreatedLookup(); DateTime lastCreated = (await PackageCatalog.ReadCommitMetadata(writer)).Item1 ?? DateTime.MinValue; ParallelOptions options = new ParallelOptions(); options.MaxDegreeOfParallelism = 8; // filter by lastCreated here Queue <KeyValuePair <string, DateTime> > packageCreatedQueue = new Queue <KeyValuePair <string, DateTime> >(packageCreated.Where(p => p.Value > lastCreated && !packageExceptionLookup.Contains(p.Key)).OrderBy(p => p.Value)); int completed = 0; Stopwatch runtime = new Stopwatch(); runtime.Start(); Task commitTask = null; var context = writer.Context; Uri rootUri = writer.RootUri; while (packageCreatedQueue.Count > 0) { List <KeyValuePair <string, DateTime> > batch = new List <KeyValuePair <string, DateTime> >(); ConcurrentBag <CatalogItem> batchItems = new ConcurrentBag <CatalogItem>(); while (batch.Count < BatchSize && packageCreatedQueue.Count > 0) { completed++; var packagePair = packageCreatedQueue.Dequeue(); lastCreated = packagePair.Value; batch.Add(packagePair); } var commitTime = DateTime.UtcNow; Parallel.ForEach(batch, options, entry => { FileInfo fileInfo = new FileInfo(nupkgs + entry.Key); if (fileInfo.Exists) { using (Stream stream = new FileStream(fileInfo.FullName, FileMode.Open)) { string packageHash = null; packageHashLookup.TryGetValue(fileInfo.Name, out packageHash); CatalogItem item = Utils.CreateCatalogItem(stream, entry.Value, packageHash, fileInfo.FullName); batchItems.Add(item); } } }); if (commitTask != null) { commitTask.Wait(); } foreach (var item in batchItems) { writer.Add(item); } commitTask = Task.Run(async() => await writer.Commit(commitTime, PackageCatalog.CreateCommitMetadata(writer.RootUri, lastCreated, null))); // stats double perPackage = runtime.Elapsed.TotalSeconds / (double)completed; DateTime finish = DateTime.Now.AddSeconds(perPackage * packageCreatedQueue.Count); Console.WriteLine("commit number {0} Completed: {1} Remaining: {2} Estimated Finish: {3}", commitCount++, completed, packageCreatedQueue.Count, finish.ToString("O")); } // wait for the final commit if (commitTask != null) { commitTask.Wait(); } Console.WriteLine("Finished in: " + runtime.Elapsed); }
static async Task <DateTime> DownloadMetadata2Catalog(HttpClient client, SortedList <DateTime, IList <Tuple <Uri, PackageDates> > > packages, Storage storage, DateTime lastCreated, DateTime lastEdited, bool?createdPackages = null) { AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 550); DateTime lastDate = createdPackages.HasValue ? (createdPackages.Value ? lastCreated : lastEdited) : DateTime.MinValue; if (packages == null || packages.Count == 0) { return(lastDate); } foreach (KeyValuePair <DateTime, IList <Tuple <Uri, PackageDates> > > entry in packages) { foreach (Tuple <Uri, PackageDates> packageItem in entry.Value) { Uri uri = packageItem.Item1; PackageDates pDates = packageItem.Item2; HttpResponseMessage response = await client.GetAsync(uri); if (response.IsSuccessStatusCode) { using (Stream stream = await response.Content.ReadAsStreamAsync()) { CatalogItem item = Utils.CreateCatalogItem(stream, entry.Key, null, uri.ToString(), pDates.packageCreatedDate, pDates.packageLastEditedDate, pDates.packagePublishedDate); if (item != null) { writer.Add(item); Trace.TraceInformation("Add: {0}", uri); } else { Trace.TraceWarning("Unable to extract metadata from: {0}", uri); } } } else { if (response.StatusCode == System.Net.HttpStatusCode.NotFound) { // the feed is out of sync with the actual package storage - if we don't have the package there is nothing to be done we might as well move onto the next package Trace.TraceWarning(string.Format("Unable to download: {0} http status: {1}", uri, response.StatusCode)); } else { // this should trigger a restart - of this program - and not more the cursor forward Trace.TraceError(string.Format("Unable to download: {0} http status: {1}", uri, response.StatusCode)); throw new Exception(string.Format("Unable to download: {0} http status: {1}", uri, response.StatusCode)); } } } lastDate = entry.Key; } if (createdPackages.HasValue) { lastCreated = createdPackages.Value ? lastDate : lastCreated; lastEdited = !createdPackages.Value ? lastDate : lastEdited; } IGraph commitMetadata = PackageCatalog.CreateCommitMetadata(writer.RootUri, lastCreated, lastEdited); await writer.Commit(commitMetadata); Trace.TraceInformation("COMMIT"); return(lastDate); }