public static async Task <Uri> AddToCatalog(CatalogItem catalogItem, string connectionString, string container, string catalogBaseAddress) { StorageWriteLock writeLock = new StorageWriteLock(connectionString, container); await writeLock.AquireAsync(); Uri rootUri = null; Exception exception = null; try { Storage storage = CreateStorage(connectionString, container, catalogBaseAddress); AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage); writer.Add(catalogItem); await writer.Commit(); rootUri = writer.RootUri; } catch (Exception e) { exception = e; } await writeLock.ReleaseAsync(); if (exception != null) { throw exception; } return(rootUri); }
public static async Task BuildCatalogAsync(string path, Storage storage, IEnumerable <string> ids) { AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 600); const int BatchSize = 200; int i = 0; int commitCount = 0; IEnumerable <string> files = GetFileList(path, ids); Console.WriteLine("initial build files = {0}", files.Count()); foreach (string fullName in files) { writer.Add(new NuspecPackageCatalogItem(fullName)); if (++i % BatchSize == 0) { await writer.Commit(DateTime.UtcNow); Console.WriteLine("commit number {0}", commitCount++); } } await writer.Commit(DateTime.UtcNow); Console.WriteLine("commit number {0}", commitCount++); }
static async Task MoreTestCatalog() { string baseAddress = "http://*****:*****@"c:\data\site\cursor"; Storage storage = new FileStorage(baseAddress, path); AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 550); writer.Add(new TestCatalogItem(8)); await writer.Commit(new DateTime(2014, 1, 11, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(9)); await writer.Commit(new DateTime(2014, 1, 13, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(10)); await writer.Commit(new DateTime(2014, 1, 14, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(11)); await writer.Commit(new DateTime(2014, 1, 15, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(12)); await writer.Commit(new DateTime(2014, 1, 17, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(13)); await writer.Commit(new DateTime(2014, 1, 18, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(14)); await writer.Commit(new DateTime(2014, 1, 20, 0, 0, 0, DateTimeKind.Utc)); Console.WriteLine("test catalog created"); }
public static async Task CreateStatisticsCatalogAsync(Storage storage, string connectionString) { const int BatchSize = 100; int i = 0; using (AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 500)) { int lastKey = 0; int iterations = 0; while (true) { iterations++; DateTime minDownloadTimeStamp; DateTime maxDownloadTimeStamp; JArray batch = GetNextBatch(connectionString, ref lastKey, out minDownloadTimeStamp, out maxDownloadTimeStamp); if (batch == null) { break; } writer.Add(new StatisticsCatalogItem(batch, minDownloadTimeStamp, maxDownloadTimeStamp)); if (++i % BatchSize == 0) { await writer.Commit(); } } await writer.Commit(); } }
public static async Task Test1Async() { string nuspecs = @"c:\data\nuget\nuspecs"; Storage storage = new FileStorage("http://*****:*****@"c:\data\site\full"); AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 20); int total = 0; //int[] commitSize = { 50, 40, 25, 50, 10, 30, 40, 5, 400, 30, 10, 20, 40, 50, 90, 70, 50, 50, 50, 50, 60, 70 }; int[] commitSize = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //200, 200, 200, 200, 200, //200, 200, 200, 200, 200, //200, 200, 200, 200, 200, //200, 200, 200, 200, 200, //200, 200, 200, 200, 200 }; int i = 0; int commitCount = 0; DirectoryInfo directoryInfo = new DirectoryInfo(nuspecs); foreach (FileInfo fileInfo in directoryInfo.EnumerateFiles("dotnetrdf.*.xml")) { if (commitCount == commitSize.Length) { break; } writer.Add(new NuspecPackageCatalogItem(fileInfo.FullName)); total++; if (++i == commitSize[commitCount]) { await writer.Commit(DateTime.UtcNow); Console.WriteLine("commit number {0}", commitCount); commitCount++; i = 0; } } if (i > 0) { await writer.Commit(DateTime.UtcNow); } Console.WriteLine("total: {0}", total); }
private async Task Commit(AppendOnlyCatalogWriter writer, CantonCatalogItem[] batchItems) { var orderedBatch = batchItems.ToList(); orderedBatch.Sort(CantonCatalogItem.Compare); int lastHighestCommit = 0; DateTime?latestPublished = null; // add the items to the writer foreach (var orderedItem in orderedBatch) { lastHighestCommit = orderedItem.CantonCommitId + 1; writer.Add(orderedItem); } Task cursorTask = null; // only save the cursor if we did something if (lastHighestCommit > 0) { // find the most recent package latestPublished = batchItems.Select(c => c.Published).OrderByDescending(d => d).FirstOrDefault(); // update the cursor JObject obj = new JObject(); // add one here since we are already added the current number obj.Add("cantonCommitId", lastHighestCommit); Log("Cursor cantonCommitId: " + lastHighestCommit); Cursor.Position = DateTime.UtcNow; Cursor.Metadata = obj; cursorTask = Cursor.Save(); } if (writer.Count > 0) { // perform the commit Stopwatch timer = new Stopwatch(); timer.Start(); IGraph commitData = PackageCatalog.CreateCommitMetadata(writer.RootUri, latestPublished, latestPublished); // commit await writer.Commit(DateTime.UtcNow, commitData); timer.Stop(); Console.WriteLine("Commit duration: " + timer.Elapsed); } if (cursorTask != null) { await cursorTask; } }
private void CommitToCatalog() { // When the CatalogWriterGate is 0, the catalog is open for writing. If 1, it is closed for writing // Using Interlocked.Exchange, set value to 1 and close the gate, and check if the value returned is 0 to see if the gate was open // If the value returned is 1, that is, if the gate was already closed, do nothing // When 2 or more threads reach this point, while the gate is open, only 1 thread will enter. Rest will find that the gate is already closed and leave if (Interlocked.Equals(Interlocked.Exchange(ref CatalogWriterGate, 1), 0)) { try { using (AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(CatalogStorage, new CatalogContext(), CatalogPageSize)) { ConcurrentQueue <JToken> headStatsQueue; while (StatsQueueOfQueues.TryDequeue(out headStatsQueue)) { if (headStatsQueue.Count == 0) { // An emtpy StatsQueue, ignore this one and go to the next one continue; } JArray statsCatalogItem = new JArray(); foreach (JToken packageStats in headStatsQueue) { statsCatalogItem.Add(packageStats); } // Note that at this point, DateTime is already in UTC string minDownloadTimestampString = statsCatalogItem[0][CatalogDownloadTimestamp].ToString(); DateTime minDownloadTimestamp = DateTime.Parse(minDownloadTimestampString, null, System.Globalization.DateTimeStyles.RoundtripKind); string maxDownloadTimestampString = statsCatalogItem[statsCatalogItem.Count - 1][CatalogDownloadTimestamp].ToString(); DateTime maxDownloadTimestamp = DateTime.Parse(minDownloadTimestampString, null, System.Globalization.DateTimeStyles.RoundtripKind); writer.Add(new StatisticsCatalogItem(statsCatalogItem, minDownloadTimestamp, maxDownloadTimestamp)); writer.Commit().Wait(); } } } catch (Exception ex) { Trace.TraceError(ex.ToString()); } Interlocked.Exchange(ref CatalogWriterGate, 0); } else { Trace.WriteLine("Another thread is committing to catalog. Skipping"); } }
static async Task <Uri> AddToCatalog(Stream nupkgStream) { string storagePrimary = _configurationService.Get("Storage.Primary"); CloudStorageAccount account = CloudStorageAccount.Parse(storagePrimary); Storage storage = new AzureStorage(account, "catalog"); AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage); writer.Add(Utils.CreateCatalogItem(nupkgStream, null, null, "")); await writer.Commit(); return(writer.RootUri); }
static async Task MakeTestCatalog() { string baseAddress = "http://*****:*****@"c:\data\site\cursor"; DirectoryInfo folder = new DirectoryInfo(path); if (folder.Exists) { Console.WriteLine("test catalog already created"); return; } Storage storage = new FileStorage(baseAddress, path); AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 550); writer.Add(new TestCatalogItem(1)); await writer.Commit(new DateTime(2014, 1, 1, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(2)); await writer.Commit(new DateTime(2014, 1, 3, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(3)); await writer.Commit(new DateTime(2014, 1, 4, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(4)); await writer.Commit(new DateTime(2014, 1, 5, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(5)); await writer.Commit(new DateTime(2014, 1, 7, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(6)); await writer.Commit(new DateTime(2014, 1, 8, 0, 0, 0, DateTimeKind.Utc)); writer.Add(new TestCatalogItem(7)); await writer.Commit(new DateTime(2014, 1, 10, 0, 0, 0, DateTimeKind.Utc)); Console.WriteLine("test catalog created"); }
private async Task <DateTime> Deletes2Catalog( SortedList <DateTime, IList <FeedPackageIdentity> > packages, IStorage storage, DateTime lastCreated, DateTime lastEdited, DateTime lastDeleted, CancellationToken cancellationToken) { var writer = new AppendOnlyCatalogWriter( storage, TelemetryService, Constants.MaxPageSize); if (packages == null || packages.Count == 0) { return(lastDeleted); } foreach (var entry in packages) { foreach (var packageIdentity in entry.Value) { var catalogItem = new DeleteCatalogItem(packageIdentity.Id, packageIdentity.Version, entry.Key); writer.Add(catalogItem); Logger.LogInformation("Delete: {PackageId} {PackageVersion}", packageIdentity.Id, packageIdentity.Version); } lastDeleted = entry.Key; } var commitMetadata = PackageCatalog.CreateCommitMetadata(writer.RootUri, new CommitMetadata(lastCreated, lastEdited, lastDeleted)); await writer.Commit(commitMetadata, cancellationToken); Logger.LogInformation("COMMIT package deletes to catalog."); return(lastDeleted); }
public async Task Load(string path, CancellationToken cancellationToken) { var directoryInfo = new DirectoryInfo(path); foreach (var fileInfo in directoryInfo.EnumerateFiles("*.nuspec")) { AddNuspec(fileInfo); } // Catalog var factory = new MemoryStorageFactory(new Uri(_baseAddress, Catalog), _store); var storage = factory.Create(); var catalog = new AppendOnlyCatalogWriter(storage); foreach (var registration in Data.Values) { foreach (var package in registration.Values) { var metadata = new NupkgMetadata { Nuspec = package }; catalog.Add(new PackageCatalogItem(metadata)); } } await catalog.Commit(null, cancellationToken); Uri catalogIndex = new Uri(storage.BaseAddress, "index.json"); Func <StorageHttpMessageHandler> handlerFunc = () => { return(new StorageHttpMessageHandler(storage)); }; await CreateRegistrationBlobs(catalogIndex, handlerFunc, cancellationToken); await CreateFlatContainer(catalogIndex, handlerFunc, cancellationToken); await CreateLuceneIndex(catalogIndex, handlerFunc, cancellationToken); await CreateIndex(cancellationToken); }
public static async Task Test0Async() { //string nuspecs = @"c:\data\nuget\nuspecs"; string nuspecs = @"c:\data\nuget\nuspecs"; //Storage storage = new FileStorage("http://*****:*****@"c:\data\site\full"); //Storage storage = new FileStorage("http://*****:*****@"c:\data\site\dotnetrdf"); Storage storage = new FileStorage("http://*****:*****@"c:\data\site\ordered"); AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 15); const int BatchSize = 10; int i = 0; int commitCount = 0; DirectoryInfo directoryInfo = new DirectoryInfo(nuspecs); //foreach (FileInfo fileInfo in directoryInfo.EnumerateFiles("*.xml")) //foreach (FileInfo fileInfo in directoryInfo.EnumerateFiles("dotnetrdf.*.xml")) foreach (FileInfo fileInfo in directoryInfo.EnumerateFiles("entityframework.*.xml")) { writer.Add(new NuspecPackageCatalogItem(fileInfo.FullName)); if (++i % BatchSize == 0) { await writer.Commit(DateTime.UtcNow); Console.WriteLine("commit number {0}", commitCount++); } } await writer.Commit(DateTime.UtcNow); Console.WriteLine("commit number {0}", commitCount++); }
public static async Task CreateStatisticsCatalogAsync(Storage storage, string connectionString, CancellationToken cancellationToken) { const int BatchSize = 100; int i = 0; using (AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 500)) { int lastKey = 0; int iterations = 0; while (true) { iterations++; DateTime minDownloadTimeStamp; DateTime maxDownloadTimeStamp; JArray batch = GetNextBatch(connectionString, ref lastKey, out minDownloadTimeStamp, out maxDownloadTimeStamp ); if (batch == null) { break; } writer.Add(new StatisticsCatalogItem(batch, minDownloadTimeStamp, maxDownloadTimeStamp)); if (++i % BatchSize == 0) { await writer.Commit(null, cancellationToken); } } await writer.Commit(null, cancellationToken); } }
static async Task <DateTime> DownloadMetadata2Catalog(HttpClient client, SortedList <DateTime, IList <Tuple <Uri, PackageDates> > > packages, Storage storage, DateTime lastCreated, DateTime lastEdited, bool?createdPackages = null) { AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 550); DateTime lastDate = createdPackages.HasValue ? (createdPackages.Value ? lastCreated : lastEdited) : DateTime.MinValue; if (packages == null || packages.Count == 0) { return(lastDate); } foreach (KeyValuePair <DateTime, IList <Tuple <Uri, PackageDates> > > entry in packages) { foreach (Tuple <Uri, PackageDates> packageItem in entry.Value) { Uri uri = packageItem.Item1; PackageDates pDates = packageItem.Item2; HttpResponseMessage response = await client.GetAsync(uri); if (response.IsSuccessStatusCode) { using (Stream stream = await response.Content.ReadAsStreamAsync()) { CatalogItem item = Utils.CreateCatalogItem(stream, entry.Key, null, uri.ToString(), pDates.packageCreatedDate, pDates.packageLastEditedDate, pDates.packagePublishedDate); if (item != null) { writer.Add(item); Trace.TraceInformation("Add: {0}", uri); } else { Trace.TraceWarning("Unable to extract metadata from: {0}", uri); } } } else { if (response.StatusCode == System.Net.HttpStatusCode.NotFound) { // the feed is out of sync with the actual package storage - if we don't have the package there is nothing to be done we might as well move onto the next package Trace.TraceWarning(string.Format("Unable to download: {0} http status: {1}", uri, response.StatusCode)); } else { // this should trigger a restart - of this program - and not more the cursor forward Trace.TraceError(string.Format("Unable to download: {0} http status: {1}", uri, response.StatusCode)); throw new Exception(string.Format("Unable to download: {0} http status: {1}", uri, response.StatusCode)); } } } lastDate = entry.Key; } if (createdPackages.HasValue) { lastCreated = createdPackages.Value ? lastDate : lastCreated; lastEdited = !createdPackages.Value ? lastDate : lastEdited; } IGraph commitMetadata = PackageCatalog.CreateCommitMetadata(writer.RootUri, lastCreated, lastEdited); await writer.Commit(commitMetadata); Trace.TraceInformation("COMMIT"); return(lastDate); }
protected override void RunCore() { Config.Catalog.LocalFolder.Create(); int total = _nupkgs.Count; Log("Processing " + total + " nupkgs"); ParallelOptions options = new ParallelOptions(); options.MaxDegreeOfParallelism = 8; Task commitTask = null; using (var writer = new AppendOnlyCatalogWriter(Config.Catalog.Storage)) { while (_nupkgs.Count > 0) { Queue <PackageCatalogItem> currentBatch = new Queue <PackageCatalogItem>(_batchSize); // create the batch while (currentBatch.Count < _batchSize && _nupkgs.Count > 0) { string file = _nupkgs.Dequeue(); if (file.EndsWith(".nupkg", StringComparison.OrdinalIgnoreCase)) { currentBatch.Enqueue(new NupkgCatalogItem(file)); } else { currentBatch.Enqueue(new NuspecPackageCatalogItem(file)); } } // process the nupkgs and nuspec files in parallel Parallel.ForEach(currentBatch, options, nupkg => { nupkg.Load(); }); // wait for the previous commit to finish before adding more if (commitTask != null) { commitTask.Wait(); } // add everything from the queue foreach (PackageCatalogItem item in currentBatch) { writer.Add(item); } // commit commitTask = Task.Run(async() => await writer.Commit(DateTime.UtcNow)); ProgressUpdate(total - _nupkgs.Count, total); } // wait for the final commit if (commitTask != null) { commitTask.Wait(); } } }
/// <summary> /// Asynchronously writes package metadata to the catalog. /// </summary> /// <param name="packageCatalogItemCreator">A package catalog item creator.</param> /// <param name="packages">Packages to download metadata for.</param> /// <param name="storage">Storage.</param> /// <param name="lastCreated">The catalog's last created datetime.</param> /// <param name="lastEdited">The catalog's last edited datetime.</param> /// <param name="lastDeleted">The catalog's last deleted datetime.</param> /// <param name="maxDegreeOfParallelism">The maximum degree of parallelism for package processing.</param> /// <param name="createdPackages"><c>true</c> to include created packages; otherwise, <c>false</c>.</param> /// <param name="updateCreatedFromEdited"><c>true</c> to update the created cursor from the last edited cursor; /// otherwise, <c>false</c>.</param> /// <param name="cancellationToken">A cancellation token.</param> /// <param name="telemetryService">A telemetry service.</param> /// <param name="logger">A logger.</param> /// <returns>A task that represents the asynchronous operation. /// The task result (<see cref="Task{TResult}.Result" />) returns the latest /// <see cref="DateTime}" /> that was processed.</returns> public static async Task <DateTime> DownloadMetadata2CatalogAsync( IPackageCatalogItemCreator packageCatalogItemCreator, SortedList <DateTime, IList <FeedPackageDetails> > packages, IStorage storage, DateTime lastCreated, DateTime lastEdited, DateTime lastDeleted, int maxDegreeOfParallelism, bool?createdPackages, bool updateCreatedFromEdited, CancellationToken cancellationToken, ITelemetryService telemetryService, ILogger logger) { if (packageCatalogItemCreator == null) { throw new ArgumentNullException(nameof(packageCatalogItemCreator)); } if (packages == null) { throw new ArgumentNullException(nameof(packages)); } if (storage == null) { throw new ArgumentNullException(nameof(storage)); } if (maxDegreeOfParallelism < 1) { throw new ArgumentOutOfRangeException( nameof(maxDegreeOfParallelism), string.Format(Strings.ArgumentOutOfRange, 1, int.MaxValue)); } if (telemetryService == null) { throw new ArgumentNullException(nameof(telemetryService)); } if (logger == null) { throw new ArgumentNullException(nameof(logger)); } cancellationToken.ThrowIfCancellationRequested(); var writer = new AppendOnlyCatalogWriter(storage, telemetryService, Constants.MaxPageSize); var lastDate = DetermineLastDate(lastCreated, lastEdited, createdPackages); if (packages.Count == 0) { return(lastDate); } // Flatten the sorted list. var workItems = packages.SelectMany( pair => pair.Value.Select( details => new PackageWorkItem(pair.Key, details))) .ToArray(); await workItems.ForEachAsync(maxDegreeOfParallelism, async workItem => { workItem.PackageCatalogItem = await packageCatalogItemCreator.CreateAsync( workItem.FeedPackageDetails, workItem.Timestamp, cancellationToken); }); lastDate = packages.Last().Key; // AppendOnlyCatalogWriter.Add(...) is not thread-safe, so add them all at once on one thread. foreach (var workItem in workItems.Where(workItem => workItem.PackageCatalogItem != null)) { writer.Add(workItem.PackageCatalogItem); logger?.LogInformation("Add metadata from: {PackageDetailsContentUri}", workItem.FeedPackageDetails.ContentUri); } if (createdPackages.HasValue) { lastEdited = !createdPackages.Value ? lastDate : lastEdited; if (updateCreatedFromEdited) { lastCreated = lastEdited; } else { lastCreated = createdPackages.Value ? lastDate : lastCreated; } } var commitMetadata = PackageCatalog.CreateCommitMetadata(writer.RootUri, new CommitMetadata(lastCreated, lastEdited, lastDeleted)); await writer.Commit(commitMetadata, cancellationToken); logger?.LogInformation("COMMIT metadata to catalog."); return(lastDate); }
public static async Task Test3Async() { System.Net.ServicePointManager.DefaultConnectionLimit = 1024; IDictionary <string, string> packageHashLookup = LoadPackageHashLookup(); HashSet <string> packageExceptionLookup = LoadPackageExceptionLookup(); string nupkgs = @"c:\data\nuget\gallery\"; Storage storage = new FileStorage("http://*****:*****@"c:\data\site\ordered"); //StorageCredentials credentials = new StorageCredentials("", ""); //CloudStorageAccount account = new CloudStorageAccount(credentials, true); //string storageContainer = "test1"; //string storagePath = ""; //string storageBaseAddress = "http://nugetjohtaylo.blob.core.windows.net/test1"; //StorageFactory storageFactory = new AzureStorageFactory(account, storageContainer, storagePath, new Uri(storageBaseAddress)) { Verbose = true }; //Storage storage = storageFactory.Create(); AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(storage, 550); const int BatchSize = 64; int commitCount = 0; IDictionary <string, DateTime> packageCreated = LoadPackageCreatedLookup(); DateTime lastCreated = (await PackageCatalog.ReadCommitMetadata(writer)).Item1 ?? DateTime.MinValue; ParallelOptions options = new ParallelOptions(); options.MaxDegreeOfParallelism = 8; // filter by lastCreated here Queue <KeyValuePair <string, DateTime> > packageCreatedQueue = new Queue <KeyValuePair <string, DateTime> >(packageCreated.Where(p => p.Value > lastCreated && !packageExceptionLookup.Contains(p.Key)).OrderBy(p => p.Value)); int completed = 0; Stopwatch runtime = new Stopwatch(); runtime.Start(); Task commitTask = null; var context = writer.Context; Uri rootUri = writer.RootUri; while (packageCreatedQueue.Count > 0) { List <KeyValuePair <string, DateTime> > batch = new List <KeyValuePair <string, DateTime> >(); ConcurrentBag <CatalogItem> batchItems = new ConcurrentBag <CatalogItem>(); while (batch.Count < BatchSize && packageCreatedQueue.Count > 0) { completed++; var packagePair = packageCreatedQueue.Dequeue(); lastCreated = packagePair.Value; batch.Add(packagePair); } var commitTime = DateTime.UtcNow; Parallel.ForEach(batch, options, entry => { FileInfo fileInfo = new FileInfo(nupkgs + entry.Key); if (fileInfo.Exists) { using (Stream stream = new FileStream(fileInfo.FullName, FileMode.Open)) { string packageHash = null; packageHashLookup.TryGetValue(fileInfo.Name, out packageHash); CatalogItem item = Utils.CreateCatalogItem(stream, entry.Value, packageHash, fileInfo.FullName); batchItems.Add(item); } } }); if (commitTask != null) { commitTask.Wait(); } foreach (var item in batchItems) { writer.Add(item); } commitTask = Task.Run(async() => await writer.Commit(commitTime, PackageCatalog.CreateCommitMetadata(writer.RootUri, lastCreated, null))); // stats double perPackage = runtime.Elapsed.TotalSeconds / (double)completed; DateTime finish = DateTime.Now.AddSeconds(perPackage * packageCreatedQueue.Count); Console.WriteLine("commit number {0} Completed: {1} Remaining: {2} Estimated Finish: {3}", commitCount++, completed, packageCreatedQueue.Count, finish.ToString("O")); } // wait for the final commit if (commitTask != null) { commitTask.Wait(); } Console.WriteLine("Finished in: " + runtime.Elapsed); }
public override async Task RunCore() { TimeSpan hold = TimeSpan.FromMinutes(90); int cantonCommitId = 0; JToken cantonCommitIdToken = null; if (Cursor.Metadata.TryGetValue("cantonCommitId", out cantonCommitIdToken)) { cantonCommitId = cantonCommitIdToken.ToObject <int>(); } Queue <JObject> orderedMessages = new Queue <JObject>(); var blobClient = Account.CreateCloudBlobClient(); Stopwatch giveup = new Stopwatch(); giveup.Start(); Dictionary <int, string> unQueuedMessages = new Dictionary <int, string>(); ParallelOptions options = new ParallelOptions(); options.MaxDegreeOfParallelism = 8; ConcurrentBag <CantonCatalogItem> batchItems = new ConcurrentBag <CantonCatalogItem>(); Task commitTask = null; try { using (AppendOnlyCatalogWriter writer = new AppendOnlyCatalogWriter(Storage, 600)) { try { // get everything in the queue Log("Getting work"); var newWork = GetWork(); Log("Done getting work"); // everything must run in canton commit order! while (_run && (newWork.Count > 0 || unQueuedMessages.Count > 0 || orderedMessages.Count > 0)) { Log(String.Format("New: {0} Waiting: {1} Ordered: {2}", newWork.Count, unQueuedMessages.Count, orderedMessages.Count)); int[] newIds = newWork.Keys.ToArray(); foreach (int curId in newIds) { string s = newWork[curId]; JObject json = JObject.Parse(s); int id = json["cantonCommitId"].ToObject <int>(); if (id >= cantonCommitId && !unQueuedMessages.ContainsKey(id)) { unQueuedMessages.Add(id, s); } else { LogError("Ignoring old cantonCommitId: " + id + " We are on: " + cantonCommitId); } } // load up the next 4096 work items we need while (unQueuedMessages.ContainsKey(cantonCommitId) && orderedMessages.Count < 4096) { JObject json = JObject.Parse(unQueuedMessages[cantonCommitId]); orderedMessages.Enqueue(json); unQueuedMessages.Remove(cantonCommitId); cantonCommitId++; giveup.Restart(); } // just take up to the batch size Queue <JObject> currentBatch = new Queue <JObject>(); // get up to the batchsize while (currentBatch.Count < BatchSize && orderedMessages.Count > 0 && (currentBatch.Count + batchItems.Count) < BatchSize) { currentBatch.Enqueue(orderedMessages.Dequeue()); } if (currentBatch.Count > 0) { Stopwatch timer = new Stopwatch(); timer.Start(); int before = batchItems.Count; Parallel.ForEach(currentBatch, options, workJson => { try { int curId = workJson["cantonCommitId"].ToObject <int>(); string resourceUriString = workJson["uri"].ToString(); if (!StringComparer.OrdinalIgnoreCase.Equals(resourceUriString, "https://failed/")) { Uri resourceUri = new Uri(resourceUriString); // the page is loaded from storage in the background CantonCatalogItem item = new CantonCatalogItem(Account, resourceUri, curId); // download the graph, this is a blocking call item.LoadGraph(); // add the item to the batch to be committed in order later batchItems.Add(item); } else { Log("Skipping failed page: " + curId); } } catch (Exception ex) { LogError("Unable to create page: " + ex.ToString()); } }); timer.Stop(); Console.WriteLine(String.Format(CultureInfo.InvariantCulture, "Loaded {0} pre-built pages in {1}", (batchItems.Count - before), timer.Elapsed)); } // commit the items if (batchItems.Count >= BatchSize) { CantonCatalogItem[] curItems = batchItems.ToArray(); batchItems = new ConcurrentBag <CantonCatalogItem>(); if (commitTask != null) { await commitTask; } // make certain this ALL runs on another thread commitTask = Task.Run(async() => await Commit(writer, curItems)); } // get the next work item if (_run) { newWork = GetWork(); } else { newWork = new Dictionary <int, string>(); } if (newWork.Count < 1 && _run) { // just give up after 5 minutes // TODO: handle this better if (giveup.Elapsed > TimeSpan.FromMinutes(30) || unQueuedMessages.Count > 20000) { while (!unQueuedMessages.ContainsKey(cantonCommitId)) { LogError("Giving up on: " + cantonCommitId); cantonCommitId++; } } else { // avoid getting out of control when the pages aren't ready yet Log("PageCommitJob Waiting for: " + cantonCommitId); Thread.Sleep(TimeSpan.FromSeconds(15)); } } } } finally { // commit anything that was waiting if (commitTask != null) { commitTask.Wait(); } Commit(writer, batchItems.ToArray()).Wait(); } } } finally { Log("returning work to the queue"); // put everything back into the queue ParallelOptions qOpts = new ParallelOptions(); qOpts.MaxDegreeOfParallelism = 128; Parallel.ForEach(orderedMessages, qOpts, json => { Queue.AddMessage(new CloudQueueMessage(json.ToString())); }); Parallel.ForEach(unQueuedMessages.Values, qOpts, s => { Queue.AddMessage(new CloudQueueMessage(s)); }); Log("returning work to the queue done"); } }