public static async Task Test0Async(string[] args) { const string V2FeedCountQuery = "/Packages/$count"; Console.WriteLine("Simple count test for distinct package ids and version between v2 feed and catalog"); if (args.Length != 2) { Console.WriteLine("Please enter only 2 arguments. First v2gallery feed url, and second catalog index.json url"); return; } else { string v2FeedUrl = args[0].TrimEnd('/'); string v2FeedCountUrl = v2FeedUrl.TrimEnd('/') + V2FeedCountQuery; int v2FeedCount = 0; using (var client = new HttpClient()) { var response = await client.GetAsync(v2FeedCountUrl); string v2FeedCountString = await response.Content.ReadAsStringAsync(); v2FeedCount = Int32.Parse(v2FeedCountString); } string catalog = args[1]; Uri catalogIndex = new Uri(catalog); CatalogIndexReader reader = new CatalogIndexReader(catalogIndex); var task = reader.GetEntries(); task.Wait(); var entries = task.Result; Console.WriteLine("Total packages count from catalog is " + entries.Count()); var distinctCatalogPackages = entries.Distinct(new CatalogIndexEntryIdVersionComparer()); int v3CatalogPackagesCount = distinctCatalogPackages.Count(); Console.WriteLine("Distinct packages count from catalog is " + v3CatalogPackagesCount); Console.WriteLine("Distinct packages count from " + v2FeedUrl + " is " + v2FeedCount); Console.WriteLine("Current difference between v2Feed and v3 catalog is " + (v2FeedCount - v3CatalogPackagesCount)); } }
private async Task PrepareAsync() { _log.WriteLine("Making sure folder {0} exists.", _outputFolder); if (!Directory.Exists(_outputFolder)) { Directory.CreateDirectory(_outputFolder); } // Create reindex file _log.WriteLine("Start preparing lightning reindex file..."); var latestCommit = DateTime.MinValue; int numberOfEntries = 0; string indexFile = Path.Combine(_outputFolder, "index.txt"); using (var streamWriter = new StreamWriter(indexFile, false)) { var collectorHttpClient = new CollectorHttpClient(); var catalogIndexReader = new CatalogIndexReader(new Uri(_catalogIndex), collectorHttpClient); var catalogIndexEntries = await catalogIndexReader.GetEntries(); foreach (var packageRegistrationGroup in catalogIndexEntries .OrderBy(x => x.CommitTimeStamp) .ThenBy(x => x.Id) .ThenBy(x => x.Version) .GroupBy(x => x.Id)) { streamWriter.WriteLine("Element@{0}. {1}", numberOfEntries++, packageRegistrationGroup.Key); var latestCatalogPages = new Dictionary<string, Uri>(); foreach (CatalogIndexEntry catalogIndexEntry in packageRegistrationGroup) { string key = catalogIndexEntry.Version.ToNormalizedString(); if (latestCatalogPages.ContainsKey(key)) { latestCatalogPages[key] = catalogIndexEntry.Uri; } else { latestCatalogPages.Add(key, catalogIndexEntry.Uri); } if (latestCommit < catalogIndexEntry.CommitTimeStamp) { latestCommit = catalogIndexEntry.CommitTimeStamp; } } foreach (var latestCatalogPage in latestCatalogPages) { streamWriter.WriteLine("{0}", latestCatalogPage.Value); } } } _log.WriteLine("Finished preparing lightning reindex file. Output file: {0}", indexFile); // Write cursor to storage _log.WriteLine("Start writing new cursor..."); var account = CloudStorageAccount.Parse(_storageAccount); var storageFactory = (StorageFactory)new AzureStorageFactory(account, _storageContainer); var storage = storageFactory.Create(); var cursor = new DurableCursor(storage.ResolveUri("cursor.json"), storage, latestCommit); cursor.Value = latestCommit; await cursor.Save(CancellationToken.None); _log.WriteLine("Finished writing new cursor."); // Write command files _log.WriteLine("Start preparing lightning reindex command files..."); string templateFileContents; using (var templateStreamReader = new StreamReader(_templateFile)) { templateFileContents = await templateStreamReader.ReadToEndAsync(); } int batchNumber = 0; int batchSizeValue = int.Parse(_batchSize); for (int batchStart = 0; batchStart < numberOfEntries; batchStart += batchSizeValue) { var batchEnd = (batchStart + batchSizeValue - 1); if (batchEnd >= numberOfEntries) { batchEnd = numberOfEntries - 1; } var cursorCommandFileName = "cursor" + batchNumber + ".cmd"; var cursorTextFileName = "cursor" + batchNumber + ".txt"; using (var cursorCommandStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorCommandFileName))) { using (var cursorTextStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorTextFileName))) { var commandStreamContents = templateFileContents .Replace("[index]", indexFile) .Replace("[cursor]", cursorTextFileName) .Replace("[contentbaseaddress]", _contentBaseAddress) .Replace("[storageaccount]", _storageAccount) .Replace("[storagecontainer]", _storageContainer) .Replace("[storagebaseaddress]", _storageBaseAddress) .Replace("[compress]", _compress.ToString().ToLowerInvariant()); await cursorCommandStreamWriter.WriteLineAsync(commandStreamContents); await cursorTextStreamWriter.WriteLineAsync(batchStart + "," + batchEnd); } } batchNumber++; } _log.WriteLine("Finished preparing lightning reindex command files."); _log.WriteLine("You can now copy the {0} file and all cursor*.cmd, cursor*.txt", indexFile); _log.WriteLine("to multiple machines and run the cursor*.cmd files in parallel."); }
public override async Task RunCore(CancellationToken cancellationToken) { int nextMasterRegId = 0; DateTime position = Cursor.Position; JToken nextMasterRegIdToken = null; if (Cursor.Metadata.TryGetValue("nextMasterRegId", out nextMasterRegIdToken)) { nextMasterRegId = nextMasterRegIdToken.ToObject<int>(); } // Get the catalog index Uri catalogIndexUri = new Uri(Config.GetProperty("CatalogIndex")); Log("Reading index entries"); var indexReader = new CatalogIndexReader(catalogIndexUri); var indexEntries = await indexReader.GetRolledUpEntries(); var context = indexReader.GetContext(); Log("Finding new or editted entries"); var changedEntries = new HashSet<string>(indexEntries.Where(e => e.CommitTimeStamp.CompareTo(position) > 0) .Select(e => e.Id.ToLowerInvariant()), StringComparer.OrdinalIgnoreCase); DateTime newPosition = indexEntries.Select(e => e.CommitTimeStamp).OrderByDescending(e => e).FirstOrDefault(); ConcurrentDictionary<string, ConcurrentBag<Uri>> batches = new ConcurrentDictionary<string, ConcurrentBag<Uri>>(StringComparer.OrdinalIgnoreCase); ParallelOptions options = new ParallelOptions(); options.MaxDegreeOfParallelism = 8; Parallel.ForEach(indexEntries, options, entry => { if (changedEntries.Contains(entry.Id)) { batches.AddOrUpdate(entry.Id, new ConcurrentBag<Uri>() { entry.Uri }, (id, uris) => { uris.Add(entry.Uri); return uris; }); } }); Uri contentBaseAddress = new Uri(Config.GetProperty("ContentBaseAddress")); if (batches.Count > 0) { Log("Building registrations from: " + position.ToString("O")); options.MaxDegreeOfParallelism = 4; for (int i = 0; i < 3 && batches.Count > 0; i++) { if (i != 0) { options.MaxDegreeOfParallelism = 1; Console.WriteLine("Single batch run."); } var ids = batches.Keys.OrderBy(s => s).ToArray(); Stopwatch buildTimer = new Stopwatch(); buildTimer.Start(); int startingCount = ids.Length; Parallel.ForEach(ids, options, id => { try { BatchRegistrationCollector regCollector = new BatchRegistrationCollector(null, _factory); regCollector.ContentBaseAddress = contentBaseAddress; Stopwatch timer = new Stopwatch(); timer.Start(); var uriGroup = batches[id].ToArray(); regCollector.ProcessGraphs(_client, id, uriGroup, context, cancellationToken).Wait(); int rem = batches.Count; timer.Stop(); string log = String.Format("Completed: {0} Duration: {1} Uris: {2} Remaining Ids: {3} Loop: {4}", id, timer.Elapsed, uriGroup.Length, rem, i); Console.WriteLine(log); // stats double perPackage = buildTimer.Elapsed.TotalSeconds / (double)(startingCount - rem + 1); DateTime finish = DateTime.Now.AddSeconds(Math.Ceiling(perPackage * rem)); Console.WriteLine("Estimated Finish: " + finish.ToString("O")); ConcurrentBag<Uri> vals; if (!batches.TryRemove(id, out vals)) { Console.WriteLine("Unable to remove!"); } } catch (Exception ex) { LogError("Registration failed: " + id + " " + ex.ToString()); } }); } // mark this with the last commit we included Cursor.Position = newPosition; await Cursor.Save(); Log("Finished registrations: " + newPosition.ToString("O")); } }