private async Task ExecuteAsync(CancellationToken token) { using (var cancelledCts = new CancellationTokenSource()) using (var produceWorkCts = new CancellationTokenSource()) { // Initialize the indexes, container and excluded packages data. await InitializeAsync(); // Here, we fetch the current catalog timestamp to use as the initial cursor value for // catalog2azuresearch. The idea here is that database is always more up-to-date than the catalog. // We're about to read the database so if we capture a catalog timestamp now, we are guaranteed that // any data we get from a database query will be more recent than the data represented by this catalog // timestamp. When catalog2azuresearch starts up for the first time to update the index produced by this // job, it will probably encounter some duplicate packages, but this is okay. // // Note that we could capture any dependency cursors here instead of catalog cursor, but this is // pointless because there is no reliable way to filter out data fetched from the database based on a // catalog-based cursor value. Suppose the dependency cursor is catalog2registration. If // catalog2registration is very behind, then the index produced by this job will include packages that // are not yet restorable (since they are not in the registration hives). This could lead to a case // where a user is able to search for a package that he cannot restore. We mitigate this risk by // trusting that our end-to-end tests will fail when catalog2registration (or any other V3 component) is // broken, this blocking the deployment of new Azure Search indexes. var catalogIndex = await _catalogClient.GetIndexAsync(_options.Value.CatalogIndexUrl); var initialCursorValue = catalogIndex.CommitTimestamp; _logger.LogInformation("The initial cursor value will be {CursorValue:O}.", initialCursorValue); var initialAuxiliaryData = await PushAllPackageRegistrationsAsync(cancelledCts, produceWorkCts); // Write the owner data file. await WriteOwnerDataAsync(initialAuxiliaryData.Owners); // Write the download data file. await WriteDownloadDataAsync(initialAuxiliaryData.Downloads); // Write the verified packages data file. await WriteVerifiedPackagesDataAsync(initialAuxiliaryData.VerifiedPackages); // Write popularity transfers data file. await WritePopularityTransfersDataAsync(initialAuxiliaryData.PopularityTransfers); // Write the cursor. _logger.LogInformation("Writing the initial cursor value to be {CursorValue:O}.", initialCursorValue); var frontCursorStorage = _storageFactory.Create(); var frontCursor = new DurableCursor( frontCursorStorage.ResolveUri(Catalog2AzureSearchCommand.CursorRelativeUri), frontCursorStorage, DateTime.MinValue); frontCursor.Value = initialCursorValue.UtcDateTime; await frontCursor.SaveAsync(token); } }
public async Task ExecuteAsync(bool restart) { var fileSystemStorage = new FileStorageFactory( new Uri("http://localhost/"), Directory.GetCurrentDirectory(), verbose: false); var front = new DurableCursor( new Uri("http://localhost/cursor.json"), fileSystemStorage.Create(), DateTime.MinValue); if (restart) { await front.LoadAsync(CancellationToken.None); front.Value = DateTime.MinValue; await front.SaveAsync(CancellationToken.None); } var back = MemoryCursor.CreateMax(); await _collector.RunAsync(front, back, CancellationToken.None); }
private async Task PrepareAsync() { _log.WriteLine("Making sure folder {0} exists.", _outputFolder); if (!Directory.Exists(_outputFolder)) { Directory.CreateDirectory(_outputFolder); } // Create reindex file _log.WriteLine("Start preparing lightning reindex file..."); var latestCommit = DateTime.MinValue; int numberOfEntries = 0; string indexFile = Path.Combine(_outputFolder, "index.txt"); string optionalArgumentsTemplate = "optionalArguments"; using (var streamWriter = new StreamWriter(indexFile, false)) { var httpMessageHandlerFactory = CommandHelpers.GetHttpMessageHandlerFactory(TelemetryService, _verbose); var collectorHttpClient = new CollectorHttpClient(httpMessageHandlerFactory()); var catalogIndexReader = new CatalogIndexReader(new Uri(_catalogIndex), collectorHttpClient, TelemetryService); var catalogIndexEntries = await catalogIndexReader.GetEntries(); foreach (var packageRegistrationGroup in catalogIndexEntries .OrderBy(x => x.CommitTimeStamp) .ThenBy(x => x.Id, StringComparer.OrdinalIgnoreCase) .ThenBy(x => x.Version) .GroupBy(x => x.Id, StringComparer.OrdinalIgnoreCase)) { streamWriter.WriteLine("Element@{0}. {1}", numberOfEntries++, packageRegistrationGroup.Key); var latestCatalogPages = new Dictionary <string, Uri>(); foreach (CatalogIndexEntry catalogIndexEntry in packageRegistrationGroup) { string key = catalogIndexEntry.Version.ToNormalizedString(); if (latestCatalogPages.ContainsKey(key)) { latestCatalogPages[key] = catalogIndexEntry.Uri; } else { latestCatalogPages.Add(key, catalogIndexEntry.Uri); } if (latestCommit < catalogIndexEntry.CommitTimeStamp) { latestCommit = catalogIndexEntry.CommitTimeStamp; } } foreach (var latestCatalogPage in latestCatalogPages) { streamWriter.WriteLine("{0}", latestCatalogPage.Value); } } } _log.WriteLine("Finished preparing lightning reindex file. Output file: {0}", indexFile); // Write cursor to storage _log.WriteLine("Start writing new cursor..."); var storage = _storageFactories.LegacyStorageFactory.Create(); var cursor = new DurableCursor(storage.ResolveUri("cursor.json"), storage, latestCommit) { Value = latestCommit }; await cursor.SaveAsync(CancellationToken.None); _log.WriteLine("Finished writing new cursor."); // Ensure the SemVer 2.0.0 storage containers is created, if applicable. The gzipped storage account is // created above when we write the cursor. _storageFactories.SemVer2StorageFactory?.Create(); // Write command files _log.WriteLine("Start preparing lightning reindex command files..."); string templateFileContents; using (var templateStreamReader = new StreamReader(_templateFile)) { templateFileContents = await templateStreamReader.ReadToEndAsync(); } int batchNumber = 0; int batchSizeValue = int.Parse(_batchSize); for (int batchStart = 0; batchStart < numberOfEntries; batchStart += batchSizeValue) { var batchEnd = (batchStart + batchSizeValue - 1); if (batchEnd >= numberOfEntries) { batchEnd = numberOfEntries - 1; } var cursorCommandFileName = "cursor" + batchNumber + ".cmd"; var cursorTextFileName = "cursor" + batchNumber + ".txt"; using (var cursorCommandStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorCommandFileName))) using (var cursorTextStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorTextFileName))) { var commandStreamContents = templateFileContents; var replacements = _arguments .Concat(new[] { new KeyValuePair <string, string>("indexFile", indexFile), new KeyValuePair <string, string>("cursorFile", cursorTextFileName) }); foreach (var replacement in replacements) { commandStreamContents = commandStreamContents .Replace($"[{replacement.Key}]", replacement.Value); } //the not required arguments need to be added only if they were passed in //they cannot be hardcoded in the template var optionalArguments = new StringBuilder(); AppendOptionalArgument(optionalArguments, Arguments.ContentIsFlatContainer); AppendOptionalArgument(optionalArguments, Arguments.FlatContainerName); AppendOptionalArgument(optionalArguments, Arguments.StorageSuffix); AppendOptionalArgument(optionalArguments, Arguments.AllIconsInFlatContainer); AppendOptionalArgument(optionalArguments, Arguments.Driver); AppendOptionalArgument(optionalArguments, Arguments.Verbose); commandStreamContents = commandStreamContents .Replace($"[{optionalArgumentsTemplate}]", optionalArguments.ToString()); await cursorCommandStreamWriter.WriteLineAsync(commandStreamContents); await cursorTextStreamWriter.WriteLineAsync(batchStart + "," + batchEnd); } batchNumber++; } _log.WriteLine("Finished preparing lightning reindex command files."); _log.WriteLine("You can now copy the {0} file and all cursor*.cmd, cursor*.txt", indexFile); _log.WriteLine("to multiple machines and run the cursor*.cmd files in parallel."); }
private async Task PrepareAsync() { _log.WriteLine("Making sure folder {0} exists.", _outputFolder); if (!Directory.Exists(_outputFolder)) { Directory.CreateDirectory(_outputFolder); } // Create reindex file _log.WriteLine("Start preparing lightning reindex file..."); var latestCommit = DateTime.MinValue; int numberOfEntries = 0; string indexFile = Path.Combine(_outputFolder, "index.txt"); string storageCredentialArgumentsTemplate = "storageCredentialArguments"; string optionalArgumentsTemplate = "optionalArguments"; using (var streamWriter = new StreamWriter(indexFile, false)) { var httpMessageHandlerFactory = CommandHelpers.GetHttpMessageHandlerFactory(TelemetryService, _verbose); var collectorHttpClient = new CollectorHttpClient(httpMessageHandlerFactory()); var catalogIndexReader = new CatalogIndexReader(new Uri(_catalogIndex), collectorHttpClient, TelemetryService); var catalogIndexEntries = await catalogIndexReader.GetEntries(); foreach (var packageRegistrationGroup in catalogIndexEntries .OrderBy(x => x.CommitTimeStamp) .ThenBy(x => x.Id, StringComparer.OrdinalIgnoreCase) .ThenBy(x => x.Version) .GroupBy(x => x.Id, StringComparer.OrdinalIgnoreCase)) { streamWriter.WriteLine("Element@{0}. {1}", numberOfEntries++, packageRegistrationGroup.Key); var latestCatalogPages = new Dictionary <string, Uri>(); foreach (CatalogIndexEntry catalogIndexEntry in packageRegistrationGroup) { string key = catalogIndexEntry.Version.ToNormalizedString(); if (latestCatalogPages.ContainsKey(key)) { latestCatalogPages[key] = catalogIndexEntry.Uri; } else { latestCatalogPages.Add(key, catalogIndexEntry.Uri); } if (latestCommit < catalogIndexEntry.CommitTimeStamp) { latestCommit = catalogIndexEntry.CommitTimeStamp; } } foreach (var latestCatalogPage in latestCatalogPages) { streamWriter.WriteLine("{0}", latestCatalogPage.Value); } } } _log.WriteLine("Finished preparing lightning reindex file. Output file: {0}", indexFile); // Create the containers _log.WriteLine("Creating the containers..."); var container = GetAutofacContainer(); var blobClient = container.Resolve <ICloudBlobClient>(); var config = container.Resolve <IOptionsSnapshot <Catalog2RegistrationConfiguration> >().Value; foreach (var name in new[] { config.LegacyStorageContainer, config.GzippedStorageContainer, config.SemVer2StorageContainer }) { var reference = blobClient.GetContainerReference(name); var permissions = new BlobContainerPermissions { PublicAccess = BlobContainerPublicAccessType.Blob }; await reference.CreateIfNotExistAsync(permissions); } // Write cursor to storage _log.WriteLine("Start writing new cursor..."); var storageFactory = container.ResolveKeyed <IStorageFactory>(DependencyInjectionExtensions.CursorBindingKey); var storage = storageFactory.Create(); var cursor = new DurableCursor(storage.ResolveUri("cursor.json"), storage, latestCommit) { Value = latestCommit }; await cursor.SaveAsync(CancellationToken.None); _log.WriteLine("Finished writing new cursor."); // Write command files _log.WriteLine("Start preparing lightning reindex command files..."); string templateFileContents; using (var templateStreamReader = new StreamReader(_templateFile)) { templateFileContents = await templateStreamReader.ReadToEndAsync(); } int batchNumber = 0; int batchSizeValue = int.Parse(_batchSize); for (int batchStart = 0; batchStart < numberOfEntries; batchStart += batchSizeValue) { var batchEnd = (batchStart + batchSizeValue - 1); if (batchEnd >= numberOfEntries) { batchEnd = numberOfEntries - 1; } var cursorCommandFileName = "cursor" + batchNumber + ".cmd"; var cursorTextFileName = "cursor" + batchNumber + ".txt"; using (var cursorCommandStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorCommandFileName))) using (var cursorTextStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorTextFileName))) { var commandStreamContents = templateFileContents; var replacements = _arguments .Concat(new[] { new KeyValuePair <string, string>("indexFile", indexFile), new KeyValuePair <string, string>("cursorFile", cursorTextFileName) }); foreach (var replacement in replacements) { commandStreamContents = commandStreamContents .Replace($"[{replacement.Key}]", replacement.Value); } // Since we only need to set the storage key or the storage sas token, only one will be added to the template. var storageCredentialArguments = new StringBuilder(); AddStorageCredentialArgument(storageCredentialArguments, Arguments.StorageSasValue, Arguments.StorageKeyValue); AddStorageCredentialArgument(storageCredentialArguments, Arguments.CompressedStorageSasValue, Arguments.CompressedStorageKeyValue); AddStorageCredentialArgument(storageCredentialArguments, Arguments.SemVer2StorageSasValue, Arguments.SemVer2StorageKeyValue); commandStreamContents = commandStreamContents .Replace($"[{storageCredentialArgumentsTemplate}]", storageCredentialArguments.ToString()); //the not required arguments need to be added only if they were passed in //they cannot be hardcoded in the template var optionalArguments = new StringBuilder(); AppendArgument(optionalArguments, Arguments.FlatContainerName); AppendArgument(optionalArguments, Arguments.StorageSuffix); AppendArgument(optionalArguments, Arguments.Verbose); commandStreamContents = commandStreamContents .Replace($"[{optionalArgumentsTemplate}]", optionalArguments.ToString()); await cursorCommandStreamWriter.WriteLineAsync(commandStreamContents); await cursorTextStreamWriter.WriteLineAsync(batchStart + "," + batchEnd); } batchNumber++; } _log.WriteLine("Finished preparing lightning reindex command files."); _log.WriteLine("You can now copy the {0} file and all cursor*.cmd, cursor*.txt", indexFile); _log.WriteLine("to multiple machines and run the cursor*.cmd files in parallel."); }