Example #1
0
        private async Task ExecuteAsync(CancellationToken token)
        {
            using (var cancelledCts = new CancellationTokenSource())
                using (var produceWorkCts = new CancellationTokenSource())
                {
                    // Initialize the indexes, container and excluded packages data.
                    await InitializeAsync();

                    // Here, we fetch the current catalog timestamp to use as the initial cursor value for
                    // catalog2azuresearch. The idea here is that database is always more up-to-date than the catalog.
                    // We're about to read the database so if we capture a catalog timestamp now, we are guaranteed that
                    // any data we get from a database query will be more recent than the data represented by this catalog
                    // timestamp. When catalog2azuresearch starts up for the first time to update the index produced by this
                    // job, it will probably encounter some duplicate packages, but this is okay.
                    //
                    // Note that we could capture any dependency cursors here instead of catalog cursor, but this is
                    // pointless because there is no reliable way to filter out data fetched from the database based on a
                    // catalog-based cursor value. Suppose the dependency cursor is catalog2registration. If
                    // catalog2registration is very behind, then the index produced by this job will include packages that
                    // are not yet restorable (since they are not in the registration hives). This could lead to a case
                    // where a user is able to search for a package that he cannot restore. We mitigate this risk by
                    // trusting that our end-to-end tests will fail when catalog2registration (or any other V3 component) is
                    // broken, this blocking the deployment of new Azure Search indexes.
                    var catalogIndex = await _catalogClient.GetIndexAsync(_options.Value.CatalogIndexUrl);

                    var initialCursorValue = catalogIndex.CommitTimestamp;
                    _logger.LogInformation("The initial cursor value will be {CursorValue:O}.", initialCursorValue);

                    var initialAuxiliaryData = await PushAllPackageRegistrationsAsync(cancelledCts, produceWorkCts);

                    // Write the owner data file.
                    await WriteOwnerDataAsync(initialAuxiliaryData.Owners);

                    // Write the download data file.
                    await WriteDownloadDataAsync(initialAuxiliaryData.Downloads);

                    // Write the verified packages data file.
                    await WriteVerifiedPackagesDataAsync(initialAuxiliaryData.VerifiedPackages);

                    // Write popularity transfers data file.
                    await WritePopularityTransfersDataAsync(initialAuxiliaryData.PopularityTransfers);

                    // Write the cursor.
                    _logger.LogInformation("Writing the initial cursor value to be {CursorValue:O}.", initialCursorValue);
                    var frontCursorStorage = _storageFactory.Create();
                    var frontCursor        = new DurableCursor(
                        frontCursorStorage.ResolveUri(Catalog2AzureSearchCommand.CursorRelativeUri),
                        frontCursorStorage,
                        DateTime.MinValue);
                    frontCursor.Value = initialCursorValue.UtcDateTime;
                    await frontCursor.SaveAsync(token);
                }
        }
        public async Task ExecuteAsync(bool restart)
        {
            var fileSystemStorage = new FileStorageFactory(
                new Uri("http://localhost/"),
                Directory.GetCurrentDirectory(),
                verbose: false);

            var front = new DurableCursor(
                new Uri("http://localhost/cursor.json"),
                fileSystemStorage.Create(),
                DateTime.MinValue);

            if (restart)
            {
                await front.LoadAsync(CancellationToken.None);

                front.Value = DateTime.MinValue;
                await front.SaveAsync(CancellationToken.None);
            }

            var back = MemoryCursor.CreateMax();

            await _collector.RunAsync(front, back, CancellationToken.None);
        }
Example #3
0
        private async Task PrepareAsync()
        {
            _log.WriteLine("Making sure folder {0} exists.", _outputFolder);
            if (!Directory.Exists(_outputFolder))
            {
                Directory.CreateDirectory(_outputFolder);
            }

            // Create reindex file
            _log.WriteLine("Start preparing lightning reindex file...");

            var    latestCommit              = DateTime.MinValue;
            int    numberOfEntries           = 0;
            string indexFile                 = Path.Combine(_outputFolder, "index.txt");
            string optionalArgumentsTemplate = "optionalArguments";

            using (var streamWriter = new StreamWriter(indexFile, false))
            {
                var httpMessageHandlerFactory = CommandHelpers.GetHttpMessageHandlerFactory(TelemetryService, _verbose);
                var collectorHttpClient       = new CollectorHttpClient(httpMessageHandlerFactory());
                var catalogIndexReader        = new CatalogIndexReader(new Uri(_catalogIndex), collectorHttpClient, TelemetryService);

                var catalogIndexEntries = await catalogIndexReader.GetEntries();

                foreach (var packageRegistrationGroup in catalogIndexEntries
                         .OrderBy(x => x.CommitTimeStamp)
                         .ThenBy(x => x.Id, StringComparer.OrdinalIgnoreCase)
                         .ThenBy(x => x.Version)
                         .GroupBy(x => x.Id, StringComparer.OrdinalIgnoreCase))
                {
                    streamWriter.WriteLine("Element@{0}. {1}", numberOfEntries++, packageRegistrationGroup.Key);

                    var latestCatalogPages = new Dictionary <string, Uri>();

                    foreach (CatalogIndexEntry catalogIndexEntry in packageRegistrationGroup)
                    {
                        string key = catalogIndexEntry.Version.ToNormalizedString();
                        if (latestCatalogPages.ContainsKey(key))
                        {
                            latestCatalogPages[key] = catalogIndexEntry.Uri;
                        }
                        else
                        {
                            latestCatalogPages.Add(key, catalogIndexEntry.Uri);
                        }

                        if (latestCommit < catalogIndexEntry.CommitTimeStamp)
                        {
                            latestCommit = catalogIndexEntry.CommitTimeStamp;
                        }
                    }

                    foreach (var latestCatalogPage in latestCatalogPages)
                    {
                        streamWriter.WriteLine("{0}", latestCatalogPage.Value);
                    }
                }
            }

            _log.WriteLine("Finished preparing lightning reindex file. Output file: {0}", indexFile);

            // Write cursor to storage
            _log.WriteLine("Start writing new cursor...");
            var storage = _storageFactories.LegacyStorageFactory.Create();
            var cursor  = new DurableCursor(storage.ResolveUri("cursor.json"), storage, latestCommit)
            {
                Value = latestCommit
            };

            await cursor.SaveAsync(CancellationToken.None);

            _log.WriteLine("Finished writing new cursor.");

            // Ensure the SemVer 2.0.0 storage containers is created, if applicable. The gzipped storage account is
            // created above when we write the cursor.
            _storageFactories.SemVer2StorageFactory?.Create();

            // Write command files
            _log.WriteLine("Start preparing lightning reindex command files...");

            string templateFileContents;

            using (var templateStreamReader = new StreamReader(_templateFile))
            {
                templateFileContents = await templateStreamReader.ReadToEndAsync();
            }

            int batchNumber    = 0;
            int batchSizeValue = int.Parse(_batchSize);

            for (int batchStart = 0; batchStart < numberOfEntries; batchStart += batchSizeValue)
            {
                var batchEnd = (batchStart + batchSizeValue - 1);
                if (batchEnd >= numberOfEntries)
                {
                    batchEnd = numberOfEntries - 1;
                }

                var cursorCommandFileName = "cursor" + batchNumber + ".cmd";
                var cursorTextFileName    = "cursor" + batchNumber + ".txt";

                using (var cursorCommandStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorCommandFileName)))
                    using (var cursorTextStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorTextFileName)))
                    {
                        var commandStreamContents = templateFileContents;

                        var replacements = _arguments
                                           .Concat(new[]
                        {
                            new KeyValuePair <string, string>("indexFile", indexFile),
                            new KeyValuePair <string, string>("cursorFile", cursorTextFileName)
                        });

                        foreach (var replacement in replacements)
                        {
                            commandStreamContents = commandStreamContents
                                                    .Replace($"[{replacement.Key}]", replacement.Value);
                        }

                        //the not required arguments need to be added only if they were passed in
                        //they cannot be hardcoded in the template
                        var optionalArguments = new StringBuilder();
                        AppendOptionalArgument(optionalArguments, Arguments.ContentIsFlatContainer);
                        AppendOptionalArgument(optionalArguments, Arguments.FlatContainerName);
                        AppendOptionalArgument(optionalArguments, Arguments.StorageSuffix);
                        AppendOptionalArgument(optionalArguments, Arguments.AllIconsInFlatContainer);
                        AppendOptionalArgument(optionalArguments, Arguments.Driver);
                        AppendOptionalArgument(optionalArguments, Arguments.Verbose);

                        commandStreamContents = commandStreamContents
                                                .Replace($"[{optionalArgumentsTemplate}]", optionalArguments.ToString());

                        await cursorCommandStreamWriter.WriteLineAsync(commandStreamContents);

                        await cursorTextStreamWriter.WriteLineAsync(batchStart + "," + batchEnd);
                    }

                batchNumber++;
            }

            _log.WriteLine("Finished preparing lightning reindex command files.");

            _log.WriteLine("You can now copy the {0} file and all cursor*.cmd, cursor*.txt", indexFile);
            _log.WriteLine("to multiple machines and run the cursor*.cmd files in parallel.");
        }
Example #4
0
        private async Task PrepareAsync()
        {
            _log.WriteLine("Making sure folder {0} exists.", _outputFolder);
            if (!Directory.Exists(_outputFolder))
            {
                Directory.CreateDirectory(_outputFolder);
            }

            // Create reindex file
            _log.WriteLine("Start preparing lightning reindex file...");

            var    latestCommit    = DateTime.MinValue;
            int    numberOfEntries = 0;
            string indexFile       = Path.Combine(_outputFolder, "index.txt");
            string storageCredentialArgumentsTemplate = "storageCredentialArguments";
            string optionalArgumentsTemplate          = "optionalArguments";

            using (var streamWriter = new StreamWriter(indexFile, false))
            {
                var httpMessageHandlerFactory = CommandHelpers.GetHttpMessageHandlerFactory(TelemetryService, _verbose);
                var collectorHttpClient       = new CollectorHttpClient(httpMessageHandlerFactory());
                var catalogIndexReader        = new CatalogIndexReader(new Uri(_catalogIndex), collectorHttpClient, TelemetryService);

                var catalogIndexEntries = await catalogIndexReader.GetEntries();

                foreach (var packageRegistrationGroup in catalogIndexEntries
                         .OrderBy(x => x.CommitTimeStamp)
                         .ThenBy(x => x.Id, StringComparer.OrdinalIgnoreCase)
                         .ThenBy(x => x.Version)
                         .GroupBy(x => x.Id, StringComparer.OrdinalIgnoreCase))
                {
                    streamWriter.WriteLine("Element@{0}. {1}", numberOfEntries++, packageRegistrationGroup.Key);

                    var latestCatalogPages = new Dictionary <string, Uri>();

                    foreach (CatalogIndexEntry catalogIndexEntry in packageRegistrationGroup)
                    {
                        string key = catalogIndexEntry.Version.ToNormalizedString();
                        if (latestCatalogPages.ContainsKey(key))
                        {
                            latestCatalogPages[key] = catalogIndexEntry.Uri;
                        }
                        else
                        {
                            latestCatalogPages.Add(key, catalogIndexEntry.Uri);
                        }

                        if (latestCommit < catalogIndexEntry.CommitTimeStamp)
                        {
                            latestCommit = catalogIndexEntry.CommitTimeStamp;
                        }
                    }

                    foreach (var latestCatalogPage in latestCatalogPages)
                    {
                        streamWriter.WriteLine("{0}", latestCatalogPage.Value);
                    }
                }
            }

            _log.WriteLine("Finished preparing lightning reindex file. Output file: {0}", indexFile);

            // Create the containers
            _log.WriteLine("Creating the containers...");
            var container  = GetAutofacContainer();
            var blobClient = container.Resolve <ICloudBlobClient>();
            var config     = container.Resolve <IOptionsSnapshot <Catalog2RegistrationConfiguration> >().Value;

            foreach (var name in new[] { config.LegacyStorageContainer, config.GzippedStorageContainer, config.SemVer2StorageContainer })
            {
                var reference   = blobClient.GetContainerReference(name);
                var permissions = new BlobContainerPermissions {
                    PublicAccess = BlobContainerPublicAccessType.Blob
                };
                await reference.CreateIfNotExistAsync(permissions);
            }

            // Write cursor to storage
            _log.WriteLine("Start writing new cursor...");
            var storageFactory = container.ResolveKeyed <IStorageFactory>(DependencyInjectionExtensions.CursorBindingKey);
            var storage        = storageFactory.Create();
            var cursor         = new DurableCursor(storage.ResolveUri("cursor.json"), storage, latestCommit)
            {
                Value = latestCommit
            };

            await cursor.SaveAsync(CancellationToken.None);

            _log.WriteLine("Finished writing new cursor.");

            // Write command files
            _log.WriteLine("Start preparing lightning reindex command files...");

            string templateFileContents;

            using (var templateStreamReader = new StreamReader(_templateFile))
            {
                templateFileContents = await templateStreamReader.ReadToEndAsync();
            }

            int batchNumber    = 0;
            int batchSizeValue = int.Parse(_batchSize);

            for (int batchStart = 0; batchStart < numberOfEntries; batchStart += batchSizeValue)
            {
                var batchEnd = (batchStart + batchSizeValue - 1);
                if (batchEnd >= numberOfEntries)
                {
                    batchEnd = numberOfEntries - 1;
                }

                var cursorCommandFileName = "cursor" + batchNumber + ".cmd";
                var cursorTextFileName    = "cursor" + batchNumber + ".txt";

                using (var cursorCommandStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorCommandFileName)))
                    using (var cursorTextStreamWriter = new StreamWriter(Path.Combine(_outputFolder, cursorTextFileName)))
                    {
                        var commandStreamContents = templateFileContents;

                        var replacements = _arguments
                                           .Concat(new[]
                        {
                            new KeyValuePair <string, string>("indexFile", indexFile),
                            new KeyValuePair <string, string>("cursorFile", cursorTextFileName)
                        });

                        foreach (var replacement in replacements)
                        {
                            commandStreamContents = commandStreamContents
                                                    .Replace($"[{replacement.Key}]", replacement.Value);
                        }

                        // Since we only need to set the storage key or the storage sas token, only one will be added to the template.
                        var storageCredentialArguments = new StringBuilder();
                        AddStorageCredentialArgument(storageCredentialArguments, Arguments.StorageSasValue, Arguments.StorageKeyValue);
                        AddStorageCredentialArgument(storageCredentialArguments, Arguments.CompressedStorageSasValue, Arguments.CompressedStorageKeyValue);
                        AddStorageCredentialArgument(storageCredentialArguments, Arguments.SemVer2StorageSasValue, Arguments.SemVer2StorageKeyValue);

                        commandStreamContents = commandStreamContents
                                                .Replace($"[{storageCredentialArgumentsTemplate}]", storageCredentialArguments.ToString());

                        //the not required arguments need to be added only if they were passed in
                        //they cannot be hardcoded in the template
                        var optionalArguments = new StringBuilder();
                        AppendArgument(optionalArguments, Arguments.FlatContainerName);
                        AppendArgument(optionalArguments, Arguments.StorageSuffix);
                        AppendArgument(optionalArguments, Arguments.Verbose);

                        commandStreamContents = commandStreamContents
                                                .Replace($"[{optionalArgumentsTemplate}]", optionalArguments.ToString());

                        await cursorCommandStreamWriter.WriteLineAsync(commandStreamContents);

                        await cursorTextStreamWriter.WriteLineAsync(batchStart + "," + batchEnd);
                    }

                batchNumber++;
            }

            _log.WriteLine("Finished preparing lightning reindex command files.");

            _log.WriteLine("You can now copy the {0} file and all cursor*.cmd, cursor*.txt", indexFile);
            _log.WriteLine("to multiple machines and run the cursor*.cmd files in parallel.");
        }