public async Task Read_MagicNumber_Invalid_Throws() { var invalidMagicNumber = Encoding.UTF8.GetBytes("APIC_TFB"); await using var stream = new MemoryStream(invalidMagicNumber); await Assert.ThrowsAsync <InvalidDataException>(() => ApiCatalogModel.LoadAsync(stream)); }
private static async Task GenerateSuffixTreeAsync(string catalogModelPath, string suffixTreePath) { if (File.Exists(suffixTreePath)) { return; } Console.WriteLine($"Generating {Path.GetFileName(suffixTreePath)}..."); var catalog = await ApiCatalogModel.LoadAsync(catalogModelPath); var builder = new SuffixTreeBuilder(); foreach (var api in catalog.GetAllApis()) { if (api.Kind.IsAccessor()) { continue; } builder.Add(api.ToString(), api.Id); } await using var stream = File.Create(suffixTreePath); builder.WriteSuffixTree(stream); }
public async Task Read_Version_TooNew_Throws() { await using var stream = new MemoryStream(); await using (var writer = new BinaryWriter(stream, Encoding.UTF8, true)) { writer.Write(Encoding.UTF8.GetBytes("APICATFB")); writer.Write(999_999_999); } stream.Position = 0; await Assert.ThrowsAsync <InvalidDataException>(() => ApiCatalogModel.LoadAsync(stream)); }
public async Task InvalidateAsync() { if (!_environment.IsDevelopment()) { File.Delete(GetDatabasePath()); File.Delete(GetSuffixTreePath()); } var azureConnectionString = _configuration["AzureStorageConnectionString"]; var databasePath = GetDatabasePath(); if (!File.Exists(databasePath)) { var blobClient = new BlobClient(azureConnectionString, "catalog", "apicatalog.dat"); await blobClient.DownloadToAsync(databasePath); } var catalog = await ApiCatalogModel.LoadAsync(databasePath); var availabilityContext = ApiAvailabilityContext.Create(catalog); var apiByGuid = catalog.GetAllApis().ToDictionary(a => a.Guid); var suffixTreePath = GetSuffixTreePath(); if (!File.Exists(suffixTreePath)) { // TODO: Ideally the underlying file format uses compression. This seems weird. var blobClient = new BlobClient(azureConnectionString, "catalog", "suffixtree.dat.deflate"); using var blobStream = await blobClient.OpenReadAsync(); using var deflateStream = new DeflateStream(blobStream, CompressionMode.Decompress); using var fileStream = File.Create(suffixTreePath); await deflateStream.CopyToAsync(fileStream); } var suffixTree = SuffixTree.Load(suffixTreePath); var jobBlobClient = new BlobClient(azureConnectionString, "catalog", "job.json"); using var jobStream = await jobBlobClient.OpenReadAsync(); var jobInfo = await JsonSerializer.DeserializeAsync <CatalogJobInfo>(jobStream); _catalog = catalog; _availabilityContext = availabilityContext; _statistics = catalog.GetStatistics(); _apiByGuid = apiByGuid; _suffixTree = suffixTree; _jobInfo = jobInfo; }
private static async Task GenerateCatalogModel(string databasePath, string catalogModelPath) { if (File.Exists(catalogModelPath)) { return; } Console.WriteLine($"Generating {Path.GetFileName(catalogModelPath)}..."); await ApiCatalogModel.ConvertAsync(databasePath, catalogModelPath); var model = await ApiCatalogModel.LoadAsync(catalogModelPath); var stats = model.GetStatistics().ToString(); Console.WriteLine("Catalog stats:"); Console.Write(stats); await File.WriteAllTextAsync(Path.ChangeExtension(catalogModelPath, ".txt"), stats); }
public async Task <ApiCatalogModel> LoadCatalogAsync() { var catalogPath = GetCatalogPath(); if (!File.Exists(catalogPath)) { DownloadCatalog(); } try { return(await ApiCatalogModel.LoadAsync(catalogPath)); } catch (Exception ex) { Console.Error.WriteLine($"error: can't open catalog: {ex.Message}"); Environment.Exit(1); return(null); } }
public static async Task <ApiCatalogModel> LoadCatalogAsync() { var catalogPath = GetCatalogPath(); if (!File.Exists(catalogPath)) { DownloadCatalog(); } #pragma warning disable CA1031 // Do not catch general exception types try { return(await ApiCatalogModel.LoadAsync(catalogPath).ConfigureAwait(false)); } catch (Exception ex) { Console.Error.WriteLine($"error: can't open catalog: {ex.Message}"); Environment.Exit(1); return(null); } #pragma warning restore CA1031 // Do not catch general exception types }
private static async Task CrawlAsync(PackageListCrawler packageListCrawler, CrawlerStore crawlerStore) { var apiCatalogPath = GetScratchFilePath("apicatalog.dat"); var databasePath = GetScratchFilePath("usage.db"); var usagesPath = GetScratchFilePath("usages.tsv"); Console.WriteLine("Downloading API catalog..."); await crawlerStore.DownloadApiCatalogAsync(apiCatalogPath); Console.WriteLine("Loading API catalog..."); var apiCatalog = await ApiCatalogModel.LoadAsync(apiCatalogPath); Console.WriteLine("Downloading previously indexed usages..."); await crawlerStore.DownloadDatabaseAsync(databasePath); using var usageDatabase = await UsageDatabase.OpenOrCreateAsync(databasePath); Console.WriteLine("Creating temporary indexes..."); await usageDatabase.CreateTempIndexesAsync(); Console.WriteLine("Discovering existing APIs..."); var apiMap = await usageDatabase.ReadApisAsync(); Console.WriteLine("Discovering existing packages..."); var packageIdMap = await usageDatabase.ReadPackagesAsync(); Console.WriteLine("Discovering latest packages..."); var stopwatch = Stopwatch.StartNew(); var packages = await packageListCrawler.GetPackagesAsync(); Console.WriteLine($"Finished package discovery. Took {stopwatch.Elapsed}"); Console.WriteLine($"Found {packages.Count:N0} package(s) in total."); packages = CollapseToLatestStableAndLatestPreview(packages); Console.WriteLine($"Found {packages.Count:N0} package(s) after collapsing to latest stable & latest preview."); var indexedPackages = new HashSet <PackageIdentity>(packageIdMap.Values); var currentPackages = new HashSet <PackageIdentity>(packages); var packagesToBeDeleted = indexedPackages.Where(p => !currentPackages.Contains(p)).ToArray(); var packagesToBeIndexed = currentPackages.Where(p => !indexedPackages.Contains(p)).ToArray(); Console.WriteLine($"Found {indexedPackages.Count:N0} package(s) in the index."); Console.WriteLine($"Found {packagesToBeDeleted.Length:N0} package(s) to remove from the index."); Console.WriteLine($"Found {packagesToBeIndexed.Length:N0} package(s) to add to the index."); Console.WriteLine($"Deleting packages..."); stopwatch.Restart(); await usageDatabase.DeletePackagesAsync(packagesToBeDeleted.Select(p => packageIdMap.GetId(p))); Console.WriteLine($"Finished deleting packages. Took {stopwatch.Elapsed}"); Console.WriteLine($"Inserting new packages..."); stopwatch.Restart(); using (var packageWriter = usageDatabase.CreatePackageWriter()) { foreach (var packageIdentity in packagesToBeIndexed) { var packageId = packageIdMap.Add(packageIdentity); await packageWriter.WriteAsync(packageId, packageIdentity); } await packageWriter.SaveAsync(); } Console.WriteLine($"Finished inserting new packages. Took {stopwatch.Elapsed}"); stopwatch.Restart(); var numberOfWorkers = Environment.ProcessorCount; Console.WriteLine($"Crawling using {numberOfWorkers} workers."); var inputQueue = new ConcurrentQueue <PackageIdentity>(packagesToBeIndexed); var outputQueue = new BlockingCollection <PackageResults>(); var workers = Enumerable.Range(0, numberOfWorkers) .Select(i => Task.Run(() => CrawlWorker(i, inputQueue, outputQueue))) .ToArray(); var outputWorker = Task.Run(() => OutputWorker(usageDatabase, apiMap, packageIdMap, outputQueue)); await Task.WhenAll(workers); outputQueue.CompleteAdding(); await outputWorker; Console.WriteLine($"Finished crawling. Took {stopwatch.Elapsed}"); Console.WriteLine("Inserting missing APIs..."); stopwatch.Restart(); await usageDatabase.InsertMissingApisAsync(apiMap); Console.WriteLine($"Finished inserting missing APIs. Took {stopwatch.Elapsed}"); Console.WriteLine($"Aggregating results..."); stopwatch.Restart(); var ancestors = apiCatalog.GetAllApis() .SelectMany(a => a.AncestorsAndSelf(), (api, ancestor) => (api.Guid, ancestor.Guid)); await usageDatabase.ExportUsagesAsync(apiMap, ancestors, usagesPath); Console.WriteLine($"Finished aggregating results. Took {stopwatch.Elapsed}"); Console.WriteLine($"Vacuuming database..."); stopwatch.Restart(); await usageDatabase.VacuumAsync(); Console.WriteLine($"Finished vacuuming database. Took {stopwatch.Elapsed}"); usageDatabase.Dispose(); Console.WriteLine($"Uploading usages..."); await crawlerStore.UploadResultsAsync(usagesPath); Console.WriteLine($"Uploading database..."); await crawlerStore.UploadDatabaseAsync(databasePath);
public async Task Read_Empty_Throws() { await using var stream = new MemoryStream(); await Assert.ThrowsAsync <InvalidDataException>(() => ApiCatalogModel.LoadAsync(stream)); }