public async Task Read_MagicNumber_Invalid_Throws()
    {
        var invalidMagicNumber = Encoding.UTF8.GetBytes("APIC_TFB");

        await using var stream = new MemoryStream(invalidMagicNumber);
        await Assert.ThrowsAsync <InvalidDataException>(() => ApiCatalogModel.LoadAsync(stream));
    }
Example #2
0
    private static async Task GenerateSuffixTreeAsync(string catalogModelPath, string suffixTreePath)
    {
        if (File.Exists(suffixTreePath))
        {
            return;
        }

        Console.WriteLine($"Generating {Path.GetFileName(suffixTreePath)}...");
        var catalog = await ApiCatalogModel.LoadAsync(catalogModelPath);

        var builder = new SuffixTreeBuilder();

        foreach (var api in catalog.GetAllApis())
        {
            if (api.Kind.IsAccessor())
            {
                continue;
            }

            builder.Add(api.ToString(), api.Id);
        }

        await using var stream = File.Create(suffixTreePath);
        builder.WriteSuffixTree(stream);
    }
    public async Task Read_Version_TooNew_Throws()
    {
        await using var stream = new MemoryStream();
        await using (var writer = new BinaryWriter(stream, Encoding.UTF8, true))
        {
            writer.Write(Encoding.UTF8.GetBytes("APICATFB"));
            writer.Write(999_999_999);
        }

        stream.Position = 0;

        await Assert.ThrowsAsync <InvalidDataException>(() => ApiCatalogModel.LoadAsync(stream));
    }
Example #4
0
    public async Task InvalidateAsync()
    {
        if (!_environment.IsDevelopment())
        {
            File.Delete(GetDatabasePath());
            File.Delete(GetSuffixTreePath());
        }

        var azureConnectionString = _configuration["AzureStorageConnectionString"];

        var databasePath = GetDatabasePath();

        if (!File.Exists(databasePath))
        {
            var blobClient = new BlobClient(azureConnectionString, "catalog", "apicatalog.dat");
            await blobClient.DownloadToAsync(databasePath);
        }

        var catalog = await ApiCatalogModel.LoadAsync(databasePath);

        var availabilityContext = ApiAvailabilityContext.Create(catalog);
        var apiByGuid           = catalog.GetAllApis().ToDictionary(a => a.Guid);

        var suffixTreePath = GetSuffixTreePath();

        if (!File.Exists(suffixTreePath))
        {
            // TODO: Ideally the underlying file format uses compression. This seems weird.
            var blobClient = new BlobClient(azureConnectionString, "catalog", "suffixtree.dat.deflate");
            using var blobStream = await blobClient.OpenReadAsync();

            using var deflateStream = new DeflateStream(blobStream, CompressionMode.Decompress);
            using var fileStream    = File.Create(suffixTreePath);
            await deflateStream.CopyToAsync(fileStream);
        }

        var suffixTree = SuffixTree.Load(suffixTreePath);

        var jobBlobClient = new BlobClient(azureConnectionString, "catalog", "job.json");

        using var jobStream = await jobBlobClient.OpenReadAsync();

        var jobInfo = await JsonSerializer.DeserializeAsync <CatalogJobInfo>(jobStream);

        _catalog             = catalog;
        _availabilityContext = availabilityContext;
        _statistics          = catalog.GetStatistics();
        _apiByGuid           = apiByGuid;
        _suffixTree          = suffixTree;
        _jobInfo             = jobInfo;
    }
Example #5
0
    private static async Task GenerateCatalogModel(string databasePath, string catalogModelPath)
    {
        if (File.Exists(catalogModelPath))
        {
            return;
        }

        Console.WriteLine($"Generating {Path.GetFileName(catalogModelPath)}...");
        await ApiCatalogModel.ConvertAsync(databasePath, catalogModelPath);

        var model = await ApiCatalogModel.LoadAsync(catalogModelPath);

        var stats = model.GetStatistics().ToString();

        Console.WriteLine("Catalog stats:");
        Console.Write(stats);
        await File.WriteAllTextAsync(Path.ChangeExtension(catalogModelPath, ".txt"), stats);
    }
Example #6
0
    public async Task <ApiCatalogModel> LoadCatalogAsync()
    {
        var catalogPath = GetCatalogPath();

        if (!File.Exists(catalogPath))
        {
            DownloadCatalog();
        }

        try
        {
            return(await ApiCatalogModel.LoadAsync(catalogPath));
        }
        catch (Exception ex)
        {
            Console.Error.WriteLine($"error: can't open catalog: {ex.Message}");
            Environment.Exit(1);
            return(null);
        }
    }
        public static async Task <ApiCatalogModel> LoadCatalogAsync()
        {
            var catalogPath = GetCatalogPath();

            if (!File.Exists(catalogPath))
            {
                DownloadCatalog();
            }

#pragma warning disable CA1031 // Do not catch general exception types
            try
            {
                return(await ApiCatalogModel.LoadAsync(catalogPath).ConfigureAwait(false));
            }
            catch (Exception ex)
            {
                Console.Error.WriteLine($"error: can't open catalog: {ex.Message}");
                Environment.Exit(1);
                return(null);
            }
#pragma warning restore CA1031 // Do not catch general exception types
        }
Example #8
0
    private static async Task CrawlAsync(PackageListCrawler packageListCrawler, CrawlerStore crawlerStore)
    {
        var apiCatalogPath = GetScratchFilePath("apicatalog.dat");
        var databasePath   = GetScratchFilePath("usage.db");
        var usagesPath     = GetScratchFilePath("usages.tsv");

        Console.WriteLine("Downloading API catalog...");

        await crawlerStore.DownloadApiCatalogAsync(apiCatalogPath);

        Console.WriteLine("Loading API catalog...");

        var apiCatalog = await ApiCatalogModel.LoadAsync(apiCatalogPath);

        Console.WriteLine("Downloading previously indexed usages...");

        await crawlerStore.DownloadDatabaseAsync(databasePath);

        using var usageDatabase = await UsageDatabase.OpenOrCreateAsync(databasePath);

        Console.WriteLine("Creating temporary indexes...");

        await usageDatabase.CreateTempIndexesAsync();

        Console.WriteLine("Discovering existing APIs...");

        var apiMap = await usageDatabase.ReadApisAsync();

        Console.WriteLine("Discovering existing packages...");

        var packageIdMap = await usageDatabase.ReadPackagesAsync();

        Console.WriteLine("Discovering latest packages...");

        var stopwatch = Stopwatch.StartNew();
        var packages  = await packageListCrawler.GetPackagesAsync();

        Console.WriteLine($"Finished package discovery. Took {stopwatch.Elapsed}");
        Console.WriteLine($"Found {packages.Count:N0} package(s) in total.");

        packages = CollapseToLatestStableAndLatestPreview(packages);

        Console.WriteLine($"Found {packages.Count:N0} package(s) after collapsing to latest stable & latest preview.");

        var indexedPackages = new HashSet <PackageIdentity>(packageIdMap.Values);
        var currentPackages = new HashSet <PackageIdentity>(packages);

        var packagesToBeDeleted = indexedPackages.Where(p => !currentPackages.Contains(p)).ToArray();
        var packagesToBeIndexed = currentPackages.Where(p => !indexedPackages.Contains(p)).ToArray();

        Console.WriteLine($"Found {indexedPackages.Count:N0} package(s) in the index.");
        Console.WriteLine($"Found {packagesToBeDeleted.Length:N0} package(s) to remove from the index.");
        Console.WriteLine($"Found {packagesToBeIndexed.Length:N0} package(s) to add to the index.");

        Console.WriteLine($"Deleting packages...");

        stopwatch.Restart();
        await usageDatabase.DeletePackagesAsync(packagesToBeDeleted.Select(p => packageIdMap.GetId(p)));

        Console.WriteLine($"Finished deleting packages. Took {stopwatch.Elapsed}");

        Console.WriteLine($"Inserting new packages...");

        stopwatch.Restart();

        using (var packageWriter = usageDatabase.CreatePackageWriter())
        {
            foreach (var packageIdentity in packagesToBeIndexed)
            {
                var packageId = packageIdMap.Add(packageIdentity);
                await packageWriter.WriteAsync(packageId, packageIdentity);
            }

            await packageWriter.SaveAsync();
        }

        Console.WriteLine($"Finished inserting new packages. Took {stopwatch.Elapsed}");

        stopwatch.Restart();

        var numberOfWorkers = Environment.ProcessorCount;

        Console.WriteLine($"Crawling using {numberOfWorkers} workers.");

        var inputQueue = new ConcurrentQueue <PackageIdentity>(packagesToBeIndexed);

        var outputQueue = new BlockingCollection <PackageResults>();

        var workers = Enumerable.Range(0, numberOfWorkers)
                      .Select(i => Task.Run(() => CrawlWorker(i, inputQueue, outputQueue)))
                      .ToArray();

        var outputWorker = Task.Run(() => OutputWorker(usageDatabase, apiMap, packageIdMap, outputQueue));

        await Task.WhenAll(workers);

        outputQueue.CompleteAdding();
        await outputWorker;

        Console.WriteLine($"Finished crawling. Took {stopwatch.Elapsed}");

        Console.WriteLine("Inserting missing APIs...");

        stopwatch.Restart();
        await usageDatabase.InsertMissingApisAsync(apiMap);

        Console.WriteLine($"Finished inserting missing APIs. Took {stopwatch.Elapsed}");

        Console.WriteLine($"Aggregating results...");

        stopwatch.Restart();

        var ancestors = apiCatalog.GetAllApis()
                        .SelectMany(a => a.AncestorsAndSelf(), (api, ancestor) => (api.Guid, ancestor.Guid));
        await usageDatabase.ExportUsagesAsync(apiMap, ancestors, usagesPath);

        Console.WriteLine($"Finished aggregating results. Took {stopwatch.Elapsed}");

        Console.WriteLine($"Vacuuming database...");

        stopwatch.Restart();
        await usageDatabase.VacuumAsync();

        Console.WriteLine($"Finished vacuuming database. Took {stopwatch.Elapsed}");

        usageDatabase.Dispose();

        Console.WriteLine($"Uploading usages...");

        await crawlerStore.UploadResultsAsync(usagesPath);

        Console.WriteLine($"Uploading database...");

        await crawlerStore.UploadDatabaseAsync(databasePath);
 public async Task Read_Empty_Throws()
 {
     await using var stream = new MemoryStream();
     await Assert.ThrowsAsync <InvalidDataException>(() => ApiCatalogModel.LoadAsync(stream));
 }