private async Task UpdateInlinedPagesAsync( HiveType hive, string id, IndexInfo indexInfo, CatalogCommit registrationCommit) { for (var pageIndex = 0; pageIndex < indexInfo.Items.Count; pageIndex++) { var pageInfo = indexInfo.Items[pageIndex]; if (!pageInfo.IsInlined) { _logger.LogInformation( "Moving page {PageNumber}/{PageCount} [{Lower}, {Upper}] from having its own blob to being inlined.", pageIndex + 1, indexInfo.Items.Count, pageInfo.Lower.ToNormalizedString(), pageInfo.Upper.ToNormalizedString()); pageInfo = await pageInfo.CloneToInlinedAsync(); indexInfo.RemoveAt(pageIndex); indexInfo.Insert(pageIndex, pageInfo); } Guard.Assert(pageInfo.IsInlined, "The page should be inlined at this point."); _entityBuilder.UpdateInlinedPageItem(pageInfo.PageItem, hive, id, pageInfo.Count, pageInfo.Lower, pageInfo.Upper); _entityBuilder.UpdateCommit(pageInfo.PageItem, registrationCommit); } }
private HashSet <string> GetPageUrls(IndexInfo indexInfo) { return(new HashSet <string>(indexInfo .Items .Where(x => !x.IsInlined) .Select(x => x.PageItem.Url))); }
public Context(IndexInfo indexInfo, IReadOnlyList <CatalogCommitItem> sortedCatalog) { SortedCatalog = sortedCatalog; IndexInfo = indexInfo; ModifiedPages = new HashSet <PageInfo>(); ModifiedLeaves = new HashSet <LeafInfo>(); DeletedLeaves = new HashSet <LeafInfo>(); }
public async Task DeleteAgainstEmpty() { var indexInfo = IndexInfo.New(); var sortedCatalog = MakeSortedCatalog(Delete("1.0.0")); var result = await Target.MergeAsync(indexInfo, sortedCatalog); Assert.Empty(indexInfo.Items); Assert.Empty(result.ModifiedPages); Assert.Empty(result.ModifiedLeaves); Assert.Empty(result.DeletedLeaves); }
public async Task AddManyVersions() { var indexInfo = IndexInfo.New(); var sortedCatalog = MakeSortedCatalog(Details("1.0.0"), Details("2.0.0"), Details("3.0.0"), Details("4.0.0")); var result = await Target.MergeAsync(indexInfo, sortedCatalog); Assert.Equal(2, indexInfo.Items.Count); Assert.Equal(new[] { "1.0.0", "2.0.0", "3.0.0", "4.0.0" }, await GetVersionArrayAsync(indexInfo)); Assert.Equal(2, result.ModifiedPages.Count); Assert.Equal(new[] { "1.0.0", "2.0.0", "3.0.0", "4.0.0" }, GetVersionArray(result.ModifiedLeaves)); Assert.Empty(result.DeletedLeaves); }
private static async Task <List <NuGetVersion> > GetVersionsAsync(IndexInfo indexInfo) { var versions = new List <NuGetVersion>(); foreach (var pageInfo in indexInfo.Items) { var leafInfos = await pageInfo.GetLeafInfosAsync(); foreach (var leafInfo in leafInfos) { versions.Add(leafInfo.Version); } } return(versions); }
private async Task DeleteOrphansAsync( HiveType hive, IReadOnlyList <HiveType> replicaHives, IEnumerable <string> existingPageUrls, IndexInfo indexInfo, HiveMergeResult mergeResult) { // Start with all of the page URLs found in the index prior to the update process. var orphanUrls = new HashSet <string>(existingPageUrls); // Add all of the deleted leaf URLs. orphanUrls.UnionWith(mergeResult.DeletedLeaves.Select(x => x.LeafItem.Url)); // Leave the new page URLs alone. foreach (var pageInfo in indexInfo.Items) { orphanUrls.Remove(pageInfo.PageItem.Url); } // Leave the modified leaf URLs alone. This should not be necessary since deleted leaves and modified // leaves are disjoint sets but is a reasonable precaution. foreach (var leafInfo in mergeResult.ModifiedLeaves) { orphanUrls.Remove(leafInfo.LeafItem.Url); } if (orphanUrls.Count == 0) { _logger.LogInformation("There are no orphan blobs to delete."); return; } _logger.LogInformation("About to delete {Count} orphan blobs.", orphanUrls.Count); var work = new ConcurrentBag <string>(orphanUrls); await ParallelAsync.Repeat( async() => { while (work.TryTake(out var url)) { await _storage.DeleteUrlAsync(hive, replicaHives, url); } }, _options.Value.MaxConcurrentOperationsPerHive); _logger.LogInformation("Done deleting orphan blobs.", orphanUrls.Count); }
public async Task <HiveMergeResult> MergeAsync(IndexInfo indexInfo, IReadOnlyList <CatalogCommitItem> sortedCatalog) { for (var i = 1; i < sortedCatalog.Count; i++) { Guard.Assert( sortedCatalog[i - 1].PackageIdentity.Version < sortedCatalog[i].PackageIdentity.Version, "The catalog commit items must be in ascending order by version."); } var context = new Context(indexInfo, sortedCatalog); await MergeAsync(context); return(new HiveMergeResult( context.ModifiedPages, context.ModifiedLeaves, context.DeletedLeaves)); }
private static IndexInfo MakeIndexInfo( List <NuGetVersion> sortedVersions, int maxLeavesPerPage, Dictionary <NuGetVersion, string> versionToNormalized) { var index = new RegistrationIndex { Items = new List <RegistrationPage>(), }; // Populate the pages. RegistrationPage currentPage = null; for (var i = 0; i < sortedVersions.Count; i++) { if (i % maxLeavesPerPage == 0) { currentPage = new RegistrationPage { Items = new List <RegistrationLeafItem>(), }; index.Items.Add(currentPage); } currentPage.Items.Add(new RegistrationLeafItem { CatalogEntry = new RegistrationCatalogEntry { Version = versionToNormalized[sortedVersions[i]], }, }); } // Update the bounds. foreach (var page in index.Items) { page.Count = page.Items.Count; page.Lower = page.Items.First().CatalogEntry.Version; page.Upper = page.Items.Last().CatalogEntry.Version; } return(IndexInfo.Existing(storage: null, hive: HiveType.SemVer2, index: index)); }
public async Task AddSingleFirstVersion(string version) { var indexInfo = IndexInfo.New(); var sortedCatalog = MakeSortedCatalog(Details(version)); var result = await Target.MergeAsync(indexInfo, sortedCatalog); Assert.True(indexInfo.Items[0].IsPageFetched); var pageInfo = Assert.Single(indexInfo.Items); var page = await pageInfo.GetPageAsync(); var leafInfo = Assert.Single(await pageInfo.GetLeafInfosAsync()); var leafItem = Assert.Single(page.Items); Assert.Same(leafItem, leafInfo.LeafItem); Assert.Equal(version, leafItem.CatalogEntry.Version); Assert.Same(pageInfo, Assert.Single(result.ModifiedPages)); Assert.Same(leafInfo, Assert.Single(result.ModifiedLeaves)); Assert.Empty(result.DeletedLeaves); }
private static async Task <string[]> GetVersionArrayAsync(IndexInfo indexInfo) { var versions = await GetVersionsAsync(indexInfo); return(versions.Select(x => x.ToNormalizedString()).ToArray()); }
public async Task UpdateAsync( HiveType hive, IReadOnlyList <HiveType> replicaHives, string id, IReadOnlyList <CatalogCommitItem> entries, IReadOnlyDictionary <CatalogCommitItem, PackageDetailsCatalogLeaf> entryToCatalogLeaf, CatalogCommit registrationCommit) { // Validate the input and put it in more convenient forms. if (!entries.Any()) { return; } GuardInput(entries, entryToCatalogLeaf); var sortedCatalog = entries.OrderBy(x => x.PackageIdentity.Version).ToList(); var versionToCatalogLeaf = entryToCatalogLeaf.ToDictionary(x => x.Key.PackageIdentity.Version, x => x.Value); // Remove SemVer 2.0.0 versions if this hive should only have SemVer 1.0.0 versions. if (ShouldExcludeSemVer2(hive)) { Guard.Assert( replicaHives.All(ShouldExcludeSemVer2), "A replica hive of a non-SemVer 2.0.0 hive must also exclude SemVer 2.0.0."); ExcludeSemVer2(hive, sortedCatalog, versionToCatalogLeaf); } else { Guard.Assert( replicaHives.All(x => !ShouldExcludeSemVer2(x)), "A replica hive of a SemVer 2.0.0 hive must also include SemVer 2.0.0."); } _logger.LogInformation( "Starting to update the {PackageId} registration index in the {Hive} hive and {ReplicaHives} replica hives with {UpsertCount} " + "package details and {DeleteCount} package deletes.", id, hive, replicaHives, entryToCatalogLeaf.Count, entries.Count - entryToCatalogLeaf.Count); // Read the existing registration index if it exists. If it does not exist, initialize a new index. var index = await _storage.ReadIndexOrNullAsync(hive, id); IndexInfo indexInfo; if (index == null) { indexInfo = IndexInfo.New(); } else { indexInfo = IndexInfo.Existing(_storage, hive, index); } // Find all of the existing page URLs. This will be used later to find orphan pages. var existingPageUrls = GetPageUrls(indexInfo); // Read all of the obviously relevant pages in parallel. This simply evaluates some work that would // otherwise be done lazily. await LoadRelevantPagesAsync(sortedCatalog, indexInfo); // Merge the incoming catalog entries in memory. var mergeResult = await _merger.MergeAsync(indexInfo, sortedCatalog); // Write the modified leaves. await UpdateLeavesAsync(hive, replicaHives, id, versionToCatalogLeaf, registrationCommit, mergeResult); // Write the pages and handle the inline vs. non-inlined cases. if (indexInfo.Items.Count == 0) { _logger.LogInformation("There are no pages to update."); } else { var itemCount = indexInfo.Items.Sum(x => x.Count); if (itemCount <= _options.Value.MaxInlinedLeafItems) { _logger.LogInformation( "There are {Count} total leaf items so the leaf items will be inlined.", itemCount); await UpdateInlinedPagesAsync(hive, id, indexInfo, registrationCommit); } else { _logger.LogInformation( "There are {Count} total leaf items so the leaf items will not be inlined.", itemCount); await UpdateNonInlinedPagesAsync(hive, replicaHives, id, indexInfo, registrationCommit, mergeResult); } } // Write the index, if there were any changes. if (mergeResult.ModifiedPages.Any() || mergeResult.ModifiedLeaves.Any()) { _logger.LogInformation("Updating the index."); _entityBuilder.UpdateIndex(indexInfo.Index, hive, id, indexInfo.Items.Count); _entityBuilder.UpdateCommit(indexInfo.Index, registrationCommit); await _storage.WriteIndexAsync(hive, replicaHives, id, indexInfo.Index); } if (!indexInfo.Items.Any()) { _logger.LogInformation("Deleting the index since there are no more page items."); await _storage.DeleteIndexAsync(hive, replicaHives, id); } // Delete orphan blobs. await DeleteOrphansAsync(hive, replicaHives, existingPageUrls, indexInfo, mergeResult); _logger.LogInformation( "Done updating the {PackageId} registration index in the {Hive} hive and replica hives {ReplicaHives}. {ModifiedPages} pages were " + "updated, {ModifiedLeaves} leaves were upserted, and {DeletedLeaves} leaves were deleted.", id, hive, replicaHives, mergeResult.ModifiedPages.Count, mergeResult.ModifiedLeaves.Count, mergeResult.DeletedLeaves.Count); }
private async Task UpdateNonInlinedPagesAsync( HiveType hive, IReadOnlyList <HiveType> replicaHives, string id, IndexInfo indexInfo, CatalogCommit registrationCommit, HiveMergeResult mergeResult) { var taskFactories = new ConcurrentBag <Func <Task> >(); for (var pageIndex = 0; pageIndex < indexInfo.Items.Count; pageIndex++) { var pageInfo = indexInfo.Items[pageIndex]; if (pageInfo.IsInlined) { _logger.LogInformation( "Moving page {PageNumber}/{PageCount} [{Lower}, {Upper}] from being inlined to having its own blob.", pageIndex + 1, indexInfo.Items.Count, pageInfo.Lower.ToNormalizedString(), pageInfo.Upper.ToNormalizedString()); pageInfo = await pageInfo.CloneToNonInlinedAsync(); indexInfo.RemoveAt(pageIndex); indexInfo.Insert(pageIndex, pageInfo); } else if (!mergeResult.ModifiedPages.Contains(pageInfo)) { _logger.LogInformation( "Skipping unmodified page {PageNumber}/{PageCount} [{Lower}, {Upper}].", pageIndex + 1, indexInfo.Items.Count, pageInfo.Lower.ToNormalizedString(), pageInfo.Upper.ToNormalizedString()); continue; } Guard.Assert(!pageInfo.IsInlined, "The page should not be inlined at this point."); var page = await pageInfo.GetPageAsync(); _entityBuilder.UpdateNonInlinedPageItem(pageInfo.PageItem, hive, id, pageInfo.Count, pageInfo.Lower, pageInfo.Upper); _entityBuilder.UpdateCommit(pageInfo.PageItem, registrationCommit); _entityBuilder.UpdatePage(page, hive, id, pageInfo.Count, pageInfo.Lower, pageInfo.Upper); _entityBuilder.UpdateCommit(page, registrationCommit); var pageNumber = pageIndex + 1; taskFactories.Add(async() => { _logger.LogInformation( "Updating page {PageNumber}/{PageCount} [{Lower}, {Upper}].", pageNumber, indexInfo.Items.Count, pageInfo.Lower.ToNormalizedString(), pageInfo.Upper.ToNormalizedString()); await _storage.WritePageAsync(hive, replicaHives, id, pageInfo.Lower, pageInfo.Upper, page); }); } await ParallelAsync.Repeat( async() => { await Task.Yield(); while (taskFactories.TryTake(out var taskFactory)) { await taskFactory(); } }, _options.Value.MaxConcurrentOperationsPerHive); }
private async Task LoadRelevantPagesAsync(List <CatalogCommitItem> sortedCatalog, IndexInfo indexInfo) { // If there are no page items at all, there is no work to do. if (indexInfo.Items.Count == 0) { return; } var catalogIndex = 0; var pageIndex = 0; var relevantPages = new ConcurrentBag <PageInfo>(); // Load pages where at least one catalog item falls in the bounds of the page. while (catalogIndex < sortedCatalog.Count && pageIndex < indexInfo.Items.Count) { var currentCatalog = sortedCatalog[catalogIndex]; var currentPage = indexInfo.Items[pageIndex]; if (currentCatalog.PackageIdentity.Version < currentPage.Lower) { // The current catalog item lower than the current page's bounds. Move on to the next catalog item. catalogIndex++; } else if (currentCatalog.PackageIdentity.Version <= currentPage.Upper) { // The current catalog item is inside the current page's bounds. This page should be downloaded. if (!currentPage.IsInlined) { _logger.LogInformation( "Preemptively loading page {PageNumber}/{PageCount} [{Lower}, {Upper}] since catalog version {Version} is in its bounds.", pageIndex + 1, indexInfo.Items.Count, currentPage.Lower.ToNormalizedString(), currentPage.Upper.ToNormalizedString(), currentCatalog.PackageIdentity.Version.ToNormalizedString()); relevantPages.Add(currentPage); } // This page is now included in the set of relevant pages. No need to consider it any more. pageIndex++; } else { // The current catalog item is higher than the current page's bounds. This page is not relevant. pageIndex++; } } await ParallelAsync.Repeat( async() => { await Task.Yield(); while (relevantPages.TryTake(out var pageInfo)) { await pageInfo.GetLeafInfosAsync(); } }, _options.Value.MaxConcurrentOperationsPerHive); }