private static Task <BoolResult> PrintCheckpointsAsync(OperationContext context, BlobCentralStorage centralStorage) { return(context.PerformOperationAsync(Tracer, async() => { await foreach (var entry in centralStorage.ListBlobsWithNameMatchingAsync(context, CheckpointNameRegex)) { Tracer.Always(context, $"StorageId=[{entry.StorageId}] CreationTimeUtc=[{entry.CreationTime}] LastAccessTimeUtc=[{entry.LastAccessTime}]"); } return BoolResult.Success; }));
private async Task CreatePrepopulatedIfMissing() { if (FileSystem.DirectoryExists(_prePopulatedRootPath)) { return; } _tracer.Always(_context, $"Create prepopulated content store at root=[{_prePopulatedRootPath}]"); FileSystem.CreateDirectory(_prePopulatedRootPath); await RunStore( _prePopulatedRootPath, CacheName, session => session.PutRandomAsync(_context, ContentHashType, false, MaxSize, 100, LargeFileSize, RandomFileSizes)); }
private async Task <Result <SelfCheckResult> > SelfCheckContentDirectoryCoreAsync( OperationContext context, SelfCheckState selfCheckState, SelfCheckStatus status) { _tracer.Always(context, "Starting self check."); // Self checking procedure validates that in-memory content directory // is valid in respect to the state on disk. // Namely, it checks that the hashes for all the files and their size are correct. var stopwatch = Stopwatch.StartNew(); // Enumerating files from disk instead of looking them up from content directory. // This is done due to simplicity (we don't have to worry about replicas) and because an additional IO cost is negligible compared to the cost of rehashing. var contentHashes = _contentStoreInternal.ReadSnapshotFromDisk(context).ListOrderedByHash(); _tracer.Debug(context, $"SelfCheck: Enumerated {contentHashes.Count} entries from disk by {stopwatch.ElapsedMilliseconds}ms."); stopwatch.Restart(); // Trying to restore the index of a hash that we processed before. int index = 0; if (status == SelfCheckStatus.InProgress && selfCheckState.LastPosition != null) { index = findNextIndexToProcess(selfCheckState.LastPosition.Value); _tracer.Debug(context, $"SelfCheck: skipping {index} elements based on previous state '{selfCheckState.ToParseableString()}'."); } else { string statusAsString = status == SelfCheckStatus.Force ? "the epoch has changed" : "a self check is out of date"; _tracer.Debug( context, $"SelfCheck: starting self check for the entire content directory because {statusAsString}. Previous state '{selfCheckState.ToParseableString()}'."); } // Time span for tracking progress. TimeSpan progressTracker = TimeSpan.FromSeconds(0); long processedBytes = 0; int invalidEntries = 0; int processedEntries = 0; for (; index < contentHashes.Count; index++) { if (context.Token.IsCancellationRequested) { _tracer.Debug(context, "SelfCheck: Exiting self check because cancellation was requested."); break; } var hashInfo = contentHashes[index]; processedEntries++; var(isValid, error) = await ValidateFileAsync(context, hashInfo.Hash, hashInfo.Payload); if (!isValid) { _tracer.Warning(context, $"SelfCheck: Found invalid entry in cache. Hash={hashInfo.Hash.ToShortString()}. {error}. Evicting the file..."); await _contentStoreInternal.RemoveInvalidContentAsync(context, hashInfo.Hash); invalidEntries++; } // Tracking the progress if needed. traceProgressIfNeeded(hashInfo.Hash, index); // If the current entry is not the last one, and we reached the number of invalid files, // then exiting the loop. if (invalidEntries == _settings.InvalidFilesLimit && index != contentHashes.Count - 1) { _tracer.Debug(context, $"SelfCheck: Exiting self check because invalid file limit of {_settings.InvalidFilesLimit} is reached."); break; } if (_settings.HashAnalysisDelay != null) { await Task.Delay(_settings.HashAnalysisDelay.Value, context.Token); } } if (index == contentHashes.Count) { // All the items are processed. Saving new stable checkpoint UpdateSelfCheckStateOnDisk(context, SelfCheckState.SelfCheckComplete(_settings.Epoch, _clock.UtcNow)); } else { // The loop was interrupted. Saving an incremental state. var newStatus = selfCheckState.WithEpochAndPosition(_settings.Epoch, contentHashes[index].Hash); UpdateSelfCheckStateOnDisk(context, newStatus); } return(Result.Success(new SelfCheckResult(invalidHashes: invalidEntries, totalProcessedFiles: processedEntries))); int findNextIndexToProcess(ContentHash lastProcessedHash) { var binarySearchResult = contentHashes.BinarySearch(new PayloadFromDisk <FileInfo>(lastProcessedHash, default), new ByHashPayloadFromDiskComparer <FileInfo>()); int targetIndex = 0; if (binarySearchResult >= 0 && binarySearchResult < contentHashes.Count - 1) { targetIndex = binarySearchResult + 1; } else { // The exact match is not found (which is fine, because the state on disk may changed between app invocations). // BinarySearch returns a negative value that bitwise complement of the closest element in the sorted array. binarySearchResult = ~binarySearchResult; if (binarySearchResult < contentHashes.Count) { targetIndex = binarySearchResult; } } return(targetIndex); } void traceProgressIfNeeded(ContentHash currentHash, int currentHashIndex) { processedBytes += contentHashes[currentHashIndex].Payload.Length; var swTime = stopwatch.Elapsed; if (swTime - progressTracker > _settings.ProgressReportingInterval) { // It is possible to have multiple replicas with the same hash. // We need to save the state only when *all* the replicas are processed. // So we check if the next item has a different hash. // No check is performed on the last element because the state will be saved immediately after if (currentHashIndex + 1 < contentHashes.Count && currentHash != contentHashes[currentHashIndex + 1].Hash) { var speed = ((double)processedBytes / (1024 * 1024)) / _settings.ProgressReportingInterval.TotalSeconds; _tracer.Always(context, $"SelfCheck: processed {index}/{contentHashes.Count}: {new SelfCheckResult(invalidEntries, processedEntries)}. Hashing speed {speed:0.##}Mb/s."); processedBytes = 0; // Saving incremental state var newStatus = selfCheckState.WithEpochAndPosition(_settings.Epoch, currentHash); UpdateSelfCheckStateOnDisk(context, newStatus); progressTracker = swTime; } } } }
protected MemoizationPerformanceTests ( ILogger logger, PerformanceResultsFixture resultsFixture, InitialDatabaseSize initialDatabaseSize, string databaseFileName, Func <DisposableDirectory, IMemoizationStore> createStoreFunc ) : base(() => new PassThroughFileSystem(logger), logger) { _context = new Context(Logger); var itemCountEnvironmentVariable = Environment.GetEnvironmentVariable(ItemCountEnvironmentVariableName); _itemCount = itemCountEnvironmentVariable == null ? ItemCountDefault : int.Parse(itemCountEnvironmentVariable); _tracer.Debug(_context, $"Using itemCount=[{_itemCount}] (MaxRowCount=[{MaxRowCount}])"); ResultsFixture = resultsFixture; _initialDatabaseSize = initialDatabaseSize; CreateStoreFunc = createStoreFunc; _prePopulatedRootPath = FileSystem.GetTempPath() / "CloudStore" / "MemoizationPerformanceTestsPrePopulated"; if (!FileSystem.DirectoryExists(_prePopulatedRootPath)) { FileSystem.CreateDirectory(_prePopulatedRootPath); } AbsolutePath databaseFilePath = _prePopulatedRootPath / databaseFileName; if (FileSystem.FileExists(databaseFilePath)) { return; } _tracer.Always(_context, $"Creating prepopulated database at path={databaseFilePath}"); using (var disposableDirectory = new DisposableDirectory(FileSystem)) { using (var store = createStoreFunc(disposableDirectory)) { try { var startupStoreResult = store.StartupAsync(_context).Result; startupStoreResult.ShouldBeSuccess(); var createSessionResult = store.CreateSession(_context, Name); createSessionResult.ShouldBeSuccess(); using (var session = createSessionResult.Session) { try { var startupSessionResult = session.StartupAsync(_context).Result; startupSessionResult.ShouldBeSuccess(); for (var i = 0; i < MaxRowCount; i++) { var strongFingerprint = StrongFingerprint.Random(); var contentHashList = ContentHashList.Random(); var r = session.AddOrGetContentHashListAsync( _context, strongFingerprint, new ContentHashListWithDeterminism(contentHashList, CacheDeterminism.None), Token).Result; r.Succeeded.Should().BeTrue(); r.ContentHashListWithDeterminism.ContentHashList.Should().BeNull(); } } finally { var shutdownSessionResult = session.ShutdownAsync(_context).Result; shutdownSessionResult.ShouldBeSuccess(); } } } finally { var shutdownStoreResult = store.ShutdownAsync(_context).Result; shutdownStoreResult.ShouldBeSuccess(); } } FileSystem.CopyFileAsync(disposableDirectory.Path / databaseFileName, databaseFilePath, false).Wait(); } }
private async Task <bool> ValidateNameHashesMatchContentHashesAsync(Context context) { int mismatchedParentDirectoryCount = 0; int mismatchedContentHashCount = 0; _tracer.Always(context, "Validating local CAS content hashes..."); await TaskSafetyHelpers.WhenAll(_enumerateBlobPathsFromDisk().Select( async blobPath => { var contentFile = blobPath.FullPath; if (!contentFile.FileName.StartsWith(contentFile.GetParent().FileName, StringComparison.OrdinalIgnoreCase)) { mismatchedParentDirectoryCount++; _tracer.Debug( context, $"The first {FileSystemContentStoreInternal.HashDirectoryNameLength} characters of the name of content file at {contentFile}" + $" do not match the name of its parent directory {contentFile.GetParent().FileName}."); } if (!FileSystemContentStoreInternal.TryGetHashFromPath(contentFile, out var hashFromPath)) { _tracer.Debug( context, $"The path '{contentFile}' does not contain a well-known hash name."); return; } var hasher = ContentHashers.Get(hashFromPath.HashType); ContentHash hashFromContents; using (var contentStream = await _fileSystem.OpenSafeAsync( contentFile, FileAccess.Read, FileMode.Open, FileShare.Read | FileShare.Delete, FileOptions.SequentialScan, HashingExtensions.HashStreamBufferSize)) { hashFromContents = await hasher.GetContentHashAsync(contentStream); } if (hashFromContents != hashFromPath) { mismatchedContentHashCount++; _tracer.Debug( context, $"Content at {contentFile} content hash {hashFromContents.ToShortString()} did not match expected value of {hashFromPath.ToShortString()}."); } })); _tracer.Always(context, $"{mismatchedParentDirectoryCount} mismatches between content file name and parent directory."); _tracer.Always(context, $"{mismatchedContentHashCount} mismatches between content file name and file contents."); return(mismatchedContentHashCount == 0 && mismatchedParentDirectoryCount == 0); }