Esempio n. 1
0
        private static Task <BoolResult> PrintCheckpointsAsync(OperationContext context, BlobCentralStorage centralStorage)
        {
            return(context.PerformOperationAsync(Tracer, async() =>
            {
                await foreach (var entry in centralStorage.ListBlobsWithNameMatchingAsync(context, CheckpointNameRegex))
                {
                    Tracer.Always(context, $"StorageId=[{entry.StorageId}] CreationTimeUtc=[{entry.CreationTime}] LastAccessTimeUtc=[{entry.LastAccessTime}]");
                }

                return BoolResult.Success;
            }));
        private async Task CreatePrepopulatedIfMissing()
        {
            if (FileSystem.DirectoryExists(_prePopulatedRootPath))
            {
                return;
            }

            _tracer.Always(_context, $"Create prepopulated content store at root=[{_prePopulatedRootPath}]");
            FileSystem.CreateDirectory(_prePopulatedRootPath);
            await RunStore(
                _prePopulatedRootPath,
                CacheName,
                session => session.PutRandomAsync(_context, ContentHashType, false, MaxSize, 100, LargeFileSize, RandomFileSizes));
        }
        private async Task <Result <SelfCheckResult> > SelfCheckContentDirectoryCoreAsync(
            OperationContext context,
            SelfCheckState selfCheckState,
            SelfCheckStatus status)
        {
            _tracer.Always(context, "Starting self check.");
            // Self checking procedure validates that in-memory content directory
            // is valid in respect to the state on disk.
            // Namely, it checks that the hashes for all the files and their size are correct.
            var stopwatch = Stopwatch.StartNew();

            // Enumerating files from disk instead of looking them up from content directory.
            // This is done due to simplicity (we don't have to worry about replicas) and because an additional IO cost is negligible compared to the cost of rehashing.
            var contentHashes = _contentStoreInternal.ReadSnapshotFromDisk(context).ListOrderedByHash();

            _tracer.Debug(context, $"SelfCheck: Enumerated {contentHashes.Count} entries from disk by {stopwatch.ElapsedMilliseconds}ms.");

            stopwatch.Restart();

            // Trying to restore the index of a hash that we processed before.
            int index = 0;

            if (status == SelfCheckStatus.InProgress && selfCheckState.LastPosition != null)
            {
                index = findNextIndexToProcess(selfCheckState.LastPosition.Value);
                _tracer.Debug(context, $"SelfCheck: skipping {index} elements based on previous state '{selfCheckState.ToParseableString()}'.");
            }
            else
            {
                string statusAsString = status == SelfCheckStatus.Force ? "the epoch has changed" : "a self check is out of date";
                _tracer.Debug(
                    context,
                    $"SelfCheck: starting self check for the entire content directory because {statusAsString}. Previous state '{selfCheckState.ToParseableString()}'.");
            }

            // Time span for tracking progress.
            TimeSpan progressTracker = TimeSpan.FromSeconds(0);
            long     processedBytes  = 0;

            int invalidEntries   = 0;
            int processedEntries = 0;

            for (; index < contentHashes.Count; index++)
            {
                if (context.Token.IsCancellationRequested)
                {
                    _tracer.Debug(context, "SelfCheck: Exiting self check because cancellation was requested.");
                    break;
                }

                var hashInfo = contentHashes[index];
                processedEntries++;

                var(isValid, error) = await ValidateFileAsync(context, hashInfo.Hash, hashInfo.Payload);

                if (!isValid)
                {
                    _tracer.Warning(context, $"SelfCheck: Found invalid entry in cache. Hash={hashInfo.Hash.ToShortString()}. {error}. Evicting the file...");
                    await _contentStoreInternal.RemoveInvalidContentAsync(context, hashInfo.Hash);

                    invalidEntries++;
                }

                // Tracking the progress if needed.
                traceProgressIfNeeded(hashInfo.Hash, index);

                // If the current entry is not the last one, and we reached the number of invalid files,
                // then exiting the loop.
                if (invalidEntries == _settings.InvalidFilesLimit && index != contentHashes.Count - 1)
                {
                    _tracer.Debug(context, $"SelfCheck: Exiting self check because invalid file limit of {_settings.InvalidFilesLimit} is reached.");
                    break;
                }

                if (_settings.HashAnalysisDelay != null)
                {
                    await Task.Delay(_settings.HashAnalysisDelay.Value, context.Token);
                }
            }

            if (index == contentHashes.Count)
            {
                // All the items are processed. Saving new stable checkpoint
                UpdateSelfCheckStateOnDisk(context, SelfCheckState.SelfCheckComplete(_settings.Epoch, _clock.UtcNow));
            }
            else
            {
                // The loop was interrupted. Saving an incremental state.
                var newStatus = selfCheckState.WithEpochAndPosition(_settings.Epoch, contentHashes[index].Hash);
                UpdateSelfCheckStateOnDisk(context, newStatus);
            }

            return(Result.Success(new SelfCheckResult(invalidHashes: invalidEntries, totalProcessedFiles: processedEntries)));

            int findNextIndexToProcess(ContentHash lastProcessedHash)
            {
                var binarySearchResult = contentHashes.BinarySearch(new PayloadFromDisk <FileInfo>(lastProcessedHash, default), new ByHashPayloadFromDiskComparer <FileInfo>());

                int targetIndex = 0;

                if (binarySearchResult >= 0 && binarySearchResult < contentHashes.Count - 1)
                {
                    targetIndex = binarySearchResult + 1;
                }
                else
                {
                    // The exact match is not found (which is fine, because the state on disk may changed between app invocations).
                    // BinarySearch returns a negative value that bitwise complement of the closest element in the sorted array.

                    binarySearchResult = ~binarySearchResult;
                    if (binarySearchResult < contentHashes.Count)
                    {
                        targetIndex = binarySearchResult;
                    }
                }

                return(targetIndex);
            }

            void traceProgressIfNeeded(ContentHash currentHash, int currentHashIndex)
            {
                processedBytes += contentHashes[currentHashIndex].Payload.Length;

                var swTime = stopwatch.Elapsed;

                if (swTime - progressTracker > _settings.ProgressReportingInterval)
                {
                    // It is possible to have multiple replicas with the same hash.
                    // We need to save the state only when *all* the replicas are processed.
                    // So we check if the next item has a different hash.
                    // No check is performed on the last element because the state will be saved immediately after
                    if (currentHashIndex + 1 < contentHashes.Count && currentHash != contentHashes[currentHashIndex + 1].Hash)
                    {
                        var speed = ((double)processedBytes / (1024 * 1024)) / _settings.ProgressReportingInterval.TotalSeconds;
                        _tracer.Always(context, $"SelfCheck: processed {index}/{contentHashes.Count}: {new SelfCheckResult(invalidEntries, processedEntries)}. Hashing speed {speed:0.##}Mb/s.");
                        processedBytes = 0;

                        // Saving incremental state
                        var newStatus = selfCheckState.WithEpochAndPosition(_settings.Epoch, currentHash);
                        UpdateSelfCheckStateOnDisk(context, newStatus);

                        progressTracker = swTime;
                    }
                }
            }
        }
        protected MemoizationPerformanceTests
        (
            ILogger logger,
            PerformanceResultsFixture resultsFixture,
            InitialDatabaseSize initialDatabaseSize,
            string databaseFileName,
            Func <DisposableDirectory, IMemoizationStore> createStoreFunc
        )
            : base(() => new PassThroughFileSystem(logger), logger)
        {
            _context = new Context(Logger);
            var itemCountEnvironmentVariable = Environment.GetEnvironmentVariable(ItemCountEnvironmentVariableName);

            _itemCount = itemCountEnvironmentVariable == null ? ItemCountDefault : int.Parse(itemCountEnvironmentVariable);
            _tracer.Debug(_context, $"Using itemCount=[{_itemCount}] (MaxRowCount=[{MaxRowCount}])");

            ResultsFixture       = resultsFixture;
            _initialDatabaseSize = initialDatabaseSize;
            CreateStoreFunc      = createStoreFunc;

            _prePopulatedRootPath = FileSystem.GetTempPath() / "CloudStore" / "MemoizationPerformanceTestsPrePopulated";
            if (!FileSystem.DirectoryExists(_prePopulatedRootPath))
            {
                FileSystem.CreateDirectory(_prePopulatedRootPath);
            }

            AbsolutePath databaseFilePath = _prePopulatedRootPath / databaseFileName;

            if (FileSystem.FileExists(databaseFilePath))
            {
                return;
            }

            _tracer.Always(_context, $"Creating prepopulated database at path={databaseFilePath}");

            using (var disposableDirectory = new DisposableDirectory(FileSystem))
            {
                using (var store = createStoreFunc(disposableDirectory))
                {
                    try
                    {
                        var startupStoreResult = store.StartupAsync(_context).Result;
                        startupStoreResult.ShouldBeSuccess();

                        var createSessionResult = store.CreateSession(_context, Name);
                        createSessionResult.ShouldBeSuccess();

                        using (var session = createSessionResult.Session)
                        {
                            try
                            {
                                var startupSessionResult = session.StartupAsync(_context).Result;
                                startupSessionResult.ShouldBeSuccess();

                                for (var i = 0; i < MaxRowCount; i++)
                                {
                                    var strongFingerprint = StrongFingerprint.Random();
                                    var contentHashList   = ContentHashList.Random();
                                    var r = session.AddOrGetContentHashListAsync(
                                        _context, strongFingerprint, new ContentHashListWithDeterminism(contentHashList, CacheDeterminism.None), Token).Result;
                                    r.Succeeded.Should().BeTrue();
                                    r.ContentHashListWithDeterminism.ContentHashList.Should().BeNull();
                                }
                            }
                            finally
                            {
                                var shutdownSessionResult = session.ShutdownAsync(_context).Result;
                                shutdownSessionResult.ShouldBeSuccess();
                            }
                        }
                    }
                    finally
                    {
                        var shutdownStoreResult = store.ShutdownAsync(_context).Result;
                        shutdownStoreResult.ShouldBeSuccess();
                    }
                }

                FileSystem.CopyFileAsync(disposableDirectory.Path / databaseFileName, databaseFilePath, false).Wait();
            }
        }
Esempio n. 5
0
        private async Task <bool> ValidateNameHashesMatchContentHashesAsync(Context context)
        {
            int mismatchedParentDirectoryCount = 0;
            int mismatchedContentHashCount     = 0;

            _tracer.Always(context, "Validating local CAS content hashes...");
            await TaskSafetyHelpers.WhenAll(_enumerateBlobPathsFromDisk().Select(
                                                async blobPath =>
            {
                var contentFile = blobPath.FullPath;
                if (!contentFile.FileName.StartsWith(contentFile.GetParent().FileName, StringComparison.OrdinalIgnoreCase))
                {
                    mismatchedParentDirectoryCount++;

                    _tracer.Debug(
                        context,
                        $"The first {FileSystemContentStoreInternal.HashDirectoryNameLength} characters of the name of content file at {contentFile}" +
                        $" do not match the name of its parent directory {contentFile.GetParent().FileName}.");
                }

                if (!FileSystemContentStoreInternal.TryGetHashFromPath(contentFile, out var hashFromPath))
                {
                    _tracer.Debug(
                        context,
                        $"The path '{contentFile}' does not contain a well-known hash name.");
                    return;
                }

                var hasher = ContentHashers.Get(hashFromPath.HashType);
                ContentHash hashFromContents;
                using (var contentStream = await _fileSystem.OpenSafeAsync(
                           contentFile, FileAccess.Read, FileMode.Open, FileShare.Read | FileShare.Delete, FileOptions.SequentialScan, HashingExtensions.HashStreamBufferSize))
                {
                    hashFromContents = await hasher.GetContentHashAsync(contentStream);
                }

                if (hashFromContents != hashFromPath)
                {
                    mismatchedContentHashCount++;

                    _tracer.Debug(
                        context,
                        $"Content at {contentFile} content hash {hashFromContents.ToShortString()} did not match expected value of {hashFromPath.ToShortString()}.");
                }
            }));

            _tracer.Always(context, $"{mismatchedParentDirectoryCount} mismatches between content file name and parent directory.");
            _tracer.Always(context, $"{mismatchedContentHashCount} mismatches between content file name and file contents.");

            return(mismatchedContentHashCount == 0 && mismatchedParentDirectoryCount == 0);
        }