private void UpdateSelfCheckStateOnDisk(Context context, SelfCheckState selfCheckState) { try { _tracer.Debug(context, $"SelfCheck: Updating self check status on disk with new state: {selfCheckState.ToParseableString()}"); _fileSystem.WriteAllText(_selfCheckFilePath, selfCheckState.ToParseableString()); } catch (IOException e) { _tracer.Warning(context, $"SelfCheck: Failed updating self check status on disk: {e}"); } }
public void SelfCheckStateTests() { // Reparsing date time to loose precision in order for equality to work. var now = DateTimeUtilities.FromReadableTimestamp(DateTime.UtcNow.ToReadableString()).Value; var state1 = new SelfCheckState("Epoch1", now, ContentHash.Random()); var reparsedState1 = SelfCheckState.TryParse(state1.ToParseableString()); reparsedState1.Should().NotBeNull(); reparsedState1.Value.Should().Be(state1); var state2 = new SelfCheckState("Epoch1", now); var reparsedState2 = SelfCheckState.TryParse(state2.ToParseableString()); reparsedState2.Should().NotBeNull(); reparsedState2.Value.Should().Be(state2); }
private async Task <Result <SelfCheckResult> > SelfCheckContentDirectoryCoreAsync( OperationContext context, SelfCheckState selfCheckState, SelfCheckStatus status) { _tracer.Always(context, "Starting self check."); // Self checking procedure validates that in-memory content directory // is valid in respect to the state on disk. // Namely, it checks that the hashes for all the files and their size are correct. var stopwatch = Stopwatch.StartNew(); // Enumerating files from disk instead of looking them up from content directory. // This is done due to simplicity (we don't have to worry about replicas) and because an additional IO cost is negligible compared to the cost of rehashing. var contentHashes = _contentStoreInternal.ReadSnapshotFromDisk(context).ListOrderedByHash(); _tracer.Debug(context, $"SelfCheck: Enumerated {contentHashes.Count} entries from disk by {stopwatch.ElapsedMilliseconds}ms."); stopwatch.Restart(); // Trying to restore the index of a hash that we processed before. int index = 0; if (status == SelfCheckStatus.InProgress && selfCheckState.LastPosition != null) { index = findNextIndexToProcess(selfCheckState.LastPosition.Value); _tracer.Debug(context, $"SelfCheck: skipping {index} elements based on previous state '{selfCheckState.ToParseableString()}'."); } else { string statusAsString = status == SelfCheckStatus.Force ? "the epoch has changed" : "a self check is out of date"; _tracer.Debug( context, $"SelfCheck: starting self check for the entire content directory because {statusAsString}. Previous state '{selfCheckState.ToParseableString()}'."); } // Time span for tracking progress. TimeSpan progressTracker = TimeSpan.FromSeconds(0); long processedBytes = 0; int invalidEntries = 0; int processedEntries = 0; for (; index < contentHashes.Count; index++) { if (context.Token.IsCancellationRequested) { _tracer.Debug(context, "SelfCheck: Exiting self check because cancellation was requested."); break; } var hashInfo = contentHashes[index]; processedEntries++; var(isValid, error) = await ValidateFileAsync(context, hashInfo.Hash, hashInfo.Payload); if (!isValid) { _tracer.Warning(context, $"SelfCheck: Found invalid entry in cache. Hash={hashInfo.Hash.ToShortString()}. {error}. Evicting the file..."); await _contentStoreInternal.RemoveInvalidContentAsync(context, hashInfo.Hash); invalidEntries++; } // Tracking the progress if needed. traceProgressIfNeeded(hashInfo.Hash, index); // If the current entry is not the last one, and we reached the number of invalid files, // then exiting the loop. if (invalidEntries == _settings.InvalidFilesLimit && index != contentHashes.Count - 1) { _tracer.Debug(context, $"SelfCheck: Exiting self check because invalid file limit of {_settings.InvalidFilesLimit} is reached."); break; } if (_settings.HashAnalysisDelay != null) { await Task.Delay(_settings.HashAnalysisDelay.Value, context.Token); } } if (index == contentHashes.Count) { // All the items are processed. Saving new stable checkpoint UpdateSelfCheckStateOnDisk(context, SelfCheckState.SelfCheckComplete(_settings.Epoch, _clock.UtcNow)); } else { // The loop was interrupted. Saving an incremental state. var newStatus = selfCheckState.WithEpochAndPosition(_settings.Epoch, contentHashes[index].Hash); UpdateSelfCheckStateOnDisk(context, newStatus); } return(Result.Success(new SelfCheckResult(invalidHashes: invalidEntries, totalProcessedFiles: processedEntries))); int findNextIndexToProcess(ContentHash lastProcessedHash) { var binarySearchResult = contentHashes.BinarySearch(new PayloadFromDisk <FileInfo>(lastProcessedHash, default), new ByHashPayloadFromDiskComparer <FileInfo>()); int targetIndex = 0; if (binarySearchResult >= 0 && binarySearchResult < contentHashes.Count - 1) { targetIndex = binarySearchResult + 1; } else { // The exact match is not found (which is fine, because the state on disk may changed between app invocations). // BinarySearch returns a negative value that bitwise complement of the closest element in the sorted array. binarySearchResult = ~binarySearchResult; if (binarySearchResult < contentHashes.Count) { targetIndex = binarySearchResult; } } return(targetIndex); } void traceProgressIfNeeded(ContentHash currentHash, int currentHashIndex) { processedBytes += contentHashes[currentHashIndex].Payload.Length; var swTime = stopwatch.Elapsed; if (swTime - progressTracker > _settings.ProgressReportingInterval) { // It is possible to have multiple replicas with the same hash. // We need to save the state only when *all* the replicas are processed. // So we check if the next item has a different hash. // No check is performed on the last element because the state will be saved immediately after if (currentHashIndex + 1 < contentHashes.Count && currentHash != contentHashes[currentHashIndex + 1].Hash) { var speed = ((double)processedBytes / (1024 * 1024)) / _settings.ProgressReportingInterval.TotalSeconds; _tracer.Always(context, $"SelfCheck: processed {index}/{contentHashes.Count}: {new SelfCheckResult(invalidEntries, processedEntries)}. Hashing speed {speed:0.##}Mb/s."); processedBytes = 0; // Saving incremental state var newStatus = selfCheckState.WithEpochAndPosition(_settings.Epoch, currentHash); UpdateSelfCheckStateOnDisk(context, newStatus); progressTracker = swTime; } } } }