private void UpdateSelfCheckStateOnDisk(Context context, SelfCheckState selfCheckState)
 {
     try
     {
         _tracer.Debug(context, $"SelfCheck: Updating self check status on disk with new state: {selfCheckState.ToParseableString()}");
         _fileSystem.WriteAllText(_selfCheckFilePath, selfCheckState.ToParseableString());
     }
     catch (IOException e)
     {
         _tracer.Warning(context, $"SelfCheck: Failed updating self check status on disk: {e}");
     }
 }
Пример #2
0
        public void SelfCheckStateTests()
        {
            // Reparsing date time to loose precision in order for equality to work.
            var now = DateTimeUtilities.FromReadableTimestamp(DateTime.UtcNow.ToReadableString()).Value;

            var state1         = new SelfCheckState("Epoch1", now, ContentHash.Random());
            var reparsedState1 = SelfCheckState.TryParse(state1.ToParseableString());

            reparsedState1.Should().NotBeNull();
            reparsedState1.Value.Should().Be(state1);

            var state2         = new SelfCheckState("Epoch1", now);
            var reparsedState2 = SelfCheckState.TryParse(state2.ToParseableString());

            reparsedState2.Should().NotBeNull();
            reparsedState2.Value.Should().Be(state2);
        }
        private async Task <Result <SelfCheckResult> > SelfCheckContentDirectoryCoreAsync(
            OperationContext context,
            SelfCheckState selfCheckState,
            SelfCheckStatus status)
        {
            _tracer.Always(context, "Starting self check.");
            // Self checking procedure validates that in-memory content directory
            // is valid in respect to the state on disk.
            // Namely, it checks that the hashes for all the files and their size are correct.
            var stopwatch = Stopwatch.StartNew();

            // Enumerating files from disk instead of looking them up from content directory.
            // This is done due to simplicity (we don't have to worry about replicas) and because an additional IO cost is negligible compared to the cost of rehashing.
            var contentHashes = _contentStoreInternal.ReadSnapshotFromDisk(context).ListOrderedByHash();

            _tracer.Debug(context, $"SelfCheck: Enumerated {contentHashes.Count} entries from disk by {stopwatch.ElapsedMilliseconds}ms.");

            stopwatch.Restart();

            // Trying to restore the index of a hash that we processed before.
            int index = 0;

            if (status == SelfCheckStatus.InProgress && selfCheckState.LastPosition != null)
            {
                index = findNextIndexToProcess(selfCheckState.LastPosition.Value);
                _tracer.Debug(context, $"SelfCheck: skipping {index} elements based on previous state '{selfCheckState.ToParseableString()}'.");
            }
            else
            {
                string statusAsString = status == SelfCheckStatus.Force ? "the epoch has changed" : "a self check is out of date";
                _tracer.Debug(
                    context,
                    $"SelfCheck: starting self check for the entire content directory because {statusAsString}. Previous state '{selfCheckState.ToParseableString()}'.");
            }

            // Time span for tracking progress.
            TimeSpan progressTracker = TimeSpan.FromSeconds(0);
            long     processedBytes  = 0;

            int invalidEntries   = 0;
            int processedEntries = 0;

            for (; index < contentHashes.Count; index++)
            {
                if (context.Token.IsCancellationRequested)
                {
                    _tracer.Debug(context, "SelfCheck: Exiting self check because cancellation was requested.");
                    break;
                }

                var hashInfo = contentHashes[index];
                processedEntries++;

                var(isValid, error) = await ValidateFileAsync(context, hashInfo.Hash, hashInfo.Payload);

                if (!isValid)
                {
                    _tracer.Warning(context, $"SelfCheck: Found invalid entry in cache. Hash={hashInfo.Hash.ToShortString()}. {error}. Evicting the file...");
                    await _contentStoreInternal.RemoveInvalidContentAsync(context, hashInfo.Hash);

                    invalidEntries++;
                }

                // Tracking the progress if needed.
                traceProgressIfNeeded(hashInfo.Hash, index);

                // If the current entry is not the last one, and we reached the number of invalid files,
                // then exiting the loop.
                if (invalidEntries == _settings.InvalidFilesLimit && index != contentHashes.Count - 1)
                {
                    _tracer.Debug(context, $"SelfCheck: Exiting self check because invalid file limit of {_settings.InvalidFilesLimit} is reached.");
                    break;
                }

                if (_settings.HashAnalysisDelay != null)
                {
                    await Task.Delay(_settings.HashAnalysisDelay.Value, context.Token);
                }
            }

            if (index == contentHashes.Count)
            {
                // All the items are processed. Saving new stable checkpoint
                UpdateSelfCheckStateOnDisk(context, SelfCheckState.SelfCheckComplete(_settings.Epoch, _clock.UtcNow));
            }
            else
            {
                // The loop was interrupted. Saving an incremental state.
                var newStatus = selfCheckState.WithEpochAndPosition(_settings.Epoch, contentHashes[index].Hash);
                UpdateSelfCheckStateOnDisk(context, newStatus);
            }

            return(Result.Success(new SelfCheckResult(invalidHashes: invalidEntries, totalProcessedFiles: processedEntries)));

            int findNextIndexToProcess(ContentHash lastProcessedHash)
            {
                var binarySearchResult = contentHashes.BinarySearch(new PayloadFromDisk <FileInfo>(lastProcessedHash, default), new ByHashPayloadFromDiskComparer <FileInfo>());

                int targetIndex = 0;

                if (binarySearchResult >= 0 && binarySearchResult < contentHashes.Count - 1)
                {
                    targetIndex = binarySearchResult + 1;
                }
                else
                {
                    // The exact match is not found (which is fine, because the state on disk may changed between app invocations).
                    // BinarySearch returns a negative value that bitwise complement of the closest element in the sorted array.

                    binarySearchResult = ~binarySearchResult;
                    if (binarySearchResult < contentHashes.Count)
                    {
                        targetIndex = binarySearchResult;
                    }
                }

                return(targetIndex);
            }

            void traceProgressIfNeeded(ContentHash currentHash, int currentHashIndex)
            {
                processedBytes += contentHashes[currentHashIndex].Payload.Length;

                var swTime = stopwatch.Elapsed;

                if (swTime - progressTracker > _settings.ProgressReportingInterval)
                {
                    // It is possible to have multiple replicas with the same hash.
                    // We need to save the state only when *all* the replicas are processed.
                    // So we check if the next item has a different hash.
                    // No check is performed on the last element because the state will be saved immediately after
                    if (currentHashIndex + 1 < contentHashes.Count && currentHash != contentHashes[currentHashIndex + 1].Hash)
                    {
                        var speed = ((double)processedBytes / (1024 * 1024)) / _settings.ProgressReportingInterval.TotalSeconds;
                        _tracer.Always(context, $"SelfCheck: processed {index}/{contentHashes.Count}: {new SelfCheckResult(invalidEntries, processedEntries)}. Hashing speed {speed:0.##}Mb/s.");
                        processedBytes = 0;

                        // Saving incremental state
                        var newStatus = selfCheckState.WithEpochAndPosition(_settings.Epoch, currentHash);
                        UpdateSelfCheckStateOnDisk(context, newStatus);

                        progressTracker = swTime;
                    }
                }
            }
        }