/// <summary> /// Gets the file corresponding to the given CasHash and checks /// to see if the file contents hash to the same CasHash value /// </summary> /// <param name="originalCasHash">CasHash value to check</param> /// <param name="errors">Where any cache errors found get stored</param> private async Task RehashContentsAsync(CasHash originalCasHash, ConcurrentDictionary <CacheError, int> errors) { if (originalCasHash.Equals(CasHash.NoItem)) { // No need to rehash the NoItem cas hash return; } Possible <StreamWithLength, Failure> possibleStream = await m_readOnlySession.GetStreamAsync(originalCasHash); if (!possibleStream.Succeeded) { errors.TryAdd(new CacheError(CacheErrorType.CasHashError, "CasHash " + originalCasHash + " not found in CAS"), 0); return; } using (StreamWithLength stream = possibleStream.Result) { ContentHash contentHash = await ContentHashingUtilities.HashContentStreamAsync(stream); Hash newHash = new Hash(contentHash); CasHash newCasHash = new CasHash(newHash); if (!originalCasHash.Equals(newCasHash)) { errors.TryAdd(new CacheError(CacheErrorType.CasHashError, "The data of CasHash " + originalCasHash + " has been altered in the CAS"), 0); } } }
/// <summary> /// Attempts to pin the specified CasHash. Returns true if the /// pinning succeeds. /// </summary> /// <param name="casHash">CasHash value to attempt to pin</param> private async Task <bool> AttemptToPinAsync(CasHash casHash) { if (casHash.Equals(CasHash.NoItem)) { return(true); } Possible <string, Failure> pinAttempt = await m_readOnlySession.PinToCasAsync(casHash); return(pinAttempt.Succeeded); }
public async Task <Possible <ValidateContentStatus, Failure> > ValidateContentAsync(CasHash hash, UrgencyHint urgencyHint, Guid activityId) { Contract.Requires(!IsClosed); using (var counter = m_counters.ValidateSessionCounter()) { using (var eventing = new ValidateContentActivity(BasicFilesystemCache.EventSource, activityId, this)) { eventing.Start(hash, urgencyHint); if (CasHash.NoItem.Equals(hash)) { return(eventing.Returns(counter.Ok())); } string path = m_cache.ToPath(hash); try { // We don't use ProduceStream as this operation does not pin or cause pinning // and we want to have FileShare.Delete in case we need to delete this entry // due to it being corrupt. This way there is no race as to which file is // being deleted - it will be the one that was just determined to be corrupt. using (Stream fileData = await m_cache.ContendedOpenStreamAsync(path, FileMode.Open, FileAccess.Read, FileShare.Read | FileShare.Delete, useAsync: true, handlePendingDelete: true)) { // Size of the file counter.FileSize(fileData.Length); CasHash contentHash = new CasHash(await ContentHashingUtilities.HashContentStreamAsync(fileData)); if (contentHash.Equals(hash)) { return(eventing.Returns(counter.Ok())); } // Remove it from pinned as it is being removed int junk; m_pinnedToCas.TryRemove(hash, out junk); // Now, we try to remediate - This is a simple delete attempt with any error // saying that we could not delete it try { File.Delete(path); eventing.Write(CacheActivity.CriticalDataOptions, new { RemovedCorruptedEntry = path }); return(eventing.Returns(counter.Remediated())); } catch (Exception e) { // Could not delete it (for what ever reason) eventing.Write(CacheActivity.CriticalDataOptions, new { FailedToRemovedCorruptedEntry = path, Reason = e.Message }); // The file failed to be deleted, so we need to say that it is still there return(eventing.Returns(counter.Invalid())); } } } catch (FileNotFoundException) { // Not found (either type) is the same as Remediated return(eventing.Returns(counter.Remediated())); } catch (DirectoryNotFoundException) { // Not found (either type) is the same as Remediated return(eventing.Returns(counter.Remediated())); } catch (Exception e) { // Other errors are reported as a failure to produce a stream of the data return(eventing.Returns(new ProduceStreamFailure(CacheId, hash, e))); } } } }
/// <summary> /// Check the input list against the regex /// </summary> /// <param name="weak">The weak fingerprint (for logging on failure)</param> /// <param name="casElement">The CasElement of the strong fingerprint</param> /// <param name="hashElement">The hashElement of the strong fingerprint (for logging on failure)</param> /// <param name="urgencyHint">Pass-through</param> /// <param name="activityId">Pass-through activityId</param> /// <returns>false if the check was not performed, true if the checks were performed, failure if the regex checks failed</returns> /// <remarks> /// This will attempt to validate the CAS stored input list against the regex rules /// </remarks> private async Task <Possible <bool, Failure> > CheckInputList(WeakFingerprintHash weak, CasHash casElement, Hash hashElement, UrgencyHint urgencyHint, Guid activityId) { // If we either have no CasHash item or we have no regex to check, just return false // (that we did nothing) if (casElement.Equals(CasHash.NoItem) || ((Cache.MustIncludeRegex == null) && (Cache.MustNotIncludeRegex == null))) { return(false); } // mustInclude start out false if we need to check for mustInclude // Once we get a mustInclude match we no longer need to check. // If we have no mustInclude regex, we set it to true such that // we don't bother checking it bool mustInclude = Cache.MustIncludeRegex == null; // This is just to make a faster check for the MustNotinclude // case. If we have the regex then we must check each entry // but in many cases we don't have the regex so let this be a quick out. bool checkMustNot = Cache.MustNotIncludeRegex != null; // Try to get the observed inputs from the CasHash given var possibleStream = await GetStreamAsync(casElement, urgencyHint, activityId); if (!possibleStream.Succeeded) { // If we could not get a stream to the CasEntery in the fingerprint. return(new InputListFilterFailure(Cache.CacheId, weak, casElement, hashElement, "Failed to get stream of CasElement")); } // Deserialize the contents of the path set. using (possibleStream.Result) { PathTable pathTable = new PathTable(); BuildXLReader reader = new BuildXLReader(false, possibleStream.Result, true); var maybePathSet = ObservedPathSet.TryDeserialize(pathTable, reader); if (maybePathSet.Succeeded) { // Deserialization was successful foreach (ObservedPathEntry entry in maybePathSet.Result.Paths) { string filepath = entry.Path.ToString(pathTable); // Have we seen a must-have entry yet? If not check if this is one // that way once we found one we want we stop checking this regex if (!mustInclude) { mustInclude = Cache.MustIncludeRegex.IsMatch(filepath); } // Now, if we are looking for a must not include, we just check for that // and if it matches we fail if (checkMustNot) { if (Cache.MustNotIncludeRegex.IsMatch(filepath)) { return(new InputListFilterFailure(Cache.CacheId, weak, casElement, hashElement, string.Format(CultureInfo.InvariantCulture, "Failed due to a MustNotInclude file: {0}", filepath))); } } } } else { return(new InputListFilterFailure(Cache.CacheId, weak, casElement, hashElement, "Failed to deserialize observed inputs")); } } if (!mustInclude) { return(new InputListFilterFailure(Cache.CacheId, weak, casElement, hashElement, "Failed due to not including at least one MustInclude file")); } return(true); }
/// <summary> /// Analyzes counts and churn of fingerprints, input lists, and optionally /// content sizes of sessions in the cache. /// </summary> /// <param name="sessionNameRegex">Acts as a filter for which sessions to include in the analysis.</param> /// <param name="analyzeContent">When true, analysis will include content sizing for each session.</param> /// <returns>SessionChurnInfo object for every session analyzed</returns> public IEnumerable <SessionChurnInfo> Analyze(Regex sessionNameRegex, bool analyzeContent) { Contract.Assume(sessionNameRegex != null); m_numSessions = 0; m_numSessionsAnalyzed = 0; // Used to store every unique strong fingerprint for all sessions HashSet <StrongFingerprint> allStrongFingerprints = new HashSet <StrongFingerprint>(); // Used to store strong fingerprints for a given session HashSet <StrongFingerprint> sessionStrongFingerprints = new HashSet <StrongFingerprint>(); HashSet <WeakFingerprintHash> allWeakFingerprints = new HashSet <WeakFingerprintHash>(); // Used to store every unique cas element for all sessions HashSet <CasHash> allCasHashes = new HashSet <CasHash>(); // Used to store every unique cas entry for all sessions HashSet <CasHash> allCasEntries = new HashSet <CasHash>(); IEnumerable <Task <string> > unorderedSessionNames = m_cache.EnumerateCompletedSessions(); IOrderedEnumerable <string> orderedSessionNames = unorderedSessionNames.Select(stringTask => stringTask.Result) .Where((sessionName) => { m_numSessions++; if (sessionNameRegex.IsMatch(sessionName)) { m_numSessionsAnalyzed++; return(true); } else { return(false); } }) .OrderBy(sessionName => sessionName); // Dictionary of CAS entry sizes Dictionary <CasHash, long> contentSizeTable = new Dictionary <CasHash, long>(); // Analyze each session in order foreach (string sessionName in orderedSessionNames) { Console.Error.WriteLine("Analyzing session {0}", sessionName); // Initialize counters for the current session int totalNumberStrongFingerprints = 0; int numberUniqueWeakFingerprints = 0; int numberUniqueStrongFingerprints = 0; int numberUniqueCasHashesOverTime = 0; int numberCasHashNoItemsForSession = 0; int contentErrors = 0; // Clear the set of strong fingerprints in this next session. sessionStrongFingerprints.Clear(); // Contains every unique cas hash for the current session HashSet <CasHash> sessionCasHashes = new HashSet <CasHash>(); IEnumerable <Task <StrongFingerprint> > strongFingerprints = m_cache.EnumerateSessionStrongFingerprints(sessionName).Result; // Analyze each strong fingerprint foreach (Task <StrongFingerprint> strongFingerprintTask in strongFingerprints.OutOfOrderTasks()) { totalNumberStrongFingerprints++; StrongFingerprint strongFingerprint = strongFingerprintTask.Result; sessionStrongFingerprints.Add(strongFingerprint); // Check if strong fingerprint has never been seen before if (allStrongFingerprints.Add(strongFingerprint)) { // New strong fingerprint so increment counter and add to collection numberUniqueStrongFingerprints++; } if (allWeakFingerprints.Add(strongFingerprint.WeakFingerprint)) { numberUniqueWeakFingerprints++; } CasHash casElement = strongFingerprint.CasElement; // Check if the cas hash is the special no item value if (casElement.Equals(CasHash.NoItem)) { numberCasHashNoItemsForSession++; } // Collect unique CAS elements in the session sessionCasHashes.Add(casElement); // Check if cas hash has never been seen before for the whole cache if (allCasHashes.Add(casElement)) { numberUniqueCasHashesOverTime++; } } SessionContentInfo sessionContentInfo = null; if (analyzeContent) { // Contains every unique cash entry hash for the current session var sessionCasEntries = new HashSet <CasHash>(); // Accumulate all the CasEntries for the session foreach (var task in m_session.GetCacheEntries(sessionStrongFingerprints).OutOfOrderTasks(32)) { var possibleCasEntries = task.Result; if (possibleCasEntries.Succeeded) { var casEntries = possibleCasEntries.Result; sessionCasEntries.UnionWith(casEntries); } else { Console.Error.WriteLine("Unable to get CasEntries: {0}", possibleCasEntries.Failure.DescribeIncludingInnerFailures()); ++contentErrors; } } // Retrieve the content size for each new CasEntry. foreach (var task in m_session.GetContentSizes(sessionCasEntries.Except(allCasEntries)).OutOfOrderTasks(32)) { var tuple = task.Result; contentSizeTable[tuple.Item1] = tuple.Item2; } // Accumulate the size of content we've already seen. long totalContentSize = 0; long newContentSize = 0; var newContentCount = 0; foreach (var e in sessionCasEntries) { long length; if (contentSizeTable.TryGetValue(e, out length)) { if (ValidContentSize(length)) { totalContentSize += length; if (!allCasEntries.Contains(e)) { newContentSize += length; newContentCount += 1; allCasEntries.Add(e); } } else { Console.Error.WriteLine("Unable to find content length ({0}) for {1}", (ContentError)length, e); ++contentErrors; } } else { #pragma warning disable CA2201 // Do not raise reserved exception types throw new ApplicationException($"No content length for {e}"); #pragma warning restore CA2201 // Do not raise reserved exception types } } sessionContentInfo = new SessionContentInfo( sessionCasEntries.Count, totalContentSize, newContentCount, newContentSize, contentErrors); } // Aggregate the counters and return the session's data SessionStrongFingerprintChurnInfo sessionStrongFingerprintChurnInfo = new SessionStrongFingerprintChurnInfo(totalNumberStrongFingerprints, numberUniqueStrongFingerprints, numberUniqueWeakFingerprints); SessionInputListChurnInfo sessionInputListChurnInfo = new SessionInputListChurnInfo(totalNumberStrongFingerprints, sessionCasHashes.Count, numberUniqueCasHashesOverTime, numberCasHashNoItemsForSession); yield return(new SessionChurnInfo(sessionName, sessionStrongFingerprintChurnInfo, sessionInputListChurnInfo, sessionContentInfo)); } }
private ContentBreakdownInfo AnalyzeSession(string name) { var casEntries = new HashSet <CasHash>(); var casElements = new HashSet <CasHash>(); var sfpSet = new HashSet <StrongFingerprint>(); var casElementSizeTable = new Dictionary <CasHash, long>(); var casEntrySizeTable = new Dictionary <CasHash, long>(); int countSFP = 0; int contentErrors = 0; // Enumerate strong fingerprints for the session... IEnumerable <Task <StrongFingerprint> > strongFingerprints = m_cache.EnumerateSessionStrongFingerprints(name).Result; // ...and for each, accumulate the input lists as part of the info we'll report. foreach (Task <StrongFingerprint> strongFingerprintTask in strongFingerprints.OutOfOrderTasks()) { ++countSFP; StrongFingerprint sfp = strongFingerprintTask.Result; // Grab the observed input list. CasHash casElement = sfp.CasElement; if (!casElement.Equals(CasHash.NoItem)) { casElements.Add(casElement); } // Remember the SFP for the content scan below, so we're not doing this enumeration twice. sfpSet.Add(sfp); } // Now query the content for the SFPs in the session foreach (var task in m_session.GetCacheEntries(sfpSet).OutOfOrderTasks()) { var possibleCasEntries = task.Result; if (possibleCasEntries.Succeeded) { casEntries.UnionWith(possibleCasEntries.Result); } else { Console.Error.WriteLine("Unable to get CasEntries: {0}", possibleCasEntries.Failure.DescribeIncludingInnerFailures()); ++contentErrors; } } // With all the CAS entries in hand, get the sizes... foreach (var task in m_session.GetContentSizes(casEntries.Except(casEntrySizeTable.Keys)).OutOfOrderTasks()) { var tuple = task.Result; casEntrySizeTable[tuple.Item1] = tuple.Item2; } // Get the sizes of the input lists. foreach (var task in m_session.GetContentSizes(casElements.Except(casElementSizeTable.Keys)).OutOfOrderTasks()) { var tuple = task.Result; casElementSizeTable[tuple.Item1] = tuple.Item2; } return(new ContentBreakdownInfo(name, casElementSizeTable, casEntrySizeTable)); }