/// <summary>
        /// Gets the file corresponding to the given CasHash and checks
        /// to see if the file contents hash to the same CasHash value
        /// </summary>
        /// <param name="originalCasHash">CasHash value to check</param>
        /// <param name="errors">Where any cache errors found get stored</param>
        private async Task RehashContentsAsync(CasHash originalCasHash, ConcurrentDictionary <CacheError, int> errors)
        {
            if (originalCasHash.Equals(CasHash.NoItem))
            {
                // No need to rehash the NoItem cas hash
                return;
            }

            Possible <StreamWithLength, Failure> possibleStream = await m_readOnlySession.GetStreamAsync(originalCasHash);

            if (!possibleStream.Succeeded)
            {
                errors.TryAdd(new CacheError(CacheErrorType.CasHashError, "CasHash " + originalCasHash + " not found in CAS"), 0);
                return;
            }

            using (StreamWithLength stream = possibleStream.Result)
            {
                ContentHash contentHash = await ContentHashingUtilities.HashContentStreamAsync(stream);

                Hash    newHash    = new Hash(contentHash);
                CasHash newCasHash = new CasHash(newHash);
                if (!originalCasHash.Equals(newCasHash))
                {
                    errors.TryAdd(new CacheError(CacheErrorType.CasHashError, "The data of CasHash " + originalCasHash + " has been altered in the CAS"), 0);
                }
            }
        }
        /// <summary>
        /// Attempts to pin the specified CasHash. Returns true if the
        /// pinning succeeds.
        /// </summary>
        /// <param name="casHash">CasHash value to attempt to pin</param>
        private async Task <bool> AttemptToPinAsync(CasHash casHash)
        {
            if (casHash.Equals(CasHash.NoItem))
            {
                return(true);
            }

            Possible <string, Failure> pinAttempt = await m_readOnlySession.PinToCasAsync(casHash);

            return(pinAttempt.Succeeded);
        }
        public async Task <Possible <ValidateContentStatus, Failure> > ValidateContentAsync(CasHash hash, UrgencyHint urgencyHint, Guid activityId)
        {
            Contract.Requires(!IsClosed);

            using (var counter = m_counters.ValidateSessionCounter())
            {
                using (var eventing = new ValidateContentActivity(BasicFilesystemCache.EventSource, activityId, this))
                {
                    eventing.Start(hash, urgencyHint);

                    if (CasHash.NoItem.Equals(hash))
                    {
                        return(eventing.Returns(counter.Ok()));
                    }

                    string path = m_cache.ToPath(hash);

                    try
                    {
                        // We don't use ProduceStream as this operation does not pin or cause pinning
                        // and we want to have FileShare.Delete in case we need to delete this entry
                        // due to it being corrupt.  This way there is no race as to which file is
                        // being deleted - it will be the one that was just determined to be corrupt.
                        using (Stream fileData = await m_cache.ContendedOpenStreamAsync(path, FileMode.Open, FileAccess.Read, FileShare.Read | FileShare.Delete, useAsync: true, handlePendingDelete: true))
                        {
                            // Size of the file
                            counter.FileSize(fileData.Length);

                            CasHash contentHash = new CasHash(await ContentHashingUtilities.HashContentStreamAsync(fileData));
                            if (contentHash.Equals(hash))
                            {
                                return(eventing.Returns(counter.Ok()));
                            }

                            // Remove it from pinned as it is being removed
                            int junk;
                            m_pinnedToCas.TryRemove(hash, out junk);

                            // Now, we try to remediate - This is a simple delete attempt with any error
                            // saying that we could not delete it
                            try
                            {
                                File.Delete(path);
                                eventing.Write(CacheActivity.CriticalDataOptions, new { RemovedCorruptedEntry = path });

                                return(eventing.Returns(counter.Remediated()));
                            }
                            catch (Exception e)
                            {
                                // Could not delete it (for what ever reason)
                                eventing.Write(CacheActivity.CriticalDataOptions, new { FailedToRemovedCorruptedEntry = path, Reason = e.Message });

                                // The file failed to be deleted, so we need to say that it is still there
                                return(eventing.Returns(counter.Invalid()));
                            }
                        }
                    }
                    catch (FileNotFoundException)
                    {
                        // Not found (either type) is the same as Remediated
                        return(eventing.Returns(counter.Remediated()));
                    }
                    catch (DirectoryNotFoundException)
                    {
                        // Not found (either type) is the same as Remediated
                        return(eventing.Returns(counter.Remediated()));
                    }
                    catch (Exception e)
                    {
                        // Other errors are reported as a failure to produce a stream of the data
                        return(eventing.Returns(new ProduceStreamFailure(CacheId, hash, e)));
                    }
                }
            }
        }
        /// <summary>
        /// Check the input list against the regex
        /// </summary>
        /// <param name="weak">The weak fingerprint (for logging on failure)</param>
        /// <param name="casElement">The CasElement of the strong fingerprint</param>
        /// <param name="hashElement">The hashElement of the strong fingerprint (for logging on failure)</param>
        /// <param name="urgencyHint">Pass-through</param>
        /// <param name="activityId">Pass-through activityId</param>
        /// <returns>false if the check was not performed, true if the checks were performed, failure if the regex checks failed</returns>
        /// <remarks>
        /// This will attempt to validate the CAS stored input list against the regex rules
        /// </remarks>
        private async Task <Possible <bool, Failure> > CheckInputList(WeakFingerprintHash weak, CasHash casElement, Hash hashElement, UrgencyHint urgencyHint, Guid activityId)
        {
            // If we either have no CasHash item or we have no regex to check, just return false
            // (that we did nothing)
            if (casElement.Equals(CasHash.NoItem) || ((Cache.MustIncludeRegex == null) && (Cache.MustNotIncludeRegex == null)))
            {
                return(false);
            }

            // mustInclude start out false if we need to check for mustInclude
            // Once we get a mustInclude match we no longer need to check.
            // If we have no mustInclude regex, we set it to true such that
            // we don't bother checking it
            bool mustInclude = Cache.MustIncludeRegex == null;

            // This is just to make a faster check for the MustNotinclude
            // case.  If we have the regex then we must check each entry
            // but in many cases we don't have the regex so let this be a quick out.
            bool checkMustNot = Cache.MustNotIncludeRegex != null;

            // Try to get the observed inputs from the CasHash given
            var possibleStream = await GetStreamAsync(casElement, urgencyHint, activityId);

            if (!possibleStream.Succeeded)
            {
                // If we could not get a stream to the CasEntery in the fingerprint.
                return(new InputListFilterFailure(Cache.CacheId, weak, casElement, hashElement, "Failed to get stream of CasElement"));
            }

            // Deserialize the contents of the path set.
            using (possibleStream.Result)
            {
                PathTable     pathTable    = new PathTable();
                BuildXLReader reader       = new BuildXLReader(false, possibleStream.Result, true);
                var           maybePathSet = ObservedPathSet.TryDeserialize(pathTable, reader);
                if (maybePathSet.Succeeded)
                {
                    // Deserialization was successful
                    foreach (ObservedPathEntry entry in maybePathSet.Result.Paths)
                    {
                        string filepath = entry.Path.ToString(pathTable);

                        // Have we seen a must-have entry yet?  If not check if this is one
                        // that way once we found one we want we stop checking this regex
                        if (!mustInclude)
                        {
                            mustInclude = Cache.MustIncludeRegex.IsMatch(filepath);
                        }

                        // Now, if we are looking for a must not include, we just check for that
                        // and if it matches we fail
                        if (checkMustNot)
                        {
                            if (Cache.MustNotIncludeRegex.IsMatch(filepath))
                            {
                                return(new InputListFilterFailure(Cache.CacheId, weak, casElement, hashElement, string.Format(CultureInfo.InvariantCulture, "Failed due to a MustNotInclude file: {0}", filepath)));
                            }
                        }
                    }
                }
                else
                {
                    return(new InputListFilterFailure(Cache.CacheId, weak, casElement, hashElement, "Failed to deserialize observed inputs"));
                }
            }

            if (!mustInclude)
            {
                return(new InputListFilterFailure(Cache.CacheId, weak, casElement, hashElement, "Failed due to not including at least one MustInclude file"));
            }

            return(true);
        }
Example #5
0
        /// <summary>
        /// Analyzes counts and churn of fingerprints, input lists, and optionally
        /// content sizes of sessions in the cache.
        /// </summary>
        /// <param name="sessionNameRegex">Acts as a filter for which sessions to include in the analysis.</param>
        /// <param name="analyzeContent">When true, analysis will include content sizing for each session.</param>
        /// <returns>SessionChurnInfo object for every session analyzed</returns>
        public IEnumerable <SessionChurnInfo> Analyze(Regex sessionNameRegex, bool analyzeContent)
        {
            Contract.Assume(sessionNameRegex != null);

            m_numSessions         = 0;
            m_numSessionsAnalyzed = 0;

            // Used to store every unique strong fingerprint for all sessions
            HashSet <StrongFingerprint> allStrongFingerprints = new HashSet <StrongFingerprint>();

            // Used to store strong fingerprints for a given session
            HashSet <StrongFingerprint> sessionStrongFingerprints = new HashSet <StrongFingerprint>();

            HashSet <WeakFingerprintHash> allWeakFingerprints = new HashSet <WeakFingerprintHash>();

            // Used to store every unique cas element for all sessions
            HashSet <CasHash> allCasHashes = new HashSet <CasHash>();

            // Used to store every unique cas entry for all sessions
            HashSet <CasHash> allCasEntries = new HashSet <CasHash>();

            IEnumerable <Task <string> > unorderedSessionNames = m_cache.EnumerateCompletedSessions();

            IOrderedEnumerable <string> orderedSessionNames = unorderedSessionNames.Select(stringTask => stringTask.Result)
                                                              .Where((sessionName) =>
            {
                m_numSessions++;
                if (sessionNameRegex.IsMatch(sessionName))
                {
                    m_numSessionsAnalyzed++;
                    return(true);
                }
                else
                {
                    return(false);
                }
            })
                                                              .OrderBy(sessionName => sessionName);

            // Dictionary of CAS entry sizes
            Dictionary <CasHash, long> contentSizeTable = new Dictionary <CasHash, long>();

            // Analyze each session in order
            foreach (string sessionName in orderedSessionNames)
            {
                Console.Error.WriteLine("Analyzing session {0}", sessionName);

                // Initialize counters for the current session
                int totalNumberStrongFingerprints  = 0;
                int numberUniqueWeakFingerprints   = 0;
                int numberUniqueStrongFingerprints = 0;
                int numberUniqueCasHashesOverTime  = 0;
                int numberCasHashNoItemsForSession = 0;
                int contentErrors = 0;

                // Clear the set of strong fingerprints in this next session.
                sessionStrongFingerprints.Clear();

                // Contains every unique cas hash for the current session
                HashSet <CasHash> sessionCasHashes = new HashSet <CasHash>();

                IEnumerable <Task <StrongFingerprint> > strongFingerprints = m_cache.EnumerateSessionStrongFingerprints(sessionName).Result;

                // Analyze each strong fingerprint
                foreach (Task <StrongFingerprint> strongFingerprintTask in strongFingerprints.OutOfOrderTasks())
                {
                    totalNumberStrongFingerprints++;

                    StrongFingerprint strongFingerprint = strongFingerprintTask.Result;

                    sessionStrongFingerprints.Add(strongFingerprint);

                    // Check if strong fingerprint has never been seen before
                    if (allStrongFingerprints.Add(strongFingerprint))
                    {
                        // New strong fingerprint so increment counter and add to collection
                        numberUniqueStrongFingerprints++;
                    }

                    if (allWeakFingerprints.Add(strongFingerprint.WeakFingerprint))
                    {
                        numberUniqueWeakFingerprints++;
                    }

                    CasHash casElement = strongFingerprint.CasElement;

                    // Check if the cas hash is the special no item value
                    if (casElement.Equals(CasHash.NoItem))
                    {
                        numberCasHashNoItemsForSession++;
                    }

                    // Collect unique CAS elements in the session
                    sessionCasHashes.Add(casElement);

                    // Check if cas hash has never been seen before for the whole cache
                    if (allCasHashes.Add(casElement))
                    {
                        numberUniqueCasHashesOverTime++;
                    }
                }

                SessionContentInfo sessionContentInfo = null;

                if (analyzeContent)
                {
                    // Contains every unique cash entry hash for the current session
                    var sessionCasEntries = new HashSet <CasHash>();

                    // Accumulate all the CasEntries for the session
                    foreach (var task in m_session.GetCacheEntries(sessionStrongFingerprints).OutOfOrderTasks(32))
                    {
                        var possibleCasEntries = task.Result;
                        if (possibleCasEntries.Succeeded)
                        {
                            var casEntries = possibleCasEntries.Result;
                            sessionCasEntries.UnionWith(casEntries);
                        }
                        else
                        {
                            Console.Error.WriteLine("Unable to get CasEntries: {0}", possibleCasEntries.Failure.DescribeIncludingInnerFailures());
                            ++contentErrors;
                        }
                    }

                    // Retrieve the content size for each new CasEntry.
                    foreach (var task in m_session.GetContentSizes(sessionCasEntries.Except(allCasEntries)).OutOfOrderTasks(32))
                    {
                        var tuple = task.Result;
                        contentSizeTable[tuple.Item1] = tuple.Item2;
                    }

                    // Accumulate the size of content we've already seen.
                    long totalContentSize = 0;
                    long newContentSize   = 0;
                    var  newContentCount  = 0;

                    foreach (var e in sessionCasEntries)
                    {
                        long length;

                        if (contentSizeTable.TryGetValue(e, out length))
                        {
                            if (ValidContentSize(length))
                            {
                                totalContentSize += length;

                                if (!allCasEntries.Contains(e))
                                {
                                    newContentSize  += length;
                                    newContentCount += 1;
                                    allCasEntries.Add(e);
                                }
                            }
                            else
                            {
                                Console.Error.WriteLine("Unable to find content length ({0}) for {1}", (ContentError)length, e);
                                ++contentErrors;
                            }
                        }
                        else
                        {
#pragma warning disable CA2201 // Do not raise reserved exception types
                            throw new ApplicationException($"No content length for {e}");
#pragma warning restore CA2201 // Do not raise reserved exception types
                        }
                    }

                    sessionContentInfo = new SessionContentInfo(
                        sessionCasEntries.Count,
                        totalContentSize,
                        newContentCount,
                        newContentSize,
                        contentErrors);
                }

                // Aggregate the counters and return the session's data
                SessionStrongFingerprintChurnInfo sessionStrongFingerprintChurnInfo =
                    new SessionStrongFingerprintChurnInfo(totalNumberStrongFingerprints, numberUniqueStrongFingerprints, numberUniqueWeakFingerprints);

                SessionInputListChurnInfo sessionInputListChurnInfo =
                    new SessionInputListChurnInfo(totalNumberStrongFingerprints, sessionCasHashes.Count, numberUniqueCasHashesOverTime, numberCasHashNoItemsForSession);

                yield return(new SessionChurnInfo(sessionName, sessionStrongFingerprintChurnInfo, sessionInputListChurnInfo, sessionContentInfo));
            }
        }
Example #6
0
        private ContentBreakdownInfo AnalyzeSession(string name)
        {
            var casEntries          = new HashSet <CasHash>();
            var casElements         = new HashSet <CasHash>();
            var sfpSet              = new HashSet <StrongFingerprint>();
            var casElementSizeTable = new Dictionary <CasHash, long>();
            var casEntrySizeTable   = new Dictionary <CasHash, long>();
            int countSFP            = 0;
            int contentErrors       = 0;

            // Enumerate strong fingerprints for the session...
            IEnumerable <Task <StrongFingerprint> > strongFingerprints = m_cache.EnumerateSessionStrongFingerprints(name).Result;

            // ...and for each, accumulate the input lists as part of the info we'll report.
            foreach (Task <StrongFingerprint> strongFingerprintTask in strongFingerprints.OutOfOrderTasks())
            {
                ++countSFP;
                StrongFingerprint sfp = strongFingerprintTask.Result;

                // Grab the observed input list.
                CasHash casElement = sfp.CasElement;
                if (!casElement.Equals(CasHash.NoItem))
                {
                    casElements.Add(casElement);
                }

                // Remember the SFP for the content scan below, so we're not doing this enumeration twice.
                sfpSet.Add(sfp);
            }

            // Now query the content for the SFPs in the session
            foreach (var task in m_session.GetCacheEntries(sfpSet).OutOfOrderTasks())
            {
                var possibleCasEntries = task.Result;
                if (possibleCasEntries.Succeeded)
                {
                    casEntries.UnionWith(possibleCasEntries.Result);
                }
                else
                {
                    Console.Error.WriteLine("Unable to get CasEntries: {0}", possibleCasEntries.Failure.DescribeIncludingInnerFailures());
                    ++contentErrors;
                }
            }

            // With all the CAS entries in hand, get the sizes...
            foreach (var task in m_session.GetContentSizes(casEntries.Except(casEntrySizeTable.Keys)).OutOfOrderTasks())
            {
                var tuple = task.Result;
                casEntrySizeTable[tuple.Item1] = tuple.Item2;
            }

            // Get the sizes of the input lists.
            foreach (var task in m_session.GetContentSizes(casElements.Except(casElementSizeTable.Keys)).OutOfOrderTasks())
            {
                var tuple = task.Result;
                casElementSizeTable[tuple.Item1] = tuple.Item2;
            }

            return(new ContentBreakdownInfo(name, casElementSizeTable, casEntrySizeTable));
        }