Exemple #1
0
        private Possible <VersionedFileIdentity, Failure <VersionedFileIdentity.IdentityUnavailabilityReason> > TryQueryWeakIdentity(SafeFileHandle handle)
        {
            if (IsStub)
            {
                return(new Failure <VersionedFileIdentity.IdentityUnavailabilityReason>(
                           VersionedFileIdentity.IdentityUnavailabilityReason.NotSupported));
            }

            return(VersionedFileIdentity.TryQuery(handle));
        }
Exemple #2
0
        private Possible <VersionedFileIdentity, Failure <VersionedFileIdentity.IdentityUnavailabilityReason> > TryEstablishStrongIdentity(
            SafeFileHandle handle,
            bool flush)
        {
            if (IsStub)
            {
                return(new Failure <VersionedFileIdentity.IdentityUnavailabilityReason>(
                           VersionedFileIdentity.IdentityUnavailabilityReason.NotSupported));
            }

            return(VersionedFileIdentity.TryEstablishStrong(handle, flush: flush));
        }
        /// <summary>
        /// Performs a smart write in which no write is performed if the destination already has the same content as the source
        /// (as provided in <paramref name="contentsHash" />).
        /// Note that the destination may be replaced if it exists (otherwise there's no use in comparing hashes).
        /// </summary>
        /// <remarks>
        /// Note that <paramref name="contentsHash" /> must be faithful to <paramref name="contents" />, since that hash is
        /// recorded for <paramref name="destinationPath" />
        /// if a copy is performed.
        /// </remarks>
        /// <returns>A bool indicating if the content was mismatched and thus a full write was performed.</returns>
        public static async Task <ConditionalUpdateResult> WriteBytesIfContentMismatchedAsync(
            this FileContentTable fileContentTable,
            string destinationPath,
            byte[] contents,
            ContentHash contentsHash)
        {
            Contract.Requires(fileContentTable != null);
            Contract.Requires(!string.IsNullOrEmpty(destinationPath));
            Contract.Requires(contents != null);

            VersionedFileIdentityAndContentInfo?destinationInfo = null;

            bool written = await FileUtilities.WriteAllBytesAsync(
                destinationPath,
                contents,
                predicate : handle =>
            {
                // Nonexistent destination?
                if (handle == null)
                {
                    return(true);
                }

                VersionedFileIdentityAndContentInfo?known = fileContentTable.TryGetKnownContentHash(destinationPath, handle);

                // We return true (proceed) if there's a hash mismatch.
                if (!known.HasValue || known.Value.FileContentInfo.Hash != contentsHash)
                {
                    return(true);
                }

                destinationInfo = known.Value;
                return(false);
            },
                onCompletion : handle =>
            {
                Contract.Assume(destinationInfo == null);
                VersionedFileIdentity identity =
                    fileContentTable.RecordContentHash(
                        destinationPath,
                        handle,
                        contentsHash,
                        contents.Length,
                        strict: true);
                destinationInfo = new VersionedFileIdentityAndContentInfo(
                    identity,
                    new FileContentInfo(contentsHash, contents.Length));
            });

            Contract.Assume(destinationInfo != null);
            return(new ConditionalUpdateResult(!written, destinationInfo.Value));
        }
        /// <summary>
        /// Performs a smart copy in which no writes are performed if the destination already has the same content as the source
        /// (as provided in <paramref name="sourceContentInfo" />).
        /// Note that the destination may be replaced if it exists (otherwise there's no use in comparing hashes).
        /// </summary>
        /// <remarks>
        /// Note that <paramref name="sourceContentInfo" /> should be faithful to <paramref name="sourcePath" />, since that hash is
        /// to be recorded for <paramref name="destinationPath" />.
        /// </remarks>
        /// <returns>Indicates if the copy was elided (up-to-date) or actually performed.</returns>
        public static async Task <ConditionalUpdateResult> CopyIfContentMismatchedAsync(
            this FileContentTable fileContentTable,
            string sourcePath,
            string destinationPath,
            FileContentInfo sourceContentInfo)
        {
            Contract.Requires(!string.IsNullOrEmpty(sourcePath));
            Contract.Requires(!string.IsNullOrEmpty(destinationPath));

            VersionedFileIdentityAndContentInfo?destinationInfo = null;

            bool copied = await FileUtilities.CopyFileAsync(
                sourcePath,
                destinationPath,
                predicate : (source, dest) =>
            {
                // Nonexistent destination?
                if (dest == null)
                {
                    return(true);
                }

                VersionedFileIdentityAndContentInfo?knownDestinationInfo = fileContentTable.TryGetKnownContentHash(destinationPath, dest);
                if (!knownDestinationInfo.HasValue || knownDestinationInfo.Value.FileContentInfo.Hash != sourceContentInfo.Hash)
                {
                    return(true);
                }

                destinationInfo = knownDestinationInfo.Value;
                return(false);
            },
                onCompletion : (source, dest) =>
            {
                Contract.Assume(
                    destinationInfo == null,
                    "onCompletion should only happen when we committed to a copy (and then, we shouldn't have a destination version yet).");
                VersionedFileIdentity identity =
                    fileContentTable.RecordContentHash(
                        destinationPath,
                        dest,
                        sourceContentInfo.Hash,
                        sourceContentInfo.Length,
                        strict: true);
                destinationInfo = new VersionedFileIdentityAndContentInfo(identity, sourceContentInfo);
            });

            Contract.Assume(destinationInfo != null);
            return(new ConditionalUpdateResult(!copied, destinationInfo.Value));
        }
Exemple #5
0
        /// <nodoc />
        public VersionedFileIdentityAndContentInfo RecordContentHash(
            SafeFileHandle handle,
            string path,
            ContentHash hash,
            long length,
            bool?strict = default)
        {
            VersionedFileIdentity identity =
                RecordContentHash(
                    path,
                    handle,
                    hash,
                    length,
                    strict: strict);

            return(new VersionedFileIdentityAndContentInfo(identity, new FileContentInfo(hash, length)));
        }
Exemple #6
0
        /// <summary>
        /// Records a <see cref="ContentHash" /> for the given file handle. This hash mapping will be persisted to disk if the
        /// table is saved with <see cref="SaveAsync" />. The given file handle should be opened with at most Read sharing
        /// (having the handle should ensure the file is not being written).
        /// This returns a <see cref="VersionedFileIdentityAndContentInfo"/>:
        /// - The identity has the kind <see cref="VersionedFileIdentity.IdentityKind.StrongUsn"/> if a USN-based identity was successfully established;
        ///   the identity may have kind <see cref="VersionedFileIdentity.IdentityKind.Anonymous"/> if such an identity was unavailable.
        /// - Regardless, the contained <see cref="FileContentInfo"/> contains the actual length of the stream corresponding to <paramref name="hash"/>.
        /// </summary>
        /// <remarks>
        /// An overload taking a file path is intentionally not provided. This should be called after hashing or writing a file,
        /// but before closing the handle. This way, there is no race between establishing the file's hash, some unrelated writer,
        /// and recording its file version (e.g., USN) to hash mapping.
        /// Note that this results in a small amount of I/O (e.g., on Windows, a file open and USN query), but never hashes the file or reads its contents.
        /// The <paramref name="strict"/> corresponds to the <c>flush</c> parameter of <see cref="VersionedFileIdentity.TryEstablishStrong"/>
        /// </remarks>
        public VersionedFileIdentityAndContentInfo RecordContentHash(
            FileStream stream,
            ContentHash hash,
            bool?strict = default)
        {
            Contract.Requires(stream != null);

            strict = strict ?? stream.CanWrite;

            Contract.AssertDebug(stream.SafeFileHandle != null && stream.Name != null);
            long length = stream.Length;
            VersionedFileIdentity identity = RecordContentHash(
                stream.Name,
                stream.SafeFileHandle,
                hash,
                length,
                strict: strict);

            return(new VersionedFileIdentityAndContentInfo(identity, new FileContentInfo(hash, length)));
        }
Exemple #7
0
        /// <summary>
        /// Visits each entry in this table that is up-to-date (i.e., <see cref="TryGetKnownContentHash(string)"/>
        /// would return a known content hash). The visitor <c>(file id and volume id, file handle, path, known usn, known hash) => bool</c> returns a bool indicating if visitation
        /// should continue. The file handle given to the visitor is opened for <c>GENERIC_READ</c> access.
        /// </summary>
        /// <remarks>
        /// This is intended as a diagnostic function rather than a course of normal operation. One can use this to e.g. validate that all content
        /// hashes are accurate, a known set of entries are contained in the table, etc.
        /// </remarks>
        public bool VisitKnownFiles(
            IFileContentTableAccessor accessor,
            FileShare fileShare,
            Func <FileIdAndVolumeId, SafeFileHandle, string, Usn, ContentHash, bool> visitor)
        {
            Contract.Requires(accessor != null);
            Contract.Requires(visitor != null);

            foreach (KeyValuePair <FileIdAndVolumeId, Entry> entry in m_entries)
            {
                if (accessor.TryGetFileHandleAndPathFromFileIdAndVolumeId(entry.Key, fileShare, out SafeFileHandle handle, out string path))
                {
                    using (handle)
                    {
                        Possible <VersionedFileIdentity, Failure <VersionedFileIdentity.IdentityUnavailabilityReason> > possibleActualIdentity =
                            TryQueryWeakIdentity(handle);

                        // These cases should mirror parts of TryGetKnownContentHashAsync (but note that here we already have a known file ID, since
                        // we found it via a table entry rather than via some arbitrary handle).
                        if (possibleActualIdentity.Succeeded)
                        {
                            VersionedFileIdentity actualIdentity = possibleActualIdentity.Result;
                            if (actualIdentity.Usn != entry.Value.Usn)
                            {
                                Tracing.Logger.Log.StorageUnknownUsnMiss(
                                    Events.StaticContext,
                                    path,
                                    entry.Key.FileId.High,
                                    entry.Key.FileId.Low,
                                    entry.Key.VolumeSerialNumber,
                                    readUsn: actualIdentity.Usn.Value,
                                    knownUsn: entry.Value.Usn.Value,
                                    knownContentHash: entry.Value.Hash.ToHex());
                            }
                            else
                            {
                                Tracing.Logger.Log.StorageKnownUsnHit(
                                    Events.StaticContext,
                                    path,
                                    entry.Key.FileId.High,
                                    entry.Key.FileId.Low,
                                    entry.Key.VolumeSerialNumber,
                                    usn: entry.Value.Usn.Value,
                                    contentHash: entry.Value.Hash.ToHex());

                                bool shouldContinue = visitor(entry.Key, handle, path, entry.Value.Usn, entry.Value.Hash);

                                if (!shouldContinue)
                                {
                                    return(false);
                                }
                            }
                        }
                        else
                        {
                            Contract.Assume(
                                possibleActualIdentity.Failure.Content == VersionedFileIdentity.IdentityUnavailabilityReason.NotSupported);
                            Tracing.Logger.Log.StorageVersionedFileIdentityNotSupportedMiss(Events.StaticContext, path);
                        }
                    }
                }
            }

            return(true);
        }
Exemple #8
0
        /// <summary>
        /// Records a <see cref="ContentHash" /> for the given file handle. This hash mapping will be persisted to disk if the
        /// table is saved with <see cref="SaveAsync" />. The given file handle should be opened with at most Read sharing
        /// (having the handle should ensure the file is not being written).
        /// This returns a <see cref="VersionedFileIdentityAndContentInfo"/>:
        /// - The identity has the kind <see cref="VersionedFileIdentity.IdentityKind.StrongUsn"/> if a USN-based identity was successfully established;
        ///   the identity may have kind <see cref="VersionedFileIdentity.IdentityKind.Anonymous"/> if such an identity was unavailable.
        /// - Regardless, the contained <see cref="FileContentInfo"/> contains the actual length of the stream corresponding to <paramref name="hash"/>.
        /// </summary>
        /// <remarks>
        /// An overload taking a file path is intentionally not provided. This should be called after hashing or writing a file,
        /// but before closing the handle. This way, there is no race between establishing the file's hash, some unrelated writer,
        /// and recording its file version (e.g., USN) to hash mapping.
        /// Note that this results in a small amount of I/O (e.g., on Windows, a file open and USN query), but never hashes the file or reads its contents.
        /// The <paramref name="strict"/> corresponds to the <c>flush</c> parameter of <see cref="VersionedFileIdentity.TryEstablishStrong"/>
        /// </remarks>
        public VersionedFileIdentity RecordContentHash(
            string path,
            SafeFileHandle handle,
            ContentHash hash,
            long length,
            bool?strict = default)
        {
            Contract.Requires(handle != null);
            Contract.Requires(!string.IsNullOrWhiteSpace(path));

            using (Counters.StartStopwatch(FileContentTableCounters.RecordContentHashDuration))
            {
                // TODO: The contract below looks very nice but breaks tons of UT
                // Fix the tests and enable the contract.
                // Contract.Requires(FileContentInfo.IsValidLength(length, hash));
                // Here we write a new change journal record for this file to get a 'strong' identity. This means that the USN -> hash table
                // only ever contains USNs whose records have the 'close' reason set. Recording USNs without that
                // reason set would not be correct; it would be possible that multiple separate changes (e.g. writes)
                // were represented with the same USN, and so intermediate USNs do not necessarily correspond to exactly
                // one snapshot of a file. See http://msdn.microsoft.com/en-us/library/windows/desktop/aa363803(v=vs.85).aspx
                Possible <VersionedFileIdentity, Failure <VersionedFileIdentity.IdentityUnavailabilityReason> > possibleVersionedIdentity =
                    TryEstablishStrongIdentity(handle, flush: strict == true);

                if (!possibleVersionedIdentity.Succeeded)
                {
                    if (Interlocked.CompareExchange(ref m_changeJournalWarningLogged, 1, 0) == 0)
                    {
                        Tracing.Logger.Log.StorageFileContentTableIgnoringFileSinceVersionedFileIdentityIsNotSupported(
                            Events.StaticContext,
                            path,
                            possibleVersionedIdentity.Failure.DescribeIncludingInnerFailures());
                    }

                    return(VersionedFileIdentity.Anonymous);
                }

                VersionedFileIdentity identity = possibleVersionedIdentity.Result;

                var newEntry = new Entry(identity.Usn, hash, length, EntryTimeToLive);

                // We allow concurrent update attempts with different observed USNs.
                // This is useful and relevant for two reasons:
                // - Querying a 'strong' identity (TryEstablishStrongIdentity) generates a new CLOSE record every time.
                // - Creating hardlinks generates 'hardlink change' records.
                // So, concurrently creating and recording (or even just recording) different links is possible, and
                // keeping the last stored entry (rather than highest-USN entry) can introduce false positives.
                var fileIdAndVolumeId = new FileIdAndVolumeId(identity.VolumeSerialNumber, identity.FileId);

                m_entries.AddOrUpdate(
                    new FileIdAndVolumeId(identity.VolumeSerialNumber, identity.FileId),
                    newEntry,
                    updateValueFactory: (key, existingEntry) =>
                {
                    if (existingEntry.Usn > newEntry.Usn)
                    {
                        return(existingEntry);
                    }

                    if (newEntry.Hash == existingEntry.Hash)
                    {
                        Counters.IncrementCounter(FileContentTableCounters.NumUsnMismatch);
                        Tracing.Logger.Log.StorageUsnMismatchButContentMatch(
                            Events.StaticContext,
                            path,
                            existingEntry.Usn.Value,
                            newEntry.Usn.Value,
                            existingEntry.Hash.ToHex());
                    }
                    else
                    {
                        // Stale USN.
                        Counters.IncrementCounter(FileContentTableCounters.NumContentMismatch);
                    }

                    return(newEntry);
                });

                Tracing.Logger.Log.StorageRecordNewKnownUsn(
                    Events.StaticContext,
                    path,
                    identity.FileId.High,
                    identity.FileId.Low,
                    identity.VolumeSerialNumber,
                    identity.Usn.Value,
                    hash.ToHex());

                return(identity);
            }
        }
Exemple #9
0
        /// <summary>
        /// Retrieves an already-known <see cref="ContentHash" /> for the given file handle. If no such hash is available (such as
        /// if the file has been modified since a hash was last recorded), null is returned instead.
        /// </summary>
        /// <remarks>
        /// Note that this results in a small amount of I/O (e.g., on Windows, a file open and USN query), but never hashes the file or reads its contents.
        /// </remarks>
        public VersionedFileIdentityAndContentInfo?TryGetKnownContentHash(string path, SafeFileHandle handle)
        {
            Contract.Requires(!string.IsNullOrWhiteSpace(path));
            Contract.Requires(handle != null);

            using (Counters.StartStopwatch(FileContentTableCounters.GetContentHashDuration))
            {
                Possible <VersionedFileIdentity, Failure <VersionedFileIdentity.IdentityUnavailabilityReason> > possibleVersionedIdentity =
                    TryQueryWeakIdentity(handle);

                if (!possibleVersionedIdentity.Succeeded)
                {
                    // We fail quietly for disabled journals on the query side; instead attempting to record a hash will fail.
                    Contract.Assume(
                        possibleVersionedIdentity.Failure.Content == VersionedFileIdentity.IdentityUnavailabilityReason.NotSupported);
                    Tracing.Logger.Log.StorageVersionedFileIdentityNotSupportedMiss(Events.StaticContext, path);
                    return(null);
                }

                VersionedFileIdentity identity = possibleVersionedIdentity.Result;
                var fileIdInfo = new FileIdAndVolumeId(identity.VolumeSerialNumber, identity.FileId);

                // We have a valid identity, but that identity is 'weak' and may correspond to an intermediate record (one without 'close' set).
                // We cannot discard such records here since we can't obtain a real 'Reason' field for a file's current USN record.
                // But we do know that any intermediate record will be a miss below, since we only record 'close' records (strong identities)
                // (see RecordContentHashAsync).
                Entry knownEntry;
                bool  foundEntry = m_entries.TryGetValue(fileIdInfo, out knownEntry);

                if (!foundEntry)
                {
                    Counters.IncrementCounter(FileContentTableCounters.NumFileIdMismatch);
                    Tracing.Logger.Log.StorageUnknownFileMiss(
                        Events.StaticContext,
                        path,
                        identity.FileId.High,
                        identity.FileId.Low,
                        identity.VolumeSerialNumber,
                        identity.Usn.Value);

                    return(null);
                }

                var staleUsn = identity.Usn != knownEntry.Usn;

                if (staleUsn)
                {
                    Tracing.Logger.Log.StorageUnknownUsnMiss(
                        Events.StaticContext,
                        path,
                        identity.FileId.High,
                        identity.FileId.Low,
                        identity.VolumeSerialNumber,
                        readUsn: identity.Usn.Value,
                        knownUsn: knownEntry.Usn.Value,
                        knownContentHash: knownEntry.Hash.ToHex());
                    return(null);
                }

                MarkEntryAccessed(fileIdInfo, knownEntry);
                Counters.IncrementCounter(FileContentTableCounters.NumHit);
                Tracing.Logger.Log.StorageKnownUsnHit(
                    Events.StaticContext,
                    path,
                    identity.FileId.High,
                    identity.FileId.Low,
                    identity.VolumeSerialNumber,
                    usn: knownEntry.Usn.Value,
                    contentHash: knownEntry.Hash.ToHex());

                // Note that we return a 'strong' version of the weak identity; since we matched an entry in the table, we know that the USN
                // actually corresponds to a strong identity (see RecordContentHashAsync).
                return(new VersionedFileIdentityAndContentInfo(
                           new VersionedFileIdentity(
                               identity.VolumeSerialNumber,
                               identity.FileId,
                               identity.Usn,
                               VersionedFileIdentity.IdentityKind.StrongUsn),
                           new FileContentInfo(knownEntry.Hash, knownEntry.Length)));
            }
        }
Exemple #10
0
 /// <nodoc />
 public VersionedFileIdentityAndContentInfo(VersionedFileIdentity identity, FileContentInfo fileContentInfo)
 {
     Identity        = identity;
     FileContentInfo = fileContentInfo;
 }