private Possible <VersionedFileIdentity, Failure <VersionedFileIdentity.IdentityUnavailabilityReason> > TryQueryWeakIdentity(SafeFileHandle handle) { if (IsStub) { return(new Failure <VersionedFileIdentity.IdentityUnavailabilityReason>( VersionedFileIdentity.IdentityUnavailabilityReason.NotSupported)); } return(VersionedFileIdentity.TryQuery(handle)); }
private Possible <VersionedFileIdentity, Failure <VersionedFileIdentity.IdentityUnavailabilityReason> > TryEstablishStrongIdentity( SafeFileHandle handle, bool flush) { if (IsStub) { return(new Failure <VersionedFileIdentity.IdentityUnavailabilityReason>( VersionedFileIdentity.IdentityUnavailabilityReason.NotSupported)); } return(VersionedFileIdentity.TryEstablishStrong(handle, flush: flush)); }
/// <summary> /// Performs a smart write in which no write is performed if the destination already has the same content as the source /// (as provided in <paramref name="contentsHash" />). /// Note that the destination may be replaced if it exists (otherwise there's no use in comparing hashes). /// </summary> /// <remarks> /// Note that <paramref name="contentsHash" /> must be faithful to <paramref name="contents" />, since that hash is /// recorded for <paramref name="destinationPath" /> /// if a copy is performed. /// </remarks> /// <returns>A bool indicating if the content was mismatched and thus a full write was performed.</returns> public static async Task <ConditionalUpdateResult> WriteBytesIfContentMismatchedAsync( this FileContentTable fileContentTable, string destinationPath, byte[] contents, ContentHash contentsHash) { Contract.Requires(fileContentTable != null); Contract.Requires(!string.IsNullOrEmpty(destinationPath)); Contract.Requires(contents != null); VersionedFileIdentityAndContentInfo?destinationInfo = null; bool written = await FileUtilities.WriteAllBytesAsync( destinationPath, contents, predicate : handle => { // Nonexistent destination? if (handle == null) { return(true); } VersionedFileIdentityAndContentInfo?known = fileContentTable.TryGetKnownContentHash(destinationPath, handle); // We return true (proceed) if there's a hash mismatch. if (!known.HasValue || known.Value.FileContentInfo.Hash != contentsHash) { return(true); } destinationInfo = known.Value; return(false); }, onCompletion : handle => { Contract.Assume(destinationInfo == null); VersionedFileIdentity identity = fileContentTable.RecordContentHash( destinationPath, handle, contentsHash, contents.Length, strict: true); destinationInfo = new VersionedFileIdentityAndContentInfo( identity, new FileContentInfo(contentsHash, contents.Length)); }); Contract.Assume(destinationInfo != null); return(new ConditionalUpdateResult(!written, destinationInfo.Value)); }
/// <summary> /// Performs a smart copy in which no writes are performed if the destination already has the same content as the source /// (as provided in <paramref name="sourceContentInfo" />). /// Note that the destination may be replaced if it exists (otherwise there's no use in comparing hashes). /// </summary> /// <remarks> /// Note that <paramref name="sourceContentInfo" /> should be faithful to <paramref name="sourcePath" />, since that hash is /// to be recorded for <paramref name="destinationPath" />. /// </remarks> /// <returns>Indicates if the copy was elided (up-to-date) or actually performed.</returns> public static async Task <ConditionalUpdateResult> CopyIfContentMismatchedAsync( this FileContentTable fileContentTable, string sourcePath, string destinationPath, FileContentInfo sourceContentInfo) { Contract.Requires(!string.IsNullOrEmpty(sourcePath)); Contract.Requires(!string.IsNullOrEmpty(destinationPath)); VersionedFileIdentityAndContentInfo?destinationInfo = null; bool copied = await FileUtilities.CopyFileAsync( sourcePath, destinationPath, predicate : (source, dest) => { // Nonexistent destination? if (dest == null) { return(true); } VersionedFileIdentityAndContentInfo?knownDestinationInfo = fileContentTable.TryGetKnownContentHash(destinationPath, dest); if (!knownDestinationInfo.HasValue || knownDestinationInfo.Value.FileContentInfo.Hash != sourceContentInfo.Hash) { return(true); } destinationInfo = knownDestinationInfo.Value; return(false); }, onCompletion : (source, dest) => { Contract.Assume( destinationInfo == null, "onCompletion should only happen when we committed to a copy (and then, we shouldn't have a destination version yet)."); VersionedFileIdentity identity = fileContentTable.RecordContentHash( destinationPath, dest, sourceContentInfo.Hash, sourceContentInfo.Length, strict: true); destinationInfo = new VersionedFileIdentityAndContentInfo(identity, sourceContentInfo); }); Contract.Assume(destinationInfo != null); return(new ConditionalUpdateResult(!copied, destinationInfo.Value)); }
/// <nodoc /> public VersionedFileIdentityAndContentInfo RecordContentHash( SafeFileHandle handle, string path, ContentHash hash, long length, bool?strict = default) { VersionedFileIdentity identity = RecordContentHash( path, handle, hash, length, strict: strict); return(new VersionedFileIdentityAndContentInfo(identity, new FileContentInfo(hash, length))); }
/// <summary> /// Records a <see cref="ContentHash" /> for the given file handle. This hash mapping will be persisted to disk if the /// table is saved with <see cref="SaveAsync" />. The given file handle should be opened with at most Read sharing /// (having the handle should ensure the file is not being written). /// This returns a <see cref="VersionedFileIdentityAndContentInfo"/>: /// - The identity has the kind <see cref="VersionedFileIdentity.IdentityKind.StrongUsn"/> if a USN-based identity was successfully established; /// the identity may have kind <see cref="VersionedFileIdentity.IdentityKind.Anonymous"/> if such an identity was unavailable. /// - Regardless, the contained <see cref="FileContentInfo"/> contains the actual length of the stream corresponding to <paramref name="hash"/>. /// </summary> /// <remarks> /// An overload taking a file path is intentionally not provided. This should be called after hashing or writing a file, /// but before closing the handle. This way, there is no race between establishing the file's hash, some unrelated writer, /// and recording its file version (e.g., USN) to hash mapping. /// Note that this results in a small amount of I/O (e.g., on Windows, a file open and USN query), but never hashes the file or reads its contents. /// The <paramref name="strict"/> corresponds to the <c>flush</c> parameter of <see cref="VersionedFileIdentity.TryEstablishStrong"/> /// </remarks> public VersionedFileIdentityAndContentInfo RecordContentHash( FileStream stream, ContentHash hash, bool?strict = default) { Contract.Requires(stream != null); strict = strict ?? stream.CanWrite; Contract.AssertDebug(stream.SafeFileHandle != null && stream.Name != null); long length = stream.Length; VersionedFileIdentity identity = RecordContentHash( stream.Name, stream.SafeFileHandle, hash, length, strict: strict); return(new VersionedFileIdentityAndContentInfo(identity, new FileContentInfo(hash, length))); }
/// <summary> /// Visits each entry in this table that is up-to-date (i.e., <see cref="TryGetKnownContentHash(string)"/> /// would return a known content hash). The visitor <c>(file id and volume id, file handle, path, known usn, known hash) => bool</c> returns a bool indicating if visitation /// should continue. The file handle given to the visitor is opened for <c>GENERIC_READ</c> access. /// </summary> /// <remarks> /// This is intended as a diagnostic function rather than a course of normal operation. One can use this to e.g. validate that all content /// hashes are accurate, a known set of entries are contained in the table, etc. /// </remarks> public bool VisitKnownFiles( IFileContentTableAccessor accessor, FileShare fileShare, Func <FileIdAndVolumeId, SafeFileHandle, string, Usn, ContentHash, bool> visitor) { Contract.Requires(accessor != null); Contract.Requires(visitor != null); foreach (KeyValuePair <FileIdAndVolumeId, Entry> entry in m_entries) { if (accessor.TryGetFileHandleAndPathFromFileIdAndVolumeId(entry.Key, fileShare, out SafeFileHandle handle, out string path)) { using (handle) { Possible <VersionedFileIdentity, Failure <VersionedFileIdentity.IdentityUnavailabilityReason> > possibleActualIdentity = TryQueryWeakIdentity(handle); // These cases should mirror parts of TryGetKnownContentHashAsync (but note that here we already have a known file ID, since // we found it via a table entry rather than via some arbitrary handle). if (possibleActualIdentity.Succeeded) { VersionedFileIdentity actualIdentity = possibleActualIdentity.Result; if (actualIdentity.Usn != entry.Value.Usn) { Tracing.Logger.Log.StorageUnknownUsnMiss( Events.StaticContext, path, entry.Key.FileId.High, entry.Key.FileId.Low, entry.Key.VolumeSerialNumber, readUsn: actualIdentity.Usn.Value, knownUsn: entry.Value.Usn.Value, knownContentHash: entry.Value.Hash.ToHex()); } else { Tracing.Logger.Log.StorageKnownUsnHit( Events.StaticContext, path, entry.Key.FileId.High, entry.Key.FileId.Low, entry.Key.VolumeSerialNumber, usn: entry.Value.Usn.Value, contentHash: entry.Value.Hash.ToHex()); bool shouldContinue = visitor(entry.Key, handle, path, entry.Value.Usn, entry.Value.Hash); if (!shouldContinue) { return(false); } } } else { Contract.Assume( possibleActualIdentity.Failure.Content == VersionedFileIdentity.IdentityUnavailabilityReason.NotSupported); Tracing.Logger.Log.StorageVersionedFileIdentityNotSupportedMiss(Events.StaticContext, path); } } } } return(true); }
/// <summary> /// Records a <see cref="ContentHash" /> for the given file handle. This hash mapping will be persisted to disk if the /// table is saved with <see cref="SaveAsync" />. The given file handle should be opened with at most Read sharing /// (having the handle should ensure the file is not being written). /// This returns a <see cref="VersionedFileIdentityAndContentInfo"/>: /// - The identity has the kind <see cref="VersionedFileIdentity.IdentityKind.StrongUsn"/> if a USN-based identity was successfully established; /// the identity may have kind <see cref="VersionedFileIdentity.IdentityKind.Anonymous"/> if such an identity was unavailable. /// - Regardless, the contained <see cref="FileContentInfo"/> contains the actual length of the stream corresponding to <paramref name="hash"/>. /// </summary> /// <remarks> /// An overload taking a file path is intentionally not provided. This should be called after hashing or writing a file, /// but before closing the handle. This way, there is no race between establishing the file's hash, some unrelated writer, /// and recording its file version (e.g., USN) to hash mapping. /// Note that this results in a small amount of I/O (e.g., on Windows, a file open and USN query), but never hashes the file or reads its contents. /// The <paramref name="strict"/> corresponds to the <c>flush</c> parameter of <see cref="VersionedFileIdentity.TryEstablishStrong"/> /// </remarks> public VersionedFileIdentity RecordContentHash( string path, SafeFileHandle handle, ContentHash hash, long length, bool?strict = default) { Contract.Requires(handle != null); Contract.Requires(!string.IsNullOrWhiteSpace(path)); using (Counters.StartStopwatch(FileContentTableCounters.RecordContentHashDuration)) { // TODO: The contract below looks very nice but breaks tons of UT // Fix the tests and enable the contract. // Contract.Requires(FileContentInfo.IsValidLength(length, hash)); // Here we write a new change journal record for this file to get a 'strong' identity. This means that the USN -> hash table // only ever contains USNs whose records have the 'close' reason set. Recording USNs without that // reason set would not be correct; it would be possible that multiple separate changes (e.g. writes) // were represented with the same USN, and so intermediate USNs do not necessarily correspond to exactly // one snapshot of a file. See http://msdn.microsoft.com/en-us/library/windows/desktop/aa363803(v=vs.85).aspx Possible <VersionedFileIdentity, Failure <VersionedFileIdentity.IdentityUnavailabilityReason> > possibleVersionedIdentity = TryEstablishStrongIdentity(handle, flush: strict == true); if (!possibleVersionedIdentity.Succeeded) { if (Interlocked.CompareExchange(ref m_changeJournalWarningLogged, 1, 0) == 0) { Tracing.Logger.Log.StorageFileContentTableIgnoringFileSinceVersionedFileIdentityIsNotSupported( Events.StaticContext, path, possibleVersionedIdentity.Failure.DescribeIncludingInnerFailures()); } return(VersionedFileIdentity.Anonymous); } VersionedFileIdentity identity = possibleVersionedIdentity.Result; var newEntry = new Entry(identity.Usn, hash, length, EntryTimeToLive); // We allow concurrent update attempts with different observed USNs. // This is useful and relevant for two reasons: // - Querying a 'strong' identity (TryEstablishStrongIdentity) generates a new CLOSE record every time. // - Creating hardlinks generates 'hardlink change' records. // So, concurrently creating and recording (or even just recording) different links is possible, and // keeping the last stored entry (rather than highest-USN entry) can introduce false positives. var fileIdAndVolumeId = new FileIdAndVolumeId(identity.VolumeSerialNumber, identity.FileId); m_entries.AddOrUpdate( new FileIdAndVolumeId(identity.VolumeSerialNumber, identity.FileId), newEntry, updateValueFactory: (key, existingEntry) => { if (existingEntry.Usn > newEntry.Usn) { return(existingEntry); } if (newEntry.Hash == existingEntry.Hash) { Counters.IncrementCounter(FileContentTableCounters.NumUsnMismatch); Tracing.Logger.Log.StorageUsnMismatchButContentMatch( Events.StaticContext, path, existingEntry.Usn.Value, newEntry.Usn.Value, existingEntry.Hash.ToHex()); } else { // Stale USN. Counters.IncrementCounter(FileContentTableCounters.NumContentMismatch); } return(newEntry); }); Tracing.Logger.Log.StorageRecordNewKnownUsn( Events.StaticContext, path, identity.FileId.High, identity.FileId.Low, identity.VolumeSerialNumber, identity.Usn.Value, hash.ToHex()); return(identity); } }
/// <summary> /// Retrieves an already-known <see cref="ContentHash" /> for the given file handle. If no such hash is available (such as /// if the file has been modified since a hash was last recorded), null is returned instead. /// </summary> /// <remarks> /// Note that this results in a small amount of I/O (e.g., on Windows, a file open and USN query), but never hashes the file or reads its contents. /// </remarks> public VersionedFileIdentityAndContentInfo?TryGetKnownContentHash(string path, SafeFileHandle handle) { Contract.Requires(!string.IsNullOrWhiteSpace(path)); Contract.Requires(handle != null); using (Counters.StartStopwatch(FileContentTableCounters.GetContentHashDuration)) { Possible <VersionedFileIdentity, Failure <VersionedFileIdentity.IdentityUnavailabilityReason> > possibleVersionedIdentity = TryQueryWeakIdentity(handle); if (!possibleVersionedIdentity.Succeeded) { // We fail quietly for disabled journals on the query side; instead attempting to record a hash will fail. Contract.Assume( possibleVersionedIdentity.Failure.Content == VersionedFileIdentity.IdentityUnavailabilityReason.NotSupported); Tracing.Logger.Log.StorageVersionedFileIdentityNotSupportedMiss(Events.StaticContext, path); return(null); } VersionedFileIdentity identity = possibleVersionedIdentity.Result; var fileIdInfo = new FileIdAndVolumeId(identity.VolumeSerialNumber, identity.FileId); // We have a valid identity, but that identity is 'weak' and may correspond to an intermediate record (one without 'close' set). // We cannot discard such records here since we can't obtain a real 'Reason' field for a file's current USN record. // But we do know that any intermediate record will be a miss below, since we only record 'close' records (strong identities) // (see RecordContentHashAsync). Entry knownEntry; bool foundEntry = m_entries.TryGetValue(fileIdInfo, out knownEntry); if (!foundEntry) { Counters.IncrementCounter(FileContentTableCounters.NumFileIdMismatch); Tracing.Logger.Log.StorageUnknownFileMiss( Events.StaticContext, path, identity.FileId.High, identity.FileId.Low, identity.VolumeSerialNumber, identity.Usn.Value); return(null); } var staleUsn = identity.Usn != knownEntry.Usn; if (staleUsn) { Tracing.Logger.Log.StorageUnknownUsnMiss( Events.StaticContext, path, identity.FileId.High, identity.FileId.Low, identity.VolumeSerialNumber, readUsn: identity.Usn.Value, knownUsn: knownEntry.Usn.Value, knownContentHash: knownEntry.Hash.ToHex()); return(null); } MarkEntryAccessed(fileIdInfo, knownEntry); Counters.IncrementCounter(FileContentTableCounters.NumHit); Tracing.Logger.Log.StorageKnownUsnHit( Events.StaticContext, path, identity.FileId.High, identity.FileId.Low, identity.VolumeSerialNumber, usn: knownEntry.Usn.Value, contentHash: knownEntry.Hash.ToHex()); // Note that we return a 'strong' version of the weak identity; since we matched an entry in the table, we know that the USN // actually corresponds to a strong identity (see RecordContentHashAsync). return(new VersionedFileIdentityAndContentInfo( new VersionedFileIdentity( identity.VolumeSerialNumber, identity.FileId, identity.Usn, VersionedFileIdentity.IdentityKind.StrongUsn), new FileContentInfo(knownEntry.Hash, knownEntry.Length))); } }
/// <nodoc /> public VersionedFileIdentityAndContentInfo(VersionedFileIdentity identity, FileContentInfo fileContentInfo) { Identity = identity; FileContentInfo = fileContentInfo; }