public DirEntry(FileSystemEntryInfo fs) : this() { Path = fs.FileName; // TODO this assumes date error on LastModified, what about Created and LastAccessed ? try { Modified = fs.LastModified; } catch (ArgumentOutOfRangeException) { // AlphaFS blows up trying to convert bad DateTime. eg. 1/1/1601 // create a bad date time file copy file to NAS with date of 2098 it mucks up. IsModifiedBad = true; } IsDirectory = fs.IsDirectory; IsSymbolicLink = fs.IsSymbolicLink; IsReparsePoint = fs.IsReparsePoint; if (IsDirectory) { Children = new List <DirEntry>(); } else { Size = fs.FileSize; } }
public NTBackupFile(Alphaleonis.Win32.Filesystem.FileSystemEntryInfo fsi) { fileName = fsi.FullPath; this.Name = fsi.FileName; fseInfo = fsi; this.Kind = GetKind(fsi); /*this.FileSize = 0; * try{ * this.FileSize += GetSize (); * } * catch(Exception e){ * Logger.Append(Severity.ERROR, "Unable to get size of item "+SnapFullPath+" "+e.Message); * throw(e); * }*/ this.FileSize = fsi.FileSize; if (this.Kind == FileType.Symlink) { this.TargetName = fsi.VirtualFullPath; } if (this.Kind != FileType.Directory || this.Kind != FileType.Unsupported) { this.LastModifiedTime = Utilities.Utils.GetUtcUnixTime(fsi.LastModified); //fsi.LastModified.ToFileTimeUtc(); } else { this.LastModifiedTime = DateTime.MaxValue.ToFileTimeUtc(); } this.LastMetadataModifiedTime = 0; // dummy value for correctness of incrementals using filecompare this.CreateTime = Utilities.Utils.GetUtcUnixTime(fsi.Created); //fsi.Created.ToFileTimeUtc(); GetHandleInfos(); if (fseInfo.IsMountPoint || fseInfo.IsReparsePoint || fseInfo.IsSymbolicLink) { //this.TargetName = fseInfo.VirtualFullPath; Console.WriteLine("** Item " + fileName + " is a " + this.Kind + ", target=" + TargetName); //Console.WriteLine ("reparsepoint tag(s)="+fsi.ReparsePointTag.ToString()); } if (this.Kind == FileType.Unsupported) { Console.WriteLine("unsupported file " + fileName + " with attributes " + fseInfo.Attributes.ToString()); } this.Attributes = (int)fsi.Attributes; //wSecurity = GetSecurity(); // unneeded as we save using BackupRead(), which includes security info //ownerUser = wSecurity.GetOwner(typeof(NTAccount)).; BlockMetadata = new FileBlockMetadata(); }
public void Set(FileSystemEntryInfo fs) { Name = fs.FileName; try { Modified = fs.LastModified; } catch (ArgumentOutOfRangeException) { //catch issue with crap date modified on some files. ie 1/1/1601 -- AlphaFS blows up. IsModifiedBad = true; } IsDirectory = fs.IsDirectory; IsSymbolicLink = fs.IsSymbolicLink; IsReparsePoint = fs.IsReparsePoint; if (!fs.IsDirectory) { Size = (ulong)fs.FileSize; } }
// useful, but RecurseTree has this inline, [its a bit more efficient but arguably not worth it] public int AddEntry(FileSystemEntryInfo fs, int parentIndex = 0, int siblingIndex = 0) { var myNewIndex = AddEntry(); Entry[] block; var entryIndex = EntryIndex(myNewIndex, out block); block[entryIndex].Set(fs); if (parentIndex > 0) { block[entryIndex].Parent = parentIndex; Entry[] parentBlock; var parentEntryIndex = EntryIndex(parentIndex, out parentBlock); parentBlock[parentEntryIndex].Child = myNewIndex; } if (siblingIndex > 0) { block[entryIndex].Sibling = siblingIndex; } return(myNewIndex); }
public NTBackupFile(string fullName) { if (fullName == null) { throw new Exception("NTBackupFileXP(byname): NULL name"); } //Console.WriteLine ("NTBackupFile(byname): raw name="+fullName); Alphaleonis.Win32.Filesystem.FileInfo fsi = new Alphaleonis.Win32.Filesystem.FileInfo(fullName); fsi.Refresh(); fseInfo = fsi.SystemInfo; fseInfo.FullPath = fullName; this.Name = fsi.Name; fileName = fullName; GetHandleInfos(); // gets ID and sparse attribute. //this.fileSize = fileI.Length; this.FileStartPos = 0; this.ChunkStartPos = 0; this.Kind = GetKind(fseInfo); // GetSize is more precise (though not yet 100%) but slower. As we will generally work on snapshot, // don't be so obsessed with getting real size and reporting sizes changes during backup on NT. //GetSize (); this.FileSize = fseInfo.FileSize; if (this.Kind == FileType.Symlink) { this.TargetName = fsi.SystemInfo.VirtualFullPath; } if (this.Kind != FileType.Directory || this.Kind != FileType.Unsupported) { this.LastModifiedTime = Utilities.Utils.GetUtcUnixTime(fsi.LastWriteTime); // fsi.LastWriteTime.ToFileTimeUtc(); } else { this.LastModifiedTime = 0; //fsi.LastWriteTime.ToFileTimeUtc(); //DateTime.MaxValue.ToFileTimeUtc(); } this.LastMetadataModifiedTime = 0; // dummy value for correctness of incrementals using filecompare this.CreateTime = Utilities.Utils.GetUtcUnixTime(fsi.CreationTime); //fsi.CreationTime.ToFileTimeUtc(); //this.ID = Utilities.Utils.GetUnixTime(fsi.CreationTime); if (fsi.Attributes.HasFlag(Alphaleonis.Win32.Filesystem.FileAttributes.SparseFile)) { this.ChangeStatus |= DataLayoutInfos.SparseFile; } if (fseInfo.IsMountPoint || fseInfo.IsReparsePoint || fseInfo.IsSymbolicLink) { this.TargetName = fseInfo.VirtualFullPath; Console.WriteLine("** Item " + fileName + " is a " + this.Kind); Console.WriteLine("reparsepoint tag(s)=" + fsi.SystemInfo.ReparsePointTag.ToString()); } if (this.Kind == FileType.Unsupported) { Console.WriteLine("unsupported file " + fileName + " with attributes " + fseInfo.Attributes.ToString()); } this.Attributes = (int)fsi.Attributes; //wSecurity = GetSecurity(); // unneeded as we save using BackupRead(), which includes security info //ownerUser = wSecurity.GetOwner(typeof(NTAccount)).; BlockMetadata = new FileBlockMetadata(); }
// // Summary: // Represents the method that defines a set of criteria and determines whether the // specified object meets those criteria. // // Parameters: // obj: // The object to compare against the criteria defined within the method represented // by this delegate. // // Type parameters: // T: // The type of the object to compare. // // Returns: // true if obj meets the criteria defined within the method represented by this // delegate; otherwise, false. internal bool DecideIfRecurseDuringDirScan(FileSystemEntryInfo obj) { Logging.Debug("FolderWatcher: scanning directory: {0}", obj.FullPath); return(true); }
// // Summary: // Represents the method that defines a set of criteria and determines whether the // specified object meets those criteria. // // Parameters: // obj: // The object to compare against the criteria defined within the method represented // by this delegate. // // Type parameters: // T: // The type of the object to compare. // // Returns: // true if obj meets the criteria defined within the method represented by this // delegate; otherwise, false. internal bool DecideIfIncludeDuringDirScan(FileSystemEntryInfo obj) { bool isRegularFile = !(obj.IsDevice || obj.IsDirectory || obj.IsMountPoint || /* obj.IsReparsePoint (hardlink!) || */ obj.IsOffline || obj.IsSystem || obj.IsTemporary); Logging.Debug("FolderWatcher: testing {1} '{0}' for inclusion in the Qiqqa library.", obj.FullPath, isRegularFile ? "regular File" : obj.IsDirectory ? "directory" : "node"); if (ShutdownableManager.Instance.IsShuttingDown) { Logging.Info("FolderWatcher: Breaking out of inner processing loop due to daemon termination"); throw new OperationCanceledException("FolderWatcher: Breaking out of inner processing loop due to daemon termination"); } if (Qiqqa.Common.Configuration.ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks) { Logging.Info("FolderWatcher: Breaking out of inner processing loop due to DisableAllBackgroundTasks"); throw new OperationCanceledException("FolderWatcher: Breaking out of inner processing loop due to DisableAllBackgroundTasks"); } if (LibraryRef == null || folder_watcher_manager?.TypedTarget == null) { Logging.Info("FolderWatcher: Breaking out of inner processing loop due to disposed library and/or watch manager"); throw new OperationCanceledException("FolderWatcher: Breaking out of inner processing loop due to disposed library and/or watch manager"); } global_watch_stats.Inc(0.1); bool have_we_slept = false; if (watch_stats.index_processing_clock.ElapsedMilliseconds > MAX_SECONDS_PER_ITERATION) { watch_stats.daemon.Sleep(SECONDS_TO_RELAX_PER_ITERATION); // reset: watch_stats.index_processing_clock.Restart(); have_we_slept = true; } // only include *.pdf files. Use a `while` loop to allow easy `break` statements to abort the inclusion filter logic below: while (isRegularFile && obj.Extension.ToLower() == ".pdf") { // check if the given file isn't already present in the library: watch_stats.scanned_file_count++; // If we already have this file in the "cache since we started", skip it if (folder_watcher_manager.TypedTarget.HaveProcessedFile(obj.FullPath)) { Logging.Debug("FolderWatcher is skipping {0} as it has already been processed", obj.FullPath); watch_stats.skipped_file_count++; break; } if (have_we_slept) { // As we have slept a while, it's quite unsure whether that file still exists. // Include it only when it still exists and otherwise be sure to retrigger a scan to follow up // any other directory changes. if (!File.Exists(obj.FullPath)) { Logging.Info("FolderWatcher is skipping {0} as it has disappeared while we were sleeping", obj.FullPath); FolderContentsHaveChanged = true; break; } } // ignore zero-length and tiny sized files as those sure are buggy/illegal PDFs: // // https://stackoverflow.com/questions/17279712/what-is-the-smallest-possible-valid-pdf if (obj.FileSize <= 66) { Logging.Warn("FolderWatcher is skipping {0} as it is too small to be a valid PDF file @ {1} bytes", obj.FullPath, obj.FileSize); break; } // Check that the file is not still locked - if it is, mark that the folder is still "changed" and come back later. // // We do this at the same time as calculating the file fingerprint as both actions require (costly) File I/O // and can be folded together: if the fingerprint fails, that's 99.9% sure a failure in the File I/O, hence // a locked or otherwise inaccessible file. string fingerprint; try { fingerprint = StreamFingerprint.FromFile(obj.FullPath); } catch (Exception ex) { Logging.Error(ex, "Watched folder contains file '{0}' which is locked, so coming back later...", obj.FullPath); FolderContentsHaveChanged = true; break; } // check if the PDF is already known: PDFDocument doc = LibraryRef.Xlibrary.GetDocumentByFingerprint(fingerprint); if (doc != null) { // Add this file to the list of processed files... Logging.Info("FolderWatcher is skipping {0} as it already exists in the library as fingerprint {1}, title: {2}", obj.FullPath, fingerprint, doc.TitleCombined); folder_watcher_manager.TypedTarget.RememberProcessedFile(obj.FullPath); watch_stats.skipped_file_count++; break; } if (watch_stats.file_hashes_added.TryGetValue(fingerprint, out var dupe_file_path)) { Logging.Info("FolderWatcher is skipping {0} as it has already been included in the import set as file {1} which has the same fingerprint {2}", obj.FullPath, dupe_file_path, fingerprint); watch_stats.skipped_file_count++; break; } watch_stats.file_hashes_added.Add(fingerprint, obj.FullPath); watch_stats.files_added_since_last_sleep++; return(true); } return(false); }
/// <summary> /// The daemon code calls this occasionally to poke it into action to do work /// </summary> /// <param name="daemon"></param> public void ExecuteBackgroundProcess(Daemon daemon) { // We don't want to start watching files until the library is loaded... if (!(LibraryRef?.Xlibrary.LibraryIsLoaded ?? false)) { Logging.Info("Library is not yet loaded, so waiting before watching..."); // Indicate that the library may still not have been changed... FolderContentsHaveChanged = true; return; } // Update our folder system watcher if necessary CheckIfFolderNameHasChanged(); // If the current folder is blank, do nothing if (String.IsNullOrEmpty(configured_folder_to_watch)) { return; } // If the folder does not exist, do nothing if (!Directory.Exists(configured_folder_to_watch)) { Logging.Info("Watched folder {0} does not exist: watching this directory has been disabled.", configured_folder_to_watch); return; } // If the folder or its contents has not changed since the last time, do nothing if (!FolderContentsHaveChanged) { return; } if (!ConfigurationManager.IsEnabled(nameof(FolderWatcher))) { Logging.Info("Watched folder {0} will not be watched/scanned due to Developer Override setting {1}=false", configured_folder_to_watch, nameof(FolderWatcher)); return; } Stopwatch breathing_time = Stopwatch.StartNew(); Logging.Debug("FolderWatcher BEGIN"); // To recover from a fatal library failure and re-indexing attempt for very large libraries, // we're better off processing a limited number of source files as we'll be able to see // *some* results more quickly and we'll have a working, though yet incomplete, // index in *reasonable time*. // // To reconstruct the entire index will take a *long* time. We grow the index and other meta // stores a bunch-of-files at a time and then repeat the entire maintenance process until // we'll be sure to have run out of files to process for sure... // Mark that we are now processing the folder while (TestAndReset_FolderContentsHaveChanged()) { // If this library is busy, skip it for now if (Library.IsBusyAddingPDFs || Library.IsBusyRegeneratingTags) { Logging.Debug特("FolderWatcher: Not daemon processing any library that is busy with adds..."); FolderContentsHaveChanged = true; break; } if (ShutdownableManager.Instance.IsShuttingDown) { Logging.Debug特("FolderWatcher: Breaking out of outer processing loop due to daemon termination"); FolderContentsHaveChanged = true; break; } if (Qiqqa.Common.Configuration.ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks) { Logging.Debug特("FolderWatcher: Breaking out of outer processing loop due to DisableAllBackgroundTasks"); FolderContentsHaveChanged = true; break; } if (LibraryRef == null || folder_watcher_manager?.TypedTarget == null) { Logging.Debug特("FolderWatcher: Breaking out of outer processing loop due to disposed library and/or watch manager"); FolderContentsHaveChanged = true; break; } if (!ConfigurationManager.IsEnabled(nameof(FolderWatcher))) { Logging.Info("Watched folder {0} will not be watched/scanned due to Developer Override setting {1}=false", configured_folder_to_watch, nameof(FolderWatcher)); break; } // reset counters for logging/reporting: watch_stats.Reset(daemon); // If we get this far then there might be some work to do in the folder... Stopwatch clk = Stopwatch.StartNew(); // // Summary: // [AlphaFS] Specifies a set of custom filters to be used with enumeration methods // of Alphaleonis.Win32.Filesystem.Directory, e.g., Alphaleonis.Win32.Filesystem.Directory.EnumerateDirectories(System.String), // Alphaleonis.Win32.Filesystem.Directory.EnumerateFiles(System.String), or Alphaleonis.Win32.Filesystem.Directory.EnumerateFileSystemEntries(System.String). // // Remarks: // Alphaleonis.Win32.Filesystem.DirectoryEnumerationFilters allows scenarios in // which files/directories being enumerated by the methods of Alphaleonis.Win32.Filesystem.Directory // class are accepted only if they match the search pattern, attributes (see Alphaleonis.Win32.Filesystem.DirectoryEnumerationOptions.SkipReparsePoints), // and optionally also the custom criteria tested in the method whose delegate is // specified in Alphaleonis.Win32.Filesystem.DirectoryEnumerationFilters.InclusionFilter. // These criteria could be, e.g., file size exceeding some threshold, pathname matches // a complex regular expression, etc. If the enumeration process is set to be recursive // (see Alphaleonis.Win32.Filesystem.DirectoryEnumerationOptions.Recursive) and // Alphaleonis.Win32.Filesystem.DirectoryEnumerationFilters.RecursionFilter is specified, // the directory is traversed recursively only if it matches the custom criteria // in Alphaleonis.Win32.Filesystem.DirectoryEnumerationFilters.RecursionFilter method. // This allows, for example, custom handling of junctions and symbolic links, e.g., // detection of cycles. If any error occurs during the enumeration and the enumeration // process is not set to ignore errors (see Alphaleonis.Win32.Filesystem.DirectoryEnumerationOptions.ContinueOnException), // an exception is thrown unless the error is handled (filtered out) by the method // specified in Alphaleonis.Win32.Filesystem.DirectoryEnumerationFilters.ErrorFilter // (if specified). The method may, for example, consume the error by reporting it // in a log, so that the enumeration continues as in the case of Alphaleonis.Win32.Filesystem.DirectoryEnumerationOptions.ContinueOnException // option but the user will be informed about errors. // global_watch_stats.Inc(); DirectoryEnumerationFilters filter = new DirectoryEnumerationFilters(); filter.ErrorFilter = DecideIfErrorDuringDirScan; filter.InclusionFilter = DecideIfIncludeDuringDirScan; filter.RecursionFilter = DecideIfRecurseDuringDirScan; // Note: don't use the CancellationToken, just throw an exception in the InclusionFilter when it's time to abort the scan. //filter.CancellationToken = null; IEnumerable <string> filenames_in_folder = Directory.EnumerateFiles(configured_folder_to_watch, DirectoryEnumerationOptions.Files | DirectoryEnumerationOptions.BasicSearch | //DirectoryEnumerationOptions.ContinueOnException | DirectoryEnumerationOptions.LargeCache | DirectoryEnumerationOptions.Recursive, filter); // SearchOption.AllDirectories); Logging.Debug特("Directory.EnumerateFiles took {0} ms", clk.ElapsedMilliseconds); // Do NOT count files which are already present in our library/DB, // despite the fact that those also *do* take time and effort to check // in the code above. // // The issue here is that when we would import files A,B,C,D,E,F,G,H,I,J,K, // we would do so in tiny batches, resulting in a rescan after each batch // where the already processed files will be included in the set, but must // be filtered out as 'already in there' in the code above. // Iff we had counted *all* files we inspect from the Watch Directory, // we would never make it batch the first batch as then our count limit // would trigger already for every round through here! List <string> filenames_that_are_new = new List <string>(); foreach (string filename in filenames_in_folder) { Logging.Info("FolderWatcher: {0} of {1} files have been processed/inspected (total {2} scanned, {3} skipped, {4} ignored)", watch_stats.processed_file_count, watch_stats.processing_file_count, watch_stats.scanned_file_count, watch_stats.skipped_file_count, watch_stats.scanned_file_count - watch_stats.skipped_file_count - watch_stats.processing_file_count); try { // check the file once again: it MAY have disappeared while we were slowly scanning the remainder of the dirtree. FileSystemEntryInfo info = File.GetFileSystemEntryInfo(filename); watch_stats.processing_file_count++; Logging.Info("FolderWatcher is importing {0}", filename); filenames_that_are_new.Add(filename); } catch (Exception ex) { Logging.Error(ex, "Folder Watcher: skipping file {0} due to file I/O error {1}", filename, ex.Message); } } Logging.Debug特("Directory.EnumerateFiles took {0} ms", clk.ElapsedMilliseconds); // Create the import records List <FilenameWithMetadataImport> filename_with_metadata_imports = new List <FilenameWithMetadataImport>(); foreach (var filename in filenames_that_are_new) { filename_with_metadata_imports.Add(new FilenameWithMetadataImport { filename = filename, tags = new HashSet <string>(tags) }); #if false // delay until the PDF has actually been processed completely! // // Add this file to the list of processed files... folder_watcher_manager.RememberProcessedFile(filename); #endif } // Get the library to import all these new files if (filename_with_metadata_imports.Count > 0) { ImportingIntoLibrary.AddNewPDFDocumentsToLibraryWithMetadata_SYNCHRONOUS(LibraryRef, true, filename_with_metadata_imports.ToArray()); // TODO: refactor the ImportingIntoLibrary class } watch_stats.processed_file_count = watch_stats.processing_file_count; Logging.Info("FolderWatcher: {0} of {1} files have been processed/inspected (total {2} scanned, {3} skipped, {4} ignored)", watch_stats.processed_file_count, watch_stats.processing_file_count, watch_stats.scanned_file_count, watch_stats.skipped_file_count, watch_stats.scanned_file_count - watch_stats.skipped_file_count - watch_stats.processing_file_count); if (watch_stats.index_processing_clock.ElapsedMilliseconds >= FolderWatcher.MAX_SECONDS_PER_ITERATION) { Logging.Info("FolderWatcher: Taking a nap due to MAX_SECONDS_PER_ITERATION: {0} seconds consumed, {1} threads pending", watch_stats.index_processing_clock.ElapsedMilliseconds / 1E3, SafeThreadPool.QueuedThreadCount); watch_stats.daemon.Sleep(SECONDS_TO_RELAX_PER_ITERATION); watch_stats.index_processing_clock.Restart(); } Logging.Debug("FolderWatcher End-Of-Round ({0} ms)", clk.ElapsedMilliseconds); } Logging.Debug("FolderWatcher END"); }
// // Summary: // Represents the method that defines a set of criteria and determines whether the // specified object meets those criteria. // // Parameters: // obj: // The object to compare against the criteria defined within the method represented // by this delegate. // // Type parameters: // T: // The type of the object to compare. // // Returns: // true if obj meets the criteria defined within the method represented by this // delegate; otherwise, false. internal bool DecideIfIncludeDuringDirScan(FileSystemEntryInfo obj) { bool isRegularFile = !(obj.IsDevice || obj.IsDirectory || obj.IsMountPoint || /* obj.IsReparsePoint (hardlink!) || */ obj.IsOffline || obj.IsSystem || obj.IsTemporary); Logging.Debug("FolderWatcher: testing {1} '{0}' for inclusion in the Qiqqa library.", obj.FullPath, isRegularFile ? "regular File" : obj.IsDirectory ? "directory" : "node"); if (Utilities.Shutdownable.ShutdownableManager.Instance.IsShuttingDown) { Logging.Info("FolderWatcher: Breaking out of inner processing loop due to daemon termination"); throw new OperationCanceledException("FolderWatcher: Breaking out of inner processing loop due to daemon termination"); } if (Qiqqa.Common.Configuration.ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks) { Logging.Info("FolderWatcher: Breaking out of inner processing loop due to DisableAllBackgroundTasks"); throw new OperationCanceledException("FolderWatcher: Breaking out of inner processing loop due to DisableAllBackgroundTasks"); } if (library?.TypedTarget == null || folder_watcher_manager?.TypedTarget == null) { Logging.Info("FolderWatcher: Breaking out of inner processing loop due to disposed library and/or watch manager"); throw new OperationCanceledException("FolderWatcher: Breaking out of inner processing loop due to disposed library and/or watch manager"); } bool have_we_slept = false; if (watch_stats.index_processing_clock.ElapsedMilliseconds > MAX_SECONDS_PER_ITERATION) { Logging.Info("FolderWatcher: Taking a nap due to MAX_SECONDS_PER_ITERATION: {0} seconds consumed, {1} threads pending", watch_stats.index_processing_clock.ElapsedMilliseconds / 1E3, SafeThreadPool.QueuedThreadCount); // Collect various 'pending' counts to help produce a stretched sleep/delay period // in order to allow the other background tasks to keep up with the PDF series being // fed into them by this task. int thr_cnt = Math.Max(0, SafeThreadPool.QueuedThreadCount - 2); int queued_cnt = Qiqqa.Documents.Common.DocumentQueuedStorer.Instance.PendingQueueCount; Qiqqa.Documents.PDF.PDFRendering.PDFTextExtractor.Instance.GetJobCounts(out var textify_count, out var ocr_count); int duration = 1 * 1000 + thr_cnt * 250 + queued_cnt * 20 + textify_count * 50 + ocr_count * 500; watch_stats.daemon.Sleep(Math.Min(60 * 1000, duration)); // Relinquish control to the UI thread to make sure responsiveness remains tolerable at 100% CPU load. WPFDoEvents.WaitForUIThreadActivityDone(); // reset: watch_stats.index_processing_clock.Restart(); have_we_slept = true; } // only include *.pdf files. Use a `while` loop to allow easy `break` statements to abort the inclusion filter logic below: while (isRegularFile && obj.Extension.ToLower() == ".pdf") { // check if the given file isn't already present in the library: watch_stats.scanned_file_count++; // If we already have this file in the "cache since we started", skip it if (folder_watcher_manager.TypedTarget.HaveProcessedFile(obj.FullPath)) { Logging.Debug("FolderWatcher is skipping {0} as it has already been processed", obj.FullPath); watch_stats.skipped_file_count++; break; } if (have_we_slept) { // As we have slept a while, it's quite unsure whether that file still exists. // Include it only when it still exists and otherwise be sure to retrigger a scan to follow up // any other directory changes. if (!File.Exists(obj.FullPath)) { Logging.Info("FolderWatcher is skipping {0} as it has disappeared while we were sleeping", obj.FullPath); FolderContentsHaveChanged = true; break; } } // ignore zero-length and tiny sized files as those sure are buggy/illegal PDFs: // // https://stackoverflow.com/questions/17279712/what-is-the-smallest-possible-valid-pdf if (obj.FileSize <= 66) { Logging.Warn("FolderWatcher is skipping {0} as it is too small to be a valid PDF file @ {1} bytes", obj.FullPath, obj.FileSize); break; } // Check that the file is not still locked - if it is, mark that the folder is still "changed" and come back later. // // We do this at the same time as calculating the file fingerprint as both actions require (costly) File I/O // and can be folded together: if the fingerprint fails, that's 99.9% sure a failure in the File I/O, hence // a locked or otherwise inaccessible file. string fingerprint; try { fingerprint = StreamFingerprint.FromFile(obj.FullPath); } catch (Exception ex) { Logging.Error(ex, "Watched folder contains file '{0}' which is locked, so coming back later...", obj.FullPath); FolderContentsHaveChanged = true; break; } // check if the PDF is already known: PDFDocument doc = Library.GetDocumentByFingerprint(fingerprint); if (doc != null) { // Add this file to the list of processed files... Logging.Info("FolderWatcher is skipping {0} as it already exists in the library as fingerprint {1}, title: {2}", obj.FullPath, fingerprint, doc.TitleCombined); folder_watcher_manager.TypedTarget.RememberProcessedFile(obj.FullPath); watch_stats.skipped_file_count++; break; } if (watch_stats.file_hashes_added.TryGetValue(fingerprint, out var dupe_file_path)) { Logging.Info("FolderWatcher is skipping {0} as it has already been included in the import set as file {1} which has the same fingerprint {2}", obj.FullPath, dupe_file_path, fingerprint); watch_stats.skipped_file_count++; break; } watch_stats.file_hashes_added.Add(fingerprint, obj.FullPath); return(true); } return(false); }