/// <summary> /// Handles a single file. /// </summary> /// <param name="item">The item.</param> /// <param name="configuration">The configuration.</param> /// <param name="stats">The statistics.</param> /// <param name="seenItems">The seen items.</param> private static void _HandleFile( FileInfo item, Configuration configuration, RuntimeStats stats, ConcurrentDictionary <long, ConcurrentDictionary <string, FileEntry> > seenItems) { stats.IncrementFiles(); var length = item.Length; stats.IncrementBytes(length); if (length < configuration.MinimumFileSizeInBytes || length > configuration.MaximumFileSizeInBytes) { return; } var knownWithThisLength = seenItems.GetOrAdd(length, _ => new ConcurrentDictionary <string, FileEntry>()); // preventing othre threads from processing files with the same size // avoiding a race condition where all known links to a file are removed at once, thus loosing data completely lock (knownWithThisLength) try { _HandleFileWithGivenSize(item, configuration, stats, knownWithThisLength); } catch (Exception e) { Console.WriteLine($"[Error] Could not process file {item.FullName}: {e.Message}"); } }
/// <summary> /// Handles files which are already hardlinks. /// </summary> /// <param name="item">The item.</param> /// <param name="configuration">The configuration.</param> /// <param name="stats">The statistics.</param> /// <param name="knownWithThisLength">Length of the known with this.</param> private static void _HandleExistingHardLink( FileInfo item, Configuration configuration, RuntimeStats stats, ConcurrentDictionary <string, FileEntry> knownWithThisLength ) { if (configuration.DeleteHardLinkedFiles) { _RemoveFileEntry(item, knownWithThisLength); _DeleteLink(item); stats.HardLinkStats.IncreaseDeleted(); Console.WriteLine($"[Info] Deleted Hardlink {item.FullName}"); return; } if (configuration.RemoveHardLinks) { _RemoveFileEntry(item, knownWithThisLength); try { _ReplaceFileLinkWithFileContent(item); stats.HardLinkStats.IncreaseRemoved(); Console.WriteLine($"[Info] Removed Hardlink {item.FullName}"); } catch (Exception e) { Console.WriteLine($"[Error] Could not remove Hardlink {item.FullName}: {e.Message}"); } return; } if (configuration.SetReadOnlyAttributeOnExistingHardLinks && ((item.Attributes & FileAttributes.ReadOnly) != FileAttributes.ReadOnly)) { Console.WriteLine($"[Info] Setting read-only attribute on Hardlink {item.FullName}"); item.Attributes |= FileAttributes.ReadOnly; return; } Console.WriteLine($"[Info] {item.FullName} is alrady a Hardlink"); }
/// <summary> /// Handles a specific file along with a group of files already seen with the same size. /// </summary> /// <param name="item">The item.</param> /// <param name="configuration">The configuration.</param> /// <param name="stats">The statistics.</param> /// <param name="knownWithThisLength">Length of the known with this.</param> private static void _HandleFileWithGivenSize( FileInfo item, Configuration configuration, RuntimeStats stats, ConcurrentDictionary <string, FileEntry> knownWithThisLength) { var myKey = _GenerateKey(item); var checksum = knownWithThisLength.GetOrAdd(myKey, new FileEntry(item)); var isHardLink = false; IEnumerable <FileInfo> hardlinks; try { hardlinks = item.GetHardLinkTargets(); } catch (Exception e) { _RemoveFileEntry(item, knownWithThisLength); Console.WriteLine($"[Error] Could not enumerate HardLinks {item.FullName}: {e.Message}"); return; } foreach (var target in hardlinks) { isHardLink = true; knownWithThisLength.TryAdd(_GenerateKey(target), new FileEntry(target)); } if (isHardLink) { stats.HardLinkStats.IncreaseSeen(); if (configuration.ShowInfoOnly) { return; } _HandleExistingHardLink(item, configuration, stats, knownWithThisLength); return; } string symlink; try { symlink = item.GetSymbolicLinkTarget(); } catch (Exception e) { _RemoveFileEntry(item, knownWithThisLength); Console.WriteLine($"[Error] Could not enumerate SymLink {item.FullName}: {e.Message}"); return; } if (symlink != null) { knownWithThisLength.TryAdd(symlink, new FileEntry(new FileInfo(symlink))); stats.SymbolicLinkStats.IncreaseSeen(); if (configuration.ShowInfoOnly) { return; } _HandleExistingSymbolicLink(item, configuration, stats, knownWithThisLength); return; } if (configuration.ShowInfoOnly) { return; } // find matching file in seen list and try to hard or symlink var sameFiles = knownWithThisLength .Where(kvp => kvp.Key != myKey) .Where(kvp => kvp.Value.Equals(checksum)) .Select(kvp => kvp.Key) ; foreach (var sameFile in sameFiles) { var temporaryFile = _CreateTemporaryFileInSameDirectory(item); temporaryFile.Delete(); var isSymlink = false; try { LinkExtensions.CreateHardLinkFrom((FileInfo)temporaryFile, (string)sameFile); } catch (Exception e1) { if (configuration.AlsoTrySymbolicLinks) { isSymlink = true; try { LinkExtensions.CreateSymbolicLinkFrom((FileInfo)temporaryFile, (string)sameFile); } catch (Exception e2) { Console.WriteLine( $"[Warning] Could not Symlink {item.FullName}({FilesizeFormatter.FormatUnit(item.Length, true)}) --> {sameFile}: {e2.Message}"); continue; } } else { Console.WriteLine( $"[Warning] Could not Hardlink {item.FullName}({FilesizeFormatter.FormatUnit(item.Length, true)}) --> {sameFile}: {e1.Message}"); continue; } } var isAlreadyDeleted = false; try { item.Attributes &= ~FileAttributes.ReadOnly; item.Delete(); isAlreadyDeleted = true; File.Move(temporaryFile.FullName, item.FullName); } catch { if (isAlreadyDeleted) { // undo file deletion temporaryFile.CopyTo(item.FullName, true); } else { // undo temp file creation temporaryFile.Delete(); } throw; } if (isSymlink) { stats.SymbolicLinkStats.IncreaseCreated(); if (configuration.SetReadOnlyAttributeOnNewSymbolicLinks) { item.Attributes |= FileAttributes.ReadOnly; } } else { stats.HardLinkStats.IncreaseCreated(); if (configuration.SetReadOnlyAttributeOnNewHardLinks) { item.Attributes |= FileAttributes.ReadOnly; } } Console.WriteLine($"[Info] Created {(isSymlink ? "Symlink" : "Hardlink")} for {item.FullName}({FilesizeFormatter.FormatUnit(item.Length, true)}) --> {sameFile}"); return; } }
/// <summary> /// A worker thread, pulling items from the stack, handling files and directories, etc. /// </summary> /// <param name="stack">The stack.</param> /// <param name="configuration">The configuration.</param> /// <param name="seenItems">The seen items.</param> /// <param name="waiter">The waiter.</param> /// <param name="state">The state.</param> private static void _ThreadWorker(ConcurrentStack <DirectoryInfo> stack, Configuration configuration, RuntimeStats stats, ConcurrentDictionary <long, ConcurrentDictionary <string, FileEntry> > seenItems, AutoResetEvent waiter, int[] state) { while (true) { if (!stack.TryPop(out var current)) { // when stack is empty, signal we're lazy and if all other threads are also, end thread if (Interlocked.Decrement(ref state[0]) == 0) { // signal another thread to continue exiting waiter.Set(); Console.WriteLine($"Ending Thread #{Thread.CurrentThread.ManagedThreadId}"); return; } waiter.WaitOne(); Interlocked.Increment(ref state[0]); continue; } // push directories and wake up any sleeping threads foreach (var directory in current.EnumerateDirectories()) { stack.Push(directory); stats.IncrementFolders(); // notify other threads which may be waiting for work waiter.Set(); } foreach (var item in current.EnumerateFiles()) { _HandleFile(item, configuration, stats, seenItems); } } }
/// <summary> /// Processes the given folders with the given configuration. /// </summary> /// <param name="directories">The directories.</param> /// <param name="configuration">The configuration.</param> /// <param name="stats">The statistics.</param> public static void ProcessFolders(IList <DirectoryInfo> directories, Configuration configuration, RuntimeStats stats) { var seenFiles = new ConcurrentDictionary <long, ConcurrentDictionary <string, FileEntry> >(); var stack = new ConcurrentStack <DirectoryInfo>(); stack.PushRange(directories); stats.IncrementFolders(directories.Count); var threads = new Thread[Math.Max(1, configuration.MaximumCrawlerThreads)]; using (var autoresetEvent = new AutoResetEvent(false)) { var runningWorkers = new[] { threads.Length }; for (var i = 0; i < threads.Length; ++i) { threads[i] = new Thread(_ThreadWorker); threads[i].Start(Tuple.Create(stack, configuration, stats, seenFiles, autoresetEvent, runningWorkers)); } foreach (var thread in threads) { thread.Join(); } } }