public void EmptyDatabasesReturnsEmpty() { var dba = new MemoryFileDatabase(); var dbb = new MemoryFileDatabase(); var sut = new FileDuplicateFinder(new StatDuplicateComparer()); var actual = sut.Find(dba, dbb); Assert.That(actual, Is.Empty); }
public void ScanDirectoryAddsAllFilesAndFoldersToDatabase() { var database = new MemoryFileDatabase(); var sut = new FileCrawler(database, new SystemFileDescriptorProvider(), new StatSignatureGenerator(new SHA1HashGenerator()), new NullProgressTracker()); sut.ScanDirectory(TestResources.ResourcesPath); var expected = TestResources.AllPaths; var actual = database.GetAllDescriptors().Select(x => x.Path); Assert.That(actual, Is.EquivalentTo(expected)); }
public void ScanDirectoryReturnsRootFileDescriptor() { var database = new MemoryFileDatabase(); var sut = new FileCrawler(database, new SystemFileDescriptorProvider(), new StatSignatureGenerator(new SHA1HashGenerator()), new NullProgressTracker()); var rootDescriptor = sut.ScanDirectory(TestResources.ResourcesPath); Assert.That(rootDescriptor.Name, Is.EqualTo(TestResources.ResourcesPath)); Assert.That(rootDescriptor.Children, Is.Not.Null, "Expected root descriptor to have child descriptors"); Assert.That(rootDescriptor.Children.Select(x => x.Name), Is.EquivalentTo(new [] {"albert-einstein.jpg", "TextFile.txt", "Sub"})); }
public void DifferentDatabasesReturnsEmpty() { var dba = new MemoryFileDatabase(); dba.UpdateDescriptor(new FileDescriptor("A.txt") {StatHash = new byte[] { 1 } }); dba.UpdateDescriptor(new FileDescriptor("B.txt") { StatHash = new byte[] { 2 } }); var dbb = new MemoryFileDatabase(); dbb.UpdateDescriptor(new FileDescriptor("C.txt") { StatHash = new byte[] { 3 } }); dbb.UpdateDescriptor(new FileDescriptor("D.txt") { StatHash = new byte[] { 4 } }); var sut = new FileDuplicateFinder(new StatDuplicateComparer()); var actual = sut.Find(dba, dbb); Assert.That(actual, Is.Empty); }
public void ReturnsAllDuplicatePairs() { var dba = new MemoryFileDatabase(); dba.UpdateDescriptor(new FileDescriptor("A.txt") {StatHash = new byte[] { 1 } }); dba.UpdateDescriptor(new FileDescriptor("B.txt") { StatHash = new byte[] { 2 } }); var dbb = new MemoryFileDatabase(); dbb.UpdateDescriptor(new FileDescriptor("C.txt") { StatHash = new byte[] { 1 } }); dbb.UpdateDescriptor(new FileDescriptor("D.txt") { StatHash = new byte[] { 4 } }); dbb.UpdateDescriptor(new FileDescriptor("E.txt") { StatHash = new byte[] { 2 } }); dbb.UpdateDescriptor(new FileDescriptor("F.txt") { StatHash = new byte[] { 2 } }); var sut = new FileDuplicateFinder(new StatDuplicateComparer()); var actual = sut.Find(dba, dbb).ToList(); Assert.That(actual, Has.Count.EqualTo(3)); Assert.That(actual[0].Descriptors.Select(x => x.Name), Is.EqualTo(new [] { "A.txt", "C.txt"})); Assert.That(actual[1].Descriptors.Select(x => x.Name), Is.EqualTo(new [] { "B.txt", "E.txt"})); Assert.That(actual[2].Descriptors.Select(x => x.Name), Is.EqualTo(new [] { "B.txt", "F.txt"})); }
static void Main(string[] args) { var options = new Options(); if (!Parser.Default.ParseArguments(args, options)) { Environment.Exit(1); } var baseFolder = options.ScanFolder; var database = new MemoryFileDatabase(); if (!string.IsNullOrEmpty(options.ScanFolder)) { FileDescriptor rootDescriptor = null; if (Directory.Exists(options.ScanFolder)) { rootDescriptor = StatScanFolder(database, baseFolder); } else if (File.Exists(options.ScanFolder)) { database = MemoryFileDatabase.Load(options.ScanFolder); rootDescriptor = database.RootDescriptor; } else { Console.WriteLine($"Could not open {options.ScanFolder}"); Console.WriteLine(options.GetUsage()); Environment.Exit(1); } if (options.ReadContent) UpdateContentSignatures(database, rootDescriptor); if (options.SaveDatabase) database.SaveDefault(); if (options.Verbose) { PrintDescriptorTree(rootDescriptor, descriptor => descriptor.StatHash); } var duplicateComparer = options.DuplicateMode == DuplicateMode.Stat ? (IDuplicateComparer) new StatDuplicateComparer() : new ContentDuplicateComparer(); if (options.ShowFileDuplicates) { Console.WriteLine("FILE DUPLICATES"); Console.WriteLine("---------------"); var duplicateFinder = new FileDuplicateFinder(duplicateComparer); var duplicates = duplicateFinder.Find(database, database); PrintDuplicates(duplicates); } if (options.ShowTopDuplicates) { Console.WriteLine("TOP DUPLICATES"); Console.WriteLine("--------------"); var duplicateFinder = new TopDescriptorDuplicateFinder(duplicateComparer); var duplicates = duplicateFinder.Find(database, database); PrintDuplicates(duplicates); } } }
private static void UpdateContentSignatures(MemoryFileDatabase database, FileDescriptor rootDescriptor) { Console.WriteLine("Updating content signatures"); var progressTracker = new ConsoleProgressTracker(database.GetAllDescriptors().Count()); var contentCrawler = new FileCrawler(new NullFileDatabase(), new RevisitDescriptorProvider(), new SampleSignatureGenerator(new SHA1HashGenerator()), progressTracker); var contentTimer = Stopwatch.StartNew(); contentCrawler.ScanDirectory(rootDescriptor); contentTimer.Stop(); var descriptorCount = database.GetAllDescriptors().Count(); // PrintDescriptorTree(rootDescriptor, descriptor => descriptor.ContentHash); // PrintDuplicates(database, descriptor => descriptor.ContentHash); Console.WriteLine("Calculated content signature for {0} entries in {1}. {2} files per second", descriptorCount, contentTimer.Elapsed, 1000 * descriptorCount / contentTimer.ElapsedMilliseconds); }
private static FileDescriptor StatScanFolder(MemoryFileDatabase database, string baseFolder) { var volumeInfo = new VolumeInfo(baseFolder); Console.WriteLine($"Name = {volumeInfo.VolumeName}, Serial = {volumeInfo.SerialNumber}"); database.RootInfo = new RootInfo { RootPath = baseFolder, VolumeId = volumeInfo.SerialNumber, VolumeLabel = volumeInfo.VolumeName }; var progressTracker = new ConsoleProgressTracker(); var signatureGenerator = new StatSignatureGenerator(new SHA1HashGenerator()); var crawler = new FileCrawler(database, new SystemFileDescriptorProvider(), signatureGenerator, progressTracker); var scanTimer = Stopwatch.StartNew(); var rootDescriptor = crawler.ScanDirectory(baseFolder); scanTimer.Stop(); var descriptorCount = database.GetAllDescriptors().Count(); Console.WriteLine("Scanned {0} entries in {1}. {2} stat scans per second", descriptorCount, scanTimer.Elapsed, 1000*descriptorCount/scanTimer.ElapsedMilliseconds); return rootDescriptor; }
public void FolderDuplicateReturnedForSelfFinding() { var builder = new FileDescriptorBuilder("A", 1, new FileDescriptorBuilder("B1", 2), new FileDescriptorBuilder("C1", 3, new FileDescriptorBuilder("D1", 4, new FileDescriptorBuilder("E1", 5))), new FileDescriptorBuilder("X", 10, new FileDescriptorBuilder("B2", 11), new FileDescriptorBuilder("C2", 3, new FileDescriptorBuilder("D2", 4, new FileDescriptorBuilder("E2", 5))))); var db = new MemoryFileDatabase(builder.Build()); var sut = new TopDescriptorDuplicateFinder(new StatDuplicateComparer()); var actual = sut.Find(db, db).ToList(); Assert.That(actual, Has.Count.EqualTo(1)); Assert.That(actual.Single().Descriptors.Select(x => x.Name), Is.EquivalentTo(new [] {"C1", "C2"})); }