public void EmptyDatabasesReturnsEmpty()
        {
            var dba = new MemoryFileDatabase();
            var dbb = new MemoryFileDatabase();
            var sut = new FileDuplicateFinder(new StatDuplicateComparer());

            var actual = sut.Find(dba, dbb);
            Assert.That(actual, Is.Empty);
        }
示例#2
0
 public void ScanDirectoryAddsAllFilesAndFoldersToDatabase()
 {
     var database = new MemoryFileDatabase();
     var sut = new FileCrawler(database, new SystemFileDescriptorProvider(), new StatSignatureGenerator(new SHA1HashGenerator()), new NullProgressTracker());
     sut.ScanDirectory(TestResources.ResourcesPath);
     var expected = TestResources.AllPaths;
     var actual = database.GetAllDescriptors().Select(x => x.Path);
     Assert.That(actual, Is.EquivalentTo(expected));
 }
示例#3
0
        public void ScanDirectoryReturnsRootFileDescriptor()
        {
            var database = new MemoryFileDatabase();
            var sut = new FileCrawler(database, new SystemFileDescriptorProvider(), new StatSignatureGenerator(new SHA1HashGenerator()), new NullProgressTracker());
            var rootDescriptor = sut.ScanDirectory(TestResources.ResourcesPath);

            Assert.That(rootDescriptor.Name, Is.EqualTo(TestResources.ResourcesPath));
            Assert.That(rootDescriptor.Children, Is.Not.Null, "Expected root descriptor to have child descriptors");
            Assert.That(rootDescriptor.Children.Select(x => x.Name), Is.EquivalentTo(new [] {"albert-einstein.jpg", "TextFile.txt", "Sub"}));
        }
        public void DifferentDatabasesReturnsEmpty()
        {
            var dba = new MemoryFileDatabase();
            dba.UpdateDescriptor(new FileDescriptor("A.txt") {StatHash = new byte[] { 1 } });
            dba.UpdateDescriptor(new FileDescriptor("B.txt") { StatHash = new byte[] { 2 } });
            var dbb = new MemoryFileDatabase();
            dbb.UpdateDescriptor(new FileDescriptor("C.txt") { StatHash = new byte[] { 3 } });
            dbb.UpdateDescriptor(new FileDescriptor("D.txt") { StatHash = new byte[] { 4 } });
            var sut = new FileDuplicateFinder(new StatDuplicateComparer());

            var actual = sut.Find(dba, dbb);
            Assert.That(actual, Is.Empty);
        }
        public void ReturnsAllDuplicatePairs()
        {
            var dba = new MemoryFileDatabase();
            dba.UpdateDescriptor(new FileDescriptor("A.txt") {StatHash = new byte[] { 1 } });
            dba.UpdateDescriptor(new FileDescriptor("B.txt") { StatHash = new byte[] { 2 } });

            var dbb = new MemoryFileDatabase();
            dbb.UpdateDescriptor(new FileDescriptor("C.txt") { StatHash = new byte[] { 1 } });
            dbb.UpdateDescriptor(new FileDescriptor("D.txt") { StatHash = new byte[] { 4 } });
            dbb.UpdateDescriptor(new FileDescriptor("E.txt") { StatHash = new byte[] { 2 } });
            dbb.UpdateDescriptor(new FileDescriptor("F.txt") { StatHash = new byte[] { 2 } });
            var sut = new FileDuplicateFinder(new StatDuplicateComparer());

            var actual = sut.Find(dba, dbb).ToList();
            Assert.That(actual, Has.Count.EqualTo(3));
            Assert.That(actual[0].Descriptors.Select(x => x.Name), Is.EqualTo(new [] { "A.txt", "C.txt"}));
            Assert.That(actual[1].Descriptors.Select(x => x.Name), Is.EqualTo(new [] { "B.txt", "E.txt"}));
            Assert.That(actual[2].Descriptors.Select(x => x.Name), Is.EqualTo(new [] { "B.txt", "F.txt"}));
        }
示例#6
0
        static void Main(string[] args)
        {
            var options = new Options();
            if (!Parser.Default.ParseArguments(args, options))
            {
                Environment.Exit(1);
            }

            var baseFolder = options.ScanFolder;
            var database = new MemoryFileDatabase();
            if (!string.IsNullOrEmpty(options.ScanFolder))
            {
                FileDescriptor rootDescriptor = null;
                if (Directory.Exists(options.ScanFolder))
                {
                    rootDescriptor = StatScanFolder(database, baseFolder);
                }
                else
                if (File.Exists(options.ScanFolder))
                {
                    database = MemoryFileDatabase.Load(options.ScanFolder);
                    rootDescriptor = database.RootDescriptor;
                }
                else
                {
                    Console.WriteLine($"Could not open {options.ScanFolder}");
                    Console.WriteLine(options.GetUsage());
                    Environment.Exit(1);
                }
                if (options.ReadContent)
                    UpdateContentSignatures(database, rootDescriptor);
                if (options.SaveDatabase)
                    database.SaveDefault();
                if (options.Verbose)
                {
                    PrintDescriptorTree(rootDescriptor, descriptor => descriptor.StatHash);
                }

                var duplicateComparer = options.DuplicateMode == DuplicateMode.Stat
                    ? (IDuplicateComparer) new StatDuplicateComparer()
                    : new ContentDuplicateComparer();

                if (options.ShowFileDuplicates)
                {
                    Console.WriteLine("FILE DUPLICATES");
                    Console.WriteLine("---------------");
                    var duplicateFinder = new FileDuplicateFinder(duplicateComparer);
                    var duplicates = duplicateFinder.Find(database, database);
                    PrintDuplicates(duplicates);
                }
                if (options.ShowTopDuplicates)
                {
                    Console.WriteLine("TOP DUPLICATES");
                    Console.WriteLine("--------------");
                    var duplicateFinder = new TopDescriptorDuplicateFinder(duplicateComparer);
                    var duplicates = duplicateFinder.Find(database, database);
                    PrintDuplicates(duplicates);
                }
            }
        }
示例#7
0
 private static void UpdateContentSignatures(MemoryFileDatabase database, FileDescriptor rootDescriptor)
 {
     Console.WriteLine("Updating content signatures");
     var progressTracker = new ConsoleProgressTracker(database.GetAllDescriptors().Count());
     var contentCrawler = new FileCrawler(new NullFileDatabase(), new RevisitDescriptorProvider(),
                                          new SampleSignatureGenerator(new SHA1HashGenerator()), progressTracker);
     var contentTimer = Stopwatch.StartNew();
     contentCrawler.ScanDirectory(rootDescriptor);
     contentTimer.Stop();
     var descriptorCount = database.GetAllDescriptors().Count();
     //            PrintDescriptorTree(rootDescriptor, descriptor => descriptor.ContentHash);
     //            PrintDuplicates(database, descriptor => descriptor.ContentHash);
     Console.WriteLine("Calculated content signature for {0} entries in {1}. {2} files per second", descriptorCount,
                       contentTimer.Elapsed, 1000 * descriptorCount / contentTimer.ElapsedMilliseconds);
 }
示例#8
0
 private static FileDescriptor StatScanFolder(MemoryFileDatabase database, string baseFolder)
 {
     var volumeInfo = new VolumeInfo(baseFolder);
     Console.WriteLine($"Name = {volumeInfo.VolumeName}, Serial = {volumeInfo.SerialNumber}");
     database.RootInfo = new RootInfo
     {
         RootPath = baseFolder,
         VolumeId = volumeInfo.SerialNumber,
         VolumeLabel = volumeInfo.VolumeName
     };
     var progressTracker = new ConsoleProgressTracker();
     var signatureGenerator = new StatSignatureGenerator(new SHA1HashGenerator());
     var crawler = new FileCrawler(database, new SystemFileDescriptorProvider(), signatureGenerator, progressTracker);
     var scanTimer = Stopwatch.StartNew();
     var rootDescriptor = crawler.ScanDirectory(baseFolder);
     scanTimer.Stop();
     var descriptorCount = database.GetAllDescriptors().Count();
     Console.WriteLine("Scanned {0} entries in {1}. {2} stat scans per second", descriptorCount, scanTimer.Elapsed,
         1000*descriptorCount/scanTimer.ElapsedMilliseconds);
     return rootDescriptor;
 }
        public void FolderDuplicateReturnedForSelfFinding()
        {
            var builder = new FileDescriptorBuilder("A", 1,
                new FileDescriptorBuilder("B1", 2),
                new FileDescriptorBuilder("C1", 3,
                    new FileDescriptorBuilder("D1", 4,
                        new FileDescriptorBuilder("E1", 5))),
                new FileDescriptorBuilder("X", 10,
                new FileDescriptorBuilder("B2", 11),
                new FileDescriptorBuilder("C2", 3,
                    new FileDescriptorBuilder("D2", 4,
                        new FileDescriptorBuilder("E2", 5)))));
            var db = new MemoryFileDatabase(builder.Build());
            var sut = new TopDescriptorDuplicateFinder(new StatDuplicateComparer());

            var actual = sut.Find(db, db).ToList();
            Assert.That(actual, Has.Count.EqualTo(1));
            Assert.That(actual.Single().Descriptors.Select(x => x.Name), Is.EquivalentTo(new [] {"C1", "C2"}));
        }