Ejemplo n.º 1
0
        public async void Hashify(ZapperFile zfile)
        {
            if (!string.IsNullOrWhiteSpace(zfile.ContentHash))
            {
                return;
            }
            try
            {
                zfile.LoadFileSystemInfo();
                
                var hashtimer = Stopwatch.StartNew();
                // TODO Do timing differences between Crc32 and MD5
                zfile.ContentHash = await CalculateMD5Hash(zfile.FullPath);
               
                hashtimer.Stop();
                zfile.HashTime = hashtimer.ElapsedMilliseconds;

                if (!ZapperProcessor.ZapperFiles.TryUpdate(zfile.FullPath, zfile, zfile))
                {
                    throw new FileZapperUpdateDictionaryFailureException("ZapperFiles", zfile.FullPath);
                }
            }
            catch (Exception ex)
            {
                Exceptioneer.Log(_log, ex, "Due to error, file tagged with INVALID content hash: " + zfile.FullPath);
                zfile.ContentHash = "INVALID";
                if (!ZapperProcessor.ZapperFiles.TryUpdate(zfile.FullPath, zfile, zfile))
                {
                    throw new FileZapperUpdateDictionaryFailureException("ZapperFiles", zfile.FullPath);
                }
            }
        }
Ejemplo n.º 2
0
 public void load_small_file()
 {
     string sFilePath = Path.Combine(_rootFolder.FullPath, SmallFileName);
     ZapperFileTestHelper.CreateTextFile(sFilePath, 5);
     var zfile = new ZapperFile(sFilePath);
     Assert.IsNotNull(zfile);
     Assert.AreEqual(zfile.Name, SmallFileName);
     Assert.AreNotEqual(zfile.Size, 0);
     Assert.IsFalse(zfile.IsSystem);
     Assert.AreEqual(zfile.Directory, _rootFolder.FullPath);
     Assert.AreEqual(zfile.Extension, ".txt");
 }
Ejemplo n.º 3
0
 /// <summary>
 /// Rudimentary scoring mechanism for files; used to determine which file "wins" for duplicates. Scores do not have to be unique.
 /// </summary>
 public void CalculateScore(ZapperFile zfile)
 {
     var root = ZapperProcessor.Settings.RootFolders.FirstOrDefault(x => zfile.Directory.StartsWith(x.FullPath, StringComparison.OrdinalIgnoreCase));
     // Assumes "core" folders have 6 figure priority
     int iRootScore = root == null ? 0 : root.Priority;
     // Assumes named folders should take priority over misc folders
     int iNotMiscScore = zfile.Directory.Contains("misc") || zfile.Directory.Contains("unfiltered") ? 0 : 10000;
     // Assumes deeply nested is better than not
     int iNestScore = (zfile.Directory.Count(x => x == '\\') + 1) * 1000;
     // Assumes older is better
     int iTimeScore = Convert.ToInt32((DateTime.Now - (zfile.FileModified ?? DateTime.Now)).TotalDays / 365);
     zfile.Score = iRootScore + iNotMiscScore + iNestScore + iTimeScore;
 }
Ejemplo n.º 4
0
 public ZapperFileDeleted(ZapperFile zfile, Guid sessionId)
 {
     DeletedDate = DateTime.Now;
     DeletedSessionId = sessionId;
     ContentHash = zfile.ContentHash;
     Directory = zfile.Directory;
     Extension = zfile.Extension;
     FileModified = zfile.FileModified;
     FullPath = zfile.FullPath;
     HashTime = zfile.HashTime;
     IsSystem = zfile.IsSystem;
     Name = zfile.Name;
     Score = zfile.Score;
     Size = zfile.Size;
 }
        public void process()
        {
            var rootFolder = ZapperFileTestHelper.GetTestFileSubfolder("PhaseCalculateSamplesTester");
            System.Diagnostics.Trace.WriteLine(rootFolder.FullPath);

            List<ZapperFile> files = new List<ZapperFile>();

            string sFilePath = Path.Combine(rootFolder.FullPath, "alpha.txt");
            ZapperFileTestHelper.CreateTextFile(sFilePath, 50);
            var alpha = new ZapperFile(sFilePath);
            files.Add(alpha);

            sFilePath = Path.Combine(rootFolder.FullPath, "bravo.txt");
            ZapperFileTestHelper.CreateTextFile(sFilePath, 50);
            var bravo = new ZapperFile(sFilePath);
            files.Add(bravo);

            sFilePath = Path.Combine(rootFolder.FullPath, "charlie.txt");
            ZapperFileTestHelper.CreateTextFile(sFilePath, 50, ZapperFileTestHelper.AltFillerText);
            var charlie = new ZapperFile(sFilePath);
            files.Add(charlie);

            FileZapperSettings settings = new FileZapperSettings();
            List<ZapperFolder> folders = new List<ZapperFolder>();
            folders.Add(rootFolder);
            settings.RootFolders = folders;

            List<IZapperPhase> allphases = new List<IZapperPhase>();
            var phase = new PhaseCalculateSamples { PhaseOrder = 1, IsInitialPhase = true };
            allphases.Add(phase);

            var processor = new ZapperProcessor(settings, allphases);
            foreach (var zfile in files)
            {
                Assert.IsTrue(processor.ZapperFiles.TryAdd(zfile.FullPath, zfile));
            }

            phase.Process();

            Assert.AreEqual(3, processor.ZapperFiles.Count);
            Assert.That(files[0].SampleHash, Is.Not.Null.And.Not.Empty);
            Assert.AreEqual(files[0].SampleHash, files[1].SampleHash);
            Assert.That(files[2].SampleHash, Is.Null.Or.Empty);
        }
        public void calculate_score()
        {
            var rootFolder = ZapperFileTestHelper.GetTestFileSubfolder("PhaseRemoveDuplicatesTester");
            rootFolder.Priority = 100000;
            System.Diagnostics.Trace.WriteLine(rootFolder.FullPath);

            FileZapperSettings settings = new FileZapperSettings();
            List<ZapperFolder> folders = new List<ZapperFolder>();
            folders.Add(rootFolder);
            settings.RootFolders = folders;

            List<IZapperPhase> allphases = new List<IZapperPhase>();
            var phase = new PhaseRemoveDuplicates { PhaseOrder = 1, IsInitialPhase = true };
            allphases.Add(phase);

            var processor = new ZapperProcessor(settings, allphases);
            var zfile = new ZapperFile { Directory = rootFolder.FullPath, FullPath = Path.Combine(rootFolder.FullPath, "test.txt"), FileModified = DateTime.Now };
            phase.CalculateScore(zfile);
            int iScore = 110000 + ((zfile.Directory.Count(x => x == '\\') + 1) * 1000);
            Assert.AreEqual(iScore, zfile.Score);
        }
Ejemplo n.º 7
0
 public void Hashify(ZapperFile zfile)
 {
     if (!string.IsNullOrWhiteSpace(zfile.ContentHash))
     {
         return;
     }
     try
     {
         zfile.LoadFileSystemInfo();
         var buffer = new byte[zfile.SampleBytesSize];
         using (MD5CryptoServiceProvider hasher = new MD5CryptoServiceProvider())
         {
             byte[] hashvalue;
             using (var stream = File.OpenRead(zfile.FullPath))
             {
                 stream.Seek(zfile.SampleBytesOffset, SeekOrigin.Begin);
                 stream.Read(buffer, 0, zfile.SampleBytesSize);
                 hashvalue = hasher.ComputeHash(buffer);
             }
             zfile.SampleHash = BitConverter.ToString(hashvalue);
         }
         if (!ZapperProcessor.ZapperFiles.TryUpdate(zfile.FullPath, zfile, zfile))
         {
             throw new FileZapperUpdateDictionaryFailureException("ZapperFiles", zfile.FullPath);
         }
     }
     catch (Exception ex)
     {
         Exceptioneer.Log(_log, ex, "Due to error, file tagged with INVALID sample hash: " + zfile.FullPath);
         zfile.SampleHash = "INVALID";
         if (!ZapperProcessor.ZapperFiles.TryUpdate(zfile.FullPath, zfile, zfile))
         {
             throw new FileZapperUpdateDictionaryFailureException("ZapperFiles", zfile.FullPath);
         }
     }
 }
Ejemplo n.º 8
0
 public void Process()
 {
     _log.Info(Name);
     foreach (var root in ZapperProcessor.Settings.RootFolders)
     {
         Console.WriteLine("{0}: Parsing folder {1}", DateTime.Now.ToString("HH:mm:ss.fff"), root.FullPath);
         var filepaths = Directory.EnumerateFiles(root.FullPath, "*.*", System.IO.SearchOption.AllDirectories);
         try
         {
             Parallel.ForEach(filepaths, filepath =>
             {
                 if (filepath.Length >= 260)
                 {
                     Console.WriteLine("{0}: Path too long - {1}", DateTime.Now.ToString("HH:mm:ss.fff"), filepath);
                 }
                 else
                 {
                     var zfile = new ZapperFile(filepath);
                     if (!zfile.IsSystem)
                     {
                         if (ZapperProcessor.Settings.UnwantedExtensions.Contains(zfile.Extension))
                         {
                             FileSystem.DeleteFile(filepath, UIOption.OnlyErrorDialogs, RecycleOption.SendToRecycleBin);
                             ZapperFileDeleted zfiledeleted = new ZapperFileDeleted(zfile, ZapperProcessor.ZapperSession.Id);
                             if (!ZapperProcessor.ZapperFilesDeleted.TryAdd(zfiledeleted.FullPath, zfiledeleted))
                             {
                                 throw new FileZapperAddToDictionaryFailureException("ZapperFilesDeleted", zfiledeleted.FullPath);
                             }
                         }
                         else if (!ZapperProcessor.Settings.SkippedExtensions.Contains(zfile.Extension)
                             && zfile.Size > ZapperProcessor.Settings.IgnoreFilesBelowBytes
                             && (ZapperProcessor.Settings.IgnoreFilesOverBytes <= 0 || zfile.Size < ZapperProcessor.Settings.IgnoreFilesOverBytes))
                         {
                             if (!ZapperProcessor.ZapperFiles.TryAdd(zfile.FullPath, zfile))
                             {
                                 throw new FileZapperAddToDictionaryFailureException("ZapperFiles", zfile.FullPath);
                             }
                         }
                     }
                 }
             });
         }
         catch (AggregateException ae)
         {
             ae.Handle(e =>
             {
                 Exceptioneer.Log(_log, e);
                 return true;
             });
         }
     }
 }
        public void process_folders_hierarchyonly_multiroot()
        {
            var rootFolder = ZapperFileTestHelper.GetTestFileSubfolder("PhaseCalculateSamplesTester");
            System.Diagnostics.Trace.WriteLine(rootFolder.FullPath);

            var alphaFolder = ZapperFileTestHelper.GetTestFileSubfolder(rootFolder.FullPath, "Alpha");
            alphaFolder.Priority = 300000;
            var bravoFolder = ZapperFileTestHelper.GetTestFileSubfolder(rootFolder.FullPath, "Bravo");
            bravoFolder.Priority = 100000;

            List<ZapperFile> files = new List<ZapperFile>();

            string sFilePath = Path.Combine(alphaFolder.FullPath, "alpha.txt");
            ZapperFileTestHelper.CreateTextFile(sFilePath, 50);
            var alpha = new ZapperFile(sFilePath);
            files.Add(alpha);

            sFilePath = Path.Combine(bravoFolder.FullPath, "bravo.txt");
            ZapperFileTestHelper.CreateTextFile(sFilePath, 50);
            var bravo = new ZapperFile(sFilePath);
            files.Add(bravo);

            FileZapperSettings settings = new FileZapperSettings();
            List<ZapperFolder> folders = new List<ZapperFolder>();
            folders.Add(alphaFolder);
            folders.Add(bravoFolder);
            settings.RootFolders = folders;

            List<IZapperPhase> allphases = new List<IZapperPhase>();
            var phase = new PhaseCalculateSamples { PhaseOrder = 1, IsInitialPhase = true };
            allphases.Add(phase);

            var processor = new ZapperProcessor(settings, allphases);
            foreach (var zfile in files)
            {
                Assert.IsTrue(processor.ZapperFiles.TryAdd(zfile.FullPath, zfile));
            }

            settings.DupeCheckIgnoresHierarchy = false;
            phase.Process();
            Assert.AreEqual(2, processor.ZapperFiles.Count);
            Assert.That(files[0].SampleHash, Is.Null.Or.Empty);
            Assert.That(files[1].SampleHash, Is.Null.Or.Empty);
        }