Esempio n. 1
0
        public static List <EntryStore> LoadCurrentDirCache()
        {
            var roots = new List <EntryStore>();
            var files = AlphaFSHelper.GetFilesWithExtension(".", "cde");

            foreach (var file in files)
            {
                var re = Read(file);
                if (re != null)
                {
                    roots.Add(re);
                }
            }
            return(roots);
        }
Esempio n. 2
0
        public void GetFilesWithExtension_FileContainingPatternUseToReturn()
        {
            const string name1 = "G-SN750B_02_S13UJ1NQ221583.cde";
            const string name2 = "G-SN750B_02_S13UJ1NQ221583.cde-backup-with-hash";
            var          f1    = File.Create(name1);
            var          f2    = File.Create(name2);

            f1.Close();
            f2.Close();
            var files = AlphaFSHelper.GetFilesWithExtension(".", "cde");

            foreach (var file in files)
            {
                Console.WriteLine($"file {file}");
            }

            //System.Threading.Thread.Sleep(1000); // delay 1 second

            File.Delete(name1);
            File.Delete(name2);

            Assert.That(files.Count(), Is.EqualTo(1), "Oops somehow we got a file not ending in \"cde\" in our result set.");
        }
Esempio n. 3
0
        /// <summary>
        /// Apply an MD5 Checksum to all rootEntries
        /// </summary>
        /// <param name="rootEntries">Collection of rootEntries</param>
        public void ApplyMd5Checksum(IList <RootEntry> rootEntries)
        {
            _logger.LogDebug("PrePairSize Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes());
            var newMatches = GetSizePairs(rootEntries);

            _logger.LogDebug("PostPairSize Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes());

            var totalFilesInRootEntries = rootEntries.Sum(x => x.FileEntryCount);
            var totalEntriesInSizeDupes = newMatches.Sum(x => x.Value.Count);
            var longestListLength       = newMatches.Count > 0 ? newMatches.Max(x => x.Value.Count) : -1;
            var longestListSize         = newMatches.Count == 0 ? 0
                : newMatches.First(x => x.Value.Count == longestListLength).Key;

            _logger.LogInfo("Found {0} sets of files matched by file size", newMatches.Count);
            _logger.LogInfo("Total files processed for the file size matches is {0}", totalFilesInRootEntries);
            _logger.LogInfo("Total files found with at least 1 other file of same length {0}", totalEntriesInSizeDupes);
            _logger.LogInfo("Longest list of same sized files is {0} for size {1} ", longestListLength, longestListSize);

            //flatten
            _logger.LogDebug("Flatten List..");
            var flatList = newMatches.SelectMany(dirlist => dirlist.Value).ToList();

            _logger.LogDebug("Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes());

            //group by volume/network share
            _logger.LogDebug("GroupBy Volume/Share..");

            // QUOTE
            // The IGrouping<TKey, TElement> objects are yielded in an order based on
            // the order of the elements in source that produced the first key of each
            // IGrouping<TKey, TElement>. Elements in a grouping are yielded in the
            // order they appear in source.
            //
            // by ordering from largest to smallest the larger files are hashed first
            // so a break of process and then running of dupes is a win for larger files.
            var descendingFlatList = flatList.OrderByDescending(
                pde => pde.ChildDE.IsDirectory ? 0 : pde.ChildDE.Size); // directories last

            //var some = descendingFlatList.Take(20);
            //foreach (var pairDirEntry in some)
            //{
            //    Console.WriteLine("{0}", pairDirEntry.ChildDE.Size);
            //}

            var groupedByDirectoryRoot = descendingFlatList
                                         .GroupBy(x => AlphaFSHelper.GetDirectoryRoot(x.FullPath));

            _logger.LogDebug("Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes());

            //parrallel at the grouping level, hopefully this is one group per disk.
            _logger.LogDebug("Beginning Hashing...");
            _logger.LogDebug("Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes());

            var timer = new Stopwatch();

            timer.Start();

            var cts          = new CancellationTokenSource();
            var token        = cts.Token;
            var outerOptions = new ParallelOptions {
                CancellationToken = token
            };

            _duplicationStatistics.FilesToCheckForDuplicatesCount = totalEntriesInSizeDupes;
            try
            {
                Parallel.ForEach(groupedByDirectoryRoot, outerOptions, (grp, loopState) => {
                    var parallelOptions = new ParallelOptions
                    {
                        CancellationToken      = token,
                        MaxDegreeOfParallelism = 2
                    };
                    // This now tries to hash files in approx order of largest to smallest file.
                    // Hitting break when smallest log display get down to a size you dont care about is viable.
                    // Then the full hash phase will start and you can hit break again to stop it after a while.
                    // tTo be able to then run --dupes on the larget hashed files.
                    grp.AsParallel()
                    .ForEachInApproximateOrder(parallelOptions, (flatFile, innerLoopState) => {
                        _duplicationStatistics.SeenFileSize(flatFile.ChildDE.Size);
                        CalculatePartialMD5Hash(flatFile.FullPath, flatFile.ChildDE);
                        if (Hack.BreakConsoleFlag)
                        {
                            Console.WriteLine("\n * Break key detected exiting hashing phase inner.");
                            cts.Cancel();
                        }
                    });
                });
            }
            catch (OperationCanceledException) {}
            catch (Exception ex)
            {
                //parallel cancellation. will be OperationCancelled or Aggregate Exception
                Console.WriteLine($"Exception Type {ex.GetType()}");
                Console.WriteLine(ex.Message);
                return;
            }

            _logger.LogInfo("After initial partial hashing phase.");
            var perf =
                $"{((_duplicationStatistics.BytesProcessed*(1000.0/timer.ElapsedMilliseconds)))/(1024.0*1024.0):F2} MB/s";
            var statsMessage =
                $"FullHash: {_duplicationStatistics.FullHashes}  PartialHash: {_duplicationStatistics.PartialHashes}  Processed: {_duplicationStatistics.BytesProcessed/(1024*1024):F2} MB  NotProcessed: {_duplicationStatistics.BytesNotProcessed/(1024*1024):F2} MB  Perf: {perf}\nTotal Data Encounetered: {_duplicationStatistics.TotalFileBytes/(1024*1024):F2} MB\nFailedHash: {_duplicationStatistics.FailedToHash} (amost always because cannot open to read file)";

            _logger.LogInfo(statsMessage);

            Hack.BreakConsoleFlag = false; // require to press break again to stop the fullhash phase.
            CheckDupesAndCompleteFullHash(rootEntries);

            _logger.LogInfo(string.Empty);
            _logger.LogInfo("After hashing completed.");
            timer.Stop();
            perf =
                $"{((_duplicationStatistics.BytesProcessed*(1000.0/timer.ElapsedMilliseconds)))/(1024.0*1024.0):F2} MB/s";
            statsMessage =
                $"FullHash: {_duplicationStatistics.FullHashes}  PartialHash: {_duplicationStatistics.PartialHashes}  Processed: {_duplicationStatistics.BytesProcessed/(1024*1024):F2} MB Perf: {perf}\nFailedHash: {_duplicationStatistics.FailedToHash} (amost always because cannot open to read file)";
            _logger.LogInfo(statsMessage);
        }