public static List <EntryStore> LoadCurrentDirCache() { var roots = new List <EntryStore>(); var files = AlphaFSHelper.GetFilesWithExtension(".", "cde"); foreach (var file in files) { var re = Read(file); if (re != null) { roots.Add(re); } } return(roots); }
public void GetFilesWithExtension_FileContainingPatternUseToReturn() { const string name1 = "G-SN750B_02_S13UJ1NQ221583.cde"; const string name2 = "G-SN750B_02_S13UJ1NQ221583.cde-backup-with-hash"; var f1 = File.Create(name1); var f2 = File.Create(name2); f1.Close(); f2.Close(); var files = AlphaFSHelper.GetFilesWithExtension(".", "cde"); foreach (var file in files) { Console.WriteLine($"file {file}"); } //System.Threading.Thread.Sleep(1000); // delay 1 second File.Delete(name1); File.Delete(name2); Assert.That(files.Count(), Is.EqualTo(1), "Oops somehow we got a file not ending in \"cde\" in our result set."); }
/// <summary> /// Apply an MD5 Checksum to all rootEntries /// </summary> /// <param name="rootEntries">Collection of rootEntries</param> public void ApplyMd5Checksum(IList <RootEntry> rootEntries) { _logger.LogDebug("PrePairSize Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes()); var newMatches = GetSizePairs(rootEntries); _logger.LogDebug("PostPairSize Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes()); var totalFilesInRootEntries = rootEntries.Sum(x => x.FileEntryCount); var totalEntriesInSizeDupes = newMatches.Sum(x => x.Value.Count); var longestListLength = newMatches.Count > 0 ? newMatches.Max(x => x.Value.Count) : -1; var longestListSize = newMatches.Count == 0 ? 0 : newMatches.First(x => x.Value.Count == longestListLength).Key; _logger.LogInfo("Found {0} sets of files matched by file size", newMatches.Count); _logger.LogInfo("Total files processed for the file size matches is {0}", totalFilesInRootEntries); _logger.LogInfo("Total files found with at least 1 other file of same length {0}", totalEntriesInSizeDupes); _logger.LogInfo("Longest list of same sized files is {0} for size {1} ", longestListLength, longestListSize); //flatten _logger.LogDebug("Flatten List.."); var flatList = newMatches.SelectMany(dirlist => dirlist.Value).ToList(); _logger.LogDebug("Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes()); //group by volume/network share _logger.LogDebug("GroupBy Volume/Share.."); // QUOTE // The IGrouping<TKey, TElement> objects are yielded in an order based on // the order of the elements in source that produced the first key of each // IGrouping<TKey, TElement>. Elements in a grouping are yielded in the // order they appear in source. // // by ordering from largest to smallest the larger files are hashed first // so a break of process and then running of dupes is a win for larger files. var descendingFlatList = flatList.OrderByDescending( pde => pde.ChildDE.IsDirectory ? 0 : pde.ChildDE.Size); // directories last //var some = descendingFlatList.Take(20); //foreach (var pairDirEntry in some) //{ // Console.WriteLine("{0}", pairDirEntry.ChildDE.Size); //} var groupedByDirectoryRoot = descendingFlatList .GroupBy(x => AlphaFSHelper.GetDirectoryRoot(x.FullPath)); _logger.LogDebug("Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes()); //parrallel at the grouping level, hopefully this is one group per disk. _logger.LogDebug("Beginning Hashing..."); _logger.LogDebug("Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes()); var timer = new Stopwatch(); timer.Start(); var cts = new CancellationTokenSource(); var token = cts.Token; var outerOptions = new ParallelOptions { CancellationToken = token }; _duplicationStatistics.FilesToCheckForDuplicatesCount = totalEntriesInSizeDupes; try { Parallel.ForEach(groupedByDirectoryRoot, outerOptions, (grp, loopState) => { var parallelOptions = new ParallelOptions { CancellationToken = token, MaxDegreeOfParallelism = 2 }; // This now tries to hash files in approx order of largest to smallest file. // Hitting break when smallest log display get down to a size you dont care about is viable. // Then the full hash phase will start and you can hit break again to stop it after a while. // tTo be able to then run --dupes on the larget hashed files. grp.AsParallel() .ForEachInApproximateOrder(parallelOptions, (flatFile, innerLoopState) => { _duplicationStatistics.SeenFileSize(flatFile.ChildDE.Size); CalculatePartialMD5Hash(flatFile.FullPath, flatFile.ChildDE); if (Hack.BreakConsoleFlag) { Console.WriteLine("\n * Break key detected exiting hashing phase inner."); cts.Cancel(); } }); }); } catch (OperationCanceledException) {} catch (Exception ex) { //parallel cancellation. will be OperationCancelled or Aggregate Exception Console.WriteLine($"Exception Type {ex.GetType()}"); Console.WriteLine(ex.Message); return; } _logger.LogInfo("After initial partial hashing phase."); var perf = $"{((_duplicationStatistics.BytesProcessed*(1000.0/timer.ElapsedMilliseconds)))/(1024.0*1024.0):F2} MB/s"; var statsMessage = $"FullHash: {_duplicationStatistics.FullHashes} PartialHash: {_duplicationStatistics.PartialHashes} Processed: {_duplicationStatistics.BytesProcessed/(1024*1024):F2} MB NotProcessed: {_duplicationStatistics.BytesNotProcessed/(1024*1024):F2} MB Perf: {perf}\nTotal Data Encounetered: {_duplicationStatistics.TotalFileBytes/(1024*1024):F2} MB\nFailedHash: {_duplicationStatistics.FailedToHash} (amost always because cannot open to read file)"; _logger.LogInfo(statsMessage); Hack.BreakConsoleFlag = false; // require to press break again to stop the fullhash phase. CheckDupesAndCompleteFullHash(rootEntries); _logger.LogInfo(string.Empty); _logger.LogInfo("After hashing completed."); timer.Stop(); perf = $"{((_duplicationStatistics.BytesProcessed*(1000.0/timer.ElapsedMilliseconds)))/(1024.0*1024.0):F2} MB/s"; statsMessage = $"FullHash: {_duplicationStatistics.FullHashes} PartialHash: {_duplicationStatistics.PartialHashes} Processed: {_duplicationStatistics.BytesProcessed/(1024*1024):F2} MB Perf: {perf}\nFailedHash: {_duplicationStatistics.FailedToHash} (amost always because cannot open to read file)"; _logger.LogInfo(statsMessage); }