public static string MakeFullPath(CommonEntry parentEntry, DirEntry dirEntry) { var a = parentEntry.FullPath ?? "pnull"; var b = dirEntry.Path ?? "dnull"; return(Filesystem.Path.Combine(a, b)); }
public void Find(IEnumerable <RootEntry> rootEntries) { if (VisitorFunc == null) { return; } int[] limitCount = { LimitResultCount }; if (ProgressFunc == null || ProgressModifier == 0) { // dummy func and huge progressModifier so wont call progressFunc anyway. ProgressFunc = delegate { }; ProgressModifier = int.MaxValue; } // ReSharper disable PossibleMultipleEnumeration ProgressEnd = rootEntries.TotalFileEntries(); // ReSharper restore PossibleMultipleEnumeration ProgressFunc(_progressCount[0], ProgressEnd); // Start of process Progress report. PatternMatcher = GetPatternMatcher(); var findFunc = GetFindFunc(_progressCount, limitCount); // ReSharper disable PossibleMultipleEnumeration CommonEntry.TraverseTreePair(rootEntries, findFunc); ProgressFunc(_progressCount[0], ProgressEnd); // end of Progress // ReSharper restore PossibleMultipleEnumeration }
public bool MoveNext() { _current = null; if (_childEnumerator == null) { if (_entries.Count > 0) { var de = _entries.Pop(); _parentDirEntry = de; _childEnumerator = de.Children.GetEnumerator(); } } if (_childEnumerator != null) { if (_childEnumerator.MoveNext()) { var de = _childEnumerator.Current; _current = new PairDirEntry(_parentDirEntry, de); if (de.IsDirectory && de.Children != null && de.Children.Count > 0) { _entries.Push(de); } } else { _childEnumerator = null; MoveNext(); } } return(_current != null); }
private bool BuildDuplicateList(CommonEntry parentEntry, DirEntry dirEntry) { if (!dirEntry.IsPartialHash) { BuildDuplicateListIncludePartialHash(parentEntry, dirEntry); } return(true); }
public IDictionary <long, List <PairDirEntry> > GetSizePairs(IEnumerable <RootEntry> rootEntries) { CommonEntry.TraverseTreePair(rootEntries, FindMatchesOnFileSize2); _logger.LogDebug("Post TraverseMatchOnFileSize: {0}, dupeDictCount {1}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes(), _duplicateFileSize.Count); //Remove the single values from the dictionary. DOESNT SEEM TO CLEAR MEMORY ??? GC Force? _duplicateFileSize.Where(kvp => kvp.Value.Count == 1).ToList().ForEach(x => _duplicateFileSize.Remove(x.Key)); _logger.LogDebug("Deleted entries from dictionary: {0}, dupeDictCount {1}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes(), _duplicateFileSize.Count); return(_duplicateFileSize); }
public IList <KeyValuePair <DirEntry, List <PairDirEntry> > > GetDupePairs(IEnumerable <RootEntry> rootEntries) { CommonEntry.TraverseTreePair(rootEntries, BuildDuplicateList); var moreThanOneFile = _duplicateFile.Where(d => d.Value.Count > 1).ToList(); _logger.LogInfo("Count of list of all hashes of files with same sizes {0}", _duplicateFile.Count); _logger.LogInfo("Count of list of all hashes of files with same sizes where more than 1 of that hash {0}", moreThanOneFile.Count); return(moreThanOneFile); }
private bool FindMatchesOnFileSize2(CommonEntry ce, DirEntry de) { if (de.IsDirectory || de.Size == 0) // || dirEntry.Size < 4096) { return(true); } var flatDirEntry = new PairDirEntry(ce, de); if (_duplicateFileSize.ContainsKey(de.Size)) { _duplicateFileSize[de.Size].Add(flatDirEntry); } else { _duplicateFileSize[de.Size] = new List <PairDirEntry> { flatDirEntry }; } return(true); }
private bool BuildDuplicateListIncludePartialHash(CommonEntry parentEntry, DirEntry dirEntry) { if (dirEntry.IsDirectory || !dirEntry.IsHashDone || dirEntry.Size == 0) { //TODO: how to deal with uncalculated files? return(true); } var info = new PairDirEntry(parentEntry, dirEntry); if (_duplicateFile.ContainsKey(dirEntry)) { _duplicateFile[dirEntry].Add(info); } else { _duplicateFile[dirEntry] = new List <PairDirEntry> { info }; } return(true); }
private bool CalculateFullMD5Hash(CommonEntry parentEntry, DirEntry dirEntry) { //ignore if we already have a hash. if (dirEntry.IsHashDone) { if (!dirEntry.IsPartialHash) { return(true); } if (_dirEntriesRequiringFullHashing.Contains(dirEntry)) { var fullPath = CommonEntry.MakeFullPath(parentEntry, dirEntry); var longFullPath = Path.GetFullPath(fullPath); CalculateMD5Hash(longFullPath, dirEntry, false); if (Hack.BreakConsoleFlag) { Console.WriteLine("\n * Break key detected exiting full hashing phase outer."); return(false); } } } return(true); }
private void CheckDupesAndCompleteFullHash(IEnumerable <RootEntry> rootEntries) { _logger.LogDebug(string.Empty); _logger.LogDebug("Checking duplicates and completing full hash."); CommonEntry.TraverseTreePair(rootEntries, BuildDuplicateListIncludePartialHash); var founddupes = _duplicateFile.Where(d => d.Value.Count > 1); var totalEntriesInDupes = founddupes.Sum(x => x.Value.Count); var longestListLength = founddupes.Any() ? founddupes.Max(x => x.Value.Count) : 0; _logger.LogInfo("Found {0} duplication collections.", founddupes.Count()); _logger.LogInfo("Total files found with at least 1 other file duplicate {0}", totalEntriesInDupes); _logger.LogInfo("Longest list of duplicate files is {0}", longestListLength); foreach (var keyValuePair in founddupes) { foreach (var pairDirEntry in keyValuePair.Value) { _dirEntriesRequiringFullHashing.Add(pairDirEntry.ChildDE); } } CommonEntry.TraverseTreePair(rootEntries, CalculateFullMD5Hash); }
public void TraverseTreesCopyHash(CommonEntry destination) { var dirs = new Stack <Tuple <string, CommonEntry, CommonEntry> >(); var source = this; if (source == null || destination == null) { throw new ArgumentException("source and destination must be not null."); } var sourcePath = source.Path; var destinationPath = destination.Path; //if (sourcePath != destinationPath) if (string.Compare(sourcePath, destinationPath, true) != 0) { throw new ArgumentException("source and destination must have same root path."); } // traverse every source entry copy across the meta data that matches on destination entry // if it adds value to destination. // if destination is not there source not processed. dirs.Push(Tuple.Create(sourcePath, source, destination)); while (dirs.Count > 0) { var t = dirs.Pop(); var workPath = t.Item1; var baseSourceEntry = t.Item2; var baseDestinationEntry = t.Item3; if (baseSourceEntry.Children != null) { foreach (var sourceDirEntry in baseSourceEntry.Children) { var fullPath = Filesystem.Path.Combine(workPath, sourceDirEntry.Path); // find if theres a destination entry available. // size of dir is irrelevant. date of dir we don't care about. var sourceEntry = sourceDirEntry; var destinationDirEntry = baseDestinationEntry.Children .FirstOrDefault(x => (x.Path == sourceEntry.Path)); if (destinationDirEntry == null) { continue; } if (!sourceDirEntry.IsDirectory && sourceDirEntry.Modified == destinationDirEntry.Modified && sourceDirEntry.Size == destinationDirEntry.Size) { // copy MD5 if none in destination. // copy MD5 as upgrade to full if dest currently partial. if ((sourceDirEntry.IsHashDone) && (!destinationDirEntry.IsHashDone) || ((sourceDirEntry.IsHashDone) && (destinationDirEntry.IsHashDone) && !sourceDirEntry.IsPartialHash && destinationDirEntry.IsPartialHash )) { destinationDirEntry.IsPartialHash = sourceDirEntry.IsPartialHash; destinationDirEntry.Hash = sourceDirEntry.Hash; } } else { if (destinationDirEntry.IsDirectory) { dirs.Push(Tuple.Create(fullPath, (CommonEntry)sourceDirEntry, (CommonEntry)destinationDirEntry)); } } } } } }