public void ApplyMd5Checksum() { _logger = new Logger(); _applicationDiagnostics = new ApplicationDiagnostics(); _logger.LogDebug("PrePairSize Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes()); var sizeDupes = GetSizePairs(); _logger.LogDebug("PostPairSize Memory: {0}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes()); }
public IDictionary <ulong, List <int> > GetSizePairs() { // try out idea of processing all in slices removing 1's each time. ulong bumpSize = 20000;// +200000000000; //bumpSize = 50000 +200000000000; ulong min = 0; ulong max = bumpSize; bool goNext; int loopy = 0; _duplicateFileSize = new Dictionary <ulong, List <int> >(); //Console.WriteLine(String.Format("Post TraverseMatchOnFileSize: {0}, dupeDictCount {1}", _applicationDiagnostics.GetMemoryAllocated().FormatAsBytes(), _duplicateFileSize.Count)); do { goNext = false; var entryEnumerator = new EntryEnumerator(this); foreach (var entryKey in entryEnumerator) { Entry[] block; var index = entryKey.Index; var entryIndex = EntryIndex(index, out block); var size = block[entryIndex].Size; if (size >= min && size < max) { if (!block[entryIndex].IsDirectory && size != 0) { if (_duplicateFileSize.ContainsKey(size)) { _duplicateFileSize[size].Add(index); } else { _duplicateFileSize[size] = new List <int> { index }; } } } else if (size >= max) { goNext = true; } } //Remove the single values from the dictionary. var pruneList = _duplicateFileSize.Where(kvp => kvp.Value.Count == 1) .ToList(); //Console.WriteLine("Prune 1's {0}", pruneList.Count); pruneList.ForEach(x => _duplicateFileSize.Remove(x.Key)); if (goNext) { min = max; max += bumpSize; if (min > 2000000) { bumpSize = bumpSize + bumpSize; } //bumpSize *= (ulong)(bumpSize * 1.5); //bumpSize = bumpSize + bumpSize; } ++loopy; //Console.WriteLine("loopy {0} min {1} max {2}", loopy, min, max); //GC.Collect(); if (Hack.BreakConsoleFlag) { break; } } while (goNext); Console.WriteLine($"loopy {loopy}"); Console.WriteLine( $"Deleted entries from dictionary: {_applicationDiagnostics.GetMemoryAllocated().FormatAsBytes()}, dupeDictCount {_duplicateFileSize.Count}"); return(_duplicateFileSize); }