public long GetNextRecord(out SizeHashEntry record) { record = new SizeHashEntry(); long beginPosition = this.Stream.Position; // Indicate that there are no next record if (this.Stream.Position >= this.Stream.Length) return -1; using (BinaryReader br = new BinaryReader(this.Stream, Encoding.UTF8, true)) { record.Decided = br.ReadBoolean(); // 1 int ptrCount = br.ReadInt32(); // 4 record.Pointers = new List<HashPointers>(ptrCount); for (int i = 0; i < ptrCount; ++i) { HashPointers hp = new HashPointers(); hp.Hash = Encoding.UTF8.GetString(br.ReadBytes(32)); // 32 int entryCount = br.ReadInt32(); // 4 hp.FileEntries = new List<long>(entryCount); for (int j = 0; j < entryCount; ++j) hp.FileEntries.Add(br.ReadInt64()); // 8 record.Pointers.Add(hp); } } return beginPosition; }
public long WriteRecord(SizeHashEntry entry) { this.Stream.Seek(0, SeekOrigin.End); long writePosition = this.Stream.Position; // The layout in the file is as follows: // Bool indicating whether the hash entry is decided, number of HashPointers and then the block of hash pointers // Each block begins with a Hash string, then a number of file entry pointers, then those pointers itself using (BinaryWriter bw = new BinaryWriter(this.Stream, Encoding.UTF8, true)) { bw.Write(entry.Decided); // bool 1 bw.Write(entry.Pointers.Count); // int 4 foreach (HashPointers hp in entry.Pointers) { bw.Write(Encoding.UTF8.GetBytes(hp.Hash)); // 32 bw.Write(hp.FileEntries.Count); // int 4 foreach (long fileEntry in hp.FileEntries) bw.Write(fileEntry); // 8 } } return writePosition; }
static void Main(string[] args) { Console.WriteLine("Duplicate Destroyer"); Console.WriteLine("'Devastating Desert'"); Console.WriteLine("Licenced under Tiny Driplet Licence (can be found at cloudchiller.net)"); Console.WriteLine("Copyright, Copydrunk, Copypone (c) 2012-2014, Cloud Chiller"); Console.WriteLine(); if (args.Contains("-h")) { Console.WriteLine("HELP:"); Console.WriteLine("-h Show this help text"); Console.WriteLine("-v Verbose mode"); Console.WriteLine("-d Dry run/discovery - Only check for duplicates, but don't actually remove them"); Console.WriteLine("-o Automatically keep the OLDEST of the files"); Console.WriteLine("-n Automatically keep the NEWEST of the files"); Console.WriteLine(); Console.WriteLine("Omitting both -o and -n results in the user being queried about which file to keep."); Console.WriteLine("Using both -o and -n throws an error."); Console.WriteLine(); Environment.Exit(0); } Verbose = args.Contains("-v"); DryRun = args.Contains("-d"); AutoOldest = args.Contains("-o"); AutoNewest = args.Contains("-n"); SizeCount = 0; FileCount = 0; if (AutoOldest == true && AutoNewest == true) { Console.WriteLine("ERROR: Conflicting arguments."); Console.WriteLine("Please use either -o or -n, not both."); Console.WriteLine(); Environment.Exit(3); } FileStream SizesFileStream = null; FileStream PathsFileStream = null; FileStream HashesFileStream = null; FileStream DuplicateLogFileStream = null; try { SizesFileStream = new FileStream(".dd_sizes", FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None); SizesFileStream.SetLength(0); PathsFileStream = new FileStream(".dd_files", FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None); PathsFileStream.SetLength(0); HashesFileStream = new FileStream(".dd_hashes", FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None); HashesFileStream.SetLength(0); FilesToRemove = new FileStream(".dd_remove", FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None); FilesToRemove.SetLength(0); DuplicateLogFileStream = new FileStream("duplicates_" + DateTime.Now.ToString().Replace(":", "_") + ".log", FileMode.OpenOrCreate, FileAccess.Write, FileShare.None); DuplicateLogFileStream.SetLength(0); } catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Was unable to create the program's datafiles."); Console.ResetColor(); Console.WriteLine("Please make sure the folder " + Directory.GetCurrentDirectory() + " is writable."); Console.WriteLine("The following error happened: " + ex.Message); Environment.Exit(1); } SizesFile = new SizeFile(SizesFileStream); PathsFile = new PathFile(PathsFileStream); HashesFile = new HashFile(HashesFileStream); DuplicateFileLog = new StreamWriter(DuplicateLogFileStream); FileRemoveException = false; TargetDirectory = Directory.GetCurrentDirectory(); { Console.Write("Counting files and measuring sizes... " + (Verbose ? "\n" : String.Empty)); List<string> Subfolders = new List<string>(); Subfolders.Add(TargetDirectory); while (Subfolders.Count != 0) { // Read the files in the subfolders. ReadFileSizes(Subfolders[0], ref Subfolders); // The on-the-fly detected subfolders are added to the list while reading. } SizesFile.Stream.Flush(true); PathsFile.Stream.Flush(true); Console.WriteLine((!Verbose ? "\n" : String.Empty) + FileCount + " files found."); Console.WriteLine(); } { Console.Write("Analysing sizes... " + (Verbose ? "\n" : String.Empty)); AnalyseSizes(); SizesFile.DeleteRecord(0); // 0-byte files are ALWAYS duplicates of each other... SizesFile.Stream.Flush(true); PathsFile.Stream.Flush(true); Console.WriteLine((!Verbose ? "\n" : String.Empty) + SizeCount + " unique file size found for " + FileCount + " files."); Console.WriteLine(); } //{ // // Remove entries from the PathsFile physically which were logically removed (marked deleted) in the previous step // if (Verbose) // { // Console.WriteLine("Removing knowledge about files I don't need to check."); // Console.WriteLine("(This is an internal maintenance run to speed up further operations.)"); // } // PathsFile.Consolidate(new SizeFileAligner(Program.AlignSizeFilePointers)); // PathsFile.Stream.Flush(true); // if (Verbose) // Console.WriteLine(); //} { Console.Write("Reading file contents... " + (Verbose ? "\n" : String.Empty)); MD5CryptoServiceProvider mcsp = new MD5CryptoServiceProvider(); ulong _hashesReadCount = 0; foreach (SizeEntry duplicated_size in SizesFile.GetRecords()) { if (Verbose) { Console.ForegroundColor = ConsoleColor.Cyan; Console.WriteLine("Reading files of " + duplicated_size.Size + " size"); Console.ResetColor(); } // For each size entry, iterate the path list PathEntry entry; long position = duplicated_size.FirstPath; while (position != -1) { if (PathsFile.GetRecordAt(position, out entry)) { string hash = String.Empty; try { hash = CalculateHash(ref mcsp, entry.Path); ++_hashesReadCount; } catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("The file " + entry.Path + " could not be checked, because:"); Console.ResetColor(); Console.WriteLine(ex.Message); } if (!String.IsNullOrEmpty(hash)) entry.Hash = hash; else // Mark this record "deleted" so it won't be checked for hash duplication entry.Deleted = true; PathsFile.WriteRecordAt(entry, position); VisualGlyph(_hashesReadCount); position = entry.NextRecord; // Jump to the next record in the chain } } } PathsFile.Stream.Flush(true); Console.WriteLine((!Verbose ? "\n" : String.Empty) + _hashesReadCount + " files read."); } { Console.Write("Searching for true duplication... " + (Verbose ? "\n" : String.Empty)); long UniqueHashCount, DuplicatedFileCount; AnalyseFilelist(out UniqueHashCount, out DuplicatedFileCount); HashesFile.Stream.Flush(true); Console.WriteLine((!Verbose ? "\n" : String.Empty) + UniqueHashCount + " unique content duplicated across " + DuplicatedFileCount + " files."); Console.WriteLine(); Console.WriteLine(); Console.WriteLine("Please select which files you wish to remove."); long dealtWithCount = 0; while (dealtWithCount < UniqueHashCount) { // We go through every hash entry and prompt the user to decide which file to remove HashesFile.Stream.Seek(0, SeekOrigin.Begin); SizeHashEntry she = new SizeHashEntry(); PathEntry etr = new PathEntry(); long pos = 0; while (pos != -1) { // Get the next duplicated hash pos = HashesFile.GetNextRecord(out she); if (pos != -1) { // Iterate the hash pointers... foreach (HashPointers ptr in she.Pointers) { if (ptr.FileEntries.Count == 0) continue; // Select which file the user wants to keep List<int> fileIDsToKeep; bool userDecided = SelectFilesToKeep(ptr, out fileIDsToKeep); if (!DryRun) { if (!userDecided) Console.WriteLine("Didn't make a decision. You will be asked later on."); else { ++dealtWithCount; if (fileIDsToKeep.Count == ptr.FileEntries.Count) Console.WriteLine("Selected to keep all files."); else if (fileIDsToKeep.Count > 0) { if (!AutoOldest && !AutoNewest) { foreach (int id in fileIDsToKeep) { Console.Write("Selected to "); Console.ForegroundColor = ConsoleColor.White; Console.Write("KEEP"); Console.ResetColor(); Console.Write(" "); PathsFile.GetRecordAt(ptr.FileEntries[id - 1], out etr); Console.WriteLine(etr.Path); } foreach (int id in Enumerable.Range(1, ptr.FileEntries.Count).Except(fileIDsToKeep)) { Console.Write("Selected to "); Console.ForegroundColor = ConsoleColor.Red; Console.Write("DELETE"); Console.ResetColor(); Console.Write(" "); PathsFile.GetRecordAt(ptr.FileEntries[id - 1], out etr); Console.WriteLine(etr.Path); byte[] pathLine = Encoding.UTF8.GetBytes(etr.Path + StreamWriter.Null.NewLine); FilesToRemove.Write(pathLine, 0, pathLine.Length); } } } else if (fileIDsToKeep.Count == 0) { Console.WriteLine("All files will be deleted:"); foreach (long offset in ptr.FileEntries) { PathsFile.GetRecordAt(offset, out etr); Console.WriteLine(etr.Path); byte[] pathLine = Encoding.UTF8.GetBytes(etr.Path + StreamWriter.Null.NewLine); FilesToRemove.Write(pathLine, 0, pathLine.Length); } } FilesToRemove.Flush(); } } else ++dealtWithCount; } } } } Console.WriteLine(); } { Console.Write("Removing all scheduled files... " + (Verbose ? "\n" : String.Empty)); uint _filesRemoved = 0; if (DryRun) Console.WriteLine("Won't remove files in dry-run/discovery mode."); else { FilesToRemove.Seek(0, SeekOrigin.Begin); string path; if (FilesToRemove.Length > 0) // Only if there are files to be removed { using (StreamReader sr = new StreamReader(FilesToRemove)) { path = sr.ReadLine(); if (RemoveFile(path)) ++_filesRemoved; } } } Console.WriteLine((!Verbose ? "\n" : String.Empty) + _filesRemoved + " files deleted successfully."); } SizesFileStream.Dispose(); PathsFileStream.Dispose(); HashesFileStream.Dispose(); //FilesToRemove.Dispose(); DuplicateFileLog.Dispose(); // Cleanup //File.Delete(".dd_sizes"); //File.Delete(".dd_files"); //File.Delete(".dd_hashes"); //File.Delete(".dd_remove"); if (FileRemoveException) { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("One or more files could not be deleted."); Console.ResetColor(); } Console.WriteLine("Press ENTER to exit..."); Console.ReadLine(); if (FileRemoveException) Environment.Exit(2); else Environment.Exit(0); }
static void AnalyseFilelist(out long UniqueHashCount, out long DuplicatedFileCount) { // Go through every size entry and build the hash lists UniqueHashCount = 0; DuplicatedFileCount = 0; for (long i = 0; i < SizesFile.RecordCount; ++i) { SizeEntry se = SizesFile.GetRecordByIndex(i); SizeHashEntry she = new SizeHashEntry() { Pointers = new List<HashPointers>() }; // Get the files with the current size PathEntry entry; long pos = se.FirstPath; while (pos != -1) { if (!PathsFile.GetRecordAt(pos, out entry)) break; if (!entry.Deleted) { // Get the file pointer list for the current hash HashPointers curHash = she.Pointers.Where(p => p.Hash == entry.Hash).FirstOrDefault(); if (curHash.FileEntries == null) { // This indicates that this is a new hash, allocate the List for it to prevent a null reference curHash.Hash = entry.Hash; curHash.FileEntries = new List<long>(); she.Pointers.Add(curHash); ++UniqueHashCount; } curHash.FileEntries.Add(pos); // A file with this hash is found at this position ++DuplicatedFileCount; } else if (Verbose) Console.WriteLine("Skipping file " + entry.Path + ", I was unable to check it."); VisualGlyph((ulong)DuplicatedFileCount); pos = entry.NextRecord; } // Remove hashes which is had by only one file int hashesRemoved = she.Pointers.RemoveAll(hp => hp.FileEntries.Count == 1); UniqueHashCount -= hashesRemoved; DuplicatedFileCount -= hashesRemoved; // Write the current hash's data to the datafile if (she.Pointers.Count > 0) { long shePosition = HashesFile.WriteRecord(she); // Update the size table to save where the hash map begins se.HashEntry = shePosition; SizesFile.WriteRecordAt(se, i * SizeEntry.RecordSize); } } }