Ejemplo n.º 1
0
        public long GetNextRecord(out SizeHashEntry record)
        {
            record = new SizeHashEntry();
            long beginPosition = this.Stream.Position;

            // Indicate that there are no next record
            if (this.Stream.Position >= this.Stream.Length)
                return -1;

            using (BinaryReader br = new BinaryReader(this.Stream, Encoding.UTF8, true))
            {
                record.Decided = br.ReadBoolean(); // 1
                int ptrCount = br.ReadInt32(); // 4
                record.Pointers = new List<HashPointers>(ptrCount);
                for (int i = 0; i < ptrCount; ++i)
                {
                    HashPointers hp = new HashPointers();
                    hp.Hash = Encoding.UTF8.GetString(br.ReadBytes(32)); // 32

                    int entryCount = br.ReadInt32(); // 4
                    hp.FileEntries = new List<long>(entryCount);
                    for (int j = 0; j < entryCount; ++j)
                        hp.FileEntries.Add(br.ReadInt64()); // 8

                    record.Pointers.Add(hp);
                }
            }

            return beginPosition;
        }
Ejemplo n.º 2
0
        public long WriteRecord(SizeHashEntry entry)
        {
            this.Stream.Seek(0, SeekOrigin.End);
            long writePosition = this.Stream.Position;

            // The layout in the file is as follows:
            // Bool indicating whether the hash entry is decided, number of HashPointers and then the block of hash pointers
            // Each block begins with a Hash string, then a number of file entry pointers, then those pointers itself
            using (BinaryWriter bw = new BinaryWriter(this.Stream, Encoding.UTF8, true))
            {
                bw.Write(entry.Decided); // bool 1
                bw.Write(entry.Pointers.Count); // int 4
                foreach (HashPointers hp in entry.Pointers)
                {
                    bw.Write(Encoding.UTF8.GetBytes(hp.Hash)); // 32
                    bw.Write(hp.FileEntries.Count); // int 4
                    foreach (long fileEntry in hp.FileEntries)
                        bw.Write(fileEntry); // 8
                }
            }

            return writePosition;
        }
Ejemplo n.º 3
0
        static void Main(string[] args)
        {
            Console.WriteLine("Duplicate Destroyer");
            Console.WriteLine("'Devastating Desert'");
            Console.WriteLine("Licenced under Tiny Driplet Licence (can be found at cloudchiller.net)");
            Console.WriteLine("Copyright, Copydrunk, Copypone (c) 2012-2014, Cloud Chiller");
            Console.WriteLine();

            if (args.Contains("-h"))
            {
                Console.WriteLine("HELP:");
                Console.WriteLine("-h       Show this help text");
                Console.WriteLine("-v       Verbose mode");
                Console.WriteLine("-d       Dry run/discovery - Only check for duplicates, but don't actually remove them");
                Console.WriteLine("-o       Automatically keep the OLDEST of the files");
                Console.WriteLine("-n       Automatically keep the NEWEST of the files");
                Console.WriteLine();
                Console.WriteLine("Omitting both -o and -n results in the user being queried about which file to keep.");
                Console.WriteLine("Using both -o and -n throws an error.");
                Console.WriteLine();

                Environment.Exit(0);
            }

            Verbose = args.Contains("-v");
            DryRun = args.Contains("-d");
            AutoOldest = args.Contains("-o");
            AutoNewest = args.Contains("-n");
            SizeCount = 0;
            FileCount = 0;

            if (AutoOldest == true && AutoNewest == true)
            {
                Console.WriteLine("ERROR: Conflicting arguments.");
                Console.WriteLine("Please use either -o or -n, not both.");
                Console.WriteLine();

                Environment.Exit(3);
            }

            FileStream SizesFileStream = null;
            FileStream PathsFileStream = null;
            FileStream HashesFileStream = null;
            FileStream DuplicateLogFileStream = null;
            try
            {
                SizesFileStream = new FileStream(".dd_sizes", FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None);
                SizesFileStream.SetLength(0);

                PathsFileStream = new FileStream(".dd_files", FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None);
                PathsFileStream.SetLength(0);

                HashesFileStream = new FileStream(".dd_hashes", FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None);
                HashesFileStream.SetLength(0);

                FilesToRemove = new FileStream(".dd_remove", FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None);
                FilesToRemove.SetLength(0);

                DuplicateLogFileStream = new FileStream("duplicates_" + DateTime.Now.ToString().Replace(":", "_") + ".log", FileMode.OpenOrCreate,
                    FileAccess.Write, FileShare.None);
                DuplicateLogFileStream.SetLength(0);
            }
            catch (Exception ex)
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("Was unable to create the program's datafiles.");
                Console.ResetColor();
                Console.WriteLine("Please make sure the folder " + Directory.GetCurrentDirectory() + " is writable.");
                Console.WriteLine("The following error happened: " + ex.Message);

                Environment.Exit(1);
            }

            SizesFile = new SizeFile(SizesFileStream);
            PathsFile = new PathFile(PathsFileStream);
            HashesFile = new HashFile(HashesFileStream);
            DuplicateFileLog = new StreamWriter(DuplicateLogFileStream);

            FileRemoveException = false;
            TargetDirectory = Directory.GetCurrentDirectory();

            {
                Console.Write("Counting files and measuring sizes... " + (Verbose ? "\n" : String.Empty));
                List<string> Subfolders = new List<string>();
                Subfolders.Add(TargetDirectory);
                while (Subfolders.Count != 0)
                {
                    // Read the files in the subfolders.
                    ReadFileSizes(Subfolders[0], ref Subfolders);
                    // The on-the-fly detected subfolders are added to the list while reading.
                }
                SizesFile.Stream.Flush(true);
                PathsFile.Stream.Flush(true);
                Console.WriteLine((!Verbose ? "\n" : String.Empty) + FileCount + " files found.");
                Console.WriteLine();
            }

            {
                Console.Write("Analysing sizes... " + (Verbose ? "\n" : String.Empty));
                AnalyseSizes();
                SizesFile.DeleteRecord(0); // 0-byte files are ALWAYS duplicates of each other...
                SizesFile.Stream.Flush(true);
                PathsFile.Stream.Flush(true);
                Console.WriteLine((!Verbose ? "\n" : String.Empty) + SizeCount + " unique file size found for " + FileCount + " files.");
                Console.WriteLine();
            }

            //{
            //    // Remove entries from the PathsFile physically which were logically removed (marked deleted) in the previous step
            //    if (Verbose)
            //    {
            //        Console.WriteLine("Removing knowledge about files I don't need to check.");
            //        Console.WriteLine("(This is an internal maintenance run to speed up further operations.)");
            //    }
            //    PathsFile.Consolidate(new SizeFileAligner(Program.AlignSizeFilePointers));
            //    PathsFile.Stream.Flush(true);
            //    if (Verbose)
            //        Console.WriteLine();
            //}

            {
                Console.Write("Reading file contents... " + (Verbose ? "\n" : String.Empty));
                MD5CryptoServiceProvider mcsp = new MD5CryptoServiceProvider();
                ulong _hashesReadCount = 0;
                foreach (SizeEntry duplicated_size in SizesFile.GetRecords())
                {
                    if (Verbose)
                    {
                        Console.ForegroundColor = ConsoleColor.Cyan;
                        Console.WriteLine("Reading files of " + duplicated_size.Size + " size");
                        Console.ResetColor();
                    }
                    // For each size entry, iterate the path list
                    PathEntry entry;
                    long position = duplicated_size.FirstPath;

                    while (position != -1)
                    {
                        if (PathsFile.GetRecordAt(position, out entry))
                        {
                            string hash = String.Empty;
                            try
                            {
                                hash = CalculateHash(ref mcsp, entry.Path);
                                ++_hashesReadCount;
                            }
                            catch (Exception ex)
                            {
                                Console.ForegroundColor = ConsoleColor.Yellow;
                                Console.WriteLine("The file " + entry.Path + " could not be checked, because:");
                                Console.ResetColor();
                                Console.WriteLine(ex.Message);
                            }

                            if (!String.IsNullOrEmpty(hash))
                                entry.Hash = hash;
                            else
                                // Mark this record "deleted" so it won't be checked for hash duplication
                                entry.Deleted = true;

                            PathsFile.WriteRecordAt(entry, position);
                            VisualGlyph(_hashesReadCount);
                            position = entry.NextRecord; // Jump to the next record in the chain
                        }
                    }
                }
                PathsFile.Stream.Flush(true);
                Console.WriteLine((!Verbose ? "\n" : String.Empty) + _hashesReadCount + " files read.");
            }

            {
                Console.Write("Searching for true duplication... " + (Verbose ? "\n" : String.Empty));
                long UniqueHashCount, DuplicatedFileCount;
                AnalyseFilelist(out UniqueHashCount, out DuplicatedFileCount);
                HashesFile.Stream.Flush(true);
                Console.WriteLine((!Verbose ? "\n" : String.Empty) + UniqueHashCount + " unique content duplicated across " + DuplicatedFileCount + " files.");
                Console.WriteLine();

                Console.WriteLine();
                Console.WriteLine("Please select which files you wish to remove.");
                long dealtWithCount = 0;
                while (dealtWithCount < UniqueHashCount)
                {
                    // We go through every hash entry and prompt the user to decide which file to remove
                    HashesFile.Stream.Seek(0, SeekOrigin.Begin);
                    SizeHashEntry she = new SizeHashEntry();
                    PathEntry etr = new PathEntry();
                    long pos = 0;

                    while (pos != -1)
                    {
                        // Get the next duplicated hash
                        pos = HashesFile.GetNextRecord(out she);
                        if (pos != -1)
                        {
                            // Iterate the hash pointers...
                            foreach (HashPointers ptr in she.Pointers)
                            {
                                if (ptr.FileEntries.Count == 0)
                                    continue;

                                // Select which file the user wants to keep
                                List<int> fileIDsToKeep;
                                bool userDecided = SelectFilesToKeep(ptr, out fileIDsToKeep);

                                if (!DryRun)
                                {
                                    if (!userDecided)
                                        Console.WriteLine("Didn't make a decision. You will be asked later on.");
                                    else
                                    {
                                        ++dealtWithCount;

                                        if (fileIDsToKeep.Count == ptr.FileEntries.Count)
                                            Console.WriteLine("Selected to keep all files.");
                                        else if (fileIDsToKeep.Count > 0)
                                        {
                                            if (!AutoOldest && !AutoNewest)
                                            {
                                                foreach (int id in fileIDsToKeep)
                                                {
                                                    Console.Write("Selected to  ");
                                                    Console.ForegroundColor = ConsoleColor.White;
                                                    Console.Write("KEEP");
                                                    Console.ResetColor();
                                                    Console.Write("  ");

                                                    PathsFile.GetRecordAt(ptr.FileEntries[id - 1], out etr);
                                                    Console.WriteLine(etr.Path);
                                                }

                                                foreach (int id in Enumerable.Range(1, ptr.FileEntries.Count).Except(fileIDsToKeep))
                                                {
                                                    Console.Write("Selected to ");
                                                    Console.ForegroundColor = ConsoleColor.Red;
                                                    Console.Write("DELETE");
                                                    Console.ResetColor();
                                                    Console.Write(" ");

                                                    PathsFile.GetRecordAt(ptr.FileEntries[id - 1], out etr);
                                                    Console.WriteLine(etr.Path);

                                                    byte[] pathLine = Encoding.UTF8.GetBytes(etr.Path + StreamWriter.Null.NewLine);
                                                    FilesToRemove.Write(pathLine, 0, pathLine.Length);
                                                }
                                            }
                                        }
                                        else if (fileIDsToKeep.Count == 0)
                                        {
                                            Console.WriteLine("All files will be deleted:");

                                            foreach (long offset in ptr.FileEntries)
                                            {
                                                PathsFile.GetRecordAt(offset, out etr);
                                                Console.WriteLine(etr.Path);

                                                byte[] pathLine = Encoding.UTF8.GetBytes(etr.Path + StreamWriter.Null.NewLine);
                                                FilesToRemove.Write(pathLine, 0, pathLine.Length);
                                            }
                                        }

                                        FilesToRemove.Flush();
                                    }
                                }
                                else
                                    ++dealtWithCount;
                            }
                        }
                    }
                }
                Console.WriteLine();
            }

            {
                Console.Write("Removing all scheduled files... " + (Verbose ? "\n" : String.Empty));
                uint _filesRemoved = 0;
                if (DryRun)
                    Console.WriteLine("Won't remove files in dry-run/discovery mode.");
                else
                {
                    FilesToRemove.Seek(0, SeekOrigin.Begin);
                    string path;

                    if (FilesToRemove.Length > 0) // Only if there are files to be removed
                    {
                        using (StreamReader sr = new StreamReader(FilesToRemove))
                        {
                            path = sr.ReadLine();
                            if (RemoveFile(path))
                                ++_filesRemoved;
                        }
                    }
                }
                Console.WriteLine((!Verbose ? "\n" : String.Empty) + _filesRemoved + " files deleted successfully.");
            }

            SizesFileStream.Dispose();
            PathsFileStream.Dispose();
            HashesFileStream.Dispose();
            //FilesToRemove.Dispose();
            DuplicateFileLog.Dispose();

            // Cleanup
            //File.Delete(".dd_sizes");
            //File.Delete(".dd_files");
            //File.Delete(".dd_hashes");
            //File.Delete(".dd_remove");

            if (FileRemoveException)
            {
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("One or more files could not be deleted.");
                Console.ResetColor();
            }

            Console.WriteLine("Press ENTER to exit...");
            Console.ReadLine();

            if (FileRemoveException)
                Environment.Exit(2);
            else
                Environment.Exit(0);
        }
Ejemplo n.º 4
0
        static void AnalyseFilelist(out long UniqueHashCount, out long DuplicatedFileCount)
        {
            // Go through every size entry and build the hash lists
            UniqueHashCount = 0;
            DuplicatedFileCount = 0;

            for (long i = 0; i < SizesFile.RecordCount; ++i)
            {
                SizeEntry se = SizesFile.GetRecordByIndex(i);

                SizeHashEntry she = new SizeHashEntry()
                {
                    Pointers = new List<HashPointers>()
                };

                // Get the files with the current size
                PathEntry entry;
                long pos = se.FirstPath;
                while (pos != -1)
                {
                    if (!PathsFile.GetRecordAt(pos, out entry))
                        break;

                    if (!entry.Deleted)
                    {
                        // Get the file pointer list for the current hash
                        HashPointers curHash = she.Pointers.Where(p => p.Hash == entry.Hash).FirstOrDefault();
                        if (curHash.FileEntries == null)
                        {
                            // This indicates that this is a new hash, allocate the List for it to prevent a null reference
                            curHash.Hash = entry.Hash;
                            curHash.FileEntries = new List<long>();

                            she.Pointers.Add(curHash);
                            ++UniqueHashCount;
                        }
                        curHash.FileEntries.Add(pos); // A file with this hash is found at this position
                        ++DuplicatedFileCount;
                    }
                    else
                        if (Verbose)
                            Console.WriteLine("Skipping file " + entry.Path + ", I was unable to check it.");

                    VisualGlyph((ulong)DuplicatedFileCount);
                    pos = entry.NextRecord;
                }

                // Remove hashes which is had by only one file
                int hashesRemoved = she.Pointers.RemoveAll(hp => hp.FileEntries.Count == 1);
                UniqueHashCount -= hashesRemoved;
                DuplicatedFileCount -= hashesRemoved;

                // Write the current hash's data to the datafile
                if (she.Pointers.Count > 0)
                {
                    long shePosition = HashesFile.WriteRecord(she);

                    // Update the size table to save where the hash map begins
                    se.HashEntry = shePosition;
                    SizesFile.WriteRecordAt(se, i * SizeEntry.RecordSize);
                }
            }
        }