public void DeleteRecord(ulong size)
        {
            SizeEntry already = new SizeEntry();
            long position = 0;

            if (GetRecord(size, out already, out position))
            {
                // To delete the record we simply overwrite it with all the records coming thereafter.
                // So the next record moves a whole record to the left and everything thereafter.
                Program.MoveEndPart(this.Stream, position + SizeEntry.RecordSize, -SizeEntry.RecordSize);

                this.LastPosition -= SizeEntry.RecordSize;
            }

            // If the record is not found, there is nothing to delete.
        }
        public bool GetRecord(ulong size, out SizeEntry record, out long position)
        {
            record = new SizeEntry();
            position = 0;

            if (this.RecordCount == 0)
                return false; // If there is nothing, there is nothing to find.

            // Because the data is ordered DESCENDING, we can use binary search which is a lot more effective.

            long low = 0;
            long high = this.RecordCount - 1;

            bool found = false;
            while (low <= high && !found)
            {
                // Try the record at the middle
                long mid = (low + high) / 2;
                position = mid * SizeEntry.RecordSize;
                record = GetRecordAt(position);

                if (size < record.Size)
                    // The searched size is smaller than the middle's size.
                    // The record --if exists-- is in the RIGHT half of the interval
                    low = mid + 1;
                else if (size > record.Size)
                    // If it's higher, it should be in the LEFT half because descending order.
                    high = mid - 1;
                else
                    found = true;
            }

            // If the record is not found, position should contain the place where it should be inserted.
            if (!found)
                position = low * SizeEntry.RecordSize;

            return found;
        }
        private SizeEntry GetRecordAt(long position)
        {
            if (position > this.Stream.Length)
                throw new ArgumentOutOfRangeException("Position out of stream bounds.");
            else if (position % SizeEntry.RecordSize != 0)
                throw new ArgumentException("Invalid position pointing inside record.");

            SizeEntry rec = new SizeEntry();
            using (BinaryReader br = new BinaryReader(this.Stream, System.Text.Encoding.UTF8, true))
            {
                this.Stream.Seek(position, SeekOrigin.Begin);
                rec.Size = br.ReadUInt64(); // 8
                rec.Count = br.ReadUInt64(); // 8
                rec.FirstPath = br.ReadInt64(); // 8
                rec.LastPath = br.ReadInt64(); // 8
                rec.HashEntry = br.ReadInt64(); // 8
            }

            return rec;
        }
        internal void WriteRecordAt(SizeEntry rec, long position)
        {
            if (position > (this.Stream.Length + 1)) // Length + 1 can be written: it is when the stream extends.
                throw new ArgumentOutOfRangeException("Position out of stream bounds.");
            else if (position % SizeEntry.RecordSize != 0)
                throw new ArgumentException("Invalid position pointing inside record.");

            using (BinaryWriter bw = new BinaryWriter(this.Stream, Encoding.UTF8, true))
            {
                this.Stream.Seek(position, SeekOrigin.Begin);
                bw.Write(rec.Size); // 8
                bw.Write(rec.Count); // 8
                bw.Write(rec.FirstPath); // 8
                bw.Write(rec.LastPath); // 8
                bw.Write(rec.HashEntry); // 8
                this.Stream.Flush();
            }
        }
        public void WriteRecord(SizeEntry rec)
        {
            // Check if the given record already exists
            SizeEntry already = new SizeEntry();
            long position = 0;

            if (!GetRecord(rec.Size, out already, out position))
            {
                // If the record is not found, we know the place where the record SHOULD be written to
                // to keep order. (GetRecord gives up this value as position.)
                // So at first we move every record to the right by one record size.
                Program.MoveEndPart(this.Stream, position, SizeEntry.RecordSize);

                // Modify the last position because we added a new record, so the stream contains one record more
                this.LastPosition += SizeEntry.RecordSize;
            }

            // After, we can write the record safely.
            // (If it was already found, it is an overwrite operation.)
            WriteRecordAt(rec, position);
        }
        public IEnumerable<SizeEntry> GetRecords()
        {
            this.Stream.Seek(0, SeekOrigin.Begin);

            using (BinaryReader br = new BinaryReader(this.Stream, Encoding.UTF8, true))
            {
                long lastReadPosition = 0;
                while (lastReadPosition < this.Stream.Length)
                {
                    this.Stream.Seek(lastReadPosition, SeekOrigin.Begin);
                    SizeEntry se = new SizeEntry();
                    se.Size = br.ReadUInt64(); // 8
                    se.Count = br.ReadUInt64(); // 8
                    se.FirstPath = br.ReadInt64(); // 8
                    se.LastPath = br.ReadInt64(); // 8
                    se.HashEntry = br.ReadInt64();  // 8

                    lastReadPosition = this.Stream.Position;
                    yield return se;
                }
            }
        }
Example #7
0
        static void ReadFileSizes(string directory, ref List<string> subfolderList)
        {
            if (Verbose)
            {
                Console.ForegroundColor = ConsoleColor.Cyan;
                Console.WriteLine("Reading contents of " + directory);
                Console.ResetColor();
            }

            try
            {
                int insertIndex = 0;
                foreach (string path in Directory.EnumerateFileSystemEntries(directory, "*", SearchOption.TopDirectoryOnly))
                {
                    string relativePath = Path.GetFullPath(path).Replace(Directory.GetCurrentDirectory(), String.Empty).TrimStart('\\');

                    // Skip some files which should not be access by the program
                    if (Path.GetFullPath(path) == SizesFile.Stream.Name || Path.GetFullPath(path) == PathsFile.Stream.Name
                        || Path.GetFullPath(path) == HashesFile.Stream.Name || Path.GetFullPath(path) == FilesToRemove.Name
                        || Path.GetFullPath(path) == ((FileStream)DuplicateFileLog.BaseStream).Name)
                        continue;

                    // Skip files if they are in a Subversion structure
                    // SVN saves a "pristine" copy of every file, and this makes every SVNd file to be marked as duplicate.
                    if (relativePath.Contains(".svn\\pristine") || relativePath.Contains(".svn\\entries")
                        || relativePath.Contains(".svn\\format"))
                        continue;

                    try
                    {
                        if (Directory.Exists(relativePath))
                        {
                            // If it is a directory, add it to the list of subfolders to check later on
                            if (Verbose)
                                Console.WriteLine(relativePath + " is a subfolder.");

                            // Add the found subfolders to the beginning of the list, but keep their natural order
                            subfolderList.Insert(++insertIndex, relativePath);
                        }
                        else if (File.Exists(relativePath))
                        {
                            if (Verbose)
                                Console.Write("Measuring " + relativePath + "...");

                            // If it is a file, register its size and the count for its size
                            FileInfo fi = new FileInfo(relativePath);

                            try
                            {
                                SizeEntry entry = new SizeEntry();
                                long position = 0;
                                bool known = SizesFile.GetRecord((ulong)fi.Length, out entry, out position);
                                entry.Size = (ulong)fi.Length;
                                if (!known)
                                {
                                    // Need to reset the entry's count because GetRecord gives
                                    // undefined value if the entry is not found.
                                    entry.Count = 0;
                                    ++SizeCount;

                                    // The new size record currently has no associated PathEntry records in the path file.
                                    entry.FirstPath = -1;
                                    entry.LastPath = -1;
                                    entry.HashEntry = -1;
                                }
                                entry.Count++;

                                // Also register its path
                                PathEntry pathRec = new PathEntry(relativePath);
                                long pathWrittenPosition;
                                if (entry.LastPath != -1)
                                {
                                    PathEntry previousLastEntry = new PathEntry();
                                    PathsFile.GetRecordAt(entry.LastPath, out previousLastEntry);
                                    pathWrittenPosition = PathsFile.AddAfter(previousLastEntry, pathRec, entry.LastPath);
                                }
                                else
                                {
                                    pathWrittenPosition = PathsFile.WriteRecord(pathRec);

                                    entry.FirstPath = pathWrittenPosition;
                                }

                                entry.LastPath = pathWrittenPosition;
                                SizesFile.WriteRecord(entry);

                                if (Verbose)
                                    Console.WriteLine(" Size: " + fi.Length + " bytes.");

                                ++FileCount;
                                VisualGlyph(FileCount);
                            }
                            catch (Exception ex)
                            {
                                Console.ForegroundColor = ConsoleColor.Red;
                                Console.WriteLine("There was an error registering " + relativePath + " in the databank.");
                                Console.ResetColor();
                                Console.WriteLine(ex.Message);

                                Console.ForegroundColor = ConsoleColor.Red;
                                Console.WriteLine("This indicates an error with the databank. Execution cannot continue.");
                                Console.ResetColor();
                                Console.ReadLine();
                                Environment.Exit(1);
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.ForegroundColor = ConsoleColor.Yellow;
                        Console.WriteLine("The path " + relativePath + " could not be accessed, because:");
                        Console.ResetColor();
                        Console.WriteLine(ex.Message);
                    }
                }
            }
            catch (Exception ex)
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine("The directory " + directory + " could not be accessed, because:");
                Console.ResetColor();
                Console.WriteLine(ex.Message);
            }

            subfolderList.Remove(directory);
        }
Example #8
0
        static void AnalyseSizes()
        {
            // After the file sizes are read, we eliminate every size which refers to one file
            // As there could not be duplicates that way.

            // Go from the back to make the least write overhead when a record is deleted
            ulong _analysedSizes = 0;
            for (long i = SizesFile.RecordCount - 1; i >= 0; --i)
            {
                SizeEntry rec = new SizeEntry();
                try
                {
                    rec = SizesFile.GetRecordByIndex(i);
                }
                catch (Exception)
                {
                    //Console.ForegroundColor = ConsoleColor.Yellow;
                    //Console.WriteLine("Couldn't get, because");
                    //Console.ResetColor();
                    //Console.WriteLine(ex.Message);

                    continue;
                }

                if (rec.Count == 1 || rec.Count == 0)
                {
                    if (Verbose)
                        if (rec.Count == 0)
                            Console.Write("No files with " + rec.Size + " size.");
                        else if (rec.Count == 1)
                            Console.Write("There's only 1 file with " + rec.Size + " size.");

                    SizesFile.DeleteRecord(rec.Size);
                    --SizeCount;
                    --FileCount;

                    // Delete every record (there should be 1) that is associated with this size... they'll no longer be needed.
                    if (rec.FirstPath != -1 && rec.LastPath != -1)
                        if (rec.FirstPath != rec.LastPath)
                        {
                            Console.ForegroundColor = ConsoleColor.Red;
                            Console.WriteLine("An error happened while analysing sizes:");
                            Console.ResetColor();
                            Console.WriteLine("Count for size " + rec.Size + " is 1, but there appears to be multiple associated files to exist.");

                            Console.ForegroundColor = ConsoleColor.Red;
                            Console.WriteLine("This indicates an error with the databank. Execution cannot continue.");
                            Console.ResetColor();
                            Console.ReadLine();
                            Environment.Exit(1);
                        }
                        else
                        {
                            PathEntry entry;
                            PathsFile.GetRecordAt(rec.FirstPath, out entry);
                            PathsFile.DeleteRecord(entry, rec.FirstPath);

                            if (Verbose)
                                Console.Write(" Ignoring " + entry.Path);
                        }

                    if (Verbose)
                        Console.WriteLine();
                }

                VisualGlyph(++_analysedSizes);
            }

            SizesFile.Stream.Flush(true);
            PathsFile.Stream.Flush(true);
        }