public void DeleteRecord(ulong size) { SizeEntry already = new SizeEntry(); long position = 0; if (GetRecord(size, out already, out position)) { // To delete the record we simply overwrite it with all the records coming thereafter. // So the next record moves a whole record to the left and everything thereafter. Program.MoveEndPart(this.Stream, position + SizeEntry.RecordSize, -SizeEntry.RecordSize); this.LastPosition -= SizeEntry.RecordSize; } // If the record is not found, there is nothing to delete. }
public bool GetRecord(ulong size, out SizeEntry record, out long position) { record = new SizeEntry(); position = 0; if (this.RecordCount == 0) return false; // If there is nothing, there is nothing to find. // Because the data is ordered DESCENDING, we can use binary search which is a lot more effective. long low = 0; long high = this.RecordCount - 1; bool found = false; while (low <= high && !found) { // Try the record at the middle long mid = (low + high) / 2; position = mid * SizeEntry.RecordSize; record = GetRecordAt(position); if (size < record.Size) // The searched size is smaller than the middle's size. // The record --if exists-- is in the RIGHT half of the interval low = mid + 1; else if (size > record.Size) // If it's higher, it should be in the LEFT half because descending order. high = mid - 1; else found = true; } // If the record is not found, position should contain the place where it should be inserted. if (!found) position = low * SizeEntry.RecordSize; return found; }
private SizeEntry GetRecordAt(long position) { if (position > this.Stream.Length) throw new ArgumentOutOfRangeException("Position out of stream bounds."); else if (position % SizeEntry.RecordSize != 0) throw new ArgumentException("Invalid position pointing inside record."); SizeEntry rec = new SizeEntry(); using (BinaryReader br = new BinaryReader(this.Stream, System.Text.Encoding.UTF8, true)) { this.Stream.Seek(position, SeekOrigin.Begin); rec.Size = br.ReadUInt64(); // 8 rec.Count = br.ReadUInt64(); // 8 rec.FirstPath = br.ReadInt64(); // 8 rec.LastPath = br.ReadInt64(); // 8 rec.HashEntry = br.ReadInt64(); // 8 } return rec; }
internal void WriteRecordAt(SizeEntry rec, long position) { if (position > (this.Stream.Length + 1)) // Length + 1 can be written: it is when the stream extends. throw new ArgumentOutOfRangeException("Position out of stream bounds."); else if (position % SizeEntry.RecordSize != 0) throw new ArgumentException("Invalid position pointing inside record."); using (BinaryWriter bw = new BinaryWriter(this.Stream, Encoding.UTF8, true)) { this.Stream.Seek(position, SeekOrigin.Begin); bw.Write(rec.Size); // 8 bw.Write(rec.Count); // 8 bw.Write(rec.FirstPath); // 8 bw.Write(rec.LastPath); // 8 bw.Write(rec.HashEntry); // 8 this.Stream.Flush(); } }
public void WriteRecord(SizeEntry rec) { // Check if the given record already exists SizeEntry already = new SizeEntry(); long position = 0; if (!GetRecord(rec.Size, out already, out position)) { // If the record is not found, we know the place where the record SHOULD be written to // to keep order. (GetRecord gives up this value as position.) // So at first we move every record to the right by one record size. Program.MoveEndPart(this.Stream, position, SizeEntry.RecordSize); // Modify the last position because we added a new record, so the stream contains one record more this.LastPosition += SizeEntry.RecordSize; } // After, we can write the record safely. // (If it was already found, it is an overwrite operation.) WriteRecordAt(rec, position); }
public IEnumerable<SizeEntry> GetRecords() { this.Stream.Seek(0, SeekOrigin.Begin); using (BinaryReader br = new BinaryReader(this.Stream, Encoding.UTF8, true)) { long lastReadPosition = 0; while (lastReadPosition < this.Stream.Length) { this.Stream.Seek(lastReadPosition, SeekOrigin.Begin); SizeEntry se = new SizeEntry(); se.Size = br.ReadUInt64(); // 8 se.Count = br.ReadUInt64(); // 8 se.FirstPath = br.ReadInt64(); // 8 se.LastPath = br.ReadInt64(); // 8 se.HashEntry = br.ReadInt64(); // 8 lastReadPosition = this.Stream.Position; yield return se; } } }
static void ReadFileSizes(string directory, ref List<string> subfolderList) { if (Verbose) { Console.ForegroundColor = ConsoleColor.Cyan; Console.WriteLine("Reading contents of " + directory); Console.ResetColor(); } try { int insertIndex = 0; foreach (string path in Directory.EnumerateFileSystemEntries(directory, "*", SearchOption.TopDirectoryOnly)) { string relativePath = Path.GetFullPath(path).Replace(Directory.GetCurrentDirectory(), String.Empty).TrimStart('\\'); // Skip some files which should not be access by the program if (Path.GetFullPath(path) == SizesFile.Stream.Name || Path.GetFullPath(path) == PathsFile.Stream.Name || Path.GetFullPath(path) == HashesFile.Stream.Name || Path.GetFullPath(path) == FilesToRemove.Name || Path.GetFullPath(path) == ((FileStream)DuplicateFileLog.BaseStream).Name) continue; // Skip files if they are in a Subversion structure // SVN saves a "pristine" copy of every file, and this makes every SVNd file to be marked as duplicate. if (relativePath.Contains(".svn\\pristine") || relativePath.Contains(".svn\\entries") || relativePath.Contains(".svn\\format")) continue; try { if (Directory.Exists(relativePath)) { // If it is a directory, add it to the list of subfolders to check later on if (Verbose) Console.WriteLine(relativePath + " is a subfolder."); // Add the found subfolders to the beginning of the list, but keep their natural order subfolderList.Insert(++insertIndex, relativePath); } else if (File.Exists(relativePath)) { if (Verbose) Console.Write("Measuring " + relativePath + "..."); // If it is a file, register its size and the count for its size FileInfo fi = new FileInfo(relativePath); try { SizeEntry entry = new SizeEntry(); long position = 0; bool known = SizesFile.GetRecord((ulong)fi.Length, out entry, out position); entry.Size = (ulong)fi.Length; if (!known) { // Need to reset the entry's count because GetRecord gives // undefined value if the entry is not found. entry.Count = 0; ++SizeCount; // The new size record currently has no associated PathEntry records in the path file. entry.FirstPath = -1; entry.LastPath = -1; entry.HashEntry = -1; } entry.Count++; // Also register its path PathEntry pathRec = new PathEntry(relativePath); long pathWrittenPosition; if (entry.LastPath != -1) { PathEntry previousLastEntry = new PathEntry(); PathsFile.GetRecordAt(entry.LastPath, out previousLastEntry); pathWrittenPosition = PathsFile.AddAfter(previousLastEntry, pathRec, entry.LastPath); } else { pathWrittenPosition = PathsFile.WriteRecord(pathRec); entry.FirstPath = pathWrittenPosition; } entry.LastPath = pathWrittenPosition; SizesFile.WriteRecord(entry); if (Verbose) Console.WriteLine(" Size: " + fi.Length + " bytes."); ++FileCount; VisualGlyph(FileCount); } catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("There was an error registering " + relativePath + " in the databank."); Console.ResetColor(); Console.WriteLine(ex.Message); Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("This indicates an error with the databank. Execution cannot continue."); Console.ResetColor(); Console.ReadLine(); Environment.Exit(1); } } } catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("The path " + relativePath + " could not be accessed, because:"); Console.ResetColor(); Console.WriteLine(ex.Message); } } } catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("The directory " + directory + " could not be accessed, because:"); Console.ResetColor(); Console.WriteLine(ex.Message); } subfolderList.Remove(directory); }
static void AnalyseSizes() { // After the file sizes are read, we eliminate every size which refers to one file // As there could not be duplicates that way. // Go from the back to make the least write overhead when a record is deleted ulong _analysedSizes = 0; for (long i = SizesFile.RecordCount - 1; i >= 0; --i) { SizeEntry rec = new SizeEntry(); try { rec = SizesFile.GetRecordByIndex(i); } catch (Exception) { //Console.ForegroundColor = ConsoleColor.Yellow; //Console.WriteLine("Couldn't get, because"); //Console.ResetColor(); //Console.WriteLine(ex.Message); continue; } if (rec.Count == 1 || rec.Count == 0) { if (Verbose) if (rec.Count == 0) Console.Write("No files with " + rec.Size + " size."); else if (rec.Count == 1) Console.Write("There's only 1 file with " + rec.Size + " size."); SizesFile.DeleteRecord(rec.Size); --SizeCount; --FileCount; // Delete every record (there should be 1) that is associated with this size... they'll no longer be needed. if (rec.FirstPath != -1 && rec.LastPath != -1) if (rec.FirstPath != rec.LastPath) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("An error happened while analysing sizes:"); Console.ResetColor(); Console.WriteLine("Count for size " + rec.Size + " is 1, but there appears to be multiple associated files to exist."); Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("This indicates an error with the databank. Execution cannot continue."); Console.ResetColor(); Console.ReadLine(); Environment.Exit(1); } else { PathEntry entry; PathsFile.GetRecordAt(rec.FirstPath, out entry); PathsFile.DeleteRecord(entry, rec.FirstPath); if (Verbose) Console.Write(" Ignoring " + entry.Path); } if (Verbose) Console.WriteLine(); } VisualGlyph(++_analysedSizes); } SizesFile.Stream.Flush(true); PathsFile.Stream.Flush(true); }