private static void QuickHashFile(DupItem file, int quickHashSize, ref long totalFileBytes, ref long totalReadBytes) { Interlocked.Add(ref totalFileBytes, file.Size); var hashSize = (int)Math.Min(file.Size, quickHashSize); using (var stream = File.Open(file.FileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { file.Tags = new byte[hashSize]; for (var i = 0; i < 3; i++) { var sectionSize = hashSize / 3; long position; if (i == 0) { position = 0; } else if (i == 1) { position = file.Size / 2 - sectionSize / 2; } else { position = file.Size - sectionSize; } stream.Seek(position, SeekOrigin.Begin); stream.Read(file.Tags, i * sectionSize, sectionSize); } file.QuickHash = HashTool.HashBytesText(file.Tags); if (file.Size <= hashSize) { file.Status = CompareStatus.Matched; } Interlocked.Add(ref totalReadBytes, hashSize); } }
private static void ProgressiveHashSection(long position, DupItem dupItem, int bufferSize, ref long totalReadBytes) { if (dupItem.HashSections == null) { dupItem.HashSections = new List <string>(); } dupItem.HashSections.Add(HashTool.HashFile(dupItem.FileName, position, bufferSize, bufferSize, out var readSize)); Interlocked.Add(ref totalReadBytes, readSize); }
public static bool ProgressiveCompareFile(DupItem sourceDupItem, string targetFile, int quickHashSize, int bufferSize) { var totalFileBytes = 0L; var totalReadBytes = 0L; var targetDupFileItem = GetDupFileItem(targetFile); if (targetDupFileItem.Size != sourceDupItem.Size) { return(false); } QuickHashFile(targetDupFileItem, quickHashSize, ref totalFileBytes, ref totalReadBytes); if (targetDupFileItem.QuickHash != sourceDupItem.QuickHash) { return(false); } var length = targetDupFileItem.Size / bufferSize; if (length == 0) { length = 1; } var position = 0L; for (var i = 0; i < length; i++) { ProgressiveHashSection(position, targetDupFileItem, bufferSize, ref totalReadBytes); if (sourceDupItem.HashSections.Count < i + 1 || targetDupFileItem.HashSections[i] != sourceDupItem.HashSections[i]) { return(false); } position += bufferSize; } return(true); }
static void Main(string[] args) { // Reference System.Configuration Regex rgx = new Regex(ConfigurationManager.AppSettings["IgnoreSymbol"]); List <string> oriContent = new List <string>(); List <string> rgxContent = new List <string>(); #region -- Read Line -- StreamReader file = new StreamReader(filePath, Encoding.UTF8); string line = string.Empty; int index = 0; while ((line = file.ReadLine()) != null) { ++index; string rgxLine = rgx.Replace(line, ""); rgxContent.Add(rgxLine); } #endregion //oriContent = File.ReadAllLines(filePath).ToList<string>(); Dictionary <int, DupItem> dicDup = new Dictionary <int, DupItem>(); var duplicates = rgxContent.Select((t, i) => new { Index = i, Text = t }) .GroupBy(g => g.Text) .Where(g => g.Count() > 1) .OrderBy(g => g.FirstOrDefault().Index); int duplicateCount = duplicates.Count(); for (int mainIndex = 0; mainIndex < duplicateCount; ++mainIndex) { if (dicDup.Count > 0 && mainIndex < dicDup.Last().Value.oglEnd) { continue; } var item = duplicates.ElementAt(mainIndex); int fstIndex = item.FirstOrDefault().Index; int prvIndex = fstIndex; for (int subIndex = mainIndex + 1; subIndex < duplicateCount; ++subIndex) { if (!(prvIndex.Equals(duplicates.ElementAt(subIndex).FirstOrDefault().Index - 1))) { continue; } if (dicDup.ContainsKey(fstIndex)) { DupItem dup = dicDup[fstIndex]; dup.oglEnd = duplicates.ElementAt(subIndex).ElementAt(0).Index; dup.rptEnd = duplicates.ElementAt(subIndex).ElementAt(1).Index; dicDup[fstIndex] = dup; } else { DupItem dup = new DupItem(); dup.oglTop = duplicates.ElementAt(mainIndex).ElementAt(0).Index; dup.rptTop = duplicates.ElementAt(mainIndex).ElementAt(1).Index; dup.oglEnd = duplicates.ElementAt(subIndex).ElementAt(0).Index; dup.rptEnd = duplicates.ElementAt(subIndex).ElementAt(1).Index; dicDup[prvIndex] = dup; } ++prvIndex; } } #region -- Output -- //StreamWriter newfile = new StreamWriter(CompleteOutputPath(OutputChoice.CurrentFolder, filePath)); //newfile.WriteLine("\n"); #endregion }
static void Main(string[] args) { // Reference System.Configuration Regex rgx = new Regex(ConfigurationManager.AppSettings["IgnoreSymbol"]); List<string> oriContent = new List<string>(); List<string> rgxContent = new List<string>(); #region -- Read Line -- StreamReader file = new StreamReader(filePath, Encoding.UTF8); string line = string.Empty; int index = 0; while ((line = file.ReadLine()) != null) { ++index; string rgxLine = rgx.Replace(line, ""); rgxContent.Add(rgxLine); } #endregion //oriContent = File.ReadAllLines(filePath).ToList<string>(); Dictionary<int, DupItem> dicDup = new Dictionary<int, DupItem>(); var duplicates = rgxContent.Select((t, i) => new { Index = i, Text = t }) .GroupBy(g => g.Text) .Where(g => g.Count() > 1) .OrderBy(g => g.FirstOrDefault().Index); int duplicateCount = duplicates.Count(); for (int mainIndex = 0; mainIndex < duplicateCount; ++mainIndex) { if (dicDup.Count > 0 && mainIndex < dicDup.Last().Value.oglEnd) { continue; } var item = duplicates.ElementAt(mainIndex); int fstIndex = item.FirstOrDefault().Index; int prvIndex = fstIndex; for (int subIndex = mainIndex + 1; subIndex < duplicateCount; ++subIndex) { if (!(prvIndex.Equals(duplicates.ElementAt(subIndex).FirstOrDefault().Index - 1))) { continue; } if (dicDup.ContainsKey(fstIndex)) { DupItem dup = dicDup[fstIndex]; dup.oglEnd = duplicates.ElementAt(subIndex).ElementAt(0).Index; dup.rptEnd = duplicates.ElementAt(subIndex).ElementAt(1).Index; dicDup[fstIndex] = dup; } else { DupItem dup = new DupItem(); dup.oglTop = duplicates.ElementAt(mainIndex).ElementAt(0).Index; dup.rptTop = duplicates.ElementAt(mainIndex).ElementAt(1).Index; dup.oglEnd = duplicates.ElementAt(subIndex).ElementAt(0).Index; dup.rptEnd = duplicates.ElementAt(subIndex).ElementAt(1).Index; dicDup[prvIndex] = dup; } ++prvIndex; } } #region -- Output -- //StreamWriter newfile = new StreamWriter(CompleteOutputPath(OutputChoice.CurrentFolder, filePath)); //newfile.WriteLine("\n"); #endregion }