private void IncrementalHash(IGrouping <string, DupItem> quickHashGroup) { var groups = quickHashGroup.ToArray(); var first = groups.First(); var length = first.Size / BufferSize; if (length == 0) { length = 1; } var position = 0; for (var i = 0; i < length; i++) { position += BufferSize; foreach (var group in groups.GroupBy(g => i == 0 ? string.Empty : g.HashSections[i - 1])) { foreach (var groupFile in group) { if (groupFile.HashSections == null) { groupFile.HashSections = new List <string>(); } groupFile.HashSections.Add(HashTool.HashFile(groupFile.FileName, position, BufferSize)); } } } foreach (var groupFile in groups) { groupFile.FullHash = string.Join(string.Empty, groupFile.HashSections); } }
private static void ProgressiveHashSection(long position, DupItem dupItem, int bufferSize, ref long totalReadBytes) { if (dupItem.HashSections == null) { dupItem.HashSections = new List <string>(); } dupItem.HashSections.Add(HashTool.HashFile(dupItem.FileName, position, bufferSize, bufferSize, out var readSize)); Interlocked.Add(ref totalReadBytes, readSize); }
/// <summary> /// 尝试秒传文件,文件大小必须大于256K /// </summary> /// <param name="FilePath">文件路径</param> /// <param name="FileName">文件名称</param> /// <param name="UploadPath">上传路径</param> /// <returns>是否成功</returns> public static bool RapidUpload(string FilePath, string FileName, string UploadPath) { FileInfo info = new FileInfo(FilePath); if (info.Length < (256 * 1024)) { return(false); } if (!SliceFile(FilePath, FileName)) { return(false); } var MD5 = HashTool.HashFile(FilePath); var SliceMD5 = HashTool.HashFile($"{Program.config.TempPath}\\{FileName},Tmp"); var CRC32 = HashTool.GetFileCRC32(FilePath); return(RapidUpload(MD5, SliceMD5, CRC32, info.Length, UploadPath)); }
public List <Duplicate> Find(IEnumerable <string> files, int workers) { var result = new List <Duplicate>(); //groups with same file size var sameSizeGroups = files.Select(f => { var info = new FileInfo(f); return(new DupItem { FileName = f, ModifiedTime = info.LastWriteTime, Size = info.Length }); }).GroupBy(f => f.Size).Where(g => g.Count() > 1); foreach (var group in sameSizeGroups) { foreach (var file in group) { if (file.Size > 0) { //fast random byte checking using (var stream = File.OpenRead(file.FileName)) { var length = stream.Length; file.Tags = new byte[3]; //first byte stream.Seek(0, SeekOrigin.Begin); file.Tags[0] = (byte)stream.ReadByte(); //middle byte, we need it especially for xml like files if (length > 1) { stream.Seek(stream.Length / 2, SeekOrigin.Begin); file.Tags[1] = (byte)stream.ReadByte(); } //last byte if (length > 2) { stream.Seek(0, SeekOrigin.End); file.Tags[2] = (byte)stream.ReadByte(); } file.QuickHash = HashTool.GetHashText(file.Tags); } } } //groups with same quick hash value var sameQuickHashGroups = group.GroupBy(f => f.QuickHash).Where(g => g.Count() > 1); foreach (var quickHashGroup in sameQuickHashGroups) { foreach (var groupFile in quickHashGroup) { groupFile.FullHash = HashTool.HashFile(groupFile.FileName); } //phew, finally..... //group by same file hash var sameFullHashGroups = quickHashGroup.GroupBy(g => g.FullHash).Where(g => g.Count() > 1); result.AddRange(sameFullHashGroups.Select(fullHashGroup => new Duplicate { Items = fullHashGroup.Select(f => new FileItem { FileName = f.FileName, ModifiedTime = f.ModifiedTime, Size = f.Size }) })); } } return(result); }
public DupResult Find(IEnumerable <string> files, int workers, int quickHashSize = 3, int bufferSize = 0) { var result = new DupResult { Duplicates = new List <Duplicate>(), FailedToProcessFiles = new List <string>() }; _workers = workers; if (_workers <= 0) { _workers = 5; } if (bufferSize <= 3) { bufferSize = DefaultBufferSize; } //groups with same file size var sameSizeGroups = files.Select(f => { var info = new FileInfo(f); return(new DupItem { FileName = f, ModifiedTime = info.LastWriteTime, Size = info.Length }); }).GroupBy(f => f.Size).Where(g => g.Count() > 1); var mappedSameSizeGroupList = new ConcurrentBag <IGrouping <string, DupItem> >(); Parallel.ForEach(MapFileSizeGroups(sameSizeGroups), mappedSameSizeGroups => { foreach (var group in mappedSameSizeGroups) { foreach (var file in group) { if (file.Size > 0) { //fast random byte checking try { using (var stream = File.Open(file.FileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { var length = stream.Length; file.Tags = new byte[3]; //first byte stream.Seek(0, SeekOrigin.Begin); file.Tags[0] = (byte)stream.ReadByte(); //middle byte, we need it especially for xml like files if (length > 1) { stream.Seek(stream.Length / 2, SeekOrigin.Begin); file.Tags[1] = (byte)stream.ReadByte(); } //last byte if (length > 2) { stream.Seek(0, SeekOrigin.End); file.Tags[2] = (byte)stream.ReadByte(); } file.QuickHash = HashTool.GetHashText(file.Tags); } } catch (Exception) { file.Status = CompareStatus.Failed; result.FailedToProcessFiles.Add(file.FileName); } } } //groups with same quick hash value var sameQuickHashGroups = group.Where(f => f.Status != CompareStatus.Failed).GroupBy(f => f.QuickHash).Where(g => g.Count() > 1); foreach (var sameQuickHashGroup in sameQuickHashGroups) { mappedSameSizeGroupList.Add(sameQuickHashGroup); } } }); Parallel.ForEach(MapFileHashGroups(mappedSameSizeGroupList), mappedSameHashGroups => { foreach (var quickHashGroup in mappedSameHashGroups) { foreach (var groupFile in quickHashGroup) { try { groupFile.FullHash = HashTool.HashFile(groupFile.FileName, bufferSize); } catch (Exception) { result.FailedToProcessFiles.Add(groupFile.FileName); } } //phew, finally..... //group by same file hash var sameFullHashGroups = quickHashGroup.GroupBy(g => g.FullHash).Where(g => g.Count() > 1); result.Duplicates.AddRange(sameFullHashGroups.Select(fullHashGroup => new Duplicate { Items = fullHashGroup.Select(f => new FileItem { FileName = f.FileName, ModifiedTime = f.ModifiedTime, Size = f.Size }) })); } }); return(result); }