public static void SetQuickHash(FileItem file) { if (file.Size > 0) { //fast random byte checking using (var stream = File.OpenRead(file.FileName)) { var length = stream.Length; var tags = new byte[3]; //first byte stream.Seek(0, SeekOrigin.Begin); tags[0] = (byte)stream.ReadByte(); //middle byte, we need it especially for xml like files if (length > 1) { stream.Seek(stream.Length / 2, SeekOrigin.Begin); tags[1] = (byte)stream.ReadByte(); } //last byte if (length > 2) { stream.Seek(0, SeekOrigin.End); tags[2] = (byte)stream.ReadByte(); } file.QuickHash = HashTool.GetHashText(tags); } } }
private string AddToQuickHash(FileItem fileItem) { // for empty files, do not hash fileItem.QuickHash = fileItem.Size == 0 ? "" : HashTool.QuickHashFile(fileItem.FileName); if (_sameQuickHashGroups.ContainsKey(fileItem.QuickHash)) { if (_sameQuickHashGroups[fileItem.QuickHash].Files.Count == 1 && string.IsNullOrEmpty(_sameQuickHashGroups[fileItem.QuickHash].Files[0].FullHash)) { // quickhash in group, so no fullhash comparison done. Create one now var prevFileItem = _sameQuickHashGroups[fileItem.QuickHash].Files[0]; AddToFullHash(prevFileItem); _duplicates++; //Log(string.Format("{0}, {1}, {2}", _fileNo, _duplicates, _sameFullHashGroups.Count)); } _sameQuickHashGroups[fileItem.QuickHash].Files.Add(fileItem); // This fileSize exists, so check on existing full hash return(AddToFullHash(fileItem)); } else { // quickhash did not exist, so no duplicate file _sameQuickHashGroups.Add(fileItem.QuickHash, new SameQuickHashGroup()); _sameQuickHashGroups[fileItem.QuickHash].Files.Add(fileItem); return(null); } }
public static bool IsDuplicate(string fileName1, string fileName2) { if (new FileInfo(fileName1).Length != new FileInfo(fileName2).Length) { return(false); } if (HashTool.QuickHashFile(fileName1) != HashTool.QuickHashFile(fileName2)) { return(false); } return(HashTool.HashFile(fileName1) == HashTool.HashFile(fileName2)); }
private string AddToFullHash(FileItem fileItem) { // for small files, the full hash is same as quickhash fileItem.FullHash = fileItem.Size < 4 ? fileItem.QuickHash : HashTool.HashFile(fileItem.FileName); if (_sameFullHashGroups.ContainsKey(fileItem.FullHash)) { // fullhash exists, so duplicate file _sameFullHashGroups[fileItem.FullHash].Files.Add(fileItem); return(fileItem.FullHash); } else { // fullhash did not exist, so no duplicate file _sameFullHashGroups.Add(fileItem.FullHash, new SameFullHashGroup()); _sameFullHashGroups[fileItem.FullHash].Files.Add(fileItem); return(null); } }
// ******** Private functions ****** / private string GetHash(string fileName) { var fileInfo = new FileInfo(fileName); // Add all files in groups based on size if (!_sameSizeGroups.ContainsKey(fileInfo.Length)) { return(null); } if (_sameSizeGroups[fileInfo.Length].Files.Count == 1 && string.IsNullOrEmpty(_sameSizeGroups[fileInfo.Length].Files[0].QuickHash)) { // Single file in group, so no quickhash comparison done. Create one now var prevFileItem = _sameSizeGroups[fileInfo.Length].Files[0]; AddToQuickHash(prevFileItem); } var quickHash = HashTool.QuickHashFile(fileName); if (!_sameQuickHashGroups.ContainsKey(quickHash)) { return(null); } if (_sameQuickHashGroups[quickHash].Files.Count == 1 && string.IsNullOrEmpty(_sameQuickHashGroups[quickHash].Files[0].FullHash)) { // quickhash in group, but no fullhash comparison done. Create one now var prevFileItem = _sameQuickHashGroups[quickHash].Files[0]; AddToFullHash(prevFileItem); } var fullHash = HashTool.HashFile(fileName); if (!_sameFullHashGroups.ContainsKey(fullHash)) { return(null); } return(fullHash); //return !_sameFullHashGroups.ContainsKey(fullHash) ? null : fullHash; }