Exemple #1
0
        private void IncrementalHash(IGrouping <string, DupItem> quickHashGroup)
        {
            var groups = quickHashGroup.ToArray();
            var first  = groups.First();
            var length = first.Size / BufferSize;

            if (length == 0)
            {
                length = 1;
            }
            var position = 0;

            for (var i = 0; i < length; i++)
            {
                position += BufferSize;
                foreach (var group in groups.GroupBy(g => i == 0 ? string.Empty : g.HashSections[i - 1]))
                {
                    foreach (var groupFile in group)
                    {
                        if (groupFile.HashSections == null)
                        {
                            groupFile.HashSections = new List <string>();
                        }
                        groupFile.HashSections.Add(HashTool.HashFile(groupFile.FileName, position, BufferSize));
                    }
                }
            }

            foreach (var groupFile in groups)
            {
                groupFile.FullHash = string.Join(string.Empty, groupFile.HashSections);
            }
        }
 private static void ProgressiveHashSection(long position, DupItem dupItem, int bufferSize, ref long totalReadBytes)
 {
     if (dupItem.HashSections == null)
     {
         dupItem.HashSections = new List <string>();
     }
     dupItem.HashSections.Add(HashTool.HashFile(dupItem.FileName, position, bufferSize, bufferSize, out var readSize));
     Interlocked.Add(ref totalReadBytes, readSize);
 }
        /// <summary>
        /// 尝试秒传文件,文件大小必须大于256K
        /// </summary>
        /// <param name="FilePath">文件路径</param>
        /// <param name="FileName">文件名称</param>
        /// <param name="UploadPath">上传路径</param>
        /// <returns>是否成功</returns>
        public static bool RapidUpload(string FilePath, string FileName, string UploadPath)
        {
            FileInfo info = new FileInfo(FilePath);

            if (info.Length < (256 * 1024))
            {
                return(false);
            }
            if (!SliceFile(FilePath, FileName))
            {
                return(false);
            }
            var MD5      = HashTool.HashFile(FilePath);
            var SliceMD5 = HashTool.HashFile($"{Program.config.TempPath}\\{FileName},Tmp");
            var CRC32    = HashTool.GetFileCRC32(FilePath);

            return(RapidUpload(MD5, SliceMD5, CRC32, info.Length, UploadPath));
        }
Exemple #4
0
        public List <Duplicate> Find(IEnumerable <string> files, int workers)
        {
            var result = new List <Duplicate>();

            //groups with same file size
            var sameSizeGroups = files.Select(f =>
            {
                var info = new FileInfo(f);
                return(new DupItem {
                    FileName = f, ModifiedTime = info.LastWriteTime, Size = info.Length
                });
            }).GroupBy(f => f.Size).Where(g => g.Count() > 1);

            foreach (var group in sameSizeGroups)
            {
                foreach (var file in group)
                {
                    if (file.Size > 0)
                    {
                        //fast random byte checking
                        using (var stream = File.OpenRead(file.FileName))
                        {
                            var length = stream.Length;
                            file.Tags = new byte[3];
                            //first byte
                            stream.Seek(0, SeekOrigin.Begin);
                            file.Tags[0] = (byte)stream.ReadByte();

                            //middle byte, we need it especially for xml like files
                            if (length > 1)
                            {
                                stream.Seek(stream.Length / 2, SeekOrigin.Begin);
                                file.Tags[1] = (byte)stream.ReadByte();
                            }

                            //last byte
                            if (length > 2)
                            {
                                stream.Seek(0, SeekOrigin.End);
                                file.Tags[2] = (byte)stream.ReadByte();
                            }

                            file.QuickHash = HashTool.GetHashText(file.Tags);
                        }
                    }
                }

                //groups with same quick hash value
                var sameQuickHashGroups = group.GroupBy(f => f.QuickHash).Where(g => g.Count() > 1);
                foreach (var quickHashGroup in sameQuickHashGroups)
                {
                    foreach (var groupFile in quickHashGroup)
                    {
                        groupFile.FullHash = HashTool.HashFile(groupFile.FileName);
                    }

                    //phew, finally.....
                    //group by same file hash
                    var sameFullHashGroups = quickHashGroup.GroupBy(g => g.FullHash).Where(g => g.Count() > 1);
                    result.AddRange(sameFullHashGroups.Select(fullHashGroup => new Duplicate {
                        Items = fullHashGroup.Select(f => new FileItem {
                            FileName = f.FileName, ModifiedTime = f.ModifiedTime, Size = f.Size
                        })
                    }));
                }
            }

            return(result);
        }
        public DupResult Find(IEnumerable <string> files, int workers, int quickHashSize = 3, int bufferSize = 0)
        {
            var result = new DupResult {
                Duplicates = new List <Duplicate>(), FailedToProcessFiles = new List <string>()
            };

            _workers = workers;

            if (_workers <= 0)
            {
                _workers = 5;
            }

            if (bufferSize <= 3)
            {
                bufferSize = DefaultBufferSize;
            }

            //groups with same file size
            var sameSizeGroups = files.Select(f =>
            {
                var info = new FileInfo(f);
                return(new DupItem {
                    FileName = f, ModifiedTime = info.LastWriteTime, Size = info.Length
                });
            }).GroupBy(f => f.Size).Where(g => g.Count() > 1);

            var mappedSameSizeGroupList = new ConcurrentBag <IGrouping <string, DupItem> >();

            Parallel.ForEach(MapFileSizeGroups(sameSizeGroups), mappedSameSizeGroups =>
            {
                foreach (var group in mappedSameSizeGroups)
                {
                    foreach (var file in group)
                    {
                        if (file.Size > 0)
                        {
                            //fast random byte checking
                            try
                            {
                                using (var stream = File.Open(file.FileName, FileMode.Open, FileAccess.Read, FileShare.Read))
                                {
                                    var length = stream.Length;
                                    file.Tags  = new byte[3];
                                    //first byte
                                    stream.Seek(0, SeekOrigin.Begin);
                                    file.Tags[0] = (byte)stream.ReadByte();

                                    //middle byte, we need it especially for xml like files
                                    if (length > 1)
                                    {
                                        stream.Seek(stream.Length / 2, SeekOrigin.Begin);
                                        file.Tags[1] = (byte)stream.ReadByte();
                                    }

                                    //last byte
                                    if (length > 2)
                                    {
                                        stream.Seek(0, SeekOrigin.End);
                                        file.Tags[2] = (byte)stream.ReadByte();
                                    }

                                    file.QuickHash = HashTool.GetHashText(file.Tags);
                                }
                            }
                            catch (Exception)
                            {
                                file.Status = CompareStatus.Failed;
                                result.FailedToProcessFiles.Add(file.FileName);
                            }
                        }
                    }

                    //groups with same quick hash value
                    var sameQuickHashGroups = group.Where(f => f.Status != CompareStatus.Failed).GroupBy(f => f.QuickHash).Where(g => g.Count() > 1);
                    foreach (var sameQuickHashGroup in sameQuickHashGroups)
                    {
                        mappedSameSizeGroupList.Add(sameQuickHashGroup);
                    }
                }
            });

            Parallel.ForEach(MapFileHashGroups(mappedSameSizeGroupList), mappedSameHashGroups =>
            {
                foreach (var quickHashGroup in mappedSameHashGroups)
                {
                    foreach (var groupFile in quickHashGroup)
                    {
                        try
                        {
                            groupFile.FullHash = HashTool.HashFile(groupFile.FileName, bufferSize);
                        }
                        catch (Exception)
                        {
                            result.FailedToProcessFiles.Add(groupFile.FileName);
                        }
                    }

                    //phew, finally.....
                    //group by same file hash
                    var sameFullHashGroups = quickHashGroup.GroupBy(g => g.FullHash).Where(g => g.Count() > 1);
                    result.Duplicates.AddRange(sameFullHashGroups.Select(fullHashGroup => new Duplicate {
                        Items = fullHashGroup.Select(f => new FileItem {
                            FileName = f.FileName, ModifiedTime = f.ModifiedTime, Size = f.Size
                        })
                    }));
                }
            });

            return(result);
        }