private static void QuickHashFile(DupItem file, int quickHashSize, ref long totalFileBytes, ref long totalReadBytes)
        {
            Interlocked.Add(ref totalFileBytes, file.Size);
            var hashSize = (int)Math.Min(file.Size, quickHashSize);

            using (var stream = File.Open(file.FileName, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                file.Tags = new byte[hashSize];
                for (var i = 0; i < 3; i++)
                {
                    var  sectionSize = hashSize / 3;
                    long position;
                    if (i == 0)
                    {
                        position = 0;
                    }
                    else if (i == 1)
                    {
                        position = file.Size / 2 - sectionSize / 2;
                    }
                    else
                    {
                        position = file.Size - sectionSize;
                    }
                    stream.Seek(position, SeekOrigin.Begin);
                    stream.Read(file.Tags, i * sectionSize, sectionSize);
                }
                file.QuickHash = HashTool.HashBytesText(file.Tags);
                if (file.Size <= hashSize)
                {
                    file.Status = CompareStatus.Matched;
                }
                Interlocked.Add(ref totalReadBytes, hashSize);
            }
        }
 private static void ProgressiveHashSection(long position, DupItem dupItem, int bufferSize, ref long totalReadBytes)
 {
     if (dupItem.HashSections == null)
     {
         dupItem.HashSections = new List <string>();
     }
     dupItem.HashSections.Add(HashTool.HashFile(dupItem.FileName, position, bufferSize, bufferSize, out var readSize));
     Interlocked.Add(ref totalReadBytes, readSize);
 }
        public static bool ProgressiveCompareFile(DupItem sourceDupItem, string targetFile, int quickHashSize, int bufferSize)
        {
            var totalFileBytes    = 0L;
            var totalReadBytes    = 0L;
            var targetDupFileItem = GetDupFileItem(targetFile);

            if (targetDupFileItem.Size != sourceDupItem.Size)
            {
                return(false);
            }

            QuickHashFile(targetDupFileItem, quickHashSize, ref totalFileBytes, ref totalReadBytes);
            if (targetDupFileItem.QuickHash != sourceDupItem.QuickHash)
            {
                return(false);
            }

            var length = targetDupFileItem.Size / bufferSize;

            if (length == 0)
            {
                length = 1;
            }
            var position = 0L;

            for (var i = 0; i < length; i++)
            {
                ProgressiveHashSection(position, targetDupFileItem, bufferSize, ref totalReadBytes);
                if (sourceDupItem.HashSections.Count < i + 1 || targetDupFileItem.HashSections[i] != sourceDupItem.HashSections[i])
                {
                    return(false);
                }
                position += bufferSize;
            }
            return(true);
        }
Exemple #4
0
        static void Main(string[] args)
        {
            // Reference System.Configuration
            Regex rgx = new Regex(ConfigurationManager.AppSettings["IgnoreSymbol"]);

            List <string> oriContent = new List <string>();
            List <string> rgxContent = new List <string>();

            #region -- Read Line --
            StreamReader file = new StreamReader(filePath, Encoding.UTF8);

            string line = string.Empty;

            int index = 0;

            while ((line = file.ReadLine()) != null)
            {
                ++index;

                string rgxLine = rgx.Replace(line, "");

                rgxContent.Add(rgxLine);
            }
            #endregion

            //oriContent = File.ReadAllLines(filePath).ToList<string>();

            Dictionary <int, DupItem> dicDup = new Dictionary <int, DupItem>();

            var duplicates = rgxContent.Select((t, i) => new { Index = i, Text = t })
                             .GroupBy(g => g.Text)
                             .Where(g => g.Count() > 1)
                             .OrderBy(g => g.FirstOrDefault().Index);

            int duplicateCount = duplicates.Count();

            for (int mainIndex = 0; mainIndex < duplicateCount; ++mainIndex)
            {
                if (dicDup.Count > 0 && mainIndex < dicDup.Last().Value.oglEnd)
                {
                    continue;
                }

                var item = duplicates.ElementAt(mainIndex);

                int fstIndex = item.FirstOrDefault().Index;
                int prvIndex = fstIndex;

                for (int subIndex = mainIndex + 1; subIndex < duplicateCount; ++subIndex)
                {
                    if (!(prvIndex.Equals(duplicates.ElementAt(subIndex).FirstOrDefault().Index - 1)))
                    {
                        continue;
                    }

                    if (dicDup.ContainsKey(fstIndex))
                    {
                        DupItem dup = dicDup[fstIndex];

                        dup.oglEnd = duplicates.ElementAt(subIndex).ElementAt(0).Index;
                        dup.rptEnd = duplicates.ElementAt(subIndex).ElementAt(1).Index;

                        dicDup[fstIndex] = dup;
                    }
                    else
                    {
                        DupItem dup = new DupItem();

                        dup.oglTop = duplicates.ElementAt(mainIndex).ElementAt(0).Index;
                        dup.rptTop = duplicates.ElementAt(mainIndex).ElementAt(1).Index;

                        dup.oglEnd = duplicates.ElementAt(subIndex).ElementAt(0).Index;
                        dup.rptEnd = duplicates.ElementAt(subIndex).ElementAt(1).Index;

                        dicDup[prvIndex] = dup;
                    }

                    ++prvIndex;
                }
            }

            #region -- Output --
            //StreamWriter newfile = new StreamWriter(CompleteOutputPath(OutputChoice.CurrentFolder, filePath));

            //newfile.WriteLine("\n");
            #endregion
        }
Exemple #5
0
        static void Main(string[] args)
        {
            // Reference System.Configuration
            Regex rgx = new Regex(ConfigurationManager.AppSettings["IgnoreSymbol"]);

            List<string> oriContent = new List<string>();
            List<string> rgxContent = new List<string>();

            #region -- Read Line --
            StreamReader file = new StreamReader(filePath, Encoding.UTF8);

            string line = string.Empty;

            int index = 0;

            while ((line = file.ReadLine()) != null)
            {
                ++index;

                string rgxLine = rgx.Replace(line, "");

                rgxContent.Add(rgxLine);
            }
            #endregion

            //oriContent = File.ReadAllLines(filePath).ToList<string>();

            Dictionary<int, DupItem> dicDup = new Dictionary<int, DupItem>();

            var duplicates = rgxContent.Select((t, i) => new { Index = i, Text = t })
                                      .GroupBy(g => g.Text)
                                      .Where(g => g.Count() > 1)
                                      .OrderBy(g => g.FirstOrDefault().Index);

            int duplicateCount = duplicates.Count();

            for (int mainIndex = 0; mainIndex < duplicateCount; ++mainIndex)
            {
                if (dicDup.Count > 0 && mainIndex < dicDup.Last().Value.oglEnd)
                {
                    continue;
                }

                var item = duplicates.ElementAt(mainIndex);

                int fstIndex = item.FirstOrDefault().Index;
                int prvIndex = fstIndex;

                for (int subIndex = mainIndex + 1; subIndex < duplicateCount; ++subIndex)
                {
                    if (!(prvIndex.Equals(duplicates.ElementAt(subIndex).FirstOrDefault().Index - 1)))
                    {
                        continue;
                    }

                    if (dicDup.ContainsKey(fstIndex))
                    {
                        DupItem dup = dicDup[fstIndex];

                        dup.oglEnd = duplicates.ElementAt(subIndex).ElementAt(0).Index;
                        dup.rptEnd = duplicates.ElementAt(subIndex).ElementAt(1).Index;

                        dicDup[fstIndex] = dup;
                    }
                    else
                    {
                        DupItem dup = new DupItem();

                        dup.oglTop = duplicates.ElementAt(mainIndex).ElementAt(0).Index;
                        dup.rptTop = duplicates.ElementAt(mainIndex).ElementAt(1).Index;

                        dup.oglEnd = duplicates.ElementAt(subIndex).ElementAt(0).Index;
                        dup.rptEnd = duplicates.ElementAt(subIndex).ElementAt(1).Index;

                        dicDup[prvIndex] = dup;
                    }

                    ++prvIndex;
                }
            }

            #region -- Output --
            //StreamWriter newfile = new StreamWriter(CompleteOutputPath(OutputChoice.CurrentFolder, filePath));

            //newfile.WriteLine("\n");
            #endregion
        }