Beispiel #1
0
        static void Main(string[] args)
        {
            log4net.Config.XmlConfigurator.Configure();
            Logger.Debug("Hello World");


            ArchiveDuplicateDetector worker = new ArchiveDuplicateDetector();

            worker.Notify += new ArchiveDuplicateDetector.NotifyEventHandler(worker_Notify);

            List <string> paths = new List <string>();

            paths.Add(@"D:\New Folder");
            DuplicateSearchOption option = new DuplicateSearchOption()
            {
                Paths = paths
            };
            List <DuplicateArchiveInfoList> list = worker.Search(option);

            foreach (var item in list)
            {
                System.Console.WriteLine(item.Original.ToString());
                foreach (var dup in item.Duplicates)
                {
                    System.Console.WriteLine(" - " + dup.ToString());
                }
            }

            System.Console.ReadLine();
        }
Beispiel #2
0
        private void btnSearch_Click(object sender, EventArgs e)
        {
            dgvResult.Rows.Clear();
            DuplicateSearchOption option = new DuplicateSearchOption()
            {
                Paths                    = GetPathList(),
                Limit                    = Convert.ToInt32(txtLimitPercentage.Text),
                IgnoreLimit              = Convert.ToInt32(txtIgnoreLimit.Text),
                FilePattern              = txtFilePattern.Text,
                BlacklistPattern         = txtBlackList.Text,
                FileCaseInsensitive      = chkFileCI.Checked,
                BlacklistCaseInsensitive = chkBlacklistCI.Checked,
                SevenZipPath             = txt7zDllPath.Text,
                OnlyPerfectMatch         = chkOnlyPerfectMatch.Checked,
                Priority                 = (ThreadPriority)cbxPriority.SelectedIndex,
                PreventStanby            = chkPreventStanby.Checked,
                IgnoreSmallFile          = chkIgnoreSmallFileSize.Checked,
                SmallFileSizeLimit       = ulong.Parse(txtSmallFileSizeLimit.Text),
                TaskLimit                = int.Parse(txtThreadCount.Text)
            };

            detector.SearchThreading(option);
            btnPause.Enabled  = true;
            btnStop.Enabled   = true;
            btnSearch.Enabled = false;
        }
 public void SearchThreading(DuplicateSearchOption option)
 {
     ParameterizedThreadStart ts = new ParameterizedThreadStart(SearchThreadingImpl);
     if (_thread == null || _thread.ThreadState == ThreadState.Stopped)
     {
         _thread = new Thread(ts);
         _thread.Priority = ThreadPriority.Lowest;
         _thread.Start(option);
     }
 }
        public List<DuplicateArchiveInfoList> Search(DuplicateSearchOption option)
        {
            NotifyCaller("Target Count: " + option.Paths.Count, OperationStatus.READY);

            if (option.PreventStanby)
            {
                NotifyCaller("Disabling Sleep", OperationStatus.READY);
                Util.PreventSleep();
            }

            List<FileInfo> fileList =  BuildFileList(option);
            List<DuplicateArchiveInfo> list = CalculateCRC(fileList, option);
            List<DuplicateArchiveInfoList> dupList = BuildDuplicateList(list, option);
            dupList = CleanUpDuplicate(dupList);

            Util.AllowStanby();

            return dupList;
        }
Beispiel #5
0
        static void Main(string[] args)
        {
            log4net.Config.XmlConfigurator.Configure();
            Logger.Debug("Hello World");

            ArchiveDuplicateDetector worker = new ArchiveDuplicateDetector();
            worker.Notify +=new ArchiveDuplicateDetector.NotifyEventHandler(worker_Notify);

            List<string> paths = new List<string>();
            paths.Add(@"D:\New Folder");
            DuplicateSearchOption option = new DuplicateSearchOption() { Paths = paths };
            List<DuplicateArchiveInfoList> list = worker.Search(option);

            foreach (var item in list)
            {
                System.Console.WriteLine(item.Original.ToString());
                foreach (var dup in item.Duplicates)
                {
                    System.Console.WriteLine(" - " + dup.ToString());
                }
            }

            System.Console.ReadLine();
        }
        /// <summary>
        /// Check if file is duplicated
        /// </summary>
        /// <param name="Origin"></param>
        /// <param name="Duplicate"></param>
        /// <param name="option"></param>
        /// <returns></returns>
        private bool Compare(ref DuplicateArchiveInfo Origin, ref DuplicateArchiveInfo Duplicate, DuplicateSearchOption option)
        {
            NotifyCaller("Comparing: " + Origin.Filename + " to " + Duplicate.Filename, OperationStatus.COMPARING);

            // if item count is equal, try to check from crc strings.
            Origin.MatchType = MatchType.ORIGINAL;
            Origin.Percentage = 0.0;
            if (Origin.NoMatches != null) Origin.NoMatches.Clear();

            if (Origin.Items.Count == Duplicate.Items.Count)
            {
                Duplicate.MatchType = MatchType.EQUALCOUNT;

                if (Origin.ToCRCString() == Duplicate.ToCRCString())
                {
                    NotifyCaller("CRC Strings are equal.", OperationStatus.COMPARING);
                    Duplicate.Percentage = 100.0;
                    return true;
                }
                else if (option.OnlyPerfectMatch)
                {
                    return false;
                }
            }

            Duplicate.MatchType = MatchType.SUBSET;

            // Check each files in duplicate
            int limitCount;

            // if only have 'IgnoreLimit' files, then all must match
            if (option.IgnoreLimit > Duplicate.Items.Count) limitCount = 0;
            else limitCount = Duplicate.Items.Count - (Duplicate.Items.Count * option.Limit / 100);

            int skippedCount = 0;
            int i = 0;
            int j = 0;
            while (i < Origin.Items.Count && j < Duplicate.Items.Count && skippedCount <= limitCount)
            {
                // compare the from the biggest crc.
                int result = string.Compare(Origin.Items[i].Crc, Duplicate.Items[j].Crc, true, System.Globalization.CultureInfo.InvariantCulture);
                if (result == 0)
                {
                    ++i; ++j;
                }
                else if (result > 0)
                {
                    // Origin file skipped
                    ++i;
                }
                else
                {
                    // Duplicate file skipped, no match in Origin
                    ++skippedCount;
                    if (Duplicate.NoMatches == null) Duplicate.NoMatches = new List<ArchiveFileInfoSmall>();
                    Duplicate.NoMatches.Add(Duplicate.Items[j]);
                    ++j;
                }
            }

            if (j < Duplicate.Items.Count)
            {
                if (Duplicate.NoMatches == null) Duplicate.NoMatches = new List<ArchiveFileInfoSmall>();
                Duplicate.NoMatches.AddRange(Duplicate.Items.GetRange(j, Duplicate.Items.Count - j));
                skippedCount = Duplicate.NoMatches.Count;
            }

            double percent = (double)(Duplicate.Items.Count - skippedCount) / Duplicate.Items.Count * 100;
            if (percent >= option.Limit && skippedCount < limitCount)
            {
                NotifyCaller("Match: " + percent + "%", OperationStatus.COMPARING);
                Duplicate.Percentage = percent;
                return true;
            }

            NotifyCaller("Not Match", OperationStatus.COMPARING);
            if (Duplicate.NoMatches != null) Duplicate.NoMatches.Clear();
            return false;
        }
        /// <summary>
        /// Step 2: calculate crc
        /// </summary>
        /// <param name="fileList"></param>
        /// <param name="option"></param>
        /// <returns>List DuplicateArchiveInfo</returns>
        private List<DuplicateArchiveInfo> CalculateCRC(List<FileInfo> fileList, DuplicateSearchOption option)
        {
            List<DuplicateArchiveInfo> list = new List<DuplicateArchiveInfo>();

            int i = 0;
            foreach (FileInfo f in fileList)
            {
                _pauseEvent.WaitOne(Timeout.Infinite);
                if (_shutdownEvent.WaitOne(0))
                    break;

                NotifyCaller(f.FullName, OperationStatus.CALCULATING_CRC, curr:i, total:fileList.Count);
                try
                {
                    DuplicateArchiveInfo item = Util.GetArchiveInfo(f.FullName, option);
                    item.FileSize = f.Length;
                    item.CreationTime = f.CreationTime;

                    list.Add(item);
                }
                catch (Exception ex)
                {
                    string message = ex.Message + " (" + f.FullName + ")";
                    NotifyCaller(message, OperationStatus.ERROR);
                }
                ++i;
            }

            NotifyCaller("Complete calculating CRC, total: " + list.Count, OperationStatus.CALCULATING_CRC, total:list.Count);

            return list;
        }
        /// <summary>
        /// Step 1 - build file list from given paths
        /// </summary>
        /// <param name="option"></param>
        /// <returns>List of FileInfo</returns>
        private List<FileInfo> BuildFileList(DuplicateSearchOption option)
        {
            NotifyCaller("Start building file list.", OperationStatus.BUILDING_FILE_LIST);

            List<FileInfo> fileList = new List<FileInfo>();
            Regex re = new Regex(option.FilePattern, option.FileCaseInsensitive ? RegexOptions.IgnoreCase : RegexOptions.None);

            int i = 1;
            int t = option.Paths.Count;
            foreach (var path in option.Paths)
            {
                try
                {
                    NotifyCaller("Building file list: " + path, OperationStatus.BUILDING_FILE_LIST, curr: i, total: t);
                    DirectoryInfo dirList = new DirectoryInfo(path);
                    FileInfo[] tempList = dirList.GetFiles("*", SearchOption.AllDirectories);

                    // filter based on filepattern
                    var filteredList = from f in tempList
                                       where re.IsMatch(f.Name)
                                       select f;

                    fileList.AddRange(filteredList);
                    ++i;
                }
                catch (Exception ex)
                {
                    NotifyCaller(ex.Message + " (" + path + ")", OperationStatus.ERROR);
                }
            }

            NotifyCaller("Total File: " + fileList.Count, OperationStatus.BUILDING_FILE_LIST, total: fileList.Count);

            return fileList;
        }
        /// <summary>
        /// Step 3: Build duplicate list
        /// </summary>
        /// <param name="list"></param>
        /// <param name="limit"></param>
        /// <param name="ignoreLimit"></param>
        private List<DuplicateArchiveInfoList> BuildDuplicateList(List<DuplicateArchiveInfo> list, DuplicateSearchOption option)
        {
            NotifyCaller("Start building duplicate list.", OperationStatus.BUILDING_DUPLICATE_LIST);

            List<DuplicateArchiveInfoList> dupList = new List<DuplicateArchiveInfoList>();

            list.Sort(new DuplicateArchiveInfoItemCountComparer());

            int totalCount = list.Count;
            int i = 0;
            while (list.Count > 0)
            {
                _pauseEvent.WaitOne(Timeout.Infinite);
                if (_shutdownEvent.WaitOne(0))
                {
                    NotifyCaller("Stopping...", OperationStatus.BUILDING_DUPLICATE_LIST);
                    break;
                }

                ++i;
                DuplicateArchiveInfoList dup = new DuplicateArchiveInfoList();
                DuplicateArchiveInfo temp = list[0];
                list.RemoveAt(0);
                dup.Original = temp;

                string message = "Checking: " + temp.Filename + " ( Duplicate group found: " + i + " File to check left: " + list.Count + ")";
                NotifyCaller(message, OperationStatus.BUILDING_DUPLICATE_LIST, curr:i, total:totalCount);

                // check for other possible dups.
                int index = 0;
                while (list.Count > index)
                {
                    DuplicateArchiveInfo curr = list[index];

                    if (Compare(ref temp, ref curr, option))
                    {
                        if (dup.Duplicates == null) dup.Duplicates = new List<DuplicateArchiveInfo>();
                        dup.Duplicates.Add(curr);
                        // remove from the source list.
                        list.Remove(curr);
                        --totalCount;
                    }
                    else
                    {
                        ++index;
                    }
                }

                dupList.Add(dup);
            }

            foreach (DuplicateArchiveInfoList dup in dupList)
            {
                if (dup.Duplicates != null)
                {
                    dup.Duplicates.Sort(new DuplicateArchiveInfoPercentageComparer());
                }
            }
            NotifyCaller("Building Duplicate List Complete.", OperationStatus.BUILDING_DUPLICATE_LIST);

            return dupList;
        }