Esempio n. 1
        static void Main(string[] args)
            Logger.Debug("Hello World");

            ArchiveDuplicateDetector worker = new ArchiveDuplicateDetector();

            worker.Notify += new ArchiveDuplicateDetector.NotifyEventHandler(worker_Notify);

            List <string> paths = new List <string>();

            paths.Add(@"D:\New Folder");
            DuplicateSearchOption option = new DuplicateSearchOption()
                Paths = paths
            List <DuplicateArchiveInfoList> list = worker.Search(option);

            foreach (var item in list)
                foreach (var dup in item.Duplicates)
                    System.Console.WriteLine(" - " + dup.ToString());

Esempio n. 2
        private void btnSearch_Click(object sender, EventArgs e)
            DuplicateSearchOption option = new DuplicateSearchOption()
                Paths                    = GetPathList(),
                Limit                    = Convert.ToInt32(txtLimitPercentage.Text),
                IgnoreLimit              = Convert.ToInt32(txtIgnoreLimit.Text),
                FilePattern              = txtFilePattern.Text,
                BlacklistPattern         = txtBlackList.Text,
                FileCaseInsensitive      = chkFileCI.Checked,
                BlacklistCaseInsensitive = chkBlacklistCI.Checked,
                SevenZipPath             = txt7zDllPath.Text,
                OnlyPerfectMatch         = chkOnlyPerfectMatch.Checked,
                Priority                 = (ThreadPriority)cbxPriority.SelectedIndex,
                PreventStanby            = chkPreventStanby.Checked,
                IgnoreSmallFile          = chkIgnoreSmallFileSize.Checked,
                SmallFileSizeLimit       = ulong.Parse(txtSmallFileSizeLimit.Text),
                TaskLimit                = int.Parse(txtThreadCount.Text)

            btnPause.Enabled  = true;
            btnStop.Enabled   = true;
            btnSearch.Enabled = false;
 public void SearchThreading(DuplicateSearchOption option)
     ParameterizedThreadStart ts = new ParameterizedThreadStart(SearchThreadingImpl);
     if (_thread == null || _thread.ThreadState == ThreadState.Stopped)
         _thread = new Thread(ts);
         _thread.Priority = ThreadPriority.Lowest;
        public List<DuplicateArchiveInfoList> Search(DuplicateSearchOption option)
            NotifyCaller("Target Count: " + option.Paths.Count, OperationStatus.READY);

            if (option.PreventStanby)
                NotifyCaller("Disabling Sleep", OperationStatus.READY);

            List<FileInfo> fileList =  BuildFileList(option);
            List<DuplicateArchiveInfo> list = CalculateCRC(fileList, option);
            List<DuplicateArchiveInfoList> dupList = BuildDuplicateList(list, option);
            dupList = CleanUpDuplicate(dupList);


            return dupList;
Esempio n. 5
        static void Main(string[] args)
            Logger.Debug("Hello World");

            ArchiveDuplicateDetector worker = new ArchiveDuplicateDetector();
            worker.Notify +=new ArchiveDuplicateDetector.NotifyEventHandler(worker_Notify);

            List<string> paths = new List<string>();
            paths.Add(@"D:\New Folder");
            DuplicateSearchOption option = new DuplicateSearchOption() { Paths = paths };
            List<DuplicateArchiveInfoList> list = worker.Search(option);

            foreach (var item in list)
                foreach (var dup in item.Duplicates)
                    System.Console.WriteLine(" - " + dup.ToString());

        /// <summary>
        /// Check if file is duplicated
        /// </summary>
        /// <param name="Origin"></param>
        /// <param name="Duplicate"></param>
        /// <param name="option"></param>
        /// <returns></returns>
        private bool Compare(ref DuplicateArchiveInfo Origin, ref DuplicateArchiveInfo Duplicate, DuplicateSearchOption option)
            NotifyCaller("Comparing: " + Origin.Filename + " to " + Duplicate.Filename, OperationStatus.COMPARING);

            // if item count is equal, try to check from crc strings.
            Origin.MatchType = MatchType.ORIGINAL;
            Origin.Percentage = 0.0;
            if (Origin.NoMatches != null) Origin.NoMatches.Clear();

            if (Origin.Items.Count == Duplicate.Items.Count)
                Duplicate.MatchType = MatchType.EQUALCOUNT;

                if (Origin.ToCRCString() == Duplicate.ToCRCString())
                    NotifyCaller("CRC Strings are equal.", OperationStatus.COMPARING);
                    Duplicate.Percentage = 100.0;
                    return true;
                else if (option.OnlyPerfectMatch)
                    return false;

            Duplicate.MatchType = MatchType.SUBSET;

            // Check each files in duplicate
            int limitCount;

            // if only have 'IgnoreLimit' files, then all must match
            if (option.IgnoreLimit > Duplicate.Items.Count) limitCount = 0;
            else limitCount = Duplicate.Items.Count - (Duplicate.Items.Count * option.Limit / 100);

            int skippedCount = 0;
            int i = 0;
            int j = 0;
            while (i < Origin.Items.Count && j < Duplicate.Items.Count && skippedCount <= limitCount)
                // compare the from the biggest crc.
                int result = string.Compare(Origin.Items[i].Crc, Duplicate.Items[j].Crc, true, System.Globalization.CultureInfo.InvariantCulture);
                if (result == 0)
                    ++i; ++j;
                else if (result > 0)
                    // Origin file skipped
                    // Duplicate file skipped, no match in Origin
                    if (Duplicate.NoMatches == null) Duplicate.NoMatches = new List<ArchiveFileInfoSmall>();

            if (j < Duplicate.Items.Count)
                if (Duplicate.NoMatches == null) Duplicate.NoMatches = new List<ArchiveFileInfoSmall>();
                Duplicate.NoMatches.AddRange(Duplicate.Items.GetRange(j, Duplicate.Items.Count - j));
                skippedCount = Duplicate.NoMatches.Count;

            double percent = (double)(Duplicate.Items.Count - skippedCount) / Duplicate.Items.Count * 100;
            if (percent >= option.Limit && skippedCount < limitCount)
                NotifyCaller("Match: " + percent + "%", OperationStatus.COMPARING);
                Duplicate.Percentage = percent;
                return true;

            NotifyCaller("Not Match", OperationStatus.COMPARING);
            if (Duplicate.NoMatches != null) Duplicate.NoMatches.Clear();
            return false;
        /// <summary>
        /// Step 2: calculate crc
        /// </summary>
        /// <param name="fileList"></param>
        /// <param name="option"></param>
        /// <returns>List DuplicateArchiveInfo</returns>
        private List<DuplicateArchiveInfo> CalculateCRC(List<FileInfo> fileList, DuplicateSearchOption option)
            List<DuplicateArchiveInfo> list = new List<DuplicateArchiveInfo>();

            int i = 0;
            foreach (FileInfo f in fileList)
                if (_shutdownEvent.WaitOne(0))

                NotifyCaller(f.FullName, OperationStatus.CALCULATING_CRC, curr:i, total:fileList.Count);
                    DuplicateArchiveInfo item = Util.GetArchiveInfo(f.FullName, option);
                    item.FileSize = f.Length;
                    item.CreationTime = f.CreationTime;

                catch (Exception ex)
                    string message = ex.Message + " (" + f.FullName + ")";
                    NotifyCaller(message, OperationStatus.ERROR);

            NotifyCaller("Complete calculating CRC, total: " + list.Count, OperationStatus.CALCULATING_CRC, total:list.Count);

            return list;
        /// <summary>
        /// Step 1 - build file list from given paths
        /// </summary>
        /// <param name="option"></param>
        /// <returns>List of FileInfo</returns>
        private List<FileInfo> BuildFileList(DuplicateSearchOption option)
            NotifyCaller("Start building file list.", OperationStatus.BUILDING_FILE_LIST);

            List<FileInfo> fileList = new List<FileInfo>();
            Regex re = new Regex(option.FilePattern, option.FileCaseInsensitive ? RegexOptions.IgnoreCase : RegexOptions.None);

            int i = 1;
            int t = option.Paths.Count;
            foreach (var path in option.Paths)
                    NotifyCaller("Building file list: " + path, OperationStatus.BUILDING_FILE_LIST, curr: i, total: t);
                    DirectoryInfo dirList = new DirectoryInfo(path);
                    FileInfo[] tempList = dirList.GetFiles("*", SearchOption.AllDirectories);

                    // filter based on filepattern
                    var filteredList = from f in tempList
                                       where re.IsMatch(f.Name)
                                       select f;

                catch (Exception ex)
                    NotifyCaller(ex.Message + " (" + path + ")", OperationStatus.ERROR);

            NotifyCaller("Total File: " + fileList.Count, OperationStatus.BUILDING_FILE_LIST, total: fileList.Count);

            return fileList;
        /// <summary>
        /// Step 3: Build duplicate list
        /// </summary>
        /// <param name="list"></param>
        /// <param name="limit"></param>
        /// <param name="ignoreLimit"></param>
        private List<DuplicateArchiveInfoList> BuildDuplicateList(List<DuplicateArchiveInfo> list, DuplicateSearchOption option)
            NotifyCaller("Start building duplicate list.", OperationStatus.BUILDING_DUPLICATE_LIST);

            List<DuplicateArchiveInfoList> dupList = new List<DuplicateArchiveInfoList>();

            list.Sort(new DuplicateArchiveInfoItemCountComparer());

            int totalCount = list.Count;
            int i = 0;
            while (list.Count > 0)
                if (_shutdownEvent.WaitOne(0))
                    NotifyCaller("Stopping...", OperationStatus.BUILDING_DUPLICATE_LIST);

                DuplicateArchiveInfoList dup = new DuplicateArchiveInfoList();
                DuplicateArchiveInfo temp = list[0];
                dup.Original = temp;

                string message = "Checking: " + temp.Filename + " ( Duplicate group found: " + i + " File to check left: " + list.Count + ")";
                NotifyCaller(message, OperationStatus.BUILDING_DUPLICATE_LIST, curr:i, total:totalCount);

                // check for other possible dups.
                int index = 0;
                while (list.Count > index)
                    DuplicateArchiveInfo curr = list[index];

                    if (Compare(ref temp, ref curr, option))
                        if (dup.Duplicates == null) dup.Duplicates = new List<DuplicateArchiveInfo>();
                        // remove from the source list.


            foreach (DuplicateArchiveInfoList dup in dupList)
                if (dup.Duplicates != null)
                    dup.Duplicates.Sort(new DuplicateArchiveInfoPercentageComparer());
            NotifyCaller("Building Duplicate List Complete.", OperationStatus.BUILDING_DUPLICATE_LIST);

            return dupList;