Example #1
0
        public static IFileInfo[][] FindDuplicates(DedupContext dup, Action <int, int, string> reportProgress = null)
        {
            List <IFileInfo> files = new List <IFileInfo>();

            reportProgress?.Invoke(0, 100, "building list..");
            foreach (var d in dup.Dirs)
            {
                Stuff.GetAllFiles(d, files);
            }
            files.AddRange(dup.Files);
            reportProgress?.Invoke(25, 100, "filtering");
            files = files.Where(z => z.Exist && z.Length > 0).ToList();
            reportProgress?.Invoke(50, 100, "grouping 1");

            var grp1 = files.GroupBy(z => z.Length).Where(z => z.Count() > 1).ToArray();
            List <IFileInfo[]> groups = new List <IFileInfo[]>();

            foreach (var item in grp1)
            {
                reportProgress?.Invoke(75, 100, "grouping 2");
                var arr0 = item.GroupBy(z => Stuff.CalcPartMD5(z, 1024 * 1024)).ToArray();
                var cnt0 = arr0.Count(z => z.Count() > 1);
                if (cnt0 == 0)
                {
                    continue;
                }
                groups.AddRange(arr0.Select(z => z.ToArray()).ToArray());
            }

            //todo: binary compare candidates
            return(groups.ToArray());
        }
        public void SetGroups(DedupContext ctx, IFileInfo[][] groups)
        {
            Context = ctx;
            listView1.Items.Clear();
            foreach (var fileInfo in groups.OrderByDescending(z => z.First().Length *z.Length))
            {
                listView1.Items.Add(new ListViewItem(new string[] { fileInfo.First().Name,
                                                                    Stuff.CalcPartMD5(fileInfo.First(), 1024 * 1024),
                                                                    fileInfo.Length + "",
                                                                    Stuff.GetUserFriendlyFileSize((fileInfo.First().Length *(fileInfo.Length - 1))) })
                {
                    Tag = fileInfo
                });
            }

            label1.Text = "Total repeats groups: " + groups.Length;
            label2.Text = "Total memory overhead: " + Stuff.GetUserFriendlyFileSize(groups.Sum(z => z.First().Length *(z.Length - 1)));

            listView1.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);
            listView1.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize);
        }
        public static IFileInfo[][] FindDuplicates(DedupContext dup, Action <int, int, string> reportProgress = null)
        {
            var files = dup.GetAllFiles();

            List <List <IFileInfo> >   groups = new List <List <IFileInfo> >();
            Dictionary <string, int[]> hashes = new Dictionary <string, int[]>();
            int cnt = 0;

            foreach (var item in files)
            {
                if (reportProgress != null)
                {
                    reportProgress(cnt, files.Length * 2, "calc hash of " + item.Name);
                }
                try
                {
                    hashes.Add(item.FullName, GetImageHash(item.FullName));
                }
                catch (Exception ex)
                {
                }
                cnt++;
            }
            int treshold = 800;

            foreach (var item in files)
            {
                if (reportProgress != null)
                {
                    reportProgress(cnt, files.Length * 2, "grouping " + item.Name);
                }
                cnt++;
                if (!hashes.ContainsKey(item.FullName))
                {
                    continue;
                }
                var h = hashes[item.FullName];
                List <IFileInfo> grp = null;
                int best             = treshold;
                foreach (var gitem in groups)
                {
                    foreach (var hitem in gitem)
                    {
                        if (Dist(h, hashes[hitem.FullName]) < best)
                        {
                            best = Dist(h, hashes[hitem.FullName]);
                            grp  = gitem;
                        }
                    }
                }
                if (grp == null)
                {
                    groups.Add(new List <IFileInfo>());
                    groups.Last().Add(item);
                }
                else
                {
                    grp.Add(item);
                }
            }

            //var grp1 = files.GroupBy(z => ToHash(GetImageHash(z.FullName))).Where(z => z.Count() > 1).ToArray();
            //return grp1.Select(z => z.ToArray()).ToArray();
            return(groups.Where(z => z.Count > 1).Select(z => z.ToArray()).ToArray());
        }
Example #4
0
        private void ImgDedupToolStripMenuItem_Click(object sender, EventArgs e)
        {
            if (listView1.SelectedItems.Count == 0)
            {
                return;
            }

            var tag = listView1.SelectedItems[0].Tag;

            if (tag is TagInfo)
            {
                var dd    = listView1.SelectedItems[0].Tag as TagInfo;
                var files = dd.Files.Select(z => z);

                DedupContext ctx = new DedupContext(new IDirectoryInfo[] { }, files.OfType <IFileInfo>().ToArray());

                ProgressBarOperationDialog pd = new ProgressBarOperationDialog();
                IFileInfo[][] groups          = null;
                pd.Init(() =>
                {
                    groups = ImagesDeduplicationWindow.FindDuplicates(ctx, (p, max, title) => pd.SetProgress(title, p, max));
                    pd.Complete();
                });
                pd.ShowDialog();
                if (pd.DialogResult == DialogResult.Abort)
                {
                    return;
                }


                if (groups.Count() == 0)
                {
                    Stuff.Info("No duplicates found.");
                }
                else
                {
                    ImagesDeduplicationWindow rp = new ImagesDeduplicationWindow();
                    rp.MdiParent = mdi.MainForm;
                    rp.SetGroups(ctx, groups.ToArray());
                    rp.Show();
                }
            }
            else
            {
                List <IFileInfo>      ff = new List <IFileInfo>();
                List <IDirectoryInfo> dd = new List <IDirectoryInfo>();
                for (int i = 0; i < listView1.SelectedItems.Count; i++)
                {
                    var tag0 = listView1.SelectedItems[i].Tag;
                    if (tag0 is IFileInfo)
                    {
                        ff.Add(tag0 as IFileInfo);
                    }
                    if (tag0 is IDirectoryInfo)
                    {
                        dd.Add(tag0 as IDirectoryInfo);
                    }
                }
                DedupContext ctx = new DedupContext(dd.ToArray(), ff.ToArray());
                ProgressBarOperationDialog pd = new ProgressBarOperationDialog();
                IFileInfo[][] groups          = null;
                pd.Init(() =>
                {
                    groups = ImagesDeduplicationWindow.FindDuplicates(ctx, (p, max, title) => pd.SetProgress(title, p, max));
                    pd.Complete();
                });
                pd.ShowDialog();
                if (pd.DialogResult == DialogResult.Abort)
                {
                    return;
                }
                if (groups.Count() == 0)
                {
                    Stuff.Info("No duplicates found.");
                }
                else
                {
                    ImagesDeduplicationWindow rp = new ImagesDeduplicationWindow();
                    rp.MdiParent = mdi.MainForm;
                    rp.SetGroups(ctx, groups.ToArray());
                    rp.Show();
                }
            }
        }