public static IFileInfo[][] FindDuplicates(DedupContext dup, Action <int, int, string> reportProgress = null) { List <IFileInfo> files = new List <IFileInfo>(); reportProgress?.Invoke(0, 100, "building list.."); foreach (var d in dup.Dirs) { Stuff.GetAllFiles(d, files); } files.AddRange(dup.Files); reportProgress?.Invoke(25, 100, "filtering"); files = files.Where(z => z.Exist && z.Length > 0).ToList(); reportProgress?.Invoke(50, 100, "grouping 1"); var grp1 = files.GroupBy(z => z.Length).Where(z => z.Count() > 1).ToArray(); List <IFileInfo[]> groups = new List <IFileInfo[]>(); foreach (var item in grp1) { reportProgress?.Invoke(75, 100, "grouping 2"); var arr0 = item.GroupBy(z => Stuff.CalcPartMD5(z, 1024 * 1024)).ToArray(); var cnt0 = arr0.Count(z => z.Count() > 1); if (cnt0 == 0) { continue; } groups.AddRange(arr0.Select(z => z.ToArray()).ToArray()); } //todo: binary compare candidates return(groups.ToArray()); }
public void SetGroups(DedupContext ctx, IFileInfo[][] groups) { Context = ctx; listView1.Items.Clear(); foreach (var fileInfo in groups.OrderByDescending(z => z.First().Length *z.Length)) { listView1.Items.Add(new ListViewItem(new string[] { fileInfo.First().Name, Stuff.CalcPartMD5(fileInfo.First(), 1024 * 1024), fileInfo.Length + "", Stuff.GetUserFriendlyFileSize((fileInfo.First().Length *(fileInfo.Length - 1))) }) { Tag = fileInfo }); } label1.Text = "Total repeats groups: " + groups.Length; label2.Text = "Total memory overhead: " + Stuff.GetUserFriendlyFileSize(groups.Sum(z => z.First().Length *(z.Length - 1))); listView1.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent); listView1.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize); }
public static IFileInfo[][] FindDuplicates(DedupContext dup, Action <int, int, string> reportProgress = null) { var files = dup.GetAllFiles(); List <List <IFileInfo> > groups = new List <List <IFileInfo> >(); Dictionary <string, int[]> hashes = new Dictionary <string, int[]>(); int cnt = 0; foreach (var item in files) { if (reportProgress != null) { reportProgress(cnt, files.Length * 2, "calc hash of " + item.Name); } try { hashes.Add(item.FullName, GetImageHash(item.FullName)); } catch (Exception ex) { } cnt++; } int treshold = 800; foreach (var item in files) { if (reportProgress != null) { reportProgress(cnt, files.Length * 2, "grouping " + item.Name); } cnt++; if (!hashes.ContainsKey(item.FullName)) { continue; } var h = hashes[item.FullName]; List <IFileInfo> grp = null; int best = treshold; foreach (var gitem in groups) { foreach (var hitem in gitem) { if (Dist(h, hashes[hitem.FullName]) < best) { best = Dist(h, hashes[hitem.FullName]); grp = gitem; } } } if (grp == null) { groups.Add(new List <IFileInfo>()); groups.Last().Add(item); } else { grp.Add(item); } } //var grp1 = files.GroupBy(z => ToHash(GetImageHash(z.FullName))).Where(z => z.Count() > 1).ToArray(); //return grp1.Select(z => z.ToArray()).ToArray(); return(groups.Where(z => z.Count > 1).Select(z => z.ToArray()).ToArray()); }
private void ImgDedupToolStripMenuItem_Click(object sender, EventArgs e) { if (listView1.SelectedItems.Count == 0) { return; } var tag = listView1.SelectedItems[0].Tag; if (tag is TagInfo) { var dd = listView1.SelectedItems[0].Tag as TagInfo; var files = dd.Files.Select(z => z); DedupContext ctx = new DedupContext(new IDirectoryInfo[] { }, files.OfType <IFileInfo>().ToArray()); ProgressBarOperationDialog pd = new ProgressBarOperationDialog(); IFileInfo[][] groups = null; pd.Init(() => { groups = ImagesDeduplicationWindow.FindDuplicates(ctx, (p, max, title) => pd.SetProgress(title, p, max)); pd.Complete(); }); pd.ShowDialog(); if (pd.DialogResult == DialogResult.Abort) { return; } if (groups.Count() == 0) { Stuff.Info("No duplicates found."); } else { ImagesDeduplicationWindow rp = new ImagesDeduplicationWindow(); rp.MdiParent = mdi.MainForm; rp.SetGroups(ctx, groups.ToArray()); rp.Show(); } } else { List <IFileInfo> ff = new List <IFileInfo>(); List <IDirectoryInfo> dd = new List <IDirectoryInfo>(); for (int i = 0; i < listView1.SelectedItems.Count; i++) { var tag0 = listView1.SelectedItems[i].Tag; if (tag0 is IFileInfo) { ff.Add(tag0 as IFileInfo); } if (tag0 is IDirectoryInfo) { dd.Add(tag0 as IDirectoryInfo); } } DedupContext ctx = new DedupContext(dd.ToArray(), ff.ToArray()); ProgressBarOperationDialog pd = new ProgressBarOperationDialog(); IFileInfo[][] groups = null; pd.Init(() => { groups = ImagesDeduplicationWindow.FindDuplicates(ctx, (p, max, title) => pd.SetProgress(title, p, max)); pd.Complete(); }); pd.ShowDialog(); if (pd.DialogResult == DialogResult.Abort) { return; } if (groups.Count() == 0) { Stuff.Info("No duplicates found."); } else { ImagesDeduplicationWindow rp = new ImagesDeduplicationWindow(); rp.MdiParent = mdi.MainForm; rp.SetGroups(ctx, groups.ToArray()); rp.Show(); } } }