private List <ScoreEntry2> MakeDetailList(List <HashZipEntry> ziplist1, List <HashZipEntry> ziplist2) { List <ScoreEntry2> retlist = new List <ScoreEntry2>(); bool[] rightMatch = new bool[ziplist2.Count]; // Make a list of matching files in zip1 vs zip2 for (int dex1 = 0; dex1 < ziplist1.Count; dex1++) { var hze1 = ziplist1[dex1]; bool matched = false; int bestscore = 99; ScoreEntry2 bestmatch = null; for (int dex2 = 0; dex2 < ziplist2.Count; dex2++) { var hze2 = ziplist2[dex2]; var ascore = CalcScoreP(hze1, hze2); // TODO would a VP-tree be a faster solution? [create a VPtree for ziplist1 and ziplist2 ONLY] if (ascore < MAX_SCORE2 && ascore < bestscore) { bestmatch = new ScoreEntry2(); bestmatch.F1 = hze1; bestmatch.F2 = hze2; bestmatch.score = ascore; matched = true; rightMatch[dex2] = true; bestscore = ascore; } } if (!matched || bestmatch == null) { ScoreEntry2 se = new ScoreEntry2(); se.F1 = hze1; se.score = 999 * 2; retlist.Add(se); } else { retlist.Add(bestmatch); } } for (int i = 0; i < rightMatch.Length; i++) { if (!rightMatch[i]) { ScoreEntry2 se = new ScoreEntry2(); se.F2 = ziplist2[i]; se.score = 999 * 2; retlist.Add(se); } } return(retlist); }
internal void setPix(ScoreEntry2 sel, bool first, PictureBox pbox, Label plab) { if (sel == null) { return; } string zipF; string fi; if (first) { zipF = sel.F1 == null ? "" : sel.F1.ZipFile; fi = sel.F1 != null ? sel.F1.InnerPath : ""; } else { zipF = sel.F2 == null ? "" : sel.F2.ZipFile; fi = sel.F2 != null ? sel.F2.InnerPath : ""; } if (string.IsNullOrEmpty(zipF) || string.IsNullOrEmpty(fi)) { pbox.Image = null; plab.Text = ""; return; } var imgF = _loader.Extract(zipF, fi); if (!string.IsNullOrEmpty(imgF)) // clean up all created temp files on close { _toCleanup.Add(imgF); } try { if (string.IsNullOrEmpty(imgF)) { pbox.Image = null; plab.Text = ""; return; } // load image to picturebox with no file lock pbox.Image = Image.FromStream(new MemoryStream(File.ReadAllBytes(imgF))); // no file lock // set image stats to label var info1 = new FileInfo(imgF); var size1 = pbox.Image.Size; plab.Text = string.Format("{0},{1} [{2:0.00}K]", size1.Width, size1.Height, (double)info1.Length / 1024.0); } catch { } }
public static int Comparer(ScoreEntry2 x, ScoreEntry2 y) { int val = x.score - y.score; if (val == 0 && x.F1 != null && y.F1 != null) { val = StrCmpLogicalW(x.F1.InnerPath, y.F1.InnerPath); // 'natural' sorting } //val = string.Compare(x.F1.InnerPath, y.F1.InnerPath, StringComparison.Ordinal); // same value: sort by name return(val); }
public override bool Equals(object obj) { if (obj == null) { return(false); } ScoreEntry2 obj2 = obj as ScoreEntry2; if (obj2 == null) { return(false); } // This considers "A vs B" to be equivalent to "B vs A" return((this.F1.InnerPath == obj2.F1.InnerPath && this.F2.InnerPath == obj2.F2.InnerPath) || (this.F2.InnerPath == obj2.F1.InnerPath && this.F1.InnerPath == obj2.F2.InnerPath)); }
internal void DoDiff(ScoreEntry2 sel, bool showonly = false) { if (sel == null) { return; } if (_diffDlg == null) { _diffDlg = new ShowDiff(_loader, _log) { Owner = this }; } _diffDlg.Stretch = true; // stretch; _diffDlg.Diff = !showonly; _diffDlg.Group = sel; _diffDlg.ShowDialog(); }
private void CompareVPTree() { var ziplist = _zipDict.Keys.ToArray(); int zipCount = ziplist.Length; if (zipCount < 1) { return; } SetStatus(string.Format("Hashes: {0} Archives: {1}", _hashSource, zipCount)); _scores = new HashSet <ScoreEntry>(); // use a set so that AxB and BxA are not duplicated var tree = new VPTree <HashZipEntry>(CalcScoreP); var root = tree.make_vp(_toCompare); var ret = new List <HashZipEntry>(); var thisfilematches = new HashSet <string>(); var filesdone = new HashSet <HashZipEntry>(); var zipsdone = new HashSet <string>(); updateProgress(0); int doneCount = 0; var pairset = new HashSet <ScoreEntry2>(); foreach (var azip in ziplist) { var filelist = _zipDict[azip]; foreach (var afile in filelist) { tree.query_vp(root, afile, 1, ret); foreach (var aret in ret) { if (aret == afile) // skip self { continue; } if (aret.ZipFile == afile.ZipFile) // skip self-zip matches { continue; } int dist = CalcScoreP(afile, aret); // reduce 'noise' by tossing too-distant matches if (dist > MAX_SCORE) { continue; } ScoreEntry2 se2 = new ScoreEntry2(); se2.F1 = afile; se2.F2 = aret; se2.score = dist; pairset.Add(se2); } ret.Clear(); } //foreach (var azip in ziplist) //{ // zipsdone.Add(azip); // var filelist = _zipDict[azip]; // var matchlist = new Dictionary<string, int>(); // foreach (var comp in filelist) // { // filesdone.Add(comp); // tree.query_vp(root, comp, 1, ret); // //int selfdups = ret.Where(x => x.ZipFile == azip).Count(); // //if (selfdups < 2) // foreach (var aret in ret) // { // if (zipsdone.Contains(aret.ZipFile)) // continue; // if (filesdone.Contains(aret)) // continue; // thisfilematches.Add(aret.ZipFile); // } // ret.Clear(); // foreach (var zipmatch in thisfilematches) // if (zipmatch != azip) // if (matchlist.ContainsKey(zipmatch)) // matchlist[zipmatch]++; // else // matchlist.Add(zipmatch, 1); // } // thisfilematches.Clear(); //if (ret.Count > 1) // TODO won't this always be true [as 'comp' is in the tree and will match] //{ // foreach (var aret in ret) // { // // ignore a match against self or a match against self-zip // if (aret.Equals(comp) || aret.ZipFile == comp.ZipFile) // continue; // // each aret may be from a distinct zip // // need to turn into a set of zip+match counts // if (matchlist.ContainsKey(aret.ZipFile)) // { // if (newfile) // matchlist[aret.ZipFile]++; // //newfile = false; // } // else // { // matchlist.Add(aret.ZipFile, 1); // } // } //} // ret.Clear(); // } //// build ScoreEntry list based on number of matches for azip against other zips //foreach (var amatch in matchlist) //{ // string who = amatch.Key; // int matches = amatch.Value; // var zip2 = _zipDict[who]; // int score1 = (int)(((double)matches / filelist.Count) * 100.0); // int score2 = (int)(((double)matches / zip2.Count) * 100.0); // int score = Math.Max(score1, score2); // //System.Diagnostics.Debug.Assert(score <= 100.0); // if (score > 20) // { // ScoreEntry se = new ScoreEntry(); // se.zipfile1 = azip; // se.zip1count = filelist.Count; // se.zipfile2 = who; // se.zip2count = zip2.Count; // se.score = score; // se.sameSource = filelist.First().source == zip2.First().source; // _scores.Add(se); // } //} doneCount++; if (doneCount % 5 == 0) { int perc = (int)(100.0 * doneCount / zipCount); updateProgress(perc); } } // Turn pairset into _scores var pairlist = pairset.ToList(); _log.log(string.Format(" pair candidates:{0}", pairlist.Count)); if (pairlist.Count != 0) { int matches = 0; HashZipEntry he = pairlist[0].F1; HashZipEntry he2 = pairlist[0].F2; foreach (var apair in pairlist) { if (apair.F1.ZipFile == he.ZipFile) { if (apair.F2.ZipFile == he2.ZipFile) { matches++; } else { MakeScore(matches, he, he2); he2 = apair.F2; matches = 1; } } else { MakeScore(matches, he, he2); he = apair.F1; he2 = apair.F2; matches = 1; } } // 20190426 the last entry was not processed as a possible candidate MakeScore(matches, he, he2); } updateProgress(0); _scoreList = _scores.ToList(); _log.log(string.Format(" zip matches:{0}", _scoreList.Count)); _scores = null; _scoreList.Sort(ScoreEntry.Comparer); LoadZipList(); }