public virtual void TestCommonScore_SimiliarBy75() { SimilarityIndex src = Hash("A\nB\nC\nD\n"); SimilarityIndex dst = Hash("A\nB\nC\nQ\n"); NUnit.Framework.Assert.AreEqual(6, src.Common(dst)); NUnit.Framework.Assert.AreEqual(6, dst.Common(src)); NUnit.Framework.Assert.AreEqual(75, src.Score(dst, 100)); NUnit.Framework.Assert.AreEqual(75, dst.Score(src, 100)); }
public virtual void TestCommonScore_SameFiles() { string text = string.Empty + "A\n" + "B\n" + "D\n" + "B\n"; // // // // SimilarityIndex src = Hash(text); SimilarityIndex dst = Hash(text); NUnit.Framework.Assert.AreEqual(8, src.Common(dst)); NUnit.Framework.Assert.AreEqual(8, dst.Common(src)); NUnit.Framework.Assert.AreEqual(100, src.Score(dst, 100)); NUnit.Framework.Assert.AreEqual(100, dst.Score(src, 100)); }
/// <exception cref="System.IO.IOException"></exception> private int CalculateModifyScore(ContentSource.Pair reader, DiffEntry d) { try { SimilarityIndex src = new SimilarityIndex(); src.Hash(reader.Open(DiffEntry.Side.OLD, d)); src.Sort(); SimilarityIndex dst = new SimilarityIndex(); dst.Hash(reader.Open(DiffEntry.Side.NEW, d)); dst.Sort(); return(src.Score(dst, 100)); } catch (SimilarityIndex.TableFullException) { // If either table overflowed while being constructed, don't allow // the pair to be broken. Returning 1 higher than breakScore will // ensure its not similar, but not quite dissimilar enough to break. // overRenameLimit = true; return(breakScore + 1); } }
/// <exception cref="System.IO.IOException"></exception> private int BuildMatrix(ProgressMonitor pm) { // Allocate for the worst-case scenario where every pair has a // score that we need to consider. We might not need that many. // matrix = new long[srcs.Count * dsts.Count]; long[] srcSizes = new long[srcs.Count]; long[] dstSizes = new long[dsts.Count]; BitSet dstTooLarge = null; // Consider each pair of files, if the score is above the minimum // threshold we need record that scoring in the matrix so we can // later find the best matches. // int mNext = 0; for (int srcIdx = 0; srcIdx < srcs.Count; srcIdx++) { DiffEntry srcEnt = srcs[srcIdx]; if (!IsFile(srcEnt.oldMode)) { pm.Update(dsts.Count); continue; } SimilarityIndex s = null; for (int dstIdx = 0; dstIdx < dsts.Count; dstIdx++) { DiffEntry dstEnt = dsts[dstIdx]; if (!IsFile(dstEnt.newMode)) { pm.Update(1); continue; } if (!RenameDetector.SameType(srcEnt.oldMode, dstEnt.newMode)) { pm.Update(1); continue; } if (dstTooLarge != null && dstTooLarge.Get(dstIdx)) { pm.Update(1); continue; } long srcSize = srcSizes[srcIdx]; if (srcSize == 0) { srcSize = Size(DiffEntry.Side.OLD, srcEnt) + 1; srcSizes[srcIdx] = srcSize; } long dstSize = dstSizes[dstIdx]; if (dstSize == 0) { dstSize = Size(DiffEntry.Side.NEW, dstEnt) + 1; dstSizes[dstIdx] = dstSize; } long max = Math.Max(srcSize, dstSize); long min = Math.Min(srcSize, dstSize); if (min * 100 / max < renameScore) { // Cannot possibly match, as the file sizes are so different pm.Update(1); continue; } if (s == null) { try { s = Hash(DiffEntry.Side.OLD, srcEnt); } catch (SimilarityIndex.TableFullException) { tableOverflow = true; goto SRC_continue; } } SimilarityIndex d; try { d = Hash(DiffEntry.Side.NEW, dstEnt); } catch (SimilarityIndex.TableFullException) { if (dstTooLarge == null) { dstTooLarge = new BitSet(dsts.Count); } dstTooLarge.Set(dstIdx); tableOverflow = true; pm.Update(1); continue; } int contentScore = s.Score(d, 10000); // nameScore returns a value between 0 and 100, but we want it // to be in the same range as the content score. This allows it // to be dropped into the pretty formula for the final score. int nameScore = NameScore(srcEnt.oldPath, dstEnt.newPath) * 100; int score = (contentScore * 99 + nameScore * 1) / 10000; if (score < renameScore) { pm.Update(1); continue; } matrix[mNext++] = Encode(score, srcIdx, dstIdx); pm.Update(1); } SRC_continue :; } SRC_break :; // Sort everything in the range we populated, which might be the // entire matrix, or just a smaller slice if we had some bad low // scoring pairs. // Arrays.Sort(matrix, 0, mNext); return(mNext); }