/// <exception cref="NGit.Diff.SimilarityIndex.TableFullException"></exception> private static int KeyFor(string line) { SimilarityIndex si = Hash(line); NUnit.Framework.Assert.AreEqual(1, si.Size(), "single line scored"); return(si.Key(0)); }
/// <exception cref="System.IO.IOException"></exception> /// <exception cref="NGit.Diff.SimilarityIndex.TableFullException"></exception> private SimilarityIndex Hash(DiffEntry.Side side, DiffEntry ent) { SimilarityIndex r = new SimilarityIndex(); r.Hash(reader.Open(side, ent)); r.Sort(); return(r); }
public virtual void TestCommonScore_TotallyDifferentFiles() { SimilarityIndex src = Hash("A\n"); SimilarityIndex dst = Hash("D\n"); NUnit.Framework.Assert.AreEqual(0, src.Common(dst)); NUnit.Framework.Assert.AreEqual(0, dst.Common(src)); }
public virtual void TestCommonScore_EmptyFiles() { SimilarityIndex src = Hash(string.Empty); SimilarityIndex dst = Hash(string.Empty); NUnit.Framework.Assert.AreEqual(0, src.Common(dst)); NUnit.Framework.Assert.AreEqual(0, dst.Common(src)); }
private static long Common(NGit.Diff.SimilarityIndex src, NGit.Diff.SimilarityIndex dst) { int srcIdx = src.PackedIndex(0); int dstIdx = dst.PackedIndex(0); long[] srcHash = src.idHash; long[] dstHash = dst.idHash; return(Common(srcHash, srcIdx, dstHash, dstIdx)); }
internal virtual int Score(NGit.Diff.SimilarityIndex dst, int maxScore) { long max = Math.Max(fileSize, dst.fileSize); if (max == 0) { return(maxScore); } return((int)((Common(dst) * maxScore) / max)); }
public virtual void TestCommonScore_SimiliarBy75() { SimilarityIndex src = Hash("A\nB\nC\nD\n"); SimilarityIndex dst = Hash("A\nB\nC\nQ\n"); NUnit.Framework.Assert.AreEqual(6, src.Common(dst)); NUnit.Framework.Assert.AreEqual(6, dst.Common(src)); NUnit.Framework.Assert.AreEqual(75, src.Score(dst, 100)); NUnit.Framework.Assert.AreEqual(75, dst.Score(src, 100)); }
public virtual void TestIndexingLargeObject() { byte[] @in = Sharpen.Runtime.GetBytesForString((string.Empty + "A\n" + "B\n" + "B\n" + "B\n"), "UTF-8"); // // // // SimilarityIndex si = new SimilarityIndex(); si.Hash(new ByteArrayInputStream(@in), @in.Length); NUnit.Framework.Assert.AreEqual(2, si.Size()); }
public virtual void TestIndexingLargeObject() { byte[] @in = Sharpen.Runtime.GetBytesForString((string.Empty + "A\n" + "B\n" + "B\n" + "B\n"), "UTF-8"); // // // // SimilarityIndex si = new SimilarityIndex(); si.Hash(new ByteArrayInputStream(@in), @in.Length); NUnit.Framework.Assert.AreEqual(2, si.Size()); }
public virtual void TestCommonScore_SameFiles() { string text = string.Empty + "A\n" + "B\n" + "D\n" + "B\n"; // // // // SimilarityIndex src = Hash(text); SimilarityIndex dst = Hash(text); NUnit.Framework.Assert.AreEqual(8, src.Common(dst)); NUnit.Framework.Assert.AreEqual(8, dst.Common(src)); NUnit.Framework.Assert.AreEqual(100, src.Score(dst, 100)); NUnit.Framework.Assert.AreEqual(100, dst.Score(src, 100)); }
public virtual void TestIndexingSmallObject() { SimilarityIndex si = Hash(string.Empty + "A\n" + "B\n" + "D\n" + "B\n"); // // // // // int key_A = KeyFor("A\n"); int key_B = KeyFor("B\n"); int key_D = KeyFor("D\n"); NUnit.Framework.Assert.IsTrue(key_A != key_B && key_A != key_D && key_B != key_D); NUnit.Framework.Assert.AreEqual(3, si.Size()); NUnit.Framework.Assert.AreEqual(2, si.Count(si.FindIndex(key_A))); NUnit.Framework.Assert.AreEqual(4, si.Count(si.FindIndex(key_B))); NUnit.Framework.Assert.AreEqual(2, si.Count(si.FindIndex(key_D))); }
/// <exception cref="System.IO.IOException"></exception> private int CalculateModifyScore(ContentSource.Pair reader, DiffEntry d) { try { SimilarityIndex src = new SimilarityIndex(); src.Hash(reader.Open(DiffEntry.Side.OLD, d)); src.Sort(); SimilarityIndex dst = new SimilarityIndex(); dst.Hash(reader.Open(DiffEntry.Side.NEW, d)); dst.Sort(); return(src.Score(dst, 100)); } catch (SimilarityIndex.TableFullException) { // If either table overflowed while being constructed, don't allow // the pair to be broken. Returning 1 higher than breakScore will // ensure its not similar, but not quite dissimilar enough to break. // overRenameLimit = true; return(breakScore + 1); } }
internal virtual long Common(NGit.Diff.SimilarityIndex dst) { return(Common(this, dst)); }
/// <exception cref="System.IO.IOException"></exception> /// <exception cref="NGit.Diff.SimilarityIndex.TableFullException"></exception> private SimilarityIndex Hash(DiffEntry.Side side, DiffEntry ent) { SimilarityIndex r = new SimilarityIndex(); r.Hash(reader.Open(side, ent)); r.Sort(); return r; }
/// <exception cref="System.IO.IOException"></exception> private int BuildMatrix(ProgressMonitor pm) { // Allocate for the worst-case scenario where every pair has a // score that we need to consider. We might not need that many. // matrix = new long[srcs.Count * dsts.Count]; long[] srcSizes = new long[srcs.Count]; long[] dstSizes = new long[dsts.Count]; BitSet dstTooLarge = null; // Consider each pair of files, if the score is above the minimum // threshold we need record that scoring in the matrix so we can // later find the best matches. // int mNext = 0; for (int srcIdx = 0; srcIdx < srcs.Count; srcIdx++) { DiffEntry srcEnt = srcs[srcIdx]; if (!IsFile(srcEnt.oldMode)) { pm.Update(dsts.Count); continue; } SimilarityIndex s = null; for (int dstIdx = 0; dstIdx < dsts.Count; dstIdx++) { DiffEntry dstEnt = dsts[dstIdx]; if (!IsFile(dstEnt.newMode)) { pm.Update(1); continue; } if (!RenameDetector.SameType(srcEnt.oldMode, dstEnt.newMode)) { pm.Update(1); continue; } if (dstTooLarge != null && dstTooLarge.Get(dstIdx)) { pm.Update(1); continue; } long srcSize = srcSizes[srcIdx]; if (srcSize == 0) { srcSize = Size(DiffEntry.Side.OLD, srcEnt) + 1; srcSizes[srcIdx] = srcSize; } long dstSize = dstSizes[dstIdx]; if (dstSize == 0) { dstSize = Size(DiffEntry.Side.NEW, dstEnt) + 1; dstSizes[dstIdx] = dstSize; } long max = Math.Max(srcSize, dstSize); long min = Math.Min(srcSize, dstSize); if (min * 100 / max < renameScore) { // Cannot possibly match, as the file sizes are so different pm.Update(1); continue; } if (s == null) { try { s = Hash(DiffEntry.Side.OLD, srcEnt); } catch (SimilarityIndex.TableFullException) { tableOverflow = true; goto SRC_continue; } } SimilarityIndex d; try { d = Hash(DiffEntry.Side.NEW, dstEnt); } catch (SimilarityIndex.TableFullException) { if (dstTooLarge == null) { dstTooLarge = new BitSet(dsts.Count); } dstTooLarge.Set(dstIdx); tableOverflow = true; pm.Update(1); continue; } int contentScore = s.Score(d, 10000); // nameScore returns a value between 0 and 100, but we want it // to be in the same range as the content score. This allows it // to be dropped into the pretty formula for the final score. int nameScore = NameScore(srcEnt.oldPath, dstEnt.newPath) * 100; int score = (contentScore * 99 + nameScore * 1) / 10000; if (score < renameScore) { pm.Update(1); continue; } matrix[mNext++] = Encode(score, srcIdx, dstIdx); pm.Update(1); } SRC_continue :; } SRC_break :; // Sort everything in the range we populated, which might be the // entire matrix, or just a smaller slice if we had some bad low // scoring pairs. // Arrays.Sort(matrix, 0, mNext); return(mNext); }