/// <exception cref="NGit.Diff.SimilarityIndex.TableFullException"></exception>
        private static int KeyFor(string line)
        {
            SimilarityIndex si = Hash(line);

            NUnit.Framework.Assert.AreEqual(1, si.Size(), "single line scored");
            return(si.Key(0));
        }
        /// <exception cref="System.IO.IOException"></exception>
        /// <exception cref="NGit.Diff.SimilarityIndex.TableFullException"></exception>
        private SimilarityIndex Hash(DiffEntry.Side side, DiffEntry ent)
        {
            SimilarityIndex r = new SimilarityIndex();

            r.Hash(reader.Open(side, ent));
            r.Sort();
            return(r);
        }
        public virtual void TestCommonScore_TotallyDifferentFiles()
        {
            SimilarityIndex src = Hash("A\n");
            SimilarityIndex dst = Hash("D\n");

            NUnit.Framework.Assert.AreEqual(0, src.Common(dst));
            NUnit.Framework.Assert.AreEqual(0, dst.Common(src));
        }
        public virtual void TestCommonScore_EmptyFiles()
        {
            SimilarityIndex src = Hash(string.Empty);
            SimilarityIndex dst = Hash(string.Empty);

            NUnit.Framework.Assert.AreEqual(0, src.Common(dst));
            NUnit.Framework.Assert.AreEqual(0, dst.Common(src));
        }
Beispiel #5
0
        private static long Common(NGit.Diff.SimilarityIndex src, NGit.Diff.SimilarityIndex
                                   dst)
        {
            int srcIdx = src.PackedIndex(0);
            int dstIdx = dst.PackedIndex(0);

            long[] srcHash = src.idHash;
            long[] dstHash = dst.idHash;
            return(Common(srcHash, srcIdx, dstHash, dstIdx));
        }
Beispiel #6
0
        internal virtual int Score(NGit.Diff.SimilarityIndex dst, int maxScore)
        {
            long max = Math.Max(fileSize, dst.fileSize);

            if (max == 0)
            {
                return(maxScore);
            }
            return((int)((Common(dst) * maxScore) / max));
        }
        public virtual void TestCommonScore_SimiliarBy75()
        {
            SimilarityIndex src = Hash("A\nB\nC\nD\n");
            SimilarityIndex dst = Hash("A\nB\nC\nQ\n");

            NUnit.Framework.Assert.AreEqual(6, src.Common(dst));
            NUnit.Framework.Assert.AreEqual(6, dst.Common(src));
            NUnit.Framework.Assert.AreEqual(75, src.Score(dst, 100));
            NUnit.Framework.Assert.AreEqual(75, dst.Score(src, 100));
        }
		public virtual void TestIndexingLargeObject()
		{
			byte[] @in = Sharpen.Runtime.GetBytesForString((string.Empty + "A\n" + "B\n" + "B\n"
				 + "B\n"), "UTF-8");
			//
			//
			//
			//
			SimilarityIndex si = new SimilarityIndex();
			si.Hash(new ByteArrayInputStream(@in), @in.Length);
			NUnit.Framework.Assert.AreEqual(2, si.Size());
		}
        public virtual void TestIndexingLargeObject()
        {
            byte[] @in = Sharpen.Runtime.GetBytesForString((string.Empty + "A\n" + "B\n" + "B\n"
                                                            + "B\n"), "UTF-8");
            //
            //
            //
            //
            SimilarityIndex si = new SimilarityIndex();

            si.Hash(new ByteArrayInputStream(@in), @in.Length);
            NUnit.Framework.Assert.AreEqual(2, si.Size());
        }
        public virtual void TestCommonScore_SameFiles()
        {
            string text = string.Empty + "A\n" + "B\n" + "D\n" + "B\n";
            //
            //
            //
            //
            SimilarityIndex src = Hash(text);
            SimilarityIndex dst = Hash(text);

            NUnit.Framework.Assert.AreEqual(8, src.Common(dst));
            NUnit.Framework.Assert.AreEqual(8, dst.Common(src));
            NUnit.Framework.Assert.AreEqual(100, src.Score(dst, 100));
            NUnit.Framework.Assert.AreEqual(100, dst.Score(src, 100));
        }
        public virtual void TestIndexingSmallObject()
        {
            SimilarityIndex si = Hash(string.Empty + "A\n" + "B\n" + "D\n" + "B\n");
            //
            //
            //
            //
            //
            int key_A = KeyFor("A\n");
            int key_B = KeyFor("B\n");
            int key_D = KeyFor("D\n");

            NUnit.Framework.Assert.IsTrue(key_A != key_B && key_A != key_D && key_B != key_D);
            NUnit.Framework.Assert.AreEqual(3, si.Size());
            NUnit.Framework.Assert.AreEqual(2, si.Count(si.FindIndex(key_A)));
            NUnit.Framework.Assert.AreEqual(4, si.Count(si.FindIndex(key_B)));
            NUnit.Framework.Assert.AreEqual(2, si.Count(si.FindIndex(key_D)));
        }
 /// <exception cref="System.IO.IOException"></exception>
 private int CalculateModifyScore(ContentSource.Pair reader, DiffEntry d)
 {
     try
     {
         SimilarityIndex src = new SimilarityIndex();
         src.Hash(reader.Open(DiffEntry.Side.OLD, d));
         src.Sort();
         SimilarityIndex dst = new SimilarityIndex();
         dst.Hash(reader.Open(DiffEntry.Side.NEW, d));
         dst.Sort();
         return(src.Score(dst, 100));
     }
     catch (SimilarityIndex.TableFullException)
     {
         // If either table overflowed while being constructed, don't allow
         // the pair to be broken. Returning 1 higher than breakScore will
         // ensure its not similar, but not quite dissimilar enough to break.
         //
         overRenameLimit = true;
         return(breakScore + 1);
     }
 }
Beispiel #13
0
 internal virtual long Common(NGit.Diff.SimilarityIndex dst)
 {
     return(Common(this, dst));
 }
		/// <exception cref="System.IO.IOException"></exception>
		/// <exception cref="NGit.Diff.SimilarityIndex.TableFullException"></exception>
		private SimilarityIndex Hash(DiffEntry.Side side, DiffEntry ent)
		{
			SimilarityIndex r = new SimilarityIndex();
			r.Hash(reader.Open(side, ent));
			r.Sort();
			return r;
		}
        /// <exception cref="System.IO.IOException"></exception>
        private int BuildMatrix(ProgressMonitor pm)
        {
            // Allocate for the worst-case scenario where every pair has a
            // score that we need to consider. We might not need that many.
            //
            matrix = new long[srcs.Count * dsts.Count];
            long[] srcSizes    = new long[srcs.Count];
            long[] dstSizes    = new long[dsts.Count];
            BitSet dstTooLarge = null;
            // Consider each pair of files, if the score is above the minimum
            // threshold we need record that scoring in the matrix so we can
            // later find the best matches.
            //
            int mNext = 0;

            for (int srcIdx = 0; srcIdx < srcs.Count; srcIdx++)
            {
                DiffEntry srcEnt = srcs[srcIdx];
                if (!IsFile(srcEnt.oldMode))
                {
                    pm.Update(dsts.Count);
                    continue;
                }
                SimilarityIndex s = null;
                for (int dstIdx = 0; dstIdx < dsts.Count; dstIdx++)
                {
                    DiffEntry dstEnt = dsts[dstIdx];
                    if (!IsFile(dstEnt.newMode))
                    {
                        pm.Update(1);
                        continue;
                    }
                    if (!RenameDetector.SameType(srcEnt.oldMode, dstEnt.newMode))
                    {
                        pm.Update(1);
                        continue;
                    }
                    if (dstTooLarge != null && dstTooLarge.Get(dstIdx))
                    {
                        pm.Update(1);
                        continue;
                    }
                    long srcSize = srcSizes[srcIdx];
                    if (srcSize == 0)
                    {
                        srcSize          = Size(DiffEntry.Side.OLD, srcEnt) + 1;
                        srcSizes[srcIdx] = srcSize;
                    }
                    long dstSize = dstSizes[dstIdx];
                    if (dstSize == 0)
                    {
                        dstSize          = Size(DiffEntry.Side.NEW, dstEnt) + 1;
                        dstSizes[dstIdx] = dstSize;
                    }
                    long max = Math.Max(srcSize, dstSize);
                    long min = Math.Min(srcSize, dstSize);
                    if (min * 100 / max < renameScore)
                    {
                        // Cannot possibly match, as the file sizes are so different
                        pm.Update(1);
                        continue;
                    }
                    if (s == null)
                    {
                        try
                        {
                            s = Hash(DiffEntry.Side.OLD, srcEnt);
                        }
                        catch (SimilarityIndex.TableFullException)
                        {
                            tableOverflow = true;
                            goto SRC_continue;
                        }
                    }
                    SimilarityIndex d;
                    try
                    {
                        d = Hash(DiffEntry.Side.NEW, dstEnt);
                    }
                    catch (SimilarityIndex.TableFullException)
                    {
                        if (dstTooLarge == null)
                        {
                            dstTooLarge = new BitSet(dsts.Count);
                        }
                        dstTooLarge.Set(dstIdx);
                        tableOverflow = true;
                        pm.Update(1);
                        continue;
                    }
                    int contentScore = s.Score(d, 10000);
                    // nameScore returns a value between 0 and 100, but we want it
                    // to be in the same range as the content score. This allows it
                    // to be dropped into the pretty formula for the final score.
                    int nameScore = NameScore(srcEnt.oldPath, dstEnt.newPath) * 100;
                    int score     = (contentScore * 99 + nameScore * 1) / 10000;
                    if (score < renameScore)
                    {
                        pm.Update(1);
                        continue;
                    }
                    matrix[mNext++] = Encode(score, srcIdx, dstIdx);
                    pm.Update(1);
                }
                SRC_continue :;
            }
            SRC_break :;
            // Sort everything in the range we populated, which might be the
            // entire matrix, or just a smaller slice if we had some bad low
            // scoring pairs.
            //
            Arrays.Sort(matrix, 0, mNext);
            return(mNext);
        }