public virtual void TestCommonScore_SimiliarBy75()
        {
            SimilarityIndex src = Hash("A\nB\nC\nD\n");
            SimilarityIndex dst = Hash("A\nB\nC\nQ\n");

            NUnit.Framework.Assert.AreEqual(6, src.Common(dst));
            NUnit.Framework.Assert.AreEqual(6, dst.Common(src));
            NUnit.Framework.Assert.AreEqual(75, src.Score(dst, 100));
            NUnit.Framework.Assert.AreEqual(75, dst.Score(src, 100));
        }
        public virtual void TestCommonScore_SameFiles()
        {
            string text = string.Empty + "A\n" + "B\n" + "D\n" + "B\n";
            //
            //
            //
            //
            SimilarityIndex src = Hash(text);
            SimilarityIndex dst = Hash(text);

            NUnit.Framework.Assert.AreEqual(8, src.Common(dst));
            NUnit.Framework.Assert.AreEqual(8, dst.Common(src));
            NUnit.Framework.Assert.AreEqual(100, src.Score(dst, 100));
            NUnit.Framework.Assert.AreEqual(100, dst.Score(src, 100));
        }
 /// <exception cref="System.IO.IOException"></exception>
 private int CalculateModifyScore(ContentSource.Pair reader, DiffEntry d)
 {
     try
     {
         SimilarityIndex src = new SimilarityIndex();
         src.Hash(reader.Open(DiffEntry.Side.OLD, d));
         src.Sort();
         SimilarityIndex dst = new SimilarityIndex();
         dst.Hash(reader.Open(DiffEntry.Side.NEW, d));
         dst.Sort();
         return(src.Score(dst, 100));
     }
     catch (SimilarityIndex.TableFullException)
     {
         // If either table overflowed while being constructed, don't allow
         // the pair to be broken. Returning 1 higher than breakScore will
         // ensure its not similar, but not quite dissimilar enough to break.
         //
         overRenameLimit = true;
         return(breakScore + 1);
     }
 }
        /// <exception cref="System.IO.IOException"></exception>
        private int BuildMatrix(ProgressMonitor pm)
        {
            // Allocate for the worst-case scenario where every pair has a
            // score that we need to consider. We might not need that many.
            //
            matrix = new long[srcs.Count * dsts.Count];
            long[] srcSizes    = new long[srcs.Count];
            long[] dstSizes    = new long[dsts.Count];
            BitSet dstTooLarge = null;
            // Consider each pair of files, if the score is above the minimum
            // threshold we need record that scoring in the matrix so we can
            // later find the best matches.
            //
            int mNext = 0;

            for (int srcIdx = 0; srcIdx < srcs.Count; srcIdx++)
            {
                DiffEntry srcEnt = srcs[srcIdx];
                if (!IsFile(srcEnt.oldMode))
                {
                    pm.Update(dsts.Count);
                    continue;
                }
                SimilarityIndex s = null;
                for (int dstIdx = 0; dstIdx < dsts.Count; dstIdx++)
                {
                    DiffEntry dstEnt = dsts[dstIdx];
                    if (!IsFile(dstEnt.newMode))
                    {
                        pm.Update(1);
                        continue;
                    }
                    if (!RenameDetector.SameType(srcEnt.oldMode, dstEnt.newMode))
                    {
                        pm.Update(1);
                        continue;
                    }
                    if (dstTooLarge != null && dstTooLarge.Get(dstIdx))
                    {
                        pm.Update(1);
                        continue;
                    }
                    long srcSize = srcSizes[srcIdx];
                    if (srcSize == 0)
                    {
                        srcSize          = Size(DiffEntry.Side.OLD, srcEnt) + 1;
                        srcSizes[srcIdx] = srcSize;
                    }
                    long dstSize = dstSizes[dstIdx];
                    if (dstSize == 0)
                    {
                        dstSize          = Size(DiffEntry.Side.NEW, dstEnt) + 1;
                        dstSizes[dstIdx] = dstSize;
                    }
                    long max = Math.Max(srcSize, dstSize);
                    long min = Math.Min(srcSize, dstSize);
                    if (min * 100 / max < renameScore)
                    {
                        // Cannot possibly match, as the file sizes are so different
                        pm.Update(1);
                        continue;
                    }
                    if (s == null)
                    {
                        try
                        {
                            s = Hash(DiffEntry.Side.OLD, srcEnt);
                        }
                        catch (SimilarityIndex.TableFullException)
                        {
                            tableOverflow = true;
                            goto SRC_continue;
                        }
                    }
                    SimilarityIndex d;
                    try
                    {
                        d = Hash(DiffEntry.Side.NEW, dstEnt);
                    }
                    catch (SimilarityIndex.TableFullException)
                    {
                        if (dstTooLarge == null)
                        {
                            dstTooLarge = new BitSet(dsts.Count);
                        }
                        dstTooLarge.Set(dstIdx);
                        tableOverflow = true;
                        pm.Update(1);
                        continue;
                    }
                    int contentScore = s.Score(d, 10000);
                    // nameScore returns a value between 0 and 100, but we want it
                    // to be in the same range as the content score. This allows it
                    // to be dropped into the pretty formula for the final score.
                    int nameScore = NameScore(srcEnt.oldPath, dstEnt.newPath) * 100;
                    int score     = (contentScore * 99 + nameScore * 1) / 10000;
                    if (score < renameScore)
                    {
                        pm.Update(1);
                        continue;
                    }
                    matrix[mNext++] = Encode(score, srcIdx, dstIdx);
                    pm.Update(1);
                }
                SRC_continue :;
            }
            SRC_break :;
            // Sort everything in the range we populated, which might be the
            // entire matrix, or just a smaller slice if we had some bad low
            // scoring pairs.
            //
            Arrays.Sort(matrix, 0, mNext);
            return(mNext);
        }