예제 #1
0
            // Determines the longest common subsequence between two sequences and populates the
            // list of diffs derived from the result as we go.  Each recursive step identifies
            // a middle "snake" (a common sequence) then splits the problem until nothing remains.
            private void ComputeLCS(Substring left, Substring right)
            {
                int n = left.Length;
                int m = right.Length;

                if (n != 0 && m != 0)
                {
                    int middleSnakeLeftStartIndex, middleSnakeRightStartIndex, middleSnakeLength;
                    int ses = FindMiddleSnake(left, right, out middleSnakeLeftStartIndex, out middleSnakeRightStartIndex, out middleSnakeLength);
                    if (ses > 1)
                    {
                        // If SES >= 2 then the edit script includes at least 2 differences, so we divide the problem.
                        ComputeLCS(
                            left.Extract(0, middleSnakeLeftStartIndex),
                            right.Extract(0, middleSnakeRightStartIndex));

                        EmitDiffsFromCommonSequence(
                            left.Range.StartIndex + middleSnakeLeftStartIndex,
                            right.Range.StartIndex + middleSnakeRightStartIndex,
                            middleSnakeLength);

                        ComputeLCS(
                            left.Extract(middleSnakeLeftStartIndex + middleSnakeLength),
                            right.Extract(middleSnakeRightStartIndex + middleSnakeLength));
                    }
                    else
                    {
                        // If SES = 1, then exactly one symbol needs to be added or deleted from either sequence.
                        // If SES = 0, then both sequences are equal.

                        if (ses != 0)
                        {
                            // The middle snake is the common part after the change so we just need to grab the
                            // common part before the change.
                            EmitDiffsFromCommonSequence(
                                left.Range.StartIndex,
                                right.Range.StartIndex,
                                Math.Min(middleSnakeLeftStartIndex, middleSnakeRightStartIndex));
                        }

                        EmitDiffsFromCommonSequence(
                            left.Range.StartIndex + middleSnakeLeftStartIndex,
                            right.Range.StartIndex + middleSnakeRightStartIndex,
                            middleSnakeLength);
                    }
                }
            }
예제 #2
0
            public static void PopulateDiffs(IList <Diff> diffs, Substring left, Substring right, bool boundRuntime)
            {
                int n = left.Length;
                int m = right.Length;

                int max = CeilNPlusMOverTwo(n, m);

                if (boundRuntime && ((long)n) * ((long)m) > TooLong)
                {
                    max = (int)Math.Pow(max, PowLimit - 1.0);
                }

                DiffAlgorithm algorithm = new DiffAlgorithm(diffs, left.Range.StartIndex, right.Range.StartIndex, max);

                algorithm.ComputeLCS(left, right);
                algorithm.FlushDiffs(left.Range.EndIndex, right.Range.EndIndex);
            }
예제 #3
0
 private static void WriteContext(MarkupStreamWriter writer, Substring context, int maxContextLength)
 {
     if (context.Length < maxContextLength)
     {
         writer.Write(context.ToString());
     }
     else
     {
         int split = maxContextLength / 2;
         if (split > 0)
         {
             writer.Write(context.Extract(0, split).ToString());
             writer.WriteEllipsis();
             writer.Write(context.Extract(context.Length - split));
         }
     }
 }
예제 #4
0
        private static void FastDiff(IList <Diff> diffs, Substring left, Substring right, bool boundRuntime)
        {
            // If either document is empty, then the change covers the whole document.
            if (left.Length == 0 || right.Length == 0)
            {
                diffs.Add(new Diff(DiffKind.Change, left.Range, right.Range));
                return;
            }

            // Reduce the problem size by identifying a common prefix and suffix, if any.
            int commonPrefixLength = left.FindCommonPrefixLength(right);

            if (commonPrefixLength != 0)
            {
                if (commonPrefixLength == left.Length && commonPrefixLength == right.Length)
                {
                    diffs.Add(new Diff(DiffKind.NoChange, left.Range, right.Range));
                    return;
                }

                diffs.Add(new Diff(DiffKind.NoChange, new Range(left.Range.StartIndex, commonPrefixLength),
                                   new Range(right.Range.StartIndex, commonPrefixLength)));
            }

            int commonSuffixLength = left.Extract(commonPrefixLength).FindCommonSuffixLength(right.Extract(commonPrefixLength));

            // Now work on the middle part.
            Substring leftMiddle  = left.Extract(commonPrefixLength, left.Length - commonPrefixLength - commonSuffixLength);
            Substring rightMiddle = right.Extract(commonPrefixLength, right.Length - commonPrefixLength - commonSuffixLength);

            SlowDiff(diffs, leftMiddle, rightMiddle, boundRuntime);

            // Tack on the final diff for the common suffix, if any.
            if (commonSuffixLength != 0)
            {
                diffs.Add(new Diff(DiffKind.NoChange,
                                   new Range(leftMiddle.Range.EndIndex, commonSuffixLength),
                                   new Range(rightMiddle.Range.EndIndex, commonSuffixLength)));
            }
        }
예제 #5
0
            // Finds a middle "snake", which is a (possibly empty) sequence of diagonal edges in the edit
            // graph.  Thus it directly represents a common sequence.
            //
            // In essence, this function searches D-paths forward and backward in the sequence until it
            // finds the middle snake.  The middle snake informs us about a common sequence sandwiched
            // between two other sequences that may contain changes.  By definition, the left and right
            // middle snakes must be of equal length.
            private int FindMiddleSnake(Substring left, Substring right, out int middleSnakeLeftStartIndex, out int middleSnakeRightStartIndex, out int middleSnakeLength)
            {
                int n = left.Length;
                int m = right.Length;

                int  delta      = n - m;
                bool isDeltaOdd = (delta & 1) != 0;

                leftVector[max + 1]  = 0;
                rightVector[max - 1] = n;

                int end = Math.Min(CeilNPlusMOverTwo(n, m), max);

                for (int d = 0; d <= end; d++)
                {
                    // Search forward D-paths.
                    for (int k = -d; k <= d; k += 2)
                    {
                        // Find the end of the furthest reaching forward D-path in diagonal k.
                        int x = k == -d || k != d && leftVector[max + k - 1] < leftVector[max + k + 1]
                            ? leftVector[max + k + 1]
                            : leftVector[max + k - 1] + 1;

                        int origX = x;
                        for (int y = x - k; x < n && y < m && left[x] == right[y];)
                        {
                            x += 1;
                            y += 1;
                        }

                        leftVector[max + k] = x;

                        // If the D-path is feasible and overlaps the furthest reaching reverse (D-1)-Path in diagonal k
                        // then we have found the middle snake.
                        if (isDeltaOdd && k >= delta - d + 1 && k <= delta + d - 1)
                        {
                            int u = rightVector[max + k - delta];
                            if (x >= u)
                            {
                                middleSnakeLeftStartIndex  = origX;
                                middleSnakeRightStartIndex = origX - k;
                                middleSnakeLength          = x - origX;
                                return(d * 2 - 1);
                            }
                        }
                    }

                    // Search reverse D-paths.
                    for (int k = -d; k <= d; k += 2)
                    {
                        // Find the end of the furthest reaching reverse D-path in diagonal k + delta.
                        int u = k == d || k != -d && rightVector[max + k - 1] < rightVector[max + k + 1]
                            ? rightVector[max + k - 1]
                            : rightVector[max + k + 1] - 1;

                        int kPlusDelta = k + delta;
                        int origU      = u;
                        int v;
                        for (v = u - kPlusDelta; u > 0 && v > 0 && left[u - 1] == right[v - 1];)
                        {
                            u -= 1;
                            v -= 1;
                        }

                        rightVector[max + k] = u;

                        // If the D-path is feasible and overlaps the furthest reaching forward D-Path in diagonal k
                        // then we have found the middle snake.
                        if (!isDeltaOdd && kPlusDelta >= -d && kPlusDelta <= d)
                        {
                            int x = leftVector[max + kPlusDelta];
                            if (u <= x)
                            {
                                middleSnakeLeftStartIndex  = u;
                                middleSnakeRightStartIndex = v;
                                middleSnakeLength          = origU - u;
                                return(d * 2);
                            }
                        }
                    }
                }

                // We have exceeded the maximum effort we are willing to expend finding a diff.
                //
                // So we artificially divide the problem by finding the snakes in the forward / reverse
                // direction that have the most progress toward (N, M) / (0, 0).  These are the
                // ones that maximize x + y / minimize u + v.  The snake we return will not actually
                // be the middle snake (since we haven't found it yet) but it will be good enough
                // to reduce the problem.
                //
                // These snakes all begin on the same diagonal as the others of equal
                // progress in the same direction.  As there may be several of them, we need a way
                // to decide which one to pursue.
                //
                // The Eclipse LCS implementation chooses the median of these snakes with respect to k.
                // Intuitively this is the one that is nearer the direct line between (0, 0) and (N, M)
                // so it has a good chance of forming a path with more balanced changes between A and B
                // than the snakes that consist of significantly more changes to A than B or vice-versa.
                // Consequently the median of theses snakes should yield a pretty good approximation. -- Jeff.

                int bestProgress = 0;
                Dictionary <int, bool> bestKs = new Dictionary <int, bool>(); // with the forward direction indicated by value true

                for (int k = -end; k <= end; k += 2)
                {
                    // Forward direction.
                    int x = leftVector[max + k];
                    int y = x - k;
                    if (x < n && y < m)
                    {
                        int progress = x + y;
                        if (progress >= bestProgress)
                        {
                            if (progress > bestProgress)
                            {
                                bestProgress = progress;
                                bestKs.Clear();
                            }
                            bestKs[k] = true;
                        }
                    }

                    // Reverse direction.
                    int u = rightVector[max + k];
                    int v = u - k - delta;
                    if (u >= 0 && v >= 0)
                    {
                        int progress = n + m - u - v;
                        if (progress >= bestProgress)
                        {
                            if (progress > bestProgress)
                            {
                                bestProgress = progress;
                                bestKs.Clear();
                            }
                            bestKs[k] = false;
                        }
                    }
                }

                int[] sortedKs = new int[bestKs.Count];
                bestKs.Keys.CopyTo(sortedKs, 0);
                Array.Sort(sortedKs);

                int medianK = sortedKs[sortedKs.Length / 2];

                if (bestKs[medianK])
                {
                    int x = leftVector[max + medianK];
                    int y = x - medianK;
                    middleSnakeLeftStartIndex  = x;
                    middleSnakeRightStartIndex = y;
                }
                else
                {
                    int u = rightVector[max + medianK];
                    int v = u - medianK - delta;
                    middleSnakeLeftStartIndex  = u;
                    middleSnakeRightStartIndex = v;
                }

                middleSnakeLength = 0;

                // We need to return the length of the shortest edit script but we don't actually know
                // what it is.  Fortunately the caller does not care as long as it's greater than 2, which
                // it must be since d > end >= max > 2.
                return(int.MaxValue);
            }
예제 #6
0
 private static void SlowDiff(IList <Diff> diffs, Substring left, Substring right, bool boundRuntime)
 {
     DiffAlgorithm.PopulateDiffs(diffs, left, right, boundRuntime);
 }