/// <summary>
        /// Find the differences between two texts.  Assumes that the texts do not
        /// have any common prefix or suffix.
        /// </summary>
        /// <param name="text1">Old string to be diffed.</param>
        /// <param name="text2">New string to be diffed.</param>
        /// <param name="checklines">Speedup flag.  If false, then don't run a line-level diff first to identify the changed areas. If true, then run a faster slightly less optimal diff.</param>
        /// <param name="token">Cancellation token for cooperative cancellation</param>
        /// <param name="optimizeForSpeed">Should optimizations be enabled?</param>
        /// <returns></returns>
        private static List <Diff> ComputeImpl(
            string text1,
            string text2,
            bool checklines, CancellationToken token, bool optimizeForSpeed)
        {
            var diffs = new List <Diff>();

            if (text1.Length == 0)
            {
                // Just add some text (speedup).
                diffs.Add(Diff.Insert(text2));
                return(diffs);
            }

            if (text2.Length == 0)
            {
                // Just delete some text (speedup).
                diffs.Add(Diff.Delete(text1));
                return(diffs);
            }

            var longtext  = text1.Length > text2.Length ? text1 : text2;
            var shorttext = text1.Length > text2.Length ? text2 : text1;
            var i         = longtext.IndexOf(shorttext, StringComparison.Ordinal);

            if (i != -1)
            {
                // Shorter text is inside the longer text (speedup).
                var op = text1.Length > text2.Length ? Operation.Delete : Operation.Insert;
                diffs.Add(Diff.Create(op, longtext.Substring(0, i)));
                diffs.Add(Diff.Equal(shorttext));
                diffs.Add(Diff.Create(op, longtext.Substring(i + shorttext.Length)));
                return(diffs);
            }

            if (shorttext.Length == 1)
            {
                // Single character string.
                // After the previous speedup, the character can't be an equality.
                diffs.Add(Diff.Delete(text1));
                diffs.Add(Diff.Insert(text2));
                return(diffs);
            }

            // Don't risk returning a non-optimal diff if we have unlimited time.
            if (optimizeForSpeed)
            {
                // Check to see if the problem can be split in two.
                var result = TextUtil.HalfMatch(text1, text2);
                if (!result.IsEmpty)
                {
                    // A half-match was found, sort out the return data.
                    // Send both pairs off for separate processing.
                    var diffsA = Compute(result.Prefix1, result.Prefix2, checklines, token, optimizeForSpeed);
                    var diffsB = Compute(result.Suffix1, result.Suffix2, checklines, token, optimizeForSpeed);

                    // Merge the results.
                    diffs = diffsA;
                    diffs.Add(Diff.Equal(result.CommonMiddle));
                    diffs.AddRange(diffsB);
                    return(diffs);
                }
            }
            if (checklines && text1.Length > 100 && text2.Length > 100)
            {
                return(LineDiff(text1, text2, token, optimizeForSpeed));
            }

            return(MyersDiffBisect(text1, text2, token, optimizeForSpeed));
        }
        /// <summary>
        /// Find the 'middle snake' of a diff, split the problem in two
        /// and return the recursively constructed diff.
        /// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
        /// </summary>
        /// <param name="text1"></param>
        /// <param name="text2"></param>
        /// <param name="token"></param>
        /// <param name="optimizeForSpeed"></param>
        /// <returns></returns>
        internal static List <Diff> MyersDiffBisect(string text1, string text2, CancellationToken token, bool optimizeForSpeed)
        {
            // Cache the text lengths to prevent multiple calls.
            var text1Length = text1.Length;
            var text2Length = text2.Length;
            var maxD        = (text1Length + text2Length + 1) / 2;
            var vOffset     = maxD;
            var vLength     = 2 * maxD;
            var v1          = new int[vLength];
            var v2          = new int[vLength];

            for (var x = 0; x < vLength; x++)
            {
                v1[x] = -1;
            }
            for (var x = 0; x < vLength; x++)
            {
                v2[x] = -1;
            }
            v1[vOffset + 1] = 0;
            v2[vOffset + 1] = 0;
            var delta = text1Length - text2Length;
            // If the total number of characters is odd, then the front path will
            // collide with the reverse path.
            var front = delta % 2 != 0;
            // Offsets for start and end of k loop.
            // Prevents mapping of space beyond the grid.
            var k1Start = 0;
            var k1End   = 0;
            var k2Start = 0;
            var k2End   = 0;

            for (var d = 0; d < maxD; d++)
            {
                // Bail out if cancelled.
                if (token.IsCancellationRequested)
                {
                    break;
                }

                // Walk the front path one step.
                for (var k1 = -d + k1Start; k1 <= d - k1End; k1 += 2)
                {
                    var k1Offset = vOffset + k1;
                    int x1;
                    if (k1 == -d || k1 != d && v1[k1Offset - 1] < v1[k1Offset + 1])
                    {
                        x1 = v1[k1Offset + 1];
                    }
                    else
                    {
                        x1 = v1[k1Offset - 1] + 1;
                    }
                    var y1 = x1 - k1;
                    while (x1 < text1Length && y1 < text2Length &&
                           text1[x1] == text2[y1])
                    {
                        x1++;
                        y1++;
                    }
                    v1[k1Offset] = x1;
                    if (x1 > text1Length)
                    {
                        // Ran off the right of the graph.
                        k1End += 2;
                    }
                    else if (y1 > text2Length)
                    {
                        // Ran off the bottom of the graph.
                        k1Start += 2;
                    }
                    else if (front)
                    {
                        var k2Offset = vOffset + delta - k1;
                        if (k2Offset >= 0 && k2Offset < vLength && v2[k2Offset] != -1)
                        {
                            // Mirror x2 onto top-left coordinate system.
                            var x2 = text1Length - v2[k2Offset];
                            if (x1 >= x2)
                            {
                                // Overlap detected.
                                return(BisectSplit(text1, text2, x1, y1, token, optimizeForSpeed));
                            }
                        }
                    }
                }

                // Walk the reverse path one step.
                for (var k2 = -d + k2Start; k2 <= d - k2End; k2 += 2)
                {
                    var k2Offset = vOffset + k2;
                    int x2;
                    if (k2 == -d || k2 != d && v2[k2Offset - 1] < v2[k2Offset + 1])
                    {
                        x2 = v2[k2Offset + 1];
                    }
                    else
                    {
                        x2 = v2[k2Offset - 1] + 1;
                    }
                    var y2 = x2 - k2;
                    while (x2 < text1Length && y2 < text2Length &&
                           text1[text1Length - x2 - 1]
                           == text2[text2Length - y2 - 1])
                    {
                        x2++;
                        y2++;
                    }
                    v2[k2Offset] = x2;
                    if (x2 > text1Length)
                    {
                        // Ran off the left of the graph.
                        k2End += 2;
                    }
                    else if (y2 > text2Length)
                    {
                        // Ran off the top of the graph.
                        k2Start += 2;
                    }
                    else if (!front)
                    {
                        var k1Offset = vOffset + delta - k2;
                        if (k1Offset >= 0 && k1Offset < vLength && v1[k1Offset] != -1)
                        {
                            var x1 = v1[k1Offset];
                            var y1 = vOffset + x1 - k1Offset;
                            // Mirror x2 onto top-left coordinate system.
                            x2 = text1Length - v2[k2Offset];
                            if (x1 >= x2)
                            {
                                // Overlap detected.
                                return(BisectSplit(text1, text2, x1, y1, token, optimizeForSpeed));
                            }
                        }
                    }
                }
            }
            // Diff took too long and hit the deadline or
            // number of Diffs equals number of characters, no commonality at all.
            var diffs = new List <Diff> {
                Diff.Delete(text1), Diff.Insert(text2)
            };

            return(diffs);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Reduce the number of edits by eliminating operationally trivial equalities.
        /// </summary>
        /// <param name="diffs"></param>
        /// <param name="diffEditCost"></param>
        public static void CleanupEfficiency(this List <Diff> diffs, short diffEditCost = 4)
        {
            var changes = false;
            // Stack of indices where equalities are found.
            var equalities = new Stack <int>();
            // Always equal to equalities[equalitiesLength-1][1]
            var lastEquality = string.Empty;
            // Is there an insertion operation before the last equality.
            var preIns = false;
            // Is there a deletion operation before the last equality.
            var preDel = false;
            // Is there an insertion operation after the last equality.
            var postIns = false;
            // Is there a deletion operation after the last equality.
            var postDel = false;

            for (var i = 0; i < diffs.Count; i++)
            {
                if (diffs[i].Operation == Operation.Equal)
                {  // Equality found.
                    if (diffs[i].Text.Length < diffEditCost && (postIns || postDel))
                    {
                        // Candidate found.
                        equalities.Push(i);
                        preIns       = postIns;
                        preDel       = postDel;
                        lastEquality = diffs[i].Text;
                    }
                    else
                    {
                        // Not a candidate, and can never become one.
                        equalities.Clear();
                        lastEquality = string.Empty;
                    }
                    postIns = postDel = false;
                }
                else
                {  // An insertion or deletion.
                    if (diffs[i].Operation == Operation.Delete)
                    {
                        postDel = true;
                    }
                    else
                    {
                        postIns = true;
                    }

                    /*
                     * Five types to be split:
                     * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
                     * <ins>A</ins>X<ins>C</ins><del>D</del>
                     * <ins>A</ins><del>B</del>X<ins>C</ins>
                     * <ins>A</del>X<ins>C</ins><del>D</del>
                     * <ins>A</ins><del>B</del>X<del>C</del>
                     */
                    if ((lastEquality.Length != 0) &&
                        ((preIns && preDel && postIns && postDel) ||
                         ((lastEquality.Length < diffEditCost / 2) &&
                          (preIns ? 1 : 0) + (preDel ? 1 : 0) + (postIns ? 1 : 0)
                          + (postDel ? 1 : 0) == 3)))
                    {
                        diffs.Splice(equalities.Peek(), 1, Diff.Delete(lastEquality), Diff.Insert(lastEquality));
                        equalities.Pop();  // Throw away the equality we just deleted.
                        lastEquality = string.Empty;
                        if (preIns && preDel)
                        {
                            // No changes made which could affect previous entry, keep going.
                            postIns = postDel = true;
                            equalities.Clear();
                        }
                        else
                        {
                            if (equalities.Count > 0)
                            {
                                equalities.Pop();
                            }

                            i       = equalities.Count > 0 ? equalities.Peek() : -1;
                            postIns = postDel = false;
                        }
                        changes = true;
                    }
                }
            }

            if (changes)
            {
                diffs.CleanupMerge();
            }
        }
Exemplo n.º 4
0
        /// <summary>
        /// Reduce the number of edits by eliminating semantically trivial equalities.
        /// </summary>
        /// <param name="diffs"></param>
        public static void CleanupSemantic(this List <Diff> diffs)
        {
            // Stack of indices where equalities are found.
            var equalities = new Stack <int>();
            // Always equal to equalities[equalitiesLength-1][1]
            string lastEquality = null;
            var    pointer      = 0; // Index of current position.
            // Number of characters that changed prior to the equality.
            var lengthInsertions1 = 0;
            var lengthDeletions1  = 0;
            // Number of characters that changed after the equality.
            var lengthInsertions2 = 0;
            var lengthDeletions2  = 0;

            while (pointer < diffs.Count)
            {
                if (diffs[pointer].Operation == Operation.Equal)
                {  // Equality found.
                    equalities.Push(pointer);
                    lengthInsertions1 = lengthInsertions2;
                    lengthDeletions1  = lengthDeletions2;
                    lengthInsertions2 = 0;
                    lengthDeletions2  = 0;
                    lastEquality      = diffs[pointer].Text;
                }
                else
                {  // an insertion or deletion
                    if (diffs[pointer].Operation == Operation.Insert)
                    {
                        lengthInsertions2 += diffs[pointer].Text.Length;
                    }
                    else
                    {
                        lengthDeletions2 += diffs[pointer].Text.Length;
                    }
                    // Eliminate an equality that is smaller or equal to the edits on both
                    // sides of it.
                    if (lastEquality != null && (lastEquality.Length
                                                 <= Math.Max(lengthInsertions1, lengthDeletions1)) &&
                        (lastEquality.Length
                         <= Math.Max(lengthInsertions2, lengthDeletions2)))
                    {
                        // Duplicate record.

                        diffs.Splice(equalities.Peek(), 1, Diff.Delete(lastEquality), Diff.Insert(lastEquality));

                        // Throw away the equality we just deleted.
                        equalities.Pop();
                        if (equalities.Count > 0)
                        {
                            equalities.Pop();
                        }
                        pointer           = equalities.Count > 0 ? equalities.Peek() : -1;
                        lengthInsertions1 = 0;  // Reset the counters.
                        lengthDeletions1  = 0;
                        lengthInsertions2 = 0;
                        lengthDeletions2  = 0;
                        lastEquality      = null;
                    }
                }
                pointer++;
            }

            diffs.CleanupMerge();
            diffs.CleanupSemanticLossless();

            // Find any overlaps between deletions and insertions.
            // e.g: <del>abcxxx</del><ins>xxxdef</ins>
            //   -> <del>abc</del>xxx<ins>def</ins>
            // e.g: <del>xxxabc</del><ins>defxxx</ins>
            //   -> <ins>def</ins>xxx<del>abc</del>
            // Only extract an overlap if it is as big as the edit ahead or behind it.
            pointer = 1;
            while (pointer < diffs.Count)
            {
                if (diffs[pointer - 1].Operation == Operation.Delete &&
                    diffs[pointer].Operation == Operation.Insert)
                {
                    var deletion       = diffs[pointer - 1].Text;
                    var insertion      = diffs[pointer].Text;
                    var overlapLength1 = TextUtil.CommonOverlap(deletion, insertion);
                    var overlapLength2 = TextUtil.CommonOverlap(insertion, deletion);
                    if (overlapLength1 >= overlapLength2)
                    {
                        if (overlapLength1 >= deletion.Length / 2.0 ||
                            overlapLength1 >= insertion.Length / 2.0)
                        {
                            // Overlap found.
                            // Insert an equality and trim the surrounding edits.
                            var newDiffs = new[]
                            {
                                Diff.Delete(deletion.Substring(0, deletion.Length - overlapLength1)),
                                Diff.Equal(insertion.Substring(0, overlapLength1)),
                                Diff.Insert(insertion.Substring(overlapLength1))
                            };

                            diffs.Splice(pointer - 1, 2, newDiffs);
                            pointer++;
                        }
                    }
                    else
                    {
                        if (overlapLength2 >= deletion.Length / 2.0 ||
                            overlapLength2 >= insertion.Length / 2.0)
                        {
                            // Reverse overlap found.
                            // Insert an equality and swap and trim the surrounding edits.

                            diffs.Splice(pointer - 1, 2,
                                         Diff.Insert(insertion.Substring(0, insertion.Length - overlapLength2)),
                                         Diff.Equal(deletion.Substring(0, overlapLength2)),
                                         Diff.Delete(deletion.Substring(overlapLength2)
                                                     ));
                            pointer++;
                        }
                    }
                    pointer++;
                }
                pointer++;
            }
        }
Exemplo n.º 5
0
        /// <summary>
        /// Reorder and merge like edit sections.  Merge equalities.
        /// Any edit section can move as long as it doesn't cross an equality.
        /// </summary>
        /// <param name="diffs">list of Diffs</param>
        internal static void CleanupMerge(this List <Diff> diffs)
        {
            // Add a dummy entry at the end.
            diffs.Add(Diff.Equal(string.Empty));
            var nofdiffs = 0;
            var sbDelete = new StringBuilder();
            var sbInsert = new StringBuilder();
            var pointer  = 0;

            while (pointer < diffs.Count)
            {
                switch (diffs[pointer].Operation)
                {
                case Operation.Insert:
                    nofdiffs++;
                    sbInsert.Append(diffs[pointer].Text);
                    pointer++;
                    break;

                case Operation.Delete:
                    nofdiffs++;
                    sbDelete.Append(diffs[pointer].Text);
                    pointer++;
                    break;

                case Operation.Equal:
                    // Upon reaching an equality, check for prior redundancies.
                    if (nofdiffs > 1)
                    {
                        if (sbDelete.Length > 0 && sbInsert.Length > 0)
                        {
                            // Factor out any common prefixies.
                            var commonlength = TextUtil.CommonPrefix(sbInsert, sbDelete);
                            if (commonlength != 0)
                            {
                                var commonprefix = sbInsert.ToString(0, commonlength);
                                sbInsert.Remove(0, commonlength);
                                sbDelete.Remove(0, commonlength);
                                var index = pointer - nofdiffs - 1;
                                if (index >= 0 && diffs[index].Operation == Operation.Equal)
                                {
                                    diffs[index] = diffs[index].Replace(diffs[index].Text + commonprefix);
                                }
                                else
                                {
                                    diffs.Insert(0, Diff.Equal(commonprefix));
                                    pointer++;
                                }
                            }
                            // Factor out any common suffixies.
                            commonlength = TextUtil.CommonSuffix(sbInsert, sbDelete);
                            if (commonlength != 0)
                            {
                                var commonsuffix = sbInsert.ToString(sbInsert.Length - commonlength, commonlength);
                                sbInsert.Remove(sbInsert.Length - commonlength, commonlength);
                                sbDelete.Remove(sbDelete.Length - commonlength, commonlength);
                                diffs[pointer] = diffs[pointer].Replace(commonsuffix + diffs[pointer].Text);
                            }
                        }

                        // Delete the offending records and add the merged ones.
                        IEnumerable <Diff> Replacements()
                        {
                            if (sbDelete.Length > 0)
                            {
                                yield return(Diff.Delete(sbDelete.ToString()));
                            }
                            if (sbInsert.Length > 0)
                            {
                                yield return(Diff.Insert(sbInsert.ToString()));
                            }
                        }

                        var replacements = Replacements().ToList();
                        diffs.Splice(pointer - nofdiffs, nofdiffs, replacements);

                        pointer = pointer - nofdiffs + replacements.Count + 1;
                    }
                    else if (pointer > 0 && diffs[pointer - 1].Operation == Operation.Equal)
                    {
                        // Merge this equality with the previous one.
                        diffs[pointer - 1] = diffs[pointer - 1].Replace(diffs[pointer - 1].Text + diffs[pointer].Text);
                        diffs.RemoveAt(pointer);
                    }
                    else
                    {
                        pointer++;
                    }
                    nofdiffs = 0;
                    sbDelete.Clear();
                    sbInsert.Clear();
                    break;
                }
            }
            if (diffs.Last().Text.Length == 0)
            {
                diffs.RemoveAt(diffs.Count - 1);  // Remove the dummy entry at the end.
            }

            // Second pass: look for single edits surrounded on both sides by
            // equalities which can be shifted sideways to eliminate an equality.
            // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
            var changes = false;

            // Intentionally ignore the first and last element (don't need checking).
            for (var i = 1; i < diffs.Count - 1; i++)
            {
                var previous = diffs[i - 1];
                var current  = diffs[i];
                var next     = diffs[i + 1];
                if (previous.Operation == Operation.Equal && next.Operation == Operation.Equal)
                {
                    // This is a single edit surrounded by equalities.
                    if (current.Text.EndsWith(previous.Text, StringComparison.Ordinal))
                    {
                        // Shift the edit over the previous equality.
                        var text = previous.Text + current.Text.Substring(0, current.Text.Length - previous.Text.Length);
                        diffs[i]     = current.Replace(text);
                        diffs[i + 1] = next.Replace(previous.Text + next.Text);
                        diffs.Splice(i - 1, 1);
                        changes = true;
                    }
                    else if (current.Text.StartsWith(next.Text, StringComparison.Ordinal))
                    {
                        // Shift the edit over the next equality.
                        diffs[i - 1] = previous.Replace(previous.Text + next.Text);
                        diffs[i]     = current.Replace(current.Text.Substring(next.Text.Length) + next.Text);
                        diffs.Splice(i + 1, 1);
                        changes = true;
                    }
                }
            }
            // If shifts were made, the diff needs reordering and another shift sweep.
            if (changes)
            {
                diffs.CleanupMerge();
            }
        }