/// <summary>
        /// Find the differences between two texts.  Simplifies the problem by
        /// stripping any common prefix or suffix off the texts before diffing.
        /// </summary>
        /// <param name="text1">Old string to be diffed.</param>
        /// <param name="text2">New string to be diffed.</param>
        /// <param name="checklines">Speedup flag.  If false, then don't run a line-level diff first to identify the changed areas. If true, then run a faster slightly less optimal diff.</param>
        /// <param name="token">Cancellation token for cooperative cancellation</param>
        /// <param name="optimizeForSpeed">Should optimizations be enabled?</param>
        /// <returns></returns>
        public static List <Diff> Compute(string text1, string text2, bool checklines, CancellationToken token, bool optimizeForSpeed)
        {
            if (text1.Length == text2.Length && text1.Length == 0)
            {
                return(new List <Diff>());
            }

            var commonlength = TextUtil.CommonPrefix(text1, text2);

            if (commonlength == text1.Length && commonlength == text2.Length)
            {
                // equal
                return(new List <Diff>()
                {
                    Diff.Equal(text1)
                });
            }

            // Trim off common prefix (speedup).
            var commonprefix = text1.Substring(0, commonlength);

            text1 = text1.Substring(commonlength);
            text2 = text2.Substring(commonlength);

            // Trim off common suffix (speedup).
            commonlength = TextUtil.CommonSuffix(text1, text2);
            var commonsuffix = text1.Substring(text1.Length - commonlength);

            text1 = text1.Substring(0, text1.Length - commonlength);
            text2 = text2.Substring(0, text2.Length - commonlength);

            // Compute the diff on the middle block.
            var diffs = ComputeImpl(text1, text2, checklines, token, optimizeForSpeed);

            // Restore the prefix and suffix.
            if (commonprefix.Length != 0)
            {
                diffs.Insert(0, Diff.Equal(commonprefix));
            }
            if (commonsuffix.Length != 0)
            {
                diffs.Add(Diff.Equal(commonsuffix));
            }

            diffs.CleanupMerge();
            return(diffs);
        }
示例#2
0
        /// <summary>
        /// Look for single edits surrounded on both sides by equalities
        /// which can be shifted sideways to align the edit to a word boundary.
        /// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
        /// </summary>
        /// <param name="diffs"></param>
        internal static void CleanupSemanticLossless(this List <Diff> diffs)
        {
            var pointer = 1;

            // Intentionally ignore the first and last element (don't need checking).
            while (pointer < diffs.Count - 1)
            {
                var previous = diffs[pointer - 1];
                var current  = diffs[pointer];
                var next     = diffs[pointer + 1];

                if (previous.Operation == Operation.Equal && next.Operation == Operation.Equal)
                {
                    // This is a single edit surrounded by equalities.
                    var equality1 = previous.Text;
                    var edit      = current.Text;
                    var equality2 = next.Text;

                    // First, shift the edit as far left as possible.
                    var commonOffset = TextUtil.CommonSuffix(equality1, edit);
                    if (commonOffset > 0)
                    {
                        var commonString = edit.Substring(edit.Length - commonOffset);
                        equality1 = equality1.Substring(0, equality1.Length - commonOffset);
                        edit      = commonString + edit.Substring(0, edit.Length - commonOffset);
                        equality2 = commonString + equality2;
                    }

                    // Second, step character by character right,
                    // looking for the best fit.
                    var bestEquality1 = equality1;
                    var bestEdit      = edit;
                    var bestEquality2 = equality2;
                    var bestScore     = DiffCleanupSemanticScore(equality1, edit) + DiffCleanupSemanticScore(edit, equality2);
                    while (edit.Length != 0 && equality2.Length != 0 && edit[0] == equality2[0])
                    {
                        equality1 += edit[0];
                        edit       = edit.Substring(1) + equality2[0];
                        equality2  = equality2.Substring(1);
                        var score = DiffCleanupSemanticScore(equality1, edit) + DiffCleanupSemanticScore(edit, equality2);
                        // The >= encourages trailing rather than leading whitespace on
                        // edits.
                        if (score >= bestScore)
                        {
                            bestScore     = score;
                            bestEquality1 = equality1;
                            bestEdit      = edit;
                            bestEquality2 = equality2;
                        }
                    }

                    if (previous.Text != bestEquality1)
                    {
                        // We have an improvement, save it back to the diff.

                        var newDiffs = new[]
                        {
                            Diff.Equal(bestEquality1),
                            current.Replace(bestEdit),
                            Diff.Equal(bestEquality2)
                        }.Where(d => !string.IsNullOrEmpty(d.Text))
                        .ToArray();

                        diffs.Splice(pointer - 1, 3, newDiffs);
                        pointer = pointer - (3 - newDiffs.Length);
                    }
                }
                pointer++;
            }
        }
示例#3
0
        /// <summary>
        /// Reorder and merge like edit sections.  Merge equalities.
        /// Any edit section can move as long as it doesn't cross an equality.
        /// </summary>
        /// <param name="diffs">list of Diffs</param>
        internal static void CleanupMerge(this List <Diff> diffs)
        {
            // Add a dummy entry at the end.
            diffs.Add(Diff.Equal(string.Empty));
            var nofdiffs = 0;
            var sbDelete = new StringBuilder();
            var sbInsert = new StringBuilder();
            var pointer  = 0;

            while (pointer < diffs.Count)
            {
                switch (diffs[pointer].Operation)
                {
                case Operation.Insert:
                    nofdiffs++;
                    sbInsert.Append(diffs[pointer].Text);
                    pointer++;
                    break;

                case Operation.Delete:
                    nofdiffs++;
                    sbDelete.Append(diffs[pointer].Text);
                    pointer++;
                    break;

                case Operation.Equal:
                    // Upon reaching an equality, check for prior redundancies.
                    if (nofdiffs > 1)
                    {
                        if (sbDelete.Length > 0 && sbInsert.Length > 0)
                        {
                            // Factor out any common prefixies.
                            var commonlength = TextUtil.CommonPrefix(sbInsert, sbDelete);
                            if (commonlength != 0)
                            {
                                var commonprefix = sbInsert.ToString(0, commonlength);
                                sbInsert.Remove(0, commonlength);
                                sbDelete.Remove(0, commonlength);
                                var index = pointer - nofdiffs - 1;
                                if (index >= 0 && diffs[index].Operation == Operation.Equal)
                                {
                                    diffs[index] = diffs[index].Replace(diffs[index].Text + commonprefix);
                                }
                                else
                                {
                                    diffs.Insert(0, Diff.Equal(commonprefix));
                                    pointer++;
                                }
                            }
                            // Factor out any common suffixies.
                            commonlength = TextUtil.CommonSuffix(sbInsert, sbDelete);
                            if (commonlength != 0)
                            {
                                var commonsuffix = sbInsert.ToString(sbInsert.Length - commonlength, commonlength);
                                sbInsert.Remove(sbInsert.Length - commonlength, commonlength);
                                sbDelete.Remove(sbDelete.Length - commonlength, commonlength);
                                diffs[pointer] = diffs[pointer].Replace(commonsuffix + diffs[pointer].Text);
                            }
                        }

                        // Delete the offending records and add the merged ones.
                        IEnumerable <Diff> Replacements()
                        {
                            if (sbDelete.Length > 0)
                            {
                                yield return(Diff.Delete(sbDelete.ToString()));
                            }
                            if (sbInsert.Length > 0)
                            {
                                yield return(Diff.Insert(sbInsert.ToString()));
                            }
                        }

                        var replacements = Replacements().ToList();
                        diffs.Splice(pointer - nofdiffs, nofdiffs, replacements);

                        pointer = pointer - nofdiffs + replacements.Count + 1;
                    }
                    else if (pointer > 0 && diffs[pointer - 1].Operation == Operation.Equal)
                    {
                        // Merge this equality with the previous one.
                        diffs[pointer - 1] = diffs[pointer - 1].Replace(diffs[pointer - 1].Text + diffs[pointer].Text);
                        diffs.RemoveAt(pointer);
                    }
                    else
                    {
                        pointer++;
                    }
                    nofdiffs = 0;
                    sbDelete.Clear();
                    sbInsert.Clear();
                    break;
                }
            }
            if (diffs.Last().Text.Length == 0)
            {
                diffs.RemoveAt(diffs.Count - 1);  // Remove the dummy entry at the end.
            }

            // Second pass: look for single edits surrounded on both sides by
            // equalities which can be shifted sideways to eliminate an equality.
            // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
            var changes = false;

            // Intentionally ignore the first and last element (don't need checking).
            for (var i = 1; i < diffs.Count - 1; i++)
            {
                var previous = diffs[i - 1];
                var current  = diffs[i];
                var next     = diffs[i + 1];
                if (previous.Operation == Operation.Equal && next.Operation == Operation.Equal)
                {
                    // This is a single edit surrounded by equalities.
                    if (current.Text.EndsWith(previous.Text, StringComparison.Ordinal))
                    {
                        // Shift the edit over the previous equality.
                        var text = previous.Text + current.Text.Substring(0, current.Text.Length - previous.Text.Length);
                        diffs[i]     = current.Replace(text);
                        diffs[i + 1] = next.Replace(previous.Text + next.Text);
                        diffs.Splice(i - 1, 1);
                        changes = true;
                    }
                    else if (current.Text.StartsWith(next.Text, StringComparison.Ordinal))
                    {
                        // Shift the edit over the next equality.
                        diffs[i - 1] = previous.Replace(previous.Text + next.Text);
                        diffs[i]     = current.Replace(current.Text.Substring(next.Text.Length) + next.Text);
                        diffs.Splice(i + 1, 1);
                        changes = true;
                    }
                }
            }
            // If shifts were made, the diff needs reordering and another shift sweep.
            if (changes)
            {
                diffs.CleanupMerge();
            }
        }