/// <summary> /// Find the differences between two texts. Assumes that the texts do not /// have any common prefix or suffix. /// </summary> /// <param name="text1">Old string to be diffed.</param> /// <param name="text2">New string to be diffed.</param> /// <param name="checklines">Speedup flag. If false, then don't run a line-level diff first to identify the changed areas. If true, then run a faster slightly less optimal diff.</param> /// <param name="token">Cancellation token for cooperative cancellation</param> /// <param name="optimizeForSpeed">Should optimizations be enabled?</param> /// <returns></returns> private static List <Diff> ComputeImpl( string text1, string text2, bool checklines, CancellationToken token, bool optimizeForSpeed) { var diffs = new List <Diff>(); if (text1.Length == 0) { // Just add some text (speedup). diffs.Add(Diff.Insert(text2)); return(diffs); } if (text2.Length == 0) { // Just delete some text (speedup). diffs.Add(Diff.Delete(text1)); return(diffs); } var longtext = text1.Length > text2.Length ? text1 : text2; var shorttext = text1.Length > text2.Length ? text2 : text1; var i = longtext.IndexOf(shorttext, StringComparison.Ordinal); if (i != -1) { // Shorter text is inside the longer text (speedup). var op = text1.Length > text2.Length ? Operation.Delete : Operation.Insert; diffs.Add(Diff.Create(op, longtext.Substring(0, i))); diffs.Add(Diff.Equal(shorttext)); diffs.Add(Diff.Create(op, longtext.Substring(i + shorttext.Length))); return(diffs); } if (shorttext.Length == 1) { // Single character string. // After the previous speedup, the character can't be an equality. diffs.Add(Diff.Delete(text1)); diffs.Add(Diff.Insert(text2)); return(diffs); } // Don't risk returning a non-optimal diff if we have unlimited time. if (optimizeForSpeed) { // Check to see if the problem can be split in two. var result = TextUtil.HalfMatch(text1, text2); if (!result.IsEmpty) { // A half-match was found, sort out the return data. // Send both pairs off for separate processing. var diffsA = Compute(result.Prefix1, result.Prefix2, checklines, token, optimizeForSpeed); var diffsB = Compute(result.Suffix1, result.Suffix2, checklines, token, optimizeForSpeed); // Merge the results. diffs = diffsA; diffs.Add(Diff.Equal(result.CommonMiddle)); diffs.AddRange(diffsB); return(diffs); } } if (checklines && text1.Length > 100 && text2.Length > 100) { return(LineDiff(text1, text2, token, optimizeForSpeed)); } return(MyersDiffBisect(text1, text2, token, optimizeForSpeed)); }
/// <summary> /// Look for single edits surrounded on both sides by equalities /// which can be shifted sideways to align the edit to a word boundary. /// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. /// </summary> /// <param name="diffs"></param> public static void CleanupSemanticLossless(this List <Diff> diffs) { var pointer = 1; // Intentionally ignore the first and last element (don't need checking). while (pointer < diffs.Count - 1) { if (diffs[pointer - 1].Operation == Operation.Equal && diffs[pointer + 1].Operation == Operation.Equal) { // This is a single edit surrounded by equalities. var equality1 = diffs[pointer - 1].Text; var edit = diffs[pointer].Text; var equality2 = diffs[pointer + 1].Text; // First, shift the edit as far left as possible. var commonOffset = TextUtil.CommonSuffix(equality1, edit); if (commonOffset > 0) { var commonString = edit.Substring(edit.Length - commonOffset); equality1 = equality1.Substring(0, equality1.Length - commonOffset); edit = commonString + edit.Substring(0, edit.Length - commonOffset); equality2 = commonString + equality2; } // Second, step character by character right, // looking for the best fit. var bestEquality1 = equality1; var bestEdit = edit; var bestEquality2 = equality2; var bestScore = DiffCleanupSemanticScore(equality1, edit) + DiffCleanupSemanticScore(edit, equality2); while (edit.Length != 0 && equality2.Length != 0 && edit[0] == equality2[0]) { equality1 += edit[0]; edit = edit.Substring(1) + equality2[0]; equality2 = equality2.Substring(1); var score = DiffCleanupSemanticScore(equality1, edit) + DiffCleanupSemanticScore(edit, equality2); // The >= encourages trailing rather than leading whitespace on // edits. if (score >= bestScore) { bestScore = score; bestEquality1 = equality1; bestEdit = edit; bestEquality2 = equality2; } } if (diffs[pointer - 1].Text != bestEquality1) { // We have an improvement, save it back to the diff. var newDiffs = new[] { Diff.Equal(bestEquality1), diffs[pointer].Replace(bestEdit), Diff.Equal(bestEquality2) }.Where(d => !string.IsNullOrEmpty(d.Text)) .ToArray(); diffs.Splice(pointer - 1, 3, newDiffs); pointer = pointer - (3 - newDiffs.Length); } } pointer++; } }
/// <summary> /// Reduce the number of edits by eliminating semantically trivial equalities. /// </summary> /// <param name="diffs"></param> public static void CleanupSemantic(this List <Diff> diffs) { // Stack of indices where equalities are found. var equalities = new Stack <int>(); // Always equal to equalities[equalitiesLength-1][1] string lastequality = null; var pointer = 0; // Index of current position. // Number of characters that changed prior to the equality. var lengthInsertions1 = 0; var lengthDeletions1 = 0; // Number of characters that changed after the equality. var lengthInsertions2 = 0; var lengthDeletions2 = 0; while (pointer < diffs.Count) { if (diffs[pointer].Operation == Operation.Equal) { // Equality found. equalities.Push(pointer); lengthInsertions1 = lengthInsertions2; lengthDeletions1 = lengthDeletions2; lengthInsertions2 = 0; lengthDeletions2 = 0; lastequality = diffs[pointer].Text; } else { // an insertion or deletion if (diffs[pointer].Operation == Operation.Insert) { lengthInsertions2 += diffs[pointer].Text.Length; } else { lengthDeletions2 += diffs[pointer].Text.Length; } // Eliminate an equality that is smaller or equal to the edits on both // sides of it. if (lastequality != null && (lastequality.Length <= Math.Max(lengthInsertions1, lengthDeletions1)) && (lastequality.Length <= Math.Max(lengthInsertions2, lengthDeletions2))) { // Duplicate record. diffs.Splice(equalities.Peek(), 1, Diff.Delete(lastequality), Diff.Insert(lastequality)); // Throw away the equality we just deleted. equalities.Pop(); if (equalities.Count > 0) { equalities.Pop(); } pointer = equalities.Count > 0 ? equalities.Peek() : -1; lengthInsertions1 = 0; // Reset the counters. lengthDeletions1 = 0; lengthInsertions2 = 0; lengthDeletions2 = 0; lastequality = null; } } pointer++; } diffs.CleanupMerge(); diffs.CleanupSemanticLossless(); // Find any overlaps between deletions and insertions. // e.g: <del>abcxxx</del><ins>xxxdef</ins> // -> <del>abc</del>xxx<ins>def</ins> // e.g: <del>xxxabc</del><ins>defxxx</ins> // -> <ins>def</ins>xxx<del>abc</del> // Only extract an overlap if it is as big as the edit ahead or behind it. pointer = 1; while (pointer < diffs.Count) { if (diffs[pointer - 1].Operation == Operation.Delete && diffs[pointer].Operation == Operation.Insert) { var deletion = diffs[pointer - 1].Text; var insertion = diffs[pointer].Text; var overlapLength1 = TextUtil.CommonOverlap(deletion, insertion); var overlapLength2 = TextUtil.CommonOverlap(insertion, deletion); if (overlapLength1 >= overlapLength2) { if (overlapLength1 >= deletion.Length / 2.0 || overlapLength1 >= insertion.Length / 2.0) { // Overlap found. // Insert an equality and trim the surrounding edits. var newDiffs = new[] { Diff.Delete(deletion.Substring(0, deletion.Length - overlapLength1)), Diff.Equal(insertion.Substring(0, overlapLength1)), Diff.Insert(insertion.Substring(overlapLength1)) }; diffs.Splice(pointer - 1, 2, newDiffs); pointer++; } } else { if (overlapLength2 >= deletion.Length / 2.0 || overlapLength2 >= insertion.Length / 2.0) { // Reverse overlap found. // Insert an equality and swap and trim the surrounding edits. diffs.Splice(pointer - 1, 2, Diff.Insert(insertion.Substring(0, insertion.Length - overlapLength2)), Diff.Equal(deletion.Substring(0, overlapLength2)), Diff.Delete(deletion.Substring(overlapLength2) )); pointer++; } } pointer++; } pointer++; } }
/// <summary> /// Reorder and merge like edit sections. Merge equalities. /// Any edit section can move as long as it doesn't cross an equality. /// </summary> /// <param name="diffs">list of Diffs</param> public static void CleanupMerge(this List <Diff> diffs) { // Add a dummy entry at the end. diffs.Add(Diff.Equal(string.Empty)); var countDelete = 0; var countInsert = 0; var sbDelete = new StringBuilder(); var sbInsert = new StringBuilder(); var pointer = 0; while (pointer < diffs.Count) { switch (diffs[pointer].Operation) { case Operation.Insert: countInsert++; sbInsert.Append(diffs[pointer].Text); pointer++; break; case Operation.Delete: countDelete++; sbDelete.Append(diffs[pointer].Text); pointer++; break; case Operation.Equal: // Upon reaching an equality, check for prior redundancies. if (countDelete + countInsert > 1) { if (countDelete != 0 && countInsert != 0) { // Factor out any common prefixies. var commonlength = TextUtil.CommonPrefix(sbInsert, sbDelete); if (commonlength != 0) { var commonprefix = sbInsert.ToString(0, commonlength); sbInsert.Remove(0, commonlength); sbDelete.Remove(0, commonlength); var index = pointer - countDelete - countInsert - 1; if (index >= 0 && diffs[index].Operation == Operation.Equal) { diffs[index] = diffs[index].Replace(diffs[index].Text + commonprefix); } else { diffs.Insert(0, Diff.Equal(commonprefix)); pointer++; } } // Factor out any common suffixies. commonlength = TextUtil.CommonSuffix(sbInsert, sbDelete); if (commonlength != 0) { var commonsuffix = sbInsert.ToString(sbInsert.Length - commonlength, commonlength); sbInsert.Remove(sbInsert.Length - commonlength, commonlength); sbDelete.Remove(sbDelete.Length - commonlength, commonlength); diffs[pointer] = diffs[pointer].Replace(commonsuffix + diffs[pointer].Text); } } // Delete the offending records and add the merged ones. if (countDelete == 0) { diffs.Splice(pointer - countInsert, countDelete + countInsert, Diff.Insert(sbInsert.ToString())); } else if (countInsert == 0) { diffs.Splice(pointer - countDelete, countDelete + countInsert, Diff.Delete(sbDelete.ToString())); } else { diffs.Splice(pointer - countDelete - countInsert, countDelete + countInsert, Diff.Delete(sbDelete.ToString()), Diff.Insert(sbInsert.ToString())); } pointer = pointer - countDelete - countInsert + (countDelete != 0 ? 1 : 0) + (countInsert != 0 ? 1 : 0) + 1; } else if (pointer != 0 && diffs[pointer - 1].Operation == Operation.Equal) { // Merge this equality with the previous one. diffs[pointer - 1] = diffs[pointer - 1].Replace(diffs[pointer - 1].Text + diffs[pointer].Text); diffs.RemoveAt(pointer); } else { pointer++; } countInsert = 0; countDelete = 0; sbDelete.Clear(); sbInsert.Clear(); break; } } if (diffs[diffs.Count - 1].Text.Length == 0) { diffs.RemoveAt(diffs.Count - 1); // Remove the dummy entry at the end. } // Second pass: look for single edits surrounded on both sides by // equalities which can be shifted sideways to eliminate an equality. // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC var changes = false; // Intentionally ignore the first and last element (don't need checking). for (var i = 1; i < diffs.Count - 1; i++) { var previous = diffs[i - 1]; var current = diffs[i]; var next = diffs[i + 1]; if (previous.Operation == Operation.Equal && next.Operation == Operation.Equal) { // This is a single edit surrounded by equalities. if (current.Text.EndsWith(previous.Text, StringComparison.Ordinal)) { // Shift the edit over the previous equality. var text = previous.Text + current.Text.Substring(0, current.Text.Length - previous.Text.Length); diffs[i] = current.Replace(text); diffs[i + 1] = next.Replace(previous.Text + next.Text); diffs.Splice(i - 1, 1); changes = true; } else if (current.Text.StartsWith(next.Text, StringComparison.Ordinal)) { // Shift the edit over the next equality. diffs[i - 1] = previous.Replace(previous.Text + next.Text); diffs[i] = current.Replace(current.Text.Substring(next.Text.Length) + next.Text); diffs.Splice(i + 1, 1); changes = true; } } } // If shifts were made, the diff needs reordering and another shift sweep. if (changes) { diffs.CleanupMerge(); } }