public bool Equals(Diff obj) { return obj.operation == operation && obj.text == text; }
/** * Given an array of patches, return another array that is identical. * @param patches Array of patch objects. * @return Array of patch objects. */ public List<Patch> patch_deepCopy(List<Patch> patches) { var patchesCopy = new List<Patch>(); foreach (Patch aPatch in patches) { var patchCopy = new Patch(); foreach (Diff aDiff in aPatch.diffs) { var diffCopy = new Diff(aDiff.operation, aDiff.text); patchCopy.diffs.Add(diffCopy); } patchCopy.start1 = aPatch.start1; patchCopy.start2 = aPatch.start2; patchCopy.length1 = aPatch.length1; patchCopy.length2 = aPatch.length2; patchesCopy.Add(patchCopy); } return patchesCopy; }
/// <summary> /// Find the differences between two texts. Assumes that the texts do not /// have any common prefix or suffix. /// </summary> /// <param name="text1">Old string to be diffed.</param> /// <param name="text2">New string to be diffed.</param> /// <param name="checklines">Speedup flag. If false, then don't run a line-level diff first to identify the changed areas. If true, then run a faster slightly less optimal diff.</param> /// <param name="token">Cancellation token for cooperative cancellation</param> /// <param name="optimizeForSpeed">Should optimizations be enabled?</param> /// <returns></returns> private static List <Diff> ComputeImpl( string text1, string text2, bool checklines, CancellationToken token, bool optimizeForSpeed) { var diffs = new List <Diff>(); if (text1.Length == 0) { // Just add some text (speedup). diffs.Add(Diff.Insert(text2)); return(diffs); } if (text2.Length == 0) { // Just delete some text (speedup). diffs.Add(Diff.Delete(text1)); return(diffs); } var longtext = text1.Length > text2.Length ? text1 : text2; var shorttext = text1.Length > text2.Length ? text2 : text1; var i = longtext.IndexOf(shorttext, StringComparison.Ordinal); if (i != -1) { // Shorter text is inside the longer text (speedup). var op = text1.Length > text2.Length ? Operation.Delete : Operation.Insert; diffs.Add(Diff.Create(op, longtext.Substring(0, i))); diffs.Add(Diff.Equal(shorttext)); diffs.Add(Diff.Create(op, longtext.Substring(i + shorttext.Length))); return(diffs); } if (shorttext.Length == 1) { // Single character string. // After the previous speedup, the character can't be an equality. diffs.Add(Diff.Delete(text1)); diffs.Add(Diff.Insert(text2)); return(diffs); } // Don't risk returning a non-optimal diff if we have unlimited time. if (optimizeForSpeed) { // Check to see if the problem can be split in two. var result = TextUtil.HalfMatch(text1, text2); if (!result.IsEmpty) { // A half-match was found, sort out the return data. // Send both pairs off for separate processing. var diffsA = Compute(result.Prefix1, result.Prefix2, checklines, token, optimizeForSpeed); var diffsB = Compute(result.Suffix1, result.Suffix2, checklines, token, optimizeForSpeed); // Merge the results. diffs = diffsA; diffs.Add(Diff.Equal(result.CommonMiddle)); diffs.AddRange(diffsB); return(diffs); } } if (checklines && text1.Length > 100 && text2.Length > 100) { return(LineDiff(text1, text2, token, optimizeForSpeed)); } return(MyersDiffBisect(text1, text2, token, optimizeForSpeed)); }
public bool Equals(Diff obj) => obj.Operation == Operation && obj.Text == Text;
public void ClearDiff() { Classification = DiffClassification.Unchanged; Patch = null; Diff = null; }
/// <summary> /// Find the 'middle snake' of a diff, split the problem in two /// and return the recursively constructed diff. /// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. /// </summary> /// <param name="text1"></param> /// <param name="text2"></param> /// <param name="token"></param> /// <param name="optimizeForSpeed"></param> /// <returns></returns> internal static List <Diff> MyersDiffBisect(string text1, string text2, CancellationToken token, bool optimizeForSpeed) { // Cache the text lengths to prevent multiple calls. var text1Length = text1.Length; var text2Length = text2.Length; var maxD = (text1Length + text2Length + 1) / 2; var vOffset = maxD; var vLength = 2 * maxD; var v1 = new int[vLength]; var v2 = new int[vLength]; for (var x = 0; x < vLength; x++) { v1[x] = -1; } for (var x = 0; x < vLength; x++) { v2[x] = -1; } v1[vOffset + 1] = 0; v2[vOffset + 1] = 0; var delta = text1Length - text2Length; // If the total number of characters is odd, then the front path will // collide with the reverse path. var front = delta % 2 != 0; // Offsets for start and end of k loop. // Prevents mapping of space beyond the grid. var k1Start = 0; var k1End = 0; var k2Start = 0; var k2End = 0; for (var d = 0; d < maxD; d++) { // Bail out if cancelled. if (token.IsCancellationRequested) { break; } // Walk the front path one step. for (var k1 = -d + k1Start; k1 <= d - k1End; k1 += 2) { var k1Offset = vOffset + k1; int x1; if (k1 == -d || k1 != d && v1[k1Offset - 1] < v1[k1Offset + 1]) { x1 = v1[k1Offset + 1]; } else { x1 = v1[k1Offset - 1] + 1; } var y1 = x1 - k1; while (x1 < text1Length && y1 < text2Length && text1[x1] == text2[y1]) { x1++; y1++; } v1[k1Offset] = x1; if (x1 > text1Length) { // Ran off the right of the graph. k1End += 2; } else if (y1 > text2Length) { // Ran off the bottom of the graph. k1Start += 2; } else if (front) { var k2Offset = vOffset + delta - k1; if (k2Offset >= 0 && k2Offset < vLength && v2[k2Offset] != -1) { // Mirror x2 onto top-left coordinate system. var x2 = text1Length - v2[k2Offset]; if (x1 >= x2) { // Overlap detected. return(BisectSplit(text1, text2, x1, y1, token, optimizeForSpeed)); } } } } // Walk the reverse path one step. for (var k2 = -d + k2Start; k2 <= d - k2End; k2 += 2) { var k2Offset = vOffset + k2; int x2; if (k2 == -d || k2 != d && v2[k2Offset - 1] < v2[k2Offset + 1]) { x2 = v2[k2Offset + 1]; } else { x2 = v2[k2Offset - 1] + 1; } var y2 = x2 - k2; while (x2 < text1Length && y2 < text2Length && text1[text1Length - x2 - 1] == text2[text2Length - y2 - 1]) { x2++; y2++; } v2[k2Offset] = x2; if (x2 > text1Length) { // Ran off the left of the graph. k2End += 2; } else if (y2 > text2Length) { // Ran off the top of the graph. k2Start += 2; } else if (!front) { var k1Offset = vOffset + delta - k2; if (k1Offset >= 0 && k1Offset < vLength && v1[k1Offset] != -1) { var x1 = v1[k1Offset]; var y1 = vOffset + x1 - k1Offset; // Mirror x2 onto top-left coordinate system. x2 = text1Length - v2[k2Offset]; if (x1 >= x2) { // Overlap detected. return(BisectSplit(text1, text2, x1, y1, token, optimizeForSpeed)); } } } } } // Diff took too long and hit the deadline or // number of Diffs equals number of characters, no commonality at all. var diffs = new List <Diff> { Diff.Delete(text1), Diff.Insert(text2) }; return(diffs); }
public void AssignDiff(DiffClassification classification, Patch patch, DiffMatchPatch.Diff diff) { Classification = classification; Patch = patch; Diff = diff; }
/// <summary> /// Reduce the number of edits by eliminating semantically trivial equalities. /// </summary> /// <param name="diffs"></param> public static void CleanupSemantic(this List <Diff> diffs) { // Stack of indices where equalities are found. var equalities = new Stack <int>(); // Always equal to equalities[equalitiesLength-1][1] string lastequality = null; var pointer = 0; // Index of current position. // Number of characters that changed prior to the equality. var lengthInsertions1 = 0; var lengthDeletions1 = 0; // Number of characters that changed after the equality. var lengthInsertions2 = 0; var lengthDeletions2 = 0; while (pointer < diffs.Count) { if (diffs[pointer].Operation == Operation.Equal) { // Equality found. equalities.Push(pointer); lengthInsertions1 = lengthInsertions2; lengthDeletions1 = lengthDeletions2; lengthInsertions2 = 0; lengthDeletions2 = 0; lastequality = diffs[pointer].Text; } else { // an insertion or deletion if (diffs[pointer].Operation == Operation.Insert) { lengthInsertions2 += diffs[pointer].Text.Length; } else { lengthDeletions2 += diffs[pointer].Text.Length; } // Eliminate an equality that is smaller or equal to the edits on both // sides of it. if (lastequality != null && (lastequality.Length <= Math.Max(lengthInsertions1, lengthDeletions1)) && (lastequality.Length <= Math.Max(lengthInsertions2, lengthDeletions2))) { // Duplicate record. diffs.Splice(equalities.Peek(), 1, Diff.Delete(lastequality), Diff.Insert(lastequality)); // Throw away the equality we just deleted. equalities.Pop(); if (equalities.Count > 0) { equalities.Pop(); } pointer = equalities.Count > 0 ? equalities.Peek() : -1; lengthInsertions1 = 0; // Reset the counters. lengthDeletions1 = 0; lengthInsertions2 = 0; lengthDeletions2 = 0; lastequality = null; } } pointer++; } diffs.CleanupMerge(); diffs.CleanupSemanticLossless(); // Find any overlaps between deletions and insertions. // e.g: <del>abcxxx</del><ins>xxxdef</ins> // -> <del>abc</del>xxx<ins>def</ins> // e.g: <del>xxxabc</del><ins>defxxx</ins> // -> <ins>def</ins>xxx<del>abc</del> // Only extract an overlap if it is as big as the edit ahead or behind it. pointer = 1; while (pointer < diffs.Count) { if (diffs[pointer - 1].Operation == Operation.Delete && diffs[pointer].Operation == Operation.Insert) { var deletion = diffs[pointer - 1].Text; var insertion = diffs[pointer].Text; var overlapLength1 = TextUtil.CommonOverlap(deletion, insertion); var overlapLength2 = TextUtil.CommonOverlap(insertion, deletion); if (overlapLength1 >= overlapLength2) { if (overlapLength1 >= deletion.Length / 2.0 || overlapLength1 >= insertion.Length / 2.0) { // Overlap found. // Insert an equality and trim the surrounding edits. var newDiffs = new[] { Diff.Delete(deletion.Substring(0, deletion.Length - overlapLength1)), Diff.Equal(insertion.Substring(0, overlapLength1)), Diff.Insert(insertion.Substring(overlapLength1)) }; diffs.Splice(pointer - 1, 2, newDiffs); pointer++; } } else { if (overlapLength2 >= deletion.Length / 2.0 || overlapLength2 >= insertion.Length / 2.0) { // Reverse overlap found. // Insert an equality and swap and trim the surrounding edits. diffs.Splice(pointer - 1, 2, Diff.Insert(insertion.Substring(0, insertion.Length - overlapLength2)), Diff.Equal(deletion.Substring(0, overlapLength2)), Diff.Delete(deletion.Substring(overlapLength2) )); pointer++; } } pointer++; } pointer++; } }
/// <summary> /// Reduce the number of edits by eliminating operationally trivial equalities. /// </summary> /// <param name="diffs"></param> /// <param name="diffEditCost"></param> public static void CleanupEfficiency(this List <Diff> diffs, short diffEditCost = 4) { var changes = false; // Stack of indices where equalities are found. var equalities = new Stack <int>(); // Always equal to equalities[equalitiesLength-1][1] var lastequality = string.Empty; var pointer = 0; // Index of current position. // Is there an insertion operation before the last equality. var preIns = false; // Is there a deletion operation before the last equality. var preDel = false; // Is there an insertion operation after the last equality. var postIns = false; // Is there a deletion operation after the last equality. var postDel = false; while (pointer < diffs.Count) { if (diffs[pointer].Operation == Operation.Equal) { // Equality found. if (diffs[pointer].Text.Length < diffEditCost && (postIns || postDel)) { // Candidate found. equalities.Push(pointer); preIns = postIns; preDel = postDel; lastequality = diffs[pointer].Text; } else { // Not a candidate, and can never become one. equalities.Clear(); lastequality = string.Empty; } postIns = postDel = false; } else { // An insertion or deletion. if (diffs[pointer].Operation == Operation.Delete) { postDel = true; } else { postIns = true; } /* * Five types to be split: * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> * <ins>A</ins>X<ins>C</ins><del>D</del> * <ins>A</ins><del>B</del>X<ins>C</ins> * <ins>A</del>X<ins>C</ins><del>D</del> * <ins>A</ins><del>B</del>X<del>C</del> */ if ((lastequality.Length != 0) && ((preIns && preDel && postIns && postDel) || ((lastequality.Length < diffEditCost / 2) && (preIns ? 1 : 0) + (preDel ? 1 : 0) + (postIns ? 1 : 0) + (postDel ? 1 : 0) == 3))) { diffs.Splice(equalities.Peek(), 1, Diff.Delete(lastequality), Diff.Insert(lastequality)); equalities.Pop(); // Throw away the equality we just deleted. lastequality = string.Empty; if (preIns && preDel) { // No changes made which could affect previous entry, keep going. postIns = postDel = true; equalities.Clear(); } else { if (equalities.Count > 0) { equalities.Pop(); } pointer = equalities.Count > 0 ? equalities.Peek() : -1; postIns = postDel = false; } changes = true; } } pointer++; } if (changes) { diffs.CleanupMerge(); } }
/// <summary> /// Look for single edits surrounded on both sides by equalities /// which can be shifted sideways to align the edit to a word boundary. /// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. /// </summary> /// <param name="diffs"></param> public static void CleanupSemanticLossless(this List <Diff> diffs) { var pointer = 1; // Intentionally ignore the first and last element (don't need checking). while (pointer < diffs.Count - 1) { if (diffs[pointer - 1].Operation == Operation.Equal && diffs[pointer + 1].Operation == Operation.Equal) { // This is a single edit surrounded by equalities. var equality1 = diffs[pointer - 1].Text; var edit = diffs[pointer].Text; var equality2 = diffs[pointer + 1].Text; // First, shift the edit as far left as possible. var commonOffset = TextUtil.CommonSuffix(equality1, edit); if (commonOffset > 0) { var commonString = edit.Substring(edit.Length - commonOffset); equality1 = equality1.Substring(0, equality1.Length - commonOffset); edit = commonString + edit.Substring(0, edit.Length - commonOffset); equality2 = commonString + equality2; } // Second, step character by character right, // looking for the best fit. var bestEquality1 = equality1; var bestEdit = edit; var bestEquality2 = equality2; var bestScore = DiffCleanupSemanticScore(equality1, edit) + DiffCleanupSemanticScore(edit, equality2); while (edit.Length != 0 && equality2.Length != 0 && edit[0] == equality2[0]) { equality1 += edit[0]; edit = edit.Substring(1) + equality2[0]; equality2 = equality2.Substring(1); var score = DiffCleanupSemanticScore(equality1, edit) + DiffCleanupSemanticScore(edit, equality2); // The >= encourages trailing rather than leading whitespace on // edits. if (score >= bestScore) { bestScore = score; bestEquality1 = equality1; bestEdit = edit; bestEquality2 = equality2; } } if (diffs[pointer - 1].Text != bestEquality1) { // We have an improvement, save it back to the diff. var newDiffs = new[] { Diff.Equal(bestEquality1), diffs[pointer].Replace(bestEdit), Diff.Equal(bestEquality2) }.Where(d => !string.IsNullOrEmpty(d.Text)) .ToArray(); diffs.Splice(pointer - 1, 3, newDiffs); pointer = pointer - (3 - newDiffs.Length); } } pointer++; } }
/// <summary> /// Reorder and merge like edit sections. Merge equalities. /// Any edit section can move as long as it doesn't cross an equality. /// </summary> /// <param name="diffs">list of Diffs</param> public static void CleanupMerge(this List <Diff> diffs) { // Add a dummy entry at the end. diffs.Add(Diff.Equal(string.Empty)); var countDelete = 0; var countInsert = 0; var sbDelete = new StringBuilder(); var sbInsert = new StringBuilder(); var pointer = 0; while (pointer < diffs.Count) { switch (diffs[pointer].Operation) { case Operation.Insert: countInsert++; sbInsert.Append(diffs[pointer].Text); pointer++; break; case Operation.Delete: countDelete++; sbDelete.Append(diffs[pointer].Text); pointer++; break; case Operation.Equal: // Upon reaching an equality, check for prior redundancies. if (countDelete + countInsert > 1) { if (countDelete != 0 && countInsert != 0) { // Factor out any common prefixies. var commonlength = TextUtil.CommonPrefix(sbInsert, sbDelete); if (commonlength != 0) { var commonprefix = sbInsert.ToString(0, commonlength); sbInsert.Remove(0, commonlength); sbDelete.Remove(0, commonlength); var index = pointer - countDelete - countInsert - 1; if (index >= 0 && diffs[index].Operation == Operation.Equal) { diffs[index] = diffs[index].Replace(diffs[index].Text + commonprefix); } else { diffs.Insert(0, Diff.Equal(commonprefix)); pointer++; } } // Factor out any common suffixies. commonlength = TextUtil.CommonSuffix(sbInsert, sbDelete); if (commonlength != 0) { var commonsuffix = sbInsert.ToString(sbInsert.Length - commonlength, commonlength); sbInsert.Remove(sbInsert.Length - commonlength, commonlength); sbDelete.Remove(sbDelete.Length - commonlength, commonlength); diffs[pointer] = diffs[pointer].Replace(commonsuffix + diffs[pointer].Text); } } // Delete the offending records and add the merged ones. if (countDelete == 0) { diffs.Splice(pointer - countInsert, countDelete + countInsert, Diff.Insert(sbInsert.ToString())); } else if (countInsert == 0) { diffs.Splice(pointer - countDelete, countDelete + countInsert, Diff.Delete(sbDelete.ToString())); } else { diffs.Splice(pointer - countDelete - countInsert, countDelete + countInsert, Diff.Delete(sbDelete.ToString()), Diff.Insert(sbInsert.ToString())); } pointer = pointer - countDelete - countInsert + (countDelete != 0 ? 1 : 0) + (countInsert != 0 ? 1 : 0) + 1; } else if (pointer != 0 && diffs[pointer - 1].Operation == Operation.Equal) { // Merge this equality with the previous one. diffs[pointer - 1] = diffs[pointer - 1].Replace(diffs[pointer - 1].Text + diffs[pointer].Text); diffs.RemoveAt(pointer); } else { pointer++; } countInsert = 0; countDelete = 0; sbDelete.Clear(); sbInsert.Clear(); break; } } if (diffs[diffs.Count - 1].Text.Length == 0) { diffs.RemoveAt(diffs.Count - 1); // Remove the dummy entry at the end. } // Second pass: look for single edits surrounded on both sides by // equalities which can be shifted sideways to eliminate an equality. // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC var changes = false; // Intentionally ignore the first and last element (don't need checking). for (var i = 1; i < diffs.Count - 1; i++) { var previous = diffs[i - 1]; var current = diffs[i]; var next = diffs[i + 1]; if (previous.Operation == Operation.Equal && next.Operation == Operation.Equal) { // This is a single edit surrounded by equalities. if (current.Text.EndsWith(previous.Text, StringComparison.Ordinal)) { // Shift the edit over the previous equality. var text = previous.Text + current.Text.Substring(0, current.Text.Length - previous.Text.Length); diffs[i] = current.Replace(text); diffs[i + 1] = next.Replace(previous.Text + next.Text); diffs.Splice(i - 1, 1); changes = true; } else if (current.Text.StartsWith(next.Text, StringComparison.Ordinal)) { // Shift the edit over the next equality. diffs[i - 1] = previous.Replace(previous.Text + next.Text); diffs[i] = current.Replace(current.Text.Substring(next.Text.Length) + next.Text); diffs.Splice(i + 1, 1); changes = true; } } } // If shifts were made, the diff needs reordering and another shift sweep. if (changes) { diffs.CleanupMerge(); } }