public int[] ToArray() { VArray vv = new VArray(Size()); LVR(root, vv); return(vv.dat); }
public int[] ToArray() { VArray visitor = new VArray(Size()); Visit(_root, visitor); return(visitor.array); }
protected IEnumerable <SequenceEdit> GetEdits(TSequence oldSequence, int oldLength, TSequence newSequence, int newLength) { Stack <VArray> stackOfVs = ComputeEditPaths(oldSequence, oldLength, newSequence, newLength); int x = oldLength; int y = newLength; while (x > 0 || y > 0) { VArray currentV = stackOfVs.Pop(); int d = stackOfVs.Count; int k = x - y; // "snake" == single delete or insert followed by 0 or more diagonals // snake end point is in V int yEnd = currentV[k]; int xEnd = yEnd + k; // does the snake first go down (insert) or right(delete)? bool right = (k == d || (k != -d && currentV[k - 1] > currentV[k + 1])); int kPrev = right ? k - 1 : k + 1; // snake start point int yStart = currentV[kPrev]; int xStart = yStart + kPrev; // snake mid point int yMid = right ? yStart : yStart + 1; int xMid = yMid + k; // return the matching pairs between (xMid, yMid) and (xEnd, yEnd) = diagonal part of the snake while (xEnd > xMid) { Debug.Assert(yEnd > yMid); xEnd--; yEnd--; yield return(new SequenceEdit(xEnd, yEnd)); } // return the insert/delete between (xStart, yStart) and (xMid, yMid) = the vertical/horizontal part of the snake if (xMid > 0 || yMid > 0) { if (xStart == xMid) { // insert yield return(new SequenceEdit(-1, --yMid)); } else { // delete yield return(new SequenceEdit(--xMid, -1)); } } x = xStart; y = yStart; } }
public void CopyFrom(VArray otherVArray) { int copyDelta = Offset - otherVArray.Offset; if (copyDelta >= 0) { Array.Copy(otherVArray._array, 0, _array, copyDelta, otherVArray._array.Length); } else { Array.Copy(otherVArray._array, -copyDelta, _array, 0, _array.Length); } }
/// <summary> /// Calculates a list of "V arrays" using Eugene W. Myers O(ND) Difference Algorithm /// </summary> /// <remarks> /// /// The algorithm was inspired by Myers' Diff Algorithm described in an article by Nicolas Butler: /// https://www.codeproject.com/articles/42279/investigating-myers-diff-algorithm-part-of /// The author has approved the use of his code from the article under the Apache 2.0 license. /// /// The algorithm works on an imaginary edit graph for A and B which has a vertex at each point in the grid(i, j), i in [0, lengthA] and j in [0, lengthB]. /// The vertices of the edit graph are connected by horizontal, vertical, and diagonal directed edges to form a directed acyclic graph. /// Horizontal edges connect each vertex to its right neighbor. /// Vertical edges connect each vertex to the neighbor below it. /// Diagonal edges connect vertex (i,j) to vertex (i-1,j-1) if <see cref="ItemsEqual"/>(sequenceA[i-1],sequenceB[j-1]) is true. /// /// Move right along horizontal edge (i-1,j)-(i,j) represents a delete of sequenceA[i-1]. /// Move down along vertical edge (i,j-1)-(i,j) represents an insert of sequenceB[j-1]. /// Move along diagonal edge (i-1,j-1)-(i,j) represents an match of sequenceA[i-1] to sequenceB[j-1]. /// The number of diagonal edges on the path from (0,0) to (lengthA, lengthB) is the length of the longest common sub. /// /// The function does not actually allocate this graph. Instead it uses Eugene W. Myers' O(ND) Difference Algoritm to calculate a list of "V arrays" and returns it in a Stack. /// A "V array" is a list of end points of so called "snakes". /// A "snake" is a path with a single horizontal (delete) or vertical (insert) move followed by 0 or more diagonals (matching pairs). /// /// Unlike the algorithm in the article this implementation stores 'y' indexes and prefers 'right' moves instead of 'down' moves in ambiguous situations /// to preserve the behavior of the original diff algorithm (deletes first, inserts after). /// /// The number of items in the list is the length of the shortest edit script = the number of inserts/edits between the two sequences = D. /// The list can be used to determine the matching pairs in the sequences (GetMatchingPairs method) or the full editing script (GetEdits method). /// /// The algorithm uses O(ND) time and memory where D is the number of delete/inserts and N is the sum of lengths of the two sequences. /// /// VArrays store just the y index because x can be calculated: x = y + k. /// </remarks> private Stack <VArray> ComputeEditPaths(TSequence oldSequence, int oldLength, TSequence newSequence, int newLength) { Stack <VArray> stackOfVs = new Stack <VArray>(); // special-case: the first "snake" to start at (-1,0 ) VArray previousV = new VArray(1); VArray currentV; bool reachedEnd = false; for (int d = 0; d <= oldLength + newLength && !reachedEnd; d++) { // V is in range [-d...d] => use d to offset the k-based array indices to non-negative values if (d == 0) { currentV = previousV; } else { currentV = new VArray(d); currentV.CopyFrom(previousV); } for (int k = -d; k <= d; k += 2) { // down or right? bool right = (k == d || (k != -d && currentV[k - 1] > currentV[k + 1])); int kPrev = right ? k - 1 : k + 1; // start point int yStart = currentV[kPrev]; int xStart = yStart + kPrev; // mid point int yMid = right ? yStart : yStart + 1; int xMid = yMid + k; // end point int xEnd = xMid; int yEnd = yMid; // follow diagonal while (xEnd < oldLength && yEnd < newLength && ItemsEqual(oldSequence, xEnd, newSequence, yEnd)) { xEnd++; yEnd++; } // save end point currentV[k] = yEnd; Debug.Assert(xEnd == yEnd + k); // check for solution if (xEnd >= oldLength && yEnd >= newLength) { reachedEnd = true; } } stackOfVs.Push(currentV); previousV = currentV; } return(stackOfVs); }
/// <summary> /// Calculates a list of "V arrays" using Eugene W. Myers O(ND) Difference Algorithm /// </summary> /// <remarks> /// /// The algorithm was inspired by Myers' Diff Algorithm described in an article by Nicolas Butler: /// https://www.codeproject.com/articles/42279/investigating-myers-diff-algorithm-part-of /// The author has approved the use of his code from the article under the Apache 2.0 license. /// /// The algorithm works on an imaginary edit graph for A and B which has a vertex at each point in the grid(i, j), i in [0, lengthA] and j in [0, lengthB]. /// The vertices of the edit graph are connected by horizontal, vertical, and diagonal directed edges to form a directed acyclic graph. /// Horizontal edges connect each vertex to its right neighbor. /// Vertical edges connect each vertex to the neighbor below it. /// Diagonal edges connect vertex (i,j) to vertex (i-1,j-1) if <see cref="ItemsEqual"/>(sequenceA[i-1],sequenceB[j-1]) is true. /// /// Move right along horizontal edge (i-1,j)-(i,j) represents a delete of sequenceA[i-1]. /// Move down along vertical edge (i,j-1)-(i,j) represents an insert of sequenceB[j-1]. /// Move along diagonal edge (i-1,j-1)-(i,j) represents an match of sequenceA[i-1] to sequenceB[j-1]. /// The number of diagonal edges on the path from (0,0) to (lengthA, lengthB) is the length of the longest common sub. /// /// The function does not actually allocate this graph. Instead it uses Eugene W. Myers' O(ND) Difference Algoritm to calculate a list of "V arrays" and returns it in a Stack. /// A "V array" is a list of end points of so called "snakes". /// A "snake" is a path with a single horizontal (delete) or vertical (insert) move followed by 0 or more diagonals (matching pairs). /// /// Unlike the algorithm in the article this implementation stores 'y' indexes and prefers 'right' moves instead of 'down' moves in ambiguous situations /// to preserve the behavior of the original diff algorithm (deletes first, inserts after). /// /// The number of items in the list is the length of the shortest edit script = the number of inserts/edits between the two sequences = D. /// The list can be used to determine the matching pairs in the sequences (GetMatchingPairs method) or the full editing script (GetEdits method). /// /// The algorithm uses O(ND) time and memory where D is the number of delete/inserts and N is the sum of lengths of the two sequences. /// /// VArrays store just the y index because x can be calculated: x = y + k. /// </remarks> private VStack ComputeEditPaths(TSequence oldSequence, int oldLength, TSequence newSequence, int newLength) { var reachedEnd = false; VArray currentV = default; var stack = CreateStack(); for (var d = 0; d <= oldLength + newLength && !reachedEnd; d++) { if (d == 0) { // the first "snake" to start at (-1, 0) currentV = stack.Push(); currentV.Initialize(); } else { // V is in range [-d...d] => use d to offset the k-based array indices to non-negative values var previousV = currentV; currentV = stack.Push(); currentV.InitializeFrom(previousV); } for (var k = -d; k <= d; k += 2) { // down or right? var right = k == d || (k != -d && currentV[k - 1] > currentV[k + 1]); var kPrev = right ? k - 1 : k + 1; // start point var yStart = currentV[kPrev]; // mid point var yMid = right ? yStart : yStart + 1; var xMid = yMid + k; // end point var xEnd = xMid; var yEnd = yMid; // follow diagonal while (xEnd < oldLength && yEnd < newLength && ItemsEqual(oldSequence, xEnd, newSequence, yEnd)) { xEnd++; yEnd++; } // save end point currentV[k] = yEnd; Debug.Assert(xEnd == yEnd + k); // check for solution if (xEnd >= oldLength && yEnd >= newLength) { reachedEnd = true; } } } return(stack); }