/// <summary> /// In the edit graph for the sequences src and des, search for the /// optimal(shortest) path from (src.StartIndex, des.StartIndex) to /// (src.EndIndex, des.EndIndex). /// /// The searching starts from both ends of the graph and when the /// furthest forward reaching overlaps with the furthest backward /// reaching, the overlapped point is reported as the middle point /// of the shortest path. /// /// See the listed reference for the detailed description of the /// algorithm /// </summary> /// <param name="src"> /// Represents a (sub)sequence of _original /// </param> /// <param name="desc"> /// Represents a (sub)sequence of _modified /// </param> /// <returns> /// The found middle snake /// </returns> private MiddleSnake findMiddleSnake(Sequence src, Sequence des) { int d, k; int x, y; MiddleSnake midSnake = new MiddleSnake(); // the range of diagonal values int minDiag = src.StartIndex - des.EndIndex; int maxDiag = src.EndIndex - des.StartIndex; // middle point of forward searching int fwdMid = src.StartIndex - des.StartIndex; // middle point of backward searching int bwdMid = src.EndIndex - des.EndIndex; // forward seaching range int fwdMin = fwdMid; int fwdMax = fwdMid; // backward seaching range int bwdMin = bwdMid; int bwdMax = bwdMid; bool odd = ((fwdMin - bwdMid) & 1) == 1; fwdVector[fwdMid] = src.StartIndex; bwdVector[bwdMid] = src.EndIndex; #if (MyDEBUG) Debug.WriteLine("-- Entering Function findMiddleSnake(src, des) --"); #endif for (d = 1; ; d++) { // extend or shrink the search range if (fwdMin > minDiag) fwdVector[--fwdMin -1] = -1; else ++fwdMin; if(fwdMax < maxDiag) fwdVector[++fwdMax +1] = -1; else --fwdMax; #if (MyDEBUG) Debug.WriteLine(d, " D path"); #endif // top-down search for (k = fwdMax; k >= fwdMin; k -= 2) { if (fwdVector[k-1] < fwdVector[k+1]) { x = fwdVector[k+1]; } else { x = fwdVector[k-1] + 1; } y = x - k; midSnake.Source.StartIndex = x; midSnake.Destination.StartIndex = y; while (x < src.EndIndex && y < des.EndIndex && _original[x].CompareTo(_modified[y]) == 0) { x++; y++; } // update forward vector fwdVector[k] = x; #if (MyDEBUG) Debug.WriteLine(" Inside forward loop"); Debug.WriteLine(k, " Diagonal value"); Debug.WriteLine(x, " X value"); Debug.WriteLine(y, " Y value"); #endif if (odd && k >= bwdMin && k <= bwdMax && x >= bwdVector[k]) { // this is the snake we are looking for // and set the end indeses of the snake midSnake.Source.EndIndex = x; midSnake.Destination.EndIndex = y; midSnake.SES_Length = 2 * d -1; #if (MyDEBUG) Debug.WriteLine("!!!Report snake from forward search"); Debug.WriteLine(midSnake.Source.StartIndex, " middle snake source start index"); Debug.WriteLine(midSnake.Source.EndIndex, " middle snake source end index"); Debug.WriteLine(midSnake.Destination.StartIndex, " middle snake destination start index"); Debug.WriteLine(midSnake.Destination.EndIndex, " middle snake destination end index"); #endif return midSnake; } } // extend the search range if (bwdMin > minDiag) bwdVector[--bwdMin -1] = int.MaxValue; else ++bwdMin; if(bwdMax < (maxDiag - 1)) bwdVector[++bwdMax +1] = int.MaxValue; else --bwdMax; // bottom-up search for (k = bwdMax; k >= bwdMin; k -= 2) { if (bwdVector[k - 1] < bwdVector[k + 1]) { x = bwdVector[k - 1]; } else { x = bwdVector[k + 1] - 1; } y = x - k; midSnake.Source.EndIndex = x; midSnake.Destination.EndIndex = y; while (x > src.StartIndex && y > des.StartIndex && _original[x-1].CompareTo(_modified[y-1]) == 0) { x--; y--; } // update backward Vector bwdVector[k] = x; #if (MyDEBUG) Debug.WriteLine(" Inside backward loop"); Debug.WriteLine(k, " Diagonal value"); Debug.WriteLine(x, " X value"); Debug.WriteLine(y, " Y value"); #endif if (!odd && k >= fwdMin && k <= fwdMax && x <= fwdVector[k]) { // this is the snake we are looking for // and set the start indexes of the snake midSnake.Source.StartIndex = x; midSnake.Destination.StartIndex = y; midSnake.SES_Length = 2 * d; #if (MyDEBUG) Debug.WriteLine("!!!Report snake from backward search"); Debug.WriteLine(midSnake.Source.StartIndex, " middle snake source start index"); Debug.WriteLine(midSnake.Source.EndIndex, " middle snake source end index"); Debug.WriteLine(midSnake.Destination.StartIndex, " middle snake destination start index"); Debug.WriteLine(midSnake.Destination.EndIndex, " middle snake destination end index"); #endif return midSnake; } } } }
/// <summary> /// The function merges the two sequences and returns the merged /// html text string with deleted(exists in source sequence but /// not in destination sequence) and added(exists in destination /// but not in source) decorated extra html tags defined in class /// commentoff and class added. /// </summary> /// <param name="src"> /// The source sequence /// </param> /// <param name="desc"> /// The destination sequence /// </param> /// <returns> /// The merged html string /// </returns> private string doMerge(Sequence src, Sequence des) { MiddleSnake snake; Sequence s; StringBuilder result = new StringBuilder(); string tail = string.Empty; int y = des.StartIndex; // strip off the leading common sequence while(src.StartIndex < src.EndIndex && des.StartIndex < des.EndIndex && _original[src.StartIndex].CompareTo(_modified[des.StartIndex]) == 0) { src.StartIndex++; des.StartIndex++; } if (des.StartIndex > y) { s = new Sequence(y, des.StartIndex); result.Append(constructText(s, SequenceStatus.NoChange)); } y = des.EndIndex; // strip off the tailing common sequence while(src.StartIndex < src.EndIndex && des.StartIndex < des.EndIndex && _original[src.EndIndex-1].CompareTo(_modified[des.EndIndex-1]) == 0) { src.EndIndex--; des.EndIndex--; } if (des.EndIndex < y) { s = new Sequence(des.EndIndex, y); tail = constructText(s, SequenceStatus.NoChange); } // length of the sequences int N = src.EndIndex - src.StartIndex; int M = des.EndIndex - des.StartIndex; // Special cases if (N < 1 && M < 1) { // both source and destination are // empty return (result.Append(tail)).ToString(); } else if (N < 1) { // source is already empty, report // destination as added result.Append(constructText(des, SequenceStatus.Inserted)); result.Append(tail); return result.ToString(); } else if (M < 1) { // destination is empty, report source as // deleted result.Append(constructText(src, SequenceStatus.Deleted)); result.Append(tail); return result.ToString(); } else if (M == 1 && N ==1) { // each of source and destination has only // one word left. At this point, we are sure // that they are not equal. result.Append(constructText(src, SequenceStatus.Deleted)); result.Append(constructText(des, SequenceStatus.Inserted)); result.Append(tail); return result.ToString(); } else { // find the middle snake snake = findMiddleSnake(src, des); if (snake.SES_Length > 1) { // prepare the parameters for recursion Sequence leftSrc = new Sequence(src.StartIndex, snake.Source.StartIndex); Sequence leftDes = new Sequence(des.StartIndex, snake.Destination.StartIndex); Sequence rightSrc = new Sequence(snake.Source.EndIndex, src.EndIndex); Sequence rightDes = new Sequence(snake.Destination.EndIndex, des.EndIndex); result.Append(doMerge(leftSrc, leftDes)); if (snake.Source.StartIndex < snake.Source.EndIndex) { // the snake is not empty, report it as common // sequence result.Append(constructText(snake.Destination, SequenceStatus.NoChange)); } result.Append(doMerge(rightSrc, rightDes)); result.Append(tail); return result.ToString(); } else { // Separating this case out can at least save one // level of recursion. // // Only one edit edge suggests the 4 possible cases. // if N > M, it will be either: // - or \ // \ (case 1) \ (case 2) // \ - // if N < M, it will be either: // | or \ // \ (case 3) \ (case 4) // \ | // N and M can't be equal! if ( N > M) { if ( src.StartIndex != snake.Source.StartIndex ) { // case 1 Sequence leftSrc = new Sequence(src.StartIndex, snake.Source.StartIndex); result.Append(constructText(leftSrc, SequenceStatus.Deleted)); result.Append(constructText(snake.Destination, SequenceStatus.NoChange)); } else { // case 2 Sequence rightSrc = new Sequence(snake.Source.StartIndex, src.EndIndex); result.Append(constructText(rightSrc, SequenceStatus.Deleted)); result.Append(constructText(snake.Destination, SequenceStatus.NoChange)); } } else { if ( des.StartIndex != snake.Destination.StartIndex ) { // case 3 Sequence upDes = new Sequence(des.StartIndex, snake.Destination.StartIndex); result.Append(constructText(upDes, SequenceStatus.Inserted)); result.Append(constructText(snake.Destination, SequenceStatus.NoChange)); } else { // case 4 Sequence bottomDes = new Sequence(snake.Destination.EndIndex, des.EndIndex); result.Append(constructText(bottomDes, SequenceStatus.Inserted)); result.Append(constructText(snake.Destination, SequenceStatus.NoChange)); } } result.Append(tail); return result.ToString(); } } }
/// <summary> /// The public function merges the two copies of /// files stored inside this class. The html tags /// of the destination file is used in the merged /// file. /// </summary> /// <returns> /// The merged file /// </returns> public string merge() { Sequence src = new Sequence(0, _original.Count); Sequence des = new Sequence(0, _modified.Count); return doMerge(src, des); }
/// <summary> /// The function returns a html text string reconstructed /// from the sub collection of words its starting and ending /// indexes are marked by parameter seq and its collection is /// denoted by parameter status. If the status is "deleted", /// then the _original collection is used, otherwise, _modified /// is used. /// </summary> /// <param name="seq"> /// Sequence object that marks the start index and end /// index of the sub sequence /// </param> /// <param name="status"> /// Denoting the status of the sequence. When its value is /// Deleted or Added, some extra decoration will be added /// around the word. /// </param> /// <returns> /// The html text string constructed /// </returns> private string constructText(Sequence seq, SequenceStatus status) { StringBuilder result = new StringBuilder(); switch(status) { case SequenceStatus.Deleted: // the sequence exists in _original and // will be marked as deleted in the merged // file. for (int i= seq.StartIndex; i < seq.EndIndex; i++) { result.Append(_original[i].reconstruct(CommentOff.BeginTag, CommentOff.EndTag)); } break; case SequenceStatus.Inserted: // the sequence exists in _modified and // will be marked as added in the merged // file. for (int i= seq.StartIndex; i < seq.EndIndex; i++) { result.Append(_modified[i].reconstruct(Added.BeginTag, Added.EndTag)); } break; case SequenceStatus.NoChange: // the sequence exists in both _original and // _modified and will be left as what it is in // the merged file. We chose to reconstruct from // _modified collection for (int i= seq.StartIndex; i < seq.EndIndex; i++) { result.Append(_modified[i].reconstruct()); } break; default: // this will not happen (hope) break; } return result.ToString(); }
public MiddleSnake() { Source = new Sequence(); Destination = new Sequence(); }