Beispiel #1
0
        /// <summary>
        /// In the edit graph for the sequences src and des, search for the
        /// optimal(shortest) path from (src.StartIndex, des.StartIndex) to
        /// (src.EndIndex, des.EndIndex).
        ///
        /// The searching starts from both ends of the graph and when the
        /// furthest forward reaching overlaps with the furthest backward
        /// reaching, the overlapped point is reported as the middle point
        /// of the shortest path.
        ///
        /// See the listed reference for the detailed description of the
        /// algorithm
        /// </summary>
        /// <param name="src">
        /// Represents a (sub)sequence of _original
        /// </param>
        /// <param name="desc">
        /// Represents a (sub)sequence of _modified
        /// </param>
        /// <returns>
        /// The found middle snake
        /// </returns>
        private MiddleSnake findMiddleSnake(Sequence src, Sequence des)
        {
            int d, k;
            int x, y;
            MiddleSnake midSnake = new MiddleSnake();

            // the range of diagonal values
            int minDiag = src.StartIndex - des.EndIndex;
            int maxDiag = src.EndIndex - des.StartIndex;

            // middle point of forward searching
            int fwdMid = src.StartIndex - des.StartIndex;
            // middle point of backward searching
            int bwdMid = src.EndIndex - des.EndIndex;

            // forward seaching range
            int fwdMin = fwdMid;
            int fwdMax = fwdMid;

            // backward seaching range
            int bwdMin = bwdMid;
            int bwdMax = bwdMid;

            bool odd = ((fwdMin - bwdMid) & 1) == 1;

            fwdVector[fwdMid] = src.StartIndex;
            bwdVector[bwdMid] = src.EndIndex;

            #if (MyDEBUG)
            Debug.WriteLine("-- Entering Function findMiddleSnake(src, des) --");
            #endif
            for (d = 1; ; d++)
            {
                // extend or shrink the search range
                if (fwdMin > minDiag)
                    fwdVector[--fwdMin -1] = -1;
                else
                    ++fwdMin;

                if(fwdMax < maxDiag)
                    fwdVector[++fwdMax +1] = -1;
                else
                    --fwdMax;
            #if (MyDEBUG)
                Debug.WriteLine(d, "  D path");
            #endif
                // top-down search
                for (k = fwdMax; k >= fwdMin; k -= 2)
                {
                    if (fwdVector[k-1] < fwdVector[k+1])
                    {
                        x = fwdVector[k+1];
                    }
                    else
                    {
                        x = fwdVector[k-1] + 1;
                    }
                    y = x - k;
                    midSnake.Source.StartIndex = x;
                    midSnake.Destination.StartIndex = y;

                    while (x < src.EndIndex &&
                        y < des.EndIndex &&
                        _original[x].CompareTo(_modified[y]) == 0)
                    {
                        x++;
                        y++;
                    }

                    // update forward vector
                    fwdVector[k] = x;
            #if (MyDEBUG)
                    Debug.WriteLine("    Inside forward loop");
                    Debug.WriteLine(k, "    Diagonal value");
                    Debug.WriteLine(x, "    X value");
                    Debug.WriteLine(y, "    Y value");
            #endif
                    if (odd && k >= bwdMin && k <= bwdMax && x >= bwdVector[k])
                    {
                        // this is the snake we are looking for
                        // and set the end indeses of the snake
                        midSnake.Source.EndIndex = x;
                        midSnake.Destination.EndIndex = y;
                        midSnake.SES_Length = 2 * d -1;
            #if (MyDEBUG)
                        Debug.WriteLine("!!!Report snake from forward search");
                        Debug.WriteLine(midSnake.Source.StartIndex, "  middle snake source start index");
                        Debug.WriteLine(midSnake.Source.EndIndex, "  middle snake source end index");
                        Debug.WriteLine(midSnake.Destination.StartIndex, "  middle snake destination start index");
                        Debug.WriteLine(midSnake.Destination.EndIndex, "  middle snake destination end index");
            #endif
                        return midSnake;
                    }
                }

                // extend the search range
                if (bwdMin > minDiag)
                    bwdVector[--bwdMin -1] = int.MaxValue;
                else
                    ++bwdMin;

                if(bwdMax < (maxDiag - 1))
                    bwdVector[++bwdMax +1] = int.MaxValue;
                else
                    --bwdMax;

                // bottom-up search
                for (k = bwdMax; k >= bwdMin; k -= 2)
                {
                    if (bwdVector[k - 1] < bwdVector[k + 1])
                    {
                        x = bwdVector[k - 1];
                    }
                    else
                    {
                        x = bwdVector[k + 1] - 1;
                    }
                    y = x - k;
                    midSnake.Source.EndIndex = x;
                    midSnake.Destination.EndIndex = y;

                    while (x > src.StartIndex &&
                        y > des.StartIndex &&
                        _original[x-1].CompareTo(_modified[y-1]) == 0)
                    {
                        x--;
                        y--;
                    }
                    // update backward Vector
                    bwdVector[k] = x;

            #if (MyDEBUG)
                    Debug.WriteLine("     Inside backward loop");
                    Debug.WriteLine(k, "    Diagonal value");
                    Debug.WriteLine(x, "    X value");
                    Debug.WriteLine(y, "    Y value");
            #endif
                    if (!odd && k >= fwdMin && k <= fwdMax && x <= fwdVector[k])
                    {
                        // this is the snake we are looking for
                        // and set the start indexes of the snake
                        midSnake.Source.StartIndex = x;
                        midSnake.Destination.StartIndex = y;
                        midSnake.SES_Length = 2 * d;
            #if (MyDEBUG)
                        Debug.WriteLine("!!!Report snake from backward search");
                        Debug.WriteLine(midSnake.Source.StartIndex, "  middle snake source start index");
                        Debug.WriteLine(midSnake.Source.EndIndex, "  middle snake source end index");
                        Debug.WriteLine(midSnake.Destination.StartIndex, "  middle snake destination start index");
                        Debug.WriteLine(midSnake.Destination.EndIndex, "  middle snake destination end index");
            #endif
                        return midSnake;
                    }
                }
            }
        }
Beispiel #2
0
        /// <summary>
        /// The function merges the two sequences and returns the merged
        /// html text string with deleted(exists in source sequence but
        /// not in destination sequence) and added(exists in destination
        /// but not in source) decorated extra html tags defined in class
        /// commentoff and class added.
        /// </summary>
        /// <param name="src">
        /// The source sequence
        /// </param>
        /// <param name="desc">
        /// The destination sequence
        /// </param>
        /// <returns>
        /// The merged html string
        /// </returns>
        private string doMerge(Sequence src, Sequence des)
        {
            MiddleSnake snake;
            Sequence s;
            StringBuilder result = new StringBuilder();
            string tail = string.Empty;

            int y = des.StartIndex;

            // strip off the leading common sequence
            while(src.StartIndex < src.EndIndex &&
                des.StartIndex < des.EndIndex &&
                _original[src.StartIndex].CompareTo(_modified[des.StartIndex]) == 0)
            {
                src.StartIndex++;
                des.StartIndex++;
            }

            if (des.StartIndex > y)
            {
                s = new Sequence(y, des.StartIndex);
                result.Append(constructText(s, SequenceStatus.NoChange));
            }

            y = des.EndIndex;

            // strip off the tailing common sequence
            while(src.StartIndex < src.EndIndex &&
                des.StartIndex < des.EndIndex &&
                _original[src.EndIndex-1].CompareTo(_modified[des.EndIndex-1]) == 0)
            {
                src.EndIndex--;
                des.EndIndex--;
            }

            if (des.EndIndex < y)
            {
                s = new Sequence(des.EndIndex, y);
                tail = constructText(s, SequenceStatus.NoChange);
            }

            // length of the sequences
            int N = src.EndIndex - src.StartIndex;
            int M = des.EndIndex - des.StartIndex;

            // Special cases
            if (N < 1 && M < 1)
            {
                // both source and destination are
                // empty
                return (result.Append(tail)).ToString();
            }
            else if (N < 1)
            {
                // source is already empty, report
                // destination as added
                result.Append(constructText(des, SequenceStatus.Inserted));
                result.Append(tail);
                return result.ToString();
            }
            else if (M < 1)
            {
                // destination is empty, report source as
                // deleted
                result.Append(constructText(src, SequenceStatus.Deleted));
                result.Append(tail);
                return result.ToString();
            }
            else if (M == 1 && N ==1)
            {
                // each of source and destination has only
                // one word left. At this point, we are sure
                // that they are not equal.
                result.Append(constructText(src, SequenceStatus.Deleted));
                result.Append(constructText(des, SequenceStatus.Inserted));
                result.Append(tail);
                return result.ToString();
            }
            else
            {
                // find the middle snake
                snake = findMiddleSnake(src, des);

                if (snake.SES_Length > 1)
                {
                    // prepare the parameters for recursion
                    Sequence leftSrc = new Sequence(src.StartIndex, snake.Source.StartIndex);
                    Sequence leftDes = new Sequence(des.StartIndex, snake.Destination.StartIndex);
                    Sequence rightSrc = new Sequence(snake.Source.EndIndex, src.EndIndex);
                    Sequence rightDes = new Sequence(snake.Destination.EndIndex, des.EndIndex);

                    result.Append(doMerge(leftSrc, leftDes));
                    if (snake.Source.StartIndex < snake.Source.EndIndex)
                    {
                        // the snake is not empty, report it as common
                        // sequence
                        result.Append(constructText(snake.Destination, SequenceStatus.NoChange));
                    }
                    result.Append(doMerge(rightSrc, rightDes));
                    result.Append(tail);
                    return result.ToString();
                }
                else
                {
                    // Separating this case out can at least save one
                    // level of recursion.
                    //
                    // Only one edit edge suggests the 4 possible cases.
                    // if N > M, it will be either:
                    //    -              or    \
                    //      \   (case 1)        \   (case 2)
                    //       \                   -
                    // if N < M, it will be either:
                    //    |              or    \
                    //     \    (case 3)        \   (case 4)
                    //      \                    |
                    // N and M can't be equal!
                    if ( N > M)
                    {
                        if ( src.StartIndex != snake.Source.StartIndex )
                        {
                            // case 1
                            Sequence leftSrc = new Sequence(src.StartIndex, snake.Source.StartIndex);
                            result.Append(constructText(leftSrc, SequenceStatus.Deleted));
                            result.Append(constructText(snake.Destination, SequenceStatus.NoChange));
                        }
                        else
                        {
                            // case 2
                            Sequence rightSrc = new Sequence(snake.Source.StartIndex, src.EndIndex);
                            result.Append(constructText(rightSrc, SequenceStatus.Deleted));
                            result.Append(constructText(snake.Destination, SequenceStatus.NoChange));
                        }
                    }
                    else
                    {
                        if ( des.StartIndex != snake.Destination.StartIndex )
                        {
                            // case 3
                            Sequence upDes = new Sequence(des.StartIndex, snake.Destination.StartIndex);
                            result.Append(constructText(upDes, SequenceStatus.Inserted));
                            result.Append(constructText(snake.Destination, SequenceStatus.NoChange));
                        }
                        else
                        {
                            // case 4
                            Sequence bottomDes = new Sequence(snake.Destination.EndIndex, des.EndIndex);
                            result.Append(constructText(bottomDes, SequenceStatus.Inserted));
                            result.Append(constructText(snake.Destination, SequenceStatus.NoChange));
                        }
                    }
                    result.Append(tail);
                    return result.ToString();
                }
            }
        }
Beispiel #3
0
        /// <summary>
        /// The public function merges the two copies of
        /// files stored inside this class. The html tags
        /// of the destination file is used in the merged
        /// file.
        /// </summary>
        /// <returns>
        /// The merged file
        /// </returns>
        public string merge()
        {
            Sequence src = new Sequence(0, _original.Count);
            Sequence des = new Sequence(0, _modified.Count);

            return doMerge(src, des);
        }
Beispiel #4
0
        /// <summary>
        /// The function returns a html text string reconstructed
        /// from the sub collection of words its starting and ending
        /// indexes are marked by parameter seq and its collection is
        /// denoted by parameter status. If the status is "deleted",
        /// then the _original collection is used, otherwise, _modified
        /// is used.
        /// </summary>
        /// <param name="seq">
        /// Sequence object that marks the start index and end
        /// index of the sub sequence
        /// </param>
        /// <param name="status">
        /// Denoting the status of the sequence. When its value is
        /// Deleted or Added, some extra decoration will be added
        /// around the word.
        /// </param>
        /// <returns>
        /// The html text string constructed
        /// </returns>
        private string constructText(Sequence seq, SequenceStatus status)
        {
            StringBuilder result = new StringBuilder();

            switch(status)
            {
                case SequenceStatus.Deleted:
                    // the sequence exists in _original and
                    // will be marked as deleted in the merged
                    // file.
                    for (int i= seq.StartIndex; i < seq.EndIndex; i++)
                    {
                        result.Append(_original[i].reconstruct(CommentOff.BeginTag, CommentOff.EndTag));
                    }
                    break;
                case SequenceStatus.Inserted:
                    // the sequence exists in _modified and
                    // will be marked as added in the merged
                    // file.
                    for (int i= seq.StartIndex; i < seq.EndIndex; i++)
                    {
                        result.Append(_modified[i].reconstruct(Added.BeginTag, Added.EndTag));
                    }
                    break;
                case SequenceStatus.NoChange:
                    // the sequence exists in both _original and
                    // _modified and will be left as what it is in
                    // the merged file. We chose to reconstruct from
                    // _modified collection
                    for (int i= seq.StartIndex; i < seq.EndIndex; i++)
                    {
                        result.Append(_modified[i].reconstruct());
                    }
                    break;
                default:
                    // this will not happen (hope)
                    break;
            }
            return result.ToString();
        }
Beispiel #5
0
 public MiddleSnake()
 {
     Source = new Sequence();
     Destination = new Sequence();
 }