Esempio n. 1
0
        public static IEnumerable <Hunk> GetDiff <T> (T[] baseArray, T[] changedArray)
        {
            // The A-Version of the data (original data) to be compared.
            var dataA = new DiffData <T> (baseArray);

            // The B-Version of the data (modified data) to be compared.
            var dataB = new DiffData <T> (changedArray);

            int MAX = dataA.Length + dataB.Length + 1;

            // vector for the (0,0) to (x,y) search
            int[] downVector = new int[2 * MAX + 2];
            // vector for the (u,v) to (N,M) search
            int[] upVector = new int[2 * MAX + 2];

            LCS(dataA, 0, dataA.Length, dataB, 0, dataB.Length, downVector, upVector);
            return(CreateDiffs(dataA, dataB));
        }
Esempio n. 2
0
        // SMS

        /// <summary>
        /// This is the divide-and-conquer implementation of the longest common-subsequence (LCS)
        /// algorithm.
        /// The published algorithm passes recursively parts of the A and B sequences.
        /// To avoid copying these arrays the lower and upper bounds are passed while the sequences stay constant.
        /// </summary>
        /// <param name="dataA">sequence A</param>
        /// <param name="lowerA">lower bound of the actual range in DataA</param>
        /// <param name="upperA">upper bound of the actual range in DataA (exclusive)</param>
        /// <param name="dataB">sequence B</param>
        /// <param name="lowerB">lower bound of the actual range in DataB</param>
        /// <param name="upperB">upper bound of the actual range in DataB (exclusive)</param>
        /// <param name="downVector">a vector for the (0,0) to (x,y) search. Passed as a parameter for speed reasons.</param>
        /// <param name="upVector">a vector for the (u,v) to (N,M) search. Passed as a parameter for speed reasons.</param>
        static void LCS <T> (DiffData <T> dataA, int lowerA, int upperA, DiffData <T> dataB, int lowerB, int upperB, int[] downVector, int[] upVector)
        {
            // Fast walkthrough equal lines at the start
            while (lowerA < upperA && lowerB < upperB && dataA.Data[lowerA].Equals(dataB.Data[lowerB]))
            {
                lowerA++;
                lowerB++;
            }

            // Fast walkthrough equal lines at the end
            while (lowerA < upperA && lowerB < upperB && dataA.Data[upperA - 1].Equals(dataB.Data[upperB - 1]))
            {
                --upperA;
                --upperB;
            }

            if (lowerA == upperA)
            {
                // mark as inserted lines.
                while (lowerB < upperB)
                {
                    dataB.Modified[lowerB++] = true;
                }
            }
            else if (lowerB == upperB)
            {
                // mark as deleted lines.
                while (lowerA < upperA)
                {
                    dataA.Modified[lowerA++] = true;
                }
            }
            else
            {
                // Find the middle snakea and length of an optimal path for A and B
                SMSRD smsrd = SMS(dataA, lowerA, upperA, dataB, lowerB, upperB, downVector, upVector);
                // Debug.Write(2, "MiddleSnakeData", String.Format("{0},{1}", smsrd.x, smsrd.y));

                // The path is from LowerX to (x,y) and (x,y) to UpperX
                LCS(dataA, lowerA, smsrd.x, dataB, lowerB, smsrd.y, downVector, upVector);
                LCS(dataA, smsrd.x, upperA, dataB, smsrd.y, upperB, downVector, upVector);
                // 2002.09.20: no need for 2 points
            }
        }
Esempio n. 3
0
        /// <summary>Scan the tables of which lines are inserted and deleted,
        /// producing an edit script in forward order.
        /// </summary>
        /// dynamic array
        static IEnumerable <Hunk> CreateDiffs <T>(DiffData <T> baseData, DiffData <T> changedData)
        {
            int lineA = 0;
            int lineB = 0;

            while (lineA < baseData.Length || lineB < changedData.Length)
            {
                if (lineA < baseData.Length && !baseData.Modified[lineA] &&
                    lineB < changedData.Length && !changedData.Modified[lineB])
                {
                    // equal lines
                    lineA++;
                    lineB++;
                }
                else
                {
                    // maybe deleted and/or inserted lines
                    int startA = lineA;
                    int startB = lineB;

                    while (lineA < baseData.Length && (lineB >= changedData.Length || baseData.Modified[lineA]))
                    {
                        // while (LineA < DataA.Length && DataA.Modified[LineA])
                        lineA++;
                    }

                    while (lineB < changedData.Length &&
                           (lineA >= baseData.Length || changedData.Modified[lineB]))
                    {
                        // while (LineB < DataB.Length && DataB.Modified[LineB])
                        lineB++;
                    }

                    if (startA < lineA || startB < lineB)
                    {
                        // store a new difference-item
                        yield return(new Hunk(startA + 1, startB + 1, lineA - startA, lineB - startB));
                    }
                    // if
                }
                // if
            }
            // while
        }
Esempio n. 4
0
        /// <summary>
        /// This is the algorithm to find the Shortest Middle Snake (SMS).
        /// </summary>
        /// <param name="dataA">sequence A</param>
        /// <param name="lowerA">lower bound of the actual range in DataA</param>
        /// <param name="upperA">upper bound of the actual range in DataA (exclusive)</param>
        /// <param name="dataB">sequence B</param>
        /// <param name="lowerB">lower bound of the actual range in DataB</param>
        /// <param name="upperB">upper bound of the actual range in DataB (exclusive)</param>
        /// <param name="downVector">a vector for the (0,0) to (x,y) search. Passed as a parameter for speed reasons.</param>
        /// <param name="upVector">a vector for the (u,v) to (N,M) search. Passed as a parameter for speed reasons.</param>
        /// <returns>a MiddleSnakeData record containing x,y and u,v</returns>
        static SMSRD SMS <T> (DiffData <T> dataA, int lowerA, int upperA, DiffData <T> dataB, int lowerB, int upperB, int[] downVector, int[] upVector)
        {
            SMSRD ret;
            int   MAX = dataA.Length + dataB.Length + 1;

            int downK = lowerA - lowerB;
            // the k-line to start the forward search
            int upK = upperA - upperB;
            // the k-line to start the reverse search
            int  delta    = (upperA - lowerA) - (upperB - lowerB);
            bool oddDelta = (delta & 1) != 0;

            // The vectors in the publication accepts negative indexes. the vectors implemented here are 0-based
            // and are access using a specific offset: UpOffset UpVector and DownOffset for DownVektor
            int downOffset = MAX - downK;
            int upOffset   = MAX - upK;

            int MaxD = ((upperA - lowerA + upperB - lowerB) / 2) + 1;

            // Debug.Write(2, "SMS", String.Format("Search the box: A[{0}-{1}] to B[{2}-{3}]", LowerA, UpperA, LowerB, UpperB));

            // init vectors
            downVector[downOffset + downK + 1] = lowerA;
            upVector[upOffset + upK - 1]       = upperA;

            for (int D = 0; D <= MaxD; D++)
            {
                // Extend the forward path.
                for (int k = downK - D; k <= downK + D; k += 2)
                {
                    // Debug.Write(0, "SMS", "extend forward path " + k.ToString());

                    // find the only or better starting point
                    int x, y;
                    if (k == downK - D)
                    {
                        x = downVector[downOffset + k + 1];
                        // down
                    }
                    else
                    {
                        x = downVector[downOffset + k - 1] + 1;
                        // a step to the right
                        if (k < downK + D && downVector[downOffset + k + 1] >= x)
                        {
                            x = downVector[downOffset + k + 1];
                        }
                        // down
                    }
                    y = x - k;

                    // find the end of the furthest reaching forward D-path in diagonal k.
                    while (x < upperA && y < upperB && dataA.Data[x].Equals(dataB.Data[y]))
                    {
                        x++;
                        y++;
                    }
                    downVector[downOffset + k] = x;

                    // overlap ?
                    if (oddDelta && upK - D < k && k < upK + D)
                    {
                        if (upVector[upOffset + k] <= downVector[downOffset + k])
                        {
                            ret.x = downVector[downOffset + k];
                            ret.y = downVector[downOffset + k] - k;
                            // ret.u = UpVector[UpOffset + k];      // 2002.09.20: no need for 2 points
                            // ret.v = UpVector[UpOffset + k] - k;
                            return(ret);
                        }
                        // if
                    }
                    // if
                }
                // for k
                // Extend the reverse path.
                for (int k = upK - D; k <= upK + D; k += 2)
                {
                    // Debug.Write(0, "SMS", "extend reverse path " + k.ToString());

                    // find the only or better starting point
                    int x, y;
                    if (k == upK + D)
                    {
                        x = upVector[upOffset + k - 1];
                        // up
                    }
                    else
                    {
                        x = upVector[upOffset + k + 1] - 1;
                        // left
                        if (k > upK - D && upVector[upOffset + k - 1] < x)
                        {
                            x = upVector[upOffset + k - 1];
                        }
                        // up
                    }
                    // if
                    y = x - k;

                    while (x > lowerA && y > lowerB && dataA.Data[x - 1].Equals(dataB.Data[y - 1]))
                    {
                        x--;
                        y--;
                        // diagonal
                    }
                    upVector[upOffset + k] = x;

                    // overlap ?
                    if (!oddDelta && downK - D <= k && k <= downK + D)
                    {
                        if (upVector[upOffset + k] <= downVector[downOffset + k])
                        {
                            ret.x = downVector[downOffset + k];
                            ret.y = downVector[downOffset + k] - k;
                            // ret.u = UpVector[UpOffset + k];     // 2002.09.20: no need for 2 points
                            // ret.v = UpVector[UpOffset + k] - k;
                            return(ret);
                        }
                        // if
                    }
                    // if
                }
                // for k
            }
            // for D
            throw new ApplicationException("the algorithm should never come here.");
        }