예제 #1
0
 public SubArray(SubArray arData, int iOffset, int iLength)
 {
     m_arData = arData.m_arData;
     //Subtract 1 here because iOffset will be 1-based
     m_iOffset = arData.m_iOffset + iOffset - 1;
     m_iLength = iLength;
 }
예제 #2
0
        private bool SetupFictitiousPoints(SubArray A, SubArray B)
        {
            if (A.Length > 0 && B.Length > 0)
            {
                //Setup some "fictious" endpoints for initial forward
                //and reverse path navigation.
                m_vecForward[1] = 0;
                int N = A.Length;
                int M = B.Length;
                int iDelta = N - M;
                m_vecReverse[iDelta + 1] = N + 1;

                return true;
            }
            else
                return false;
        }
예제 #3
0
        /// <summary>
        /// Returns an EditScript instance that gives all the Edits
        /// necessary to transform A into B.
        /// </summary>
        public EditScript Execute()
        {
            ArrayList MatchPoints = new ArrayList();

            SubArray A = new SubArray(m_arA);
            SubArray B = new SubArray(m_arB);

            GetMatchPoints(A, B, MatchPoints);

            Debug.Assert(MatchPoints.Count == GetLCSLength(), "The number of match points must equal the LCS length.");

            EditScript Script = ConvertMatchPointsToEditScript(A.Length, B.Length, MatchPoints);
            Debug.Assert(Script.TotalEditLength == GetSESLength());

            return Script;
        }
예제 #4
0
        private void GetMatchPoints(SubArray A, SubArray B, ArrayList MatchPoints)
        {
            if (A.Length > 0 && B.Length > 0)
            {
                //Find the middle snake from (x,y) to (u,v)
                int x, u, k;
                int D = FindMiddleSnake(A, B, out x, out u, out k);
                int y = x - k;
                int v = u - k;

                if (D > 1)
                {
                    GetMatchPoints(new SubArray(A, 1, x), new SubArray(B, 1, y), MatchPoints);

                    for (int i = x + 1; i <= u; i++)
                    {
                        //Output absolute X and Y (not relative to the current subarray)
                        MatchPoints.Add(new Point(i + A.Offset, i - k + B.Offset));
                    }

                    GetMatchPoints(new SubArray(A, u + 1, A.Length - u), new SubArray(B, v + 1, B.Length - v), MatchPoints);
                }
                else
                {
                    //If there are no differences, we have to output all of the points.
                    //If there's only one difference, we have to output all of the
                    //match points, skipping the single point that is different.
                    Debug.Assert(D == 0 || Math.Abs(A.Length - B.Length) == 1, "A and B's lengths must differ by 1 if D == 1");

                    //Only go to the minimum of the two lengths since that's the
                    //most that can possibly match between the two subsequences.
                    int N = A.Length;
                    int M = B.Length;
                    if (M > N)
                    {
                        //Output A[1..N] as match points
                        int iCurrY = 1;
                        for (int i = 1; i <= N; i++)
                        {
                            //We must skip the one difference when we hit it
                            if (A[i] != B[iCurrY])
                            {
                                iCurrY++;
                            }

                            MatchPoints.Add(new Point(i + A.Offset, iCurrY + B.Offset));
                            iCurrY++;
                        }
                    }
                    else
                    {
                        //Output B[1..M] as match points
                        int iCurrX = 1;
                        for (int i = 1; i <= M; i++)
                        {
                            //We must skip the one difference when we hit it
                            if (A[iCurrX] != B[i])
                            {
                                iCurrX++;
                            }
                            MatchPoints.Add(new Point(iCurrX + A.Offset, i + B.Offset));
                            iCurrX++;
                        }
                    }
                }
            }
        }
예제 #5
0
        private int GetReverseDPaths(SubArray A, SubArray B, int D, int k, int iDelta)
        {
            DiagonalVector V = m_vecReverse;

            int p = k + iDelta;

            int x;
            if ((k == -D) || (k != D && V[p + 1] <= V[p - 1]))
            {
                x = V[p + 1] - 1;
            }
            else
            {
                x = V[p - 1];
            }
            int y = x - p;

            while (x > 0 && y > 0 && A[x] == B[y])
            {
                x--;
                y--;
            }

            V[p] = x;

            return x;
        }
예제 #6
0
        private void GetLCS(SubArray A, SubArray B, ArrayList Output)
        {
            if (A.Length > 0 && B.Length > 0)
            {
                //Find the length D and the middle snake from (x,y) to (u,v)
                int x, u, k;
                int D = FindMiddleSnake(A, B, out x, out u, out k);
                int y = x - k;
                int v = u - k;

                if (D > 1)
                {
                    GetLCS(new SubArray(A, 1, x), new SubArray(B, 1, y), Output);

                    for (int i = x + 1; i <= u; i++)
                    {
                        Output.Add(A[i]);
                    }

                    GetLCS(new SubArray(A, u + 1, A.Length - u), new SubArray(B, v + 1, B.Length - v), Output);
                }
                else if (B.Length > A.Length)
                {
                    for (int i = 1; i <= A.Length; i++)
                    {
                        Output.Add(A[i]);
                    }
                }
                else
                {
                    for (int i = 1; i <= B.Length; i++)
                    {
                        Output.Add(B[i]);
                    }
                }
            }
        }
예제 #7
0
        private int GetForwardDPaths(SubArray A, SubArray B, int D, int k)
        {
            DiagonalVector V = m_vecForward;

            int x;
            if ((k == -D) || (k != D && V[k - 1] < V[k + 1]))
            {
                x = V[k + 1];
            }
            else
            {
                x = V[k - 1] + 1;
            }
            int y = x - k;

            while (x < A.Length && y < B.Length && A[x + 1] == B[y + 1])
            {
                x++;
                y++;
            }

            V[k] = x;

            return x;
        }
예제 #8
0
        private int FindMiddleSnake(SubArray A, SubArray B, out int iPathStartX, out int iPathEndX, out int iPathK)
        {
            //We don't have to check the result of this because the calling procedure
            //has already check the length preconditions.
            SetupFictitiousPoints(A, B);

            iPathStartX = -1;
            iPathEndX = -1;
            iPathK = 0;

            int iDelta = A.Length - B.Length;
            int iCeiling = (int)Math.Ceiling((A.Length + B.Length) / 2.0);

            for (int D = 0; D <= iCeiling; D++)
            {
                for (int k = -D; k <= D; k += 2)
                {
                    //Find the end of the furthest reaching forward D-path in diagonal k.
                    GetForwardDPaths(A, B, D, k);
                    //If iDelta is odd (i.e. remainder == 1 or -1) and ...
                    if ((iDelta % 2 != 0) && (k >= (iDelta - (D - 1)) && k <= (iDelta + (D - 1))))
                    {
                        //If the path overlaps the furthest reaching reverse (D-1)-path in diagonal k.
                        if (m_vecForward[k] >= m_vecReverse[k])
                        {
                            //The last snake of the forward path is the middle snake.
                            iPathK = k;
                            iPathEndX = m_vecForward[k];
                            iPathStartX = iPathEndX;
                            int iPathStartY = iPathStartX - iPathK;
                            while (iPathStartX > 0 && iPathStartY > 0 && A[iPathStartX] == B[iPathStartY])
                            {
                                iPathStartX--;
                                iPathStartY--;
                            }
                            //Length of an SES is 2D-1.
                            return 2 * D - 1;
                        }
                    }
                }

                for (int k = -D; k <= D; k += 2)
                {
                    //Find the end of the furthest reaching reverse D=path in diagonal k+iDelta
                    GetReverseDPaths(A, B, D, k, iDelta);
                    //If iDelta is even and ...
                    if ((iDelta % 2 == 0) && ((k + iDelta) >= -D && (k + iDelta) <= D))
                    {
                        //If the path overlaps the furthest reaching forward D-path in diagonal k+iDelta.
                        if (m_vecReverse[k + iDelta] <= m_vecForward[k + iDelta])
                        {
                            //The last snake of the reverse path is the middle snake.
                            iPathK = k + iDelta;
                            iPathStartX = m_vecReverse[iPathK];
                            iPathEndX = iPathStartX;
                            int iPathEndY = iPathEndX - iPathK;
                            while (iPathEndX < A.Length && iPathEndY < B.Length && A[iPathEndX + 1] == B[iPathEndY + 1])
                            {
                                iPathEndX++;
                                iPathEndY++;
                            }
                            //Length of an SES is 2D.
                            return 2 * D;
                        }
                    }
                }
            }

            //We should never get here if the algorithm is coded correctly.
            Debug.Assert(false);
            return -1;
        }
예제 #9
0
        /// <summary>
        /// Calculates the length of the "shortest edit script"
        /// as defined in Myers's paper.
        /// 
        /// Note: This may not be the same as the Count property
        /// of an EditScript instance returned by Execute().  If
        /// an EditScript instance has any Edits with Length > 1, 
        /// then those groupings will make EditScript.Count less
        /// than GetSESLength().  Similarly, an Edit with EditType
        /// Change should be thought of as a combined Delete and
        /// Insert for the specified Length.
        /// </summary>
        public int GetSESLength()
        {
            SubArray A = new SubArray(m_arA);
            SubArray B = new SubArray(m_arB);

            if (SetupFictitiousPoints(A, B))
            {
                int N = m_arA.Length;
                int M = m_arB.Length;

                for (int D = 0; D <= N + M; D++)
                {
                    for (int k = -D; k <= D; k += 2)
                    {
                        int x = GetForwardDPaths(A, B, D, k);
                        int y = x - k;
                        if (x >= N && y >= M)
                        {
                            return D;
                        }
                    }
                }

                //We should never get here if the algorithm is coded correctly.
                Debug.Assert(false);
                return -1;
            }
            else if (m_arA.Length == 0)
            {
                return m_arB.Length;
            }
            else
            {
                return m_arA.Length;
            }
        }
예제 #10
0
        /// <summary>
        /// Gets the length of the "shortest edit script"
        /// by running the algorithm in reverse.  We should
        /// always have GetSESLength() == GetReverseSESLength().
        /// </summary>
        public int GetReverseSESLength()
        {
            SubArray A = new SubArray(m_arA);
            SubArray B = new SubArray(m_arB);

            if (SetupFictitiousPoints(A, B))
            {
                int N = m_arA.Length;
                int M = m_arB.Length;
                int iDelta = N - M;

                for (int D = 0; D <= N + M; D++)
                {
                    for (int k = -D; k <= D; k += 2)
                    {
                        int x = GetReverseDPaths(A, B, D, k, iDelta);
                        int y = x - (k + iDelta);
                        if (x <= 0 && y <= 0)
                        {
                            return D;
                        }
                    }
                }

                //We should never get here if the algorithm is coded correctly.
                Debug.Assert(false);
                return -1;
            }
            else if (m_arA.Length == 0)
            {
                return m_arB.Length;
            }
            else
            {
                return m_arA.Length;
            }
        }