예제 #1
0
        sais_main(BaseArray T, LongArray SA, long fs, long n, long k, bool isbwt)
        {
            BaseArray C, B, RA;
            long      i, j, b, m, p, q, name, pidx = 0, newfs;
            long      c0, c1;
            ulong     flags = 0;

            if (k <= MINBUCKETSIZE)
            {
                C = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0);
                if (k <= fs)
                {
                    B = new LongArray(SA, n + fs - k); flags = 1;
                }
                else
                {
                    B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0); flags = 3;
                }
            }
            else if (k <= fs)
            {
                C = new LongArray(SA, n + fs - k);
                if (k <= (fs - k))
                {
                    B = new LongArray(SA, n + fs - k * 2); flags = 0;
                }
                else if (k <= (MINBUCKETSIZE * 4))
                {
                    B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0); flags = 2;
                }
                else
                {
                    B = C; flags = 8;
                }
            }
            else
            {
                C     = B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0);
                flags = 4 | 8;
            }

            /* stage 1: reduce the problem by at least 1/2
             * sort all the LMS-substrings */
            getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
            for (i = 0; i < n; ++i)
            {
                SA[i] = 0;
            }
            b = -1; i = n - 1; j = n; m = 0; c0 = T[n - 1];
            do
            {
                c1 = c0;
            } while ((0 <= --i) && ((c0 = T[i]) >= c1));
            for (; 0 <= i;)
            {
                do
                {
                    c1 = c0;
                } while ((0 <= --i) && ((c0 = T[i]) <= c1));
                if (0 <= i)
                {
                    if (0 <= b)
                    {
                        SA[b] = j;
                    }
                    b = --B[c1]; j = i; ++m;
                    do
                    {
                        c1 = c0;
                    } while ((0 <= --i) && ((c0 = T[i]) >= c1));
                }
            }
            if (1 < m)
            {
                LMSsort(T, SA, C, B, n, k);
                name = LMSpostproc(T, SA, n, m);
            }
            else if (m == 1)
            {
                SA[b] = j + 1;
                name  = 1;
            }
            else
            {
                name = 0;
            }

            /* stage 2: solve the reduced problem
             * recurse if names are not yet unique */
            if (name < m)
            {
                if ((flags & 4) != 0)
                {
                    C = null; B = null;
                }
                if ((flags & 2) != 0)
                {
                    B = null;
                }
                newfs = (n + fs) - (m * 2);
                if ((flags & (1 | 4 | 8)) == 0)
                {
                    if ((k + name) <= newfs)
                    {
                        newfs -= k;
                    }
                    else
                    {
                        flags |= 8;
                    }
                }
                for (i = m + (n >> 1) - 1, j = m * 2 + newfs - 1; m <= i; --i)
                {
                    if (SA[i] != 0)
                    {
                        SA[j--] = SA[i] - 1;
                    }
                }
                RA = new LongArray(SA, m + newfs);
                sais_main(RA, SA, newfs, m, name, false);
                RA = null;

                i = n - 1; j = m * 2 - 1; c0 = T[n - 1];
                do
                {
                    c1 = c0;
                } while ((0 <= --i) && ((c0 = T[i]) >= c1));
                for (; 0 <= i;)
                {
                    do
                    {
                        c1 = c0;
                    } while ((0 <= --i) && ((c0 = T[i]) <= c1));
                    if (0 <= i)
                    {
                        SA[j--] = i + 1;
                        do
                        {
                            c1 = c0;
                        } while ((0 <= --i) && ((c0 = T[i]) >= c1));
                    }
                }

                for (i = 0; i < m; ++i)
                {
                    SA[i] = SA[m + SA[i]];
                }
                if ((flags & 4) != 0)
                {
                    C = B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0);
                }
                if ((flags & 2) != 0)
                {
                    B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0);
                }
            }

            /* stage 3: induce the result for the original problem */
            if ((flags & 8) != 0)
            {
                getCounts(T, C, n, k);
            }
            /* put all left-most S characters into their buckets */
            if (1 < m)
            {
                getBuckets(C, B, k, true); /* find ends of buckets */
                i = m - 1; j = n; p = SA[m - 1]; c1 = T[p];
                do
                {
                    q = B[c0 = c1];
                    while (q < j)
                    {
                        SA[--j] = 0;
                    }
                    do
                    {
                        SA[--j] = p;
                        if (--i < 0)
                        {
                            break;
                        }
                        p = SA[i];
                    } while ((c1 = T[p]) == c0);
                } while (0 <= i);
                while (0 < j)
                {
                    SA[--j] = 0;
                }
            }
            if (isbwt == false)
            {
                induceSA(T, SA, C, B, n, k);
            }
            else
            {
                pidx = computeBWT(T, SA, C, B, n, k);
            }
            C = null; B = null;
            return(SA);
        }
예제 #2
0
 public LongArray(LongArray array, long pos)
 {
     m_array = array.m_array;
     m_pos   = array.m_pos + pos;
 }
예제 #3
0
        LMSpostproc(BaseArray T, LongArray SA, long n, long m)
        {
            long i, j, p, q, plen, qlen, name;
            long c0, c1;
            bool diff;

            /* compact all the sorted substrings into the first m items of SA
             *  2*m must be not larger than n (proveable) */
            for (i = 0; (p = SA[i]) < 0; ++i)
            {
                SA[i] = ~p;
            }
            if (i < m)
            {
                for (j = i, ++i; ; ++i)
                {
                    if ((p = SA[i]) < 0)
                    {
                        SA[j++] = ~p; SA[i] = 0;
                        if (j == m)
                        {
                            break;
                        }
                    }
                }
            }

            /* store the length of all substrings */
            i = n - 1; j = n - 1; c0 = T[n - 1];
            do
            {
                c1 = c0;
            } while ((0 <= --i) && ((c0 = T[i]) >= c1));
            for (; 0 <= i;)
            {
                do
                {
                    c1 = c0;
                } while ((0 <= --i) && ((c0 = T[i]) <= c1));
                if (0 <= i)
                {
                    SA[m + ((i + 1) >> 1)] = j - i; j = i + 1;
                    do
                    {
                        c1 = c0;
                    } while ((0 <= --i) && ((c0 = T[i]) >= c1));
                }
            }

            /* find the lexicographic names of all substrings */
            for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i)
            {
                p = SA[i]; plen = SA[m + (p >> 1)]; diff = true;
                if ((plen == qlen) && ((q + plen) < n))
                {
                    for (j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j)
                    {
                    }
                    if (j == plen)
                    {
                        diff = false;
                    }
                }
                if (diff != false)
                {
                    ++name; q = p; qlen = plen;
                }
                SA[m + (p >> 1)] = name;
            }

            return(name);
        }