Пример #1
0
        /* compute SA and BWT */
        private static void InduceSA(IBaseArray T, int[] SA, IBaseArray C, IBaseArray B, int n, int k)
        {
            int b, i, j;
            int c0, c1;

            /* compute SAl */
            if (C == B)
            {
                GetCounts(T, C, n, k);
            }

            GetBuckets(C, B, k, false); /* find starts of buckets */

            j       = n - 1;
            b       = B[c1 = T[j]];
            SA[b++] = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;

            for (i = 0; i < n; ++i)
            {
                j     = SA[i];
                SA[i] = ~j;

                if (0 < j)
                {
                    if ((c0 = T[--j]) != c1)
                    {
                        B[c1] = b;
                        b     = B[c1 = c0];
                    }

                    SA[b++] = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
                }
            }

            /* compute SAs */
            if (C == B)
            {
                GetCounts(T, C, n, k);
            }

            GetBuckets(C, B, k, true); /* find ends of buckets */

            for (i = n - 1, b = B[c1 = 0]; 0 <= i; --i)
            {
                if (0 < (j = SA[i]))
                {
                    if ((c0 = T[--j]) != c1)
                    {
                        B[c1] = b;
                        b     = B[c1 = c0];
                    }

                    SA[--b] = ((j == 0) || (T[j - 1] > c1)) ? ~j : j;
                }
                else
                {
                    SA[i] = ~j;
                }
            }
        }
Пример #2
0
        private static void GetCounts(IBaseArray T, IBaseArray C, int n, int k)
        {
            for (int i = 0; i < k; ++i)
            {
                C[i] = 0;
            }

            for (int i = 0; i < n; ++i)
            {
                C[T[i]] = C[T[i]] + 1;
            }
        }
        LmSsort(IBaseArray T, IBaseArray sa, IBaseArray c, IBaseArray baseArray, long n, long k)
        {
            long i;
            long c0, c1;

            /* compute SAl */
            if (c == baseArray)
            {
                GetCounts(T, c, n, k);
            }
            GetBuckets(c, baseArray, k, false); /* find starts of buckets */
            var j = n - 1;
            var b = baseArray[c1 = T[j]];

            --j;
            sa[b++] = (T[j] < c1) ? ~j : j;
            for (i = 0; i < n; ++i)
            {
                if (0 < (j = sa[i]))
                {
                    if ((c0 = T[j]) != c1)
                    {
                        baseArray[c1] = b; b = baseArray[c1 = c0];
                    }
                    --j;
                    sa[b++] = (T[j] < c1) ? ~j : j;
                    sa[i]   = 0;
                }
                else if (j < 0)
                {
                    sa[i] = ~j;
                }
            }
            /* compute SAs */
            if (c == baseArray)
            {
                GetCounts(T, c, n, k);
            }
            GetBuckets(c, baseArray, k, true); /* find ends of buckets */
            for (i = n - 1, b = baseArray[c1 = 0]; 0 <= i; --i)
            {
                if (0 < (j = sa[i]))
                {
                    if ((c0 = T[j]) != c1)
                    {
                        baseArray[c1] = b; b = baseArray[c1 = c0];
                    }
                    --j;
                    sa[--b] = (T[j] > c1) ? ~(j + 1) : j;
                    sa[i]   = 0;
                }
            }
        }
        GetCounts(IBaseArray T, IBaseArray c, long n, long k)
        {
            long i;

            for (i = 0; i < k; ++i)
            {
                c[i] = 0;
            }
            for (i = 0; i < n; ++i)
            {
                c[T[i]] = c[T[i]] + 1;
            }
        }
        InduceSa(IBaseArray T, IBaseArray sa, IBaseArray c, IBaseArray B, long n, long k)
        {
            long i;
            long c0, c1;

            /* compute SAl */
            if (c == B)
            {
                GetCounts(T, c, n, k);
            }
            GetBuckets(c, B, k, false); /* find starts of buckets */
            var j = n - 1;
            var b = B[c1 = T[j]];

            sa[b++] = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
            for (i = 0; i < n; ++i)
            {
                j = sa[i]; sa[i] = ~j;
                if (0 < j)
                {
                    if ((c0 = T[--j]) != c1)
                    {
                        B[c1] = b; b = B[c1 = c0];
                    }
                    sa[b++] = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
                }
            }
            /* compute SAs */
            if (c == B)
            {
                GetCounts(T, c, n, k);
            }
            GetBuckets(c, B, k, true); /* find ends of buckets */
            for (i = n - 1, b = B[c1 = 0]; 0 <= i; --i)
            {
                if (0 < (j = sa[i]))
                {
                    if ((c0 = T[--j]) != c1)
                    {
                        B[c1] = b; b = B[c1 = c0];
                    }
                    sa[--b] = ((j == 0) || (T[j - 1] > c1)) ? ~j : j;
                }
                else
                {
                    sa[i] = ~j;
                }
            }
        }
        GetBuckets(IBaseArray c, IBaseArray baseArray, long k, bool end)
        {
            long i, sum = 0;

            if (end)
            {
                for (i = 0; i < k; ++i)
                {
                    sum += c[i]; baseArray[i] = sum;
                }
            }
            else
            {
                for (i = 0; i < k; ++i)
                {
                    sum += c[i]; baseArray[i] = sum - c[i];
                }
            }
        }
Пример #7
0
        private static void GetBuckets(IBaseArray C, IBaseArray B, int k, bool end)
        {
            int sum = 0;

            if (end)
            {
                for (int i = 0; i < k; ++i)
                {
                    sum += C[i];
                    B[i] = sum;
                }
            }
            else
            {
                for (int i = 0; i < k; ++i)
                {
                    sum += C[i];
                    B[i] = sum - C[i];
                }
            }
        }
Пример #8
0
        private static void LMSsort(IBaseArray T, int[] SA, IBaseArray C, IBaseArray B, int n, int k)
        {
            int i, j, b;
            int c0, c1;

            if (C == B)
            {
                GetCounts(T, C, n, k);
            }

            GetBuckets(C, B, k, false);

            j = n - 1;
            b = B[c1 = T[j]];
            --j;
            SA[b++] = (T[j] < c1) ? ~j : j;

            for (i = 0; i < n; ++i)
            {
                if (0 < (j = SA[i]))
                {
                    if ((c0 = T[j]) != c1)
                    {
                        B[c1] = b;
                        b     = B[c1 = c0];
                    }

                    --j;

                    SA[b++] = (T[j] < c1) ? ~j : j;
                    SA[i]   = 0;
                }
                else if (j < 0)
                {
                    SA[i] = ~j;
                }
            }

            /* compute SAs */
            if (C == B)
            {
                GetCounts(T, C, n, k);
            }

            GetBuckets(C, B, k, true);

            for (i = n - 1, b = B[c1 = 0]; 0 <= i; --i)
            {
                if (0 < (j = SA[i]))
                {
                    if ((c0 = T[j]) != c1)
                    {
                        B[c1] = b;
                        b     = B[c1 = c0];
                    }

                    --j;

                    SA[--b] = (T[j] > c1) ? ~(j + 1) : j;
                    SA[i]   = 0;
                }
            }
        }
Пример #9
0
        private static int SaisMain(IBaseArray T, int[] SA, int fs, int n, int k)
        {
            IBaseArray C, B, RA;
            int        i, j, b, m, p, q, name, pidx = 0, newfs;
            int        c0, c1;
            uint       flags;

            if (k <= MINBUCKETSIZE)
            {
                C = new IntArray(new int[k], 0);

                if (k <= fs)
                {
                    B     = new IntArray(SA, n + fs - k);
                    flags = 1;
                }
                else
                {
                    B     = new IntArray(new int[k], 0);
                    flags = 3;
                }
            }
            else if (k <= fs)
            {
                C = new IntArray(SA, n + fs - k);

                if (k <= (fs - k))
                {
                    B     = new IntArray(SA, n + fs - k * 2);
                    flags = 0;
                }
                else if (k <= (MINBUCKETSIZE * 4))
                {
                    B     = new IntArray(new int[k], 0);
                    flags = 2;
                }
                else
                {
                    B     = C;
                    flags = 8;
                }
            }
            else
            {
                C     = B = new IntArray(new int[k], 0);
                flags = 4 | 8;
            }

            GetCounts(T, C, n, k);
            GetBuckets(C, B, k, true);

            for (i = 0; i < n; ++i)
            {
                SA[i] = 0;
            }

            b  = -1;
            i  = n - 1;
            j  = n;
            m  = 0;
            c0 = T[n - 1];

            do
            {
                c1 = c0;
            } while ((0 <= --i) && ((c0 = T[i]) >= c1));

            for (; 0 <= i;)
            {
                do
                {
                    c1 = c0;
                } while ((0 <= --i) && ((c0 = T[i]) <= c1));

                if (0 <= i)
                {
                    if (0 <= b)
                    {
                        SA[b] = j;
                    }

                    b = --B[c1];
                    j = i;
                    ++m;

                    do
                    {
                        c1 = c0;
                    } while ((0 <= --i) && ((c0 = T[i]) >= c1));
                }
            }

            if (1 < m)
            {
                LMSsort(T, SA, C, B, n, k);
                name = LMSpostproc(T, SA, n, m);
            }
            else if (m == 1)
            {
                SA[b] = j + 1;
                name  = 1;
            }
            else
            {
                name = 0;
            }

            if (name < m)
            {
                if ((flags & 4) != 0)
                {
                    C = null;
                    B = null;
                }

                if ((flags & 2) != 0)
                {
                    B = null;
                }

                newfs = (n + fs) - (m * 2);

                if ((flags & (1 | 4 | 8)) == 0)
                {
                    if ((k + name) <= newfs)
                    {
                        newfs -= k;
                    }
                    else
                    {
                        flags |= 8;
                    }
                }

                for (i = m + (n >> 1) - 1, j = m * 2 + newfs - 1; m <= i; --i)
                {
                    if (SA[i] != 0)
                    {
                        SA[j--] = SA[i] - 1;
                    }
                }


                RA = new IntArray(SA, m + newfs);
                SaisMain(RA, SA, newfs, m, name);

                i  = n - 1;
                j  = m * 2 - 1;
                c0 = T[n - 1];

                do
                {
                    c1 = c0;
                }while ((0 <= --i) && ((c0 = T[i]) >= c1));

                for (; 0 <= i;)
                {
                    do
                    {
                        c1 = c0;
                    }while ((0 <= --i) && ((c0 = T[i]) <= c1));

                    if (0 <= i)
                    {
                        SA[j--] = i + 1;

                        do
                        {
                            c1 = c0;
                        }while ((0 <= --i) && ((c0 = T[i]) >= c1));
                    }
                }

                for (i = 0; i < m; ++i)
                {
                    SA[i] = SA[m + SA[i]];
                }

                if ((flags & 4) != 0)
                {
                    C = B = new IntArray(new int[k], 0);
                }

                if ((flags & 2) != 0)
                {
                    B = new IntArray(new int[k], 0);
                }
            }

            if ((flags & 8) != 0)
            {
                GetCounts(T, C, n, k);
            }

            if (1 < m)
            {
                GetBuckets(C, B, k, true);

                i  = m - 1;
                j  = n;
                p  = SA[m - 1];
                c1 = T[p];

                do
                {
                    q = B[c0 = c1];

                    while (q < j)
                    {
                        SA[--j] = 0;
                    }

                    do
                    {
                        SA[--j] = p;

                        if (--i < 0)
                        {
                            break;
                        }

                        p = SA[i];
                    }while ((c1 = T[p]) == c0);
                }while (0 <= i);

                while (0 < j)
                {
                    SA[--j] = 0;
                }
            }

            InduceSA(T, SA, C, B, n, k);

            return(pidx);
        }
Пример #10
0
        private static int LMSpostproc(IBaseArray T, int[] SA, int n, int m)
        {
            int  i, j, p, q, plen, qlen, name;
            int  c0, c1;
            bool diff;

            for (i = 0; (p = SA[i]) < 0; ++i)
            {
                SA[i] = ~p;
            }

            if (i < m)
            {
                for (j = i, ++i; ; ++i)
                {
                    if ((p = SA[i]) < 0)
                    {
                        SA[j++] = ~p;
                        SA[i]   = 0;

                        if (j == m)
                        {
                            break;
                        }
                    }
                }
            }

            i  = n - 1;
            j  = n - 1;
            c0 = T[n - 1];

            do
            {
                c1 = c0;
            }while ((0 <= --i) && ((c0 = T[i]) >= c1));

            for (; 0 <= i;)
            {
                do
                {
                    c1 = c0;
                }while ((0 <= --i) && ((c0 = T[i]) <= c1));

                if (0 <= i)
                {
                    SA[m + ((i + 1) >> 1)] = j - i;
                    j = i + 1;

                    do
                    {
                        c1 = c0;
                    }while ((0 <= --i) && ((c0 = T[i]) >= c1));
                }
            }

            for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i)
            {
                p    = SA[i];
                plen = SA[m + (p >> 1)];
                diff = true;

                if ((plen == qlen) && ((q + plen) < n))
                {
                    for (j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j)
                    {
                        ;
                    }

                    if (j == plen)
                    {
                        diff = false;
                    }
                }

                if (diff != false)
                {
                    ++name;
                    q    = p;
                    qlen = plen;
                }

                SA[m + (p >> 1)] = name;
            }

            return(name);
        }
        sais_main(IBaseArray T, LongArray SA, long fs, long n, long k, bool isbwt)
        {
            IBaseArray C, B;
            long       i, j, b, m, p, q, name;
            long       c0, c1;
            ulong      flags = 0;

            if (k <= Minbucketsize)
            {
                C = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0);
                if (k <= fs)
                {
                    B = new LongArray(SA, n + fs - k); flags = 1;
                }
                else
                {
                    B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0); flags = 3;
                }
            }
            else if (k <= fs)
            {
                C = new LongArray(SA, n + fs - k);
                if (k <= (fs - k))
                {
                    B = new LongArray(SA, n + fs - k * 2); flags = 0;
                }
                else if (k <= (Minbucketsize * 4))
                {
                    B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0); flags = 2;
                }
                else
                {
                    B = C; flags = 8;
                }
            }
            else
            {
                C     = B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0);
                flags = 4 | 8;
            }

            /* stage 1: reduce the problem by at least 1/2
             * sort all the LMS-substrings */
            GetCounts(T, C, n, k); GetBuckets(C, B, k, true); /* find ends of buckets */
            for (i = 0; i < n; ++i)
            {
                SA[i] = 0;
            }
            b = -1; i = n - 1; j = n; m = 0; c0 = T[n - 1];
            do
            {
                c1 = c0;
            } while ((0 <= --i) && ((c0 = T[i]) >= c1));
            for (; 0 <= i;)
            {
                do
                {
                    c1 = c0;
                } while ((0 <= --i) && ((c0 = T[i]) <= c1));
                if (0 <= i)
                {
                    if (0 <= b)
                    {
                        SA[b] = j;
                    }
                    b = --B[c1]; j = i; ++m;
                    do
                    {
                        c1 = c0;
                    } while ((0 <= --i) && ((c0 = T[i]) >= c1));
                }
            }
            if (1 < m)
            {
                LmSsort(T, SA, C, B, n, k);
                name = LmSpostproc(T, SA, n, m);
            }
            else if (m == 1)
            {
                SA[b] = j + 1;
                name  = 1;
            }
            else
            {
                name = 0;
            }

            /* stage 2: solve the reduced problem
             * recurse if names are not yet unique */
            if (name < m)
            {
                if ((flags & 4) != 0)
                {
                    C = null; B = null;
                }
                if ((flags & 2) != 0)
                {
                    B = null;
                }
                var newfs = (n + fs) - (m * 2);
                if ((flags & (1 | 4 | 8)) == 0)
                {
                    if ((k + name) <= newfs)
                    {
                        newfs -= k;
                    }
                    else
                    {
                        flags |= 8;
                    }
                }
                for (i = m + (n >> 1) - 1, j = m * 2 + newfs - 1; m <= i; --i)
                {
                    if (SA[i] != 0)
                    {
                        SA[j--] = SA[i] - 1;
                    }
                }
                IBaseArray RA = new LongArray(SA, m + newfs);
                sais_main(RA, SA, newfs, m, name, false);
                RA = null;

                i = n - 1; j = m * 2 - 1; c0 = T[n - 1];
                do
                {
                    c1 = c0;
                } while ((0 <= --i) && ((c0 = T[i]) >= c1));
                for (; 0 <= i;)
                {
                    do
                    {
                        c1 = c0;
                    } while ((0 <= --i) && ((c0 = T[i]) <= c1));
                    if (0 <= i)
                    {
                        SA[j--] = i + 1;
                        do
                        {
                            c1 = c0;
                        } while ((0 <= --i) && ((c0 = T[i]) >= c1));
                    }
                }

                for (i = 0; i < m; ++i)
                {
                    SA[i] = SA[m + SA[i]];
                }
                if ((flags & 4) != 0)
                {
                    C = B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0);
                }
                if ((flags & 2) != 0)
                {
                    B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0);
                }
            }

            /* stage 3: induce the result for the original problem */
            if ((flags & 8) != 0)
            {
                GetCounts(T, C, n, k);
            }
            /* put all left-most S characters into their buckets */
            if (1 < m)
            {
                GetBuckets(C, B, k, true); /* find ends of buckets */
                i = m - 1; j = n; p = SA[m - 1]; c1 = T[p];
                do
                {
                    q = B[c0 = c1];
                    while (q < j)
                    {
                        SA[--j] = 0;
                    }
                    do
                    {
                        SA[--j] = p;
                        if (--i < 0)
                        {
                            break;
                        }
                        p = SA[i];
                    } while ((c1 = T[p]) == c0);
                } while (0 <= i);
                while (0 < j)
                {
                    SA[--j] = 0;
                }
            }
            if (isbwt == false)
            {
                InduceSa(T, SA, C, B, n, k);
            }
            else
            {
                ComputeBwt(T, SA, C, B, n, k);
            }
            C = null; B = null;
            return(SA);
        }
        ComputeBwt(IBaseArray T, IBaseArray sa, IBaseArray c, IBaseArray B, long n, long k)
        {
            if (B == null)
            {
                throw new ArgumentNullException(nameof(B));
            }
            long i, pidx = -1;
            long c0, c1;

            /* compute SAl */
            if (c == B)
            {
                GetCounts(T, c, n, k);
            }
            GetBuckets(c, B, k, false); /* find starts of buckets */
            var j = n - 1;
            var b = B[c1 = T[j]];

            sa[b++] = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
            for (i = 0; i < n; ++i)
            {
                if (0 < (j = sa[i]))
                {
                    sa[i] = ~(c0 = T[--j]);
                    if (c0 != c1)
                    {
                        B[c1] = b; b = B[c1 = c0];
                    }
                    sa[b++] = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
                }
                else if (j != 0)
                {
                    sa[i] = ~j;
                }
            }
            /* compute SAs */
            if (c == B)
            {
                GetCounts(T, c, n, k);
            }
            GetBuckets(c, B, k, true); /* find ends of buckets */
            for (i = n - 1, b = B[c1 = 0]; 0 <= i; --i)
            {
                if (0 < (j = sa[i]))
                {
                    sa[i] = (c0 = T[--j]);
                    if (c0 != c1)
                    {
                        B[c1] = b; b = B[c1 = c0];
                    }
                    sa[--b] = ((0 < j) && (T[j - 1] > c1)) ? ~((int)T[j - 1]) : j;
                }
                else if (j != 0)
                {
                    sa[i] = ~j;
                }
                else
                {
                    pidx = i;
                }
            }
            return(pidx);
        }
        LmSpostproc(IBaseArray T, LongArray sa, long n, long m)
        {
            if (sa == null)
            {
                throw new ArgumentNullException(nameof(sa));
            }
            long i, j, p, q;
            long qlen, name;
            long c1;

            /* compact all the sorted substrings into the first m items of SA
             *  2*m must be not larger than n (proveable) */
            for (i = 0; (p = sa[i]) < 0; ++i)
            {
                sa[i] = ~p;
            }
            if (i < m)
            {
                for (j = i, ++i; ; ++i)
                {
                    if ((p = sa[i]) < 0)
                    {
                        sa[j++] = ~p; sa[i] = 0;
                        if (j == m)
                        {
                            break;
                        }
                    }
                }
            }

            /* store the length of all substrings */
            i = n - 1; j = n - 1; var c0 = T[n - 1];
            do
            {
                c1 = c0;
            } while ((0 <= --i) && ((c0 = T[i]) >= c1));
            for (; 0 <= i;)
            {
                do
                {
                    c1 = c0;
                } while ((0 <= --i) && ((c0 = T[i]) <= c1));
                if (0 <= i)
                {
                    sa[m + ((i + 1) >> 1)] = j - i; j = i + 1;
                    do
                    {
                        c1 = c0;
                    } while ((0 <= --i) && ((c0 = T[i]) >= c1));
                }
            }

            /* find the lexicographic names of all substrings */
            for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i)
            {
                p = sa[i]; var plen = sa[m + (p >> 1)]; var diff = true;
                if ((plen == qlen) && ((q + plen) < n))
                {
                    for (j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j)
                    {
                    }
                    if (j == plen)
                    {
                        diff = false;
                    }
                }
                if (diff)
                {
                    ++name; q = p; qlen = plen;
                }
                sa[m + (p >> 1)] = name;
            }

            return(name);
        }
Пример #14
0
        /* find the suffix array SA of T[0..n-1] in {0..k-1}^n
         * use a working space (excluding T and SA) of at most 2n+O(1) for a constant alphabet */
        private static int SaisMain(IBaseArray T, int[] SA, int fs, int n, int k)
        {
            IBaseArray C, B, RA;
            int        i, j, b, m, p, q, name, pidx = 0, newfs;
            int        c0, c1;
            uint       flags;

            if (k <= MINBUCKETSIZE)
            {
                C = new IntArray(new int[k], 0);

                if (k <= fs)
                {
                    B     = new IntArray(SA, n + fs - k);
                    flags = 1;
                }
                else
                {
                    B     = new IntArray(new int[k], 0);
                    flags = 3;
                }
            }
            else if (k <= fs)
            {
                C = new IntArray(SA, n + fs - k);

                if (k <= (fs - k))
                {
                    B     = new IntArray(SA, n + fs - k * 2);
                    flags = 0;
                }
                else if (k <= (MINBUCKETSIZE * 4))
                {
                    B     = new IntArray(new int[k], 0);
                    flags = 2;
                }
                else
                {
                    B     = C;
                    flags = 8;
                }
            }
            else
            {
                C     = B = new IntArray(new int[k], 0);
                flags = 4 | 8;
            }

            /* stage 1: reduce the problem by at least 1/2
             * sort all the LMS-substrings */
            GetCounts(T, C, n, k);
            GetBuckets(C, B, k, true); /* find ends of buckets */

            for (i = 0; i < n; ++i)
            {
                SA[i] = 0;
            }

            b  = -1;
            i  = n - 1;
            j  = n;
            m  = 0;
            c0 = T[n - 1];

            do
            {
                c1 = c0;
            } while ((0 <= --i) && ((c0 = T[i]) >= c1));

            for (; 0 <= i;)
            {
                do
                {
                    c1 = c0;
                } while ((0 <= --i) && ((c0 = T[i]) <= c1));

                if (0 <= i)
                {
                    if (0 <= b)
                    {
                        SA[b] = j;
                    }

                    b = --B[c1];
                    j = i;
                    ++m;

                    do
                    {
                        c1 = c0;
                    } while ((0 <= --i) && ((c0 = T[i]) >= c1));
                }
            }

            if (1 < m)
            {
                LMSsort(T, SA, C, B, n, k);
                name = LMSpostproc(T, SA, n, m);
            }
            else if (m == 1)
            {
                SA[b] = j + 1;
                name  = 1;
            }
            else
            {
                name = 0;
            }

            /* stage 2: solve the reduced problem
             * recurse if names are not yet unique */
            if (name < m)
            {
                if ((flags & 4) != 0)
                {
                    C = null;
                    B = null;
                }

                if ((flags & 2) != 0)
                {
                    B = null;
                }

                newfs = (n + fs) - (m * 2);

                if ((flags & (1 | 4 | 8)) == 0)
                {
                    if ((k + name) <= newfs)
                    {
                        newfs -= k;
                    }
                    else
                    {
                        flags |= 8;
                    }
                }

                for (i = m + (n >> 1) - 1, j = m * 2 + newfs - 1; m <= i; --i)
                {
                    if (SA[i] != 0)
                    {
                        SA[j--] = SA[i] - 1;
                    }
                }


                RA = new IntArray(SA, m + newfs);
                SaisMain(RA, SA, newfs, m, name);
                //RA = null;

                i  = n - 1;
                j  = m * 2 - 1;
                c0 = T[n - 1];

                do
                {
                    c1 = c0;
                }while ((0 <= --i) && ((c0 = T[i]) >= c1));

                for (; 0 <= i;)
                {
                    do
                    {
                        c1 = c0;
                    }while ((0 <= --i) && ((c0 = T[i]) <= c1));

                    if (0 <= i)
                    {
                        SA[j--] = i + 1;

                        do
                        {
                            c1 = c0;
                        }while ((0 <= --i) && ((c0 = T[i]) >= c1));
                    }
                }

                for (i = 0; i < m; ++i)
                {
                    SA[i] = SA[m + SA[i]];
                }

                if ((flags & 4) != 0)
                {
                    C = B = new IntArray(new int[k], 0);
                }

                if ((flags & 2) != 0)
                {
                    B = new IntArray(new int[k], 0);
                }
            }

            /* stage 3: induce the result for the original problem */
            if ((flags & 8) != 0)
            {
                GetCounts(T, C, n, k);
            }

            /* put all left-most S characters into their buckets */
            if (1 < m)
            {
                GetBuckets(C, B, k, true); /* find ends of buckets */

                i  = m - 1;
                j  = n;
                p  = SA[m - 1];
                c1 = T[p];

                do
                {
                    q = B[c0 = c1];

                    while (q < j)
                    {
                        SA[--j] = 0;
                    }

                    do
                    {
                        SA[--j] = p;

                        if (--i < 0)
                        {
                            break;
                        }

                        p = SA[i];
                    }while ((c1 = T[p]) == c0);
                }while (0 <= i);

                while (0 < j)
                {
                    SA[--j] = 0;
                }
            }

            InduceSA(T, SA, C, B, n, k);

            return(pidx);
        }
Пример #15
0
        private static int LMSpostproc(IBaseArray T, int[] SA, int n, int m)
        {
            int  i, j, p, q, plen, qlen, name;
            int  c0, c1;
            bool diff;

            /* compact all the sorted substrings into the first m items of SA
             *  2*m must be not larger than n (proveable) */
            for (i = 0; (p = SA[i]) < 0; ++i)
            {
                SA[i] = ~p;
            }

            if (i < m)
            {
                for (j = i, ++i; ; ++i)
                {
                    if ((p = SA[i]) < 0)
                    {
                        SA[j++] = ~p;
                        SA[i]   = 0;

                        if (j == m)
                        {
                            break;
                        }
                    }
                }
            }

            /* store the length of all substrings */
            i  = n - 1;
            j  = n - 1;
            c0 = T[n - 1];

            do
            {
                c1 = c0;
            }while ((0 <= --i) && ((c0 = T[i]) >= c1));

            for (; 0 <= i;)
            {
                do
                {
                    c1 = c0;
                }while ((0 <= --i) && ((c0 = T[i]) <= c1));

                if (0 <= i)
                {
                    SA[m + ((i + 1) >> 1)] = j - i;
                    j = i + 1;

                    do
                    {
                        c1 = c0;
                    }while ((0 <= --i) && ((c0 = T[i]) >= c1));
                }
            }

            /* find the lexicographic names of all substrings */
            for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i)
            {
                p    = SA[i];
                plen = SA[m + (p >> 1)];
                diff = true;

                if ((plen == qlen) && ((q + plen) < n))
                {
                    for (j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j)
                    {
                        ;
                    }

                    if (j == plen)
                    {
                        diff = false;
                    }
                }

                if (diff != false)
                {
                    ++name;
                    q    = p;
                    qlen = plen;
                }

                SA[m + (p >> 1)] = name;
            }

            return(name);
        }