sais_main(BaseArray T, LongArray SA, long fs, long n, long k, bool isbwt) { BaseArray C, B, RA; long i, j, b, m, p, q, name, pidx = 0, newfs; long c0, c1; ulong flags = 0; if (k <= MINBUCKETSIZE) { C = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0); if (k <= fs) { B = new LongArray(SA, n + fs - k); flags = 1; } else { B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0); flags = 3; } } else if (k <= fs) { C = new LongArray(SA, n + fs - k); if (k <= (fs - k)) { B = new LongArray(SA, n + fs - k * 2); flags = 0; } else if (k <= (MINBUCKETSIZE * 4)) { B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0); flags = 2; } else { B = C; flags = 8; } } else { C = B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0); flags = 4 | 8; } /* stage 1: reduce the problem by at least 1/2 * sort all the LMS-substrings */ getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ for (i = 0; i < n; ++i) { SA[i] = 0; } b = -1; i = n - 1; j = n; m = 0; c0 = T[n - 1]; do { c1 = c0; } while ((0 <= --i) && ((c0 = T[i]) >= c1)); for (; 0 <= i;) { do { c1 = c0; } while ((0 <= --i) && ((c0 = T[i]) <= c1)); if (0 <= i) { if (0 <= b) { SA[b] = j; } b = --B[c1]; j = i; ++m; do { c1 = c0; } while ((0 <= --i) && ((c0 = T[i]) >= c1)); } } if (1 < m) { LMSsort(T, SA, C, B, n, k); name = LMSpostproc(T, SA, n, m); } else if (m == 1) { SA[b] = j + 1; name = 1; } else { name = 0; } /* stage 2: solve the reduced problem * recurse if names are not yet unique */ if (name < m) { if ((flags & 4) != 0) { C = null; B = null; } if ((flags & 2) != 0) { B = null; } newfs = (n + fs) - (m * 2); if ((flags & (1 | 4 | 8)) == 0) { if ((k + name) <= newfs) { newfs -= k; } else { flags |= 8; } } for (i = m + (n >> 1) - 1, j = m * 2 + newfs - 1; m <= i; --i) { if (SA[i] != 0) { SA[j--] = SA[i] - 1; } } RA = new LongArray(SA, m + newfs); sais_main(RA, SA, newfs, m, name, false); RA = null; i = n - 1; j = m * 2 - 1; c0 = T[n - 1]; do { c1 = c0; } while ((0 <= --i) && ((c0 = T[i]) >= c1)); for (; 0 <= i;) { do { c1 = c0; } while ((0 <= --i) && ((c0 = T[i]) <= c1)); if (0 <= i) { SA[j--] = i + 1; do { c1 = c0; } while ((0 <= --i) && ((c0 = T[i]) >= c1)); } } for (i = 0; i < m; ++i) { SA[i] = SA[m + SA[i]]; } if ((flags & 4) != 0) { C = B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0); } if ((flags & 2) != 0) { B = new LongArray(new MemoryEfficientByteAlignedBigULongArray(k), 0); } } /* stage 3: induce the result for the original problem */ if ((flags & 8) != 0) { getCounts(T, C, n, k); } /* put all left-most S characters into their buckets */ if (1 < m) { getBuckets(C, B, k, true); /* find ends of buckets */ i = m - 1; j = n; p = SA[m - 1]; c1 = T[p]; do { q = B[c0 = c1]; while (q < j) { SA[--j] = 0; } do { SA[--j] = p; if (--i < 0) { break; } p = SA[i]; } while ((c1 = T[p]) == c0); } while (0 <= i); while (0 < j) { SA[--j] = 0; } } if (isbwt == false) { induceSA(T, SA, C, B, n, k); } else { pidx = computeBWT(T, SA, C, B, n, k); } C = null; B = null; return(SA); }
public LongArray(LongArray array, long pos) { m_array = array.m_array; m_pos = array.m_pos + pos; }
LMSpostproc(BaseArray T, LongArray SA, long n, long m) { long i, j, p, q, plen, qlen, name; long c0, c1; bool diff; /* compact all the sorted substrings into the first m items of SA * 2*m must be not larger than n (proveable) */ for (i = 0; (p = SA[i]) < 0; ++i) { SA[i] = ~p; } if (i < m) { for (j = i, ++i; ; ++i) { if ((p = SA[i]) < 0) { SA[j++] = ~p; SA[i] = 0; if (j == m) { break; } } } } /* store the length of all substrings */ i = n - 1; j = n - 1; c0 = T[n - 1]; do { c1 = c0; } while ((0 <= --i) && ((c0 = T[i]) >= c1)); for (; 0 <= i;) { do { c1 = c0; } while ((0 <= --i) && ((c0 = T[i]) <= c1)); if (0 <= i) { SA[m + ((i + 1) >> 1)] = j - i; j = i + 1; do { c1 = c0; } while ((0 <= --i) && ((c0 = T[i]) >= c1)); } } /* find the lexicographic names of all substrings */ for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i) { p = SA[i]; plen = SA[m + (p >> 1)]; diff = true; if ((plen == qlen) && ((q + plen) < n)) { for (j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j) { } if (j == plen) { diff = false; } } if (diff != false) { ++name; q = p; qlen = plen; } SA[m + (p >> 1)] = name; } return(name); }