Example #1
0
        public DnaBwt(ISaisString text, LongSuffixArray suffixArray)
        {
            Length = text.Length;
            _bwt = new byte[Length / 4 + (Length % 4 != 0 ? 1 : 0)];

            for (long i = 1; i < suffixArray.Length; i++)
            {
                if (((long)suffixArray[i]) - 1 >= 0) continue;

                _sentinelIndex = i;
                break;
            }
            for (long i = 0; i < suffixArray.Length; i++)
            {
                var textIndex = ((long)suffixArray[i]) - 1;
                if (textIndex < 0) continue;
                var character = text[textIndex] - 1;
                this[i] = (int)character; // ISaisString values range from 0 to 4, 0 being sentinel
            }
        }
        private static void SA_IS(ISaisString s, LongSuffixArray SA, long n, long alphabetSize)
        {
            DateTime typeStart = DateTime.Now;
            TypeArray T = new TypeArray(n);

            T[n - 2] = SaisType.L;
            T[n - 1] = SaisType.S;
            for (long textIndex = n - 3; textIndex >= 0; textIndex--)
            {
                T[textIndex] = s[textIndex] < s[textIndex + 1] ||
                       (s[textIndex] == s[textIndex + 1] && T[textIndex + 1] == SaisType.S)
                           ? SaisType.S
                           : SaisType.L;
            }
            Console.WriteLine("Type assigning took {0} seconds", DateTime.Now.Subtract(typeStart).TotalSeconds);

            uint[] bucket = new uint[alphabetSize + 1];

            DateTime lmsSortStart = DateTime.Now;
            // Set bucket pointers to ends
            GetBucketHeads(s, bucket, alphabetSize, true);
            for (long saIndex = 0; saIndex < n; saIndex++)
                SA[saIndex] = -1;

            for (long textIndex = 1; textIndex < n; textIndex++)
                if (IsLmsCharacter(T, textIndex))
                {
                    var textChar = s[textIndex];
                    bucket[textChar] = bucket[textChar] - 1;
                    var saIndex = bucket[textChar];
                    SA[saIndex] = textIndex;
                }
            InduceSaL(T, SA, s, bucket, n, alphabetSize, false);
            InduceSaS(T, SA, s, bucket, n, alphabetSize, true);
            Console.WriteLine("LMS-substring sorting took {0} seconds", DateTime.Now.Subtract(lmsSortStart).TotalSeconds);

            // Move LMS characters into first half of SA array
            long n1 = 0;
            for (long saIndex = 0; saIndex < n; saIndex++)
                if (IsLmsCharacter(T, SA[saIndex]))
                    SA[n1++] = SA[saIndex];

            // Name the LMS-substrings according to their order
            DateTime namingStart = DateTime.Now;
            for (long saIndex = n1; saIndex < n; saIndex++)
                SA[saIndex] = -1;
            long name = 0, prev = -1; // TODO: change?
            for (long saIndex = 0; saIndex < n1; saIndex++)
            {
                long pos = SA[saIndex];
                bool diff = false;
                for (long d = 0; d < n; d++)
                    if (prev == -1 || s[pos + d] != s[prev + d]
                        || T[pos + d] != T[prev + d])
                    {
                        diff = true;
                        break;
                    }
                    else if (d > 0 && (IsLmsCharacter(T, pos + d) || IsLmsCharacter(T, prev + d)))
                        break;
                if (diff)
                {
                    name++;
                    prev = pos;
                }
                pos = (pos % 2 == 0) ? pos / 2 : (pos - 1) / 2;
                SA[n1 + pos] = name - 1;
            }
            for (long i = n - 1, j = n - 1; i >= n1; i--)
                if (SA[i] >= 0)
                    SA[j--] = SA[i];
            Console.WriteLine("Naming took {0} seconds", DateTime.Now.Subtract(namingStart).TotalSeconds);

            // Recursive call if names are not unique
            LongSuffixArray SA1 = new LongSuffixArray(SA.ParentArray, 0, n1);
            LongLevelNString s1 = new LongLevelNString(SA.ParentArray, SA.Offset + n - n1, n1);
            if (name < n1)
                SA_IS(s1, SA1, n1, name - 1);
            else // Otherwise SA1 can be solved directly
                for (long i = 0; i < n1; i++)
                    SA1[s1[i]] = i;

            // Set bucket pointers to end of buckets
            bucket = new uint[alphabetSize + 1];
            GetBucketHeads(s, bucket, alphabetSize, true);

            // Replace s1 with P
            for (long i = 1, j = 0; i < n; i++)
                if (IsLmsCharacter(T, i))
                    s1[j++] = i;

            // Replace SA1 with sorted P
            for (int i = 0; i < n1; i++)
                SA1[i] = s1[SA1[i]];

            // Place sorted LMS characters
            DateTime step1Start = DateTime.Now;
            for (long i = n1; i < n; i++)
                SA[i] = -1;

            for (long i = n1 - 1; i >= 0; i--)
            {
                var textPos = SA[i];
                SA[i] = -1;
                var character = s[textPos];
                bucket[character] = bucket[character] - 1;
                var saIndex = bucket[character];
                SA[saIndex] = textPos;
            }
            Console.WriteLine("Step 1 took {0} seconds", DateTime.Now.Subtract(step1Start).TotalSeconds);

            // Induce sort L characters from LMS characters
            DateTime step2Start = DateTime.Now;
            InduceSaL(T, SA, s, bucket, n, alphabetSize, false);
            Console.WriteLine("Step 2 took {0} seconds", DateTime.Now.Subtract(step2Start).TotalSeconds);

            // Induce sort remaining S characters
            DateTime step3Start = DateTime.Now;
            InduceSaS(T, SA, s, bucket, n, alphabetSize, true);
            Console.WriteLine("Step 3 took {0} seconds", DateTime.Now.Subtract(step3Start).TotalSeconds);
        }
 public static LongSuffixArray CreateSuffixArray(ISaisString text)
 {
     LongSuffixArray suffixArray = new LongSuffixArray(new ulong[text.Length / 2 + (text.Length % 2 == 0 ? 0 : 1)], 0, text.Length);
     SA_IS(text, suffixArray, suffixArray.Length, 4);
     return suffixArray;
 }
 private static void InduceSaS(TypeArray T, LongSuffixArray SA, ISaisString s, uint[] bucket, long n, long alphabetSize, bool end)
 {
     GetBucketHeads(s, bucket, alphabetSize, end);
     for (long i = n - 1; i >= 0; i--)
     {
         long textPos = SA[i] - 1;
         if (textPos >= 0 && T[textPos] == SaisType.S)
         {
             var character = s[textPos];
             bucket[character] = bucket[character] - 1;
             var saIndex = bucket[character];
             SA[saIndex] = textPos;
         }
     }
 }