示例#1
0
        public void SearchSuffixArrayManualTest()
        {
            const string STR = "1234567899912340";

            Dictionary <string, long[]> answers = new Dictionary <string, long[]>()
            {
                { "1", new long[] { 0, 11 } },
                { "2", new long[] { 1, 12 } },
                { "12", new long[] { 0, 11 } },
                { "5", new long[] { 4 } }
            };

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            foreach (KeyValuePair <string, long[]> kvp in answers)
            {
                string find     = kvp.Key;
                long[] expected = kvp.Value;

                SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, find);
                long[]           actual           = suffixArrayRange.SortedValues;

                CollectionAssert.AreEqual(expected, actual);
            }
        }
示例#2
0
        internal static int doesStartWithSuffix(FourBitDigitBigArray digitArray, byte[] findPrefix, long startIdx)
        {
            //Number of digits in the digit array from startIdx (inclusive)
            long numDigitsAfter = digitArray.Length - startIdx;

            for (int i = 0; i < findPrefix.Length && i < numDigitsAfter; i++)
            {
                byte findPrefixByte = findPrefix[i];
                byte actualByte     = digitArray[startIdx + i];

                if (findPrefixByte < actualByte)
                {
                    return(1); //Searching too high (in the array)
                }
                else if (findPrefixByte > actualByte)
                {
                    return(-1); //Searching too low (in the array)
                }
            }

            //If the search terminated because there wasn't enough remaining digits
            if (numDigitsAfter < findPrefix.Length)
            {
                //Searching too low (in the suffix array)
                //  This is because a string s starting with string t is lexicographically greater than t
                //  i.e. 954 > 95
                return(-1);
            }
            else //Otherwise the search terminated because we'd matched all digits we'd been given to find
            {
                return(0); //Jackpot
            }
        }
示例#3
0
        public void TestSuffixArrayWrongSize()
        {
            IBigArray <ulong>    suffixArray = Program.convertIntArrayToBigUlongArray(new int[] { 1, 2, 3 });
            FourBitDigitBigArray a           = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray("12345");

            Assert.Throws <ArgumentException>(() => SearchString.Search(suffixArray, a, "23"));
        }
        private static FourBitDigitBigArray makeNew(long length)
        {
            //If length is odd, add one to it
            bool odd = false;

            if (length % 2 == 1)
            {
                odd = true;
                length++;
            }

            Stream stream;
            long   streamLength = length / 2;

            if (length > int.MaxValue)
            {
                stream = new BigMemoryStream(streamLength);
            }
            else
            {
                stream = new MemoryStream((int)streamLength);
            }
            stream.SetLength(streamLength);

            //If the length was odd, set the last byte to 15 (last 4 bits are all 1's)
            if (odd)
            {
                stream.Position = streamLength - 1;
                stream.WriteByte(15);
            }

            FourBitDigitBigArray a = new FourBitDigitBigArray(stream);

            return(a);
        }
示例#5
0
        internal static long binarySearchForPrefix(IBigArray <ulong> suffixArray, FourBitDigitBigArray digitArray,
                                                   byte[] findPrefix, long min, long max)
        {
            long numLeftToSearch = max - min + 1;

            //If there are no values left to search
            if (numLeftToSearch <= 0)
            {
                return(-1);
            }
            //There are multiuple values left to search
            else
            {
                long idx = min + ((numLeftToSearch - 1) / 2);

                int hit = doesStartWithSuffix(digitArray, findPrefix, (long)suffixArray[idx]);

                //If this is the answer
                if (hit == 0)
                {
                    return(idx);
                }
                //Otherwise if we're too high in the array
                else if (hit == 1)
                {
                    return(binarySearchForPrefix(suffixArray, digitArray, findPrefix, min, idx - 1));
                }
                //Otherwise we're too low in the array
                else // hit == -1
                {
                    return(binarySearchForPrefix(suffixArray, digitArray, findPrefix, idx + 1, max));
                }
            }
        }
        public void TestGetSetBig()
        {
            FourBitDigitBigArray a = makeNew(3000000000);

            a[2500000000] = 5;
            Assert.AreEqual(5, a[2500000000]);
        }
        public void TestLengthBigEven()
        {
            const long           LENGTH = 3000000000;
            FourBitDigitBigArray a      = makeNew(LENGTH);

            Assert.AreEqual(LENGTH, a.Length);
        }
        public static MemoryEfficientBigULongArray GenerateSearchResults(FourBitDigitBigArray fourBitDigitArray,
                                                                         IBigArray <ulong> suffixArray, int stringLength)
        {
            int    lessThan          = NumPrecomputedResults(stringLength);
            string toStringFormatter = "D" + stringLength;

            MemoryEfficientBigULongArray precomputedResults = new MemoryEfficientBigULongArray(
                lessThan * 2, (ulong)fourBitDigitArray.Length);

            long suffixArrayIdx = 0;

            for (int i = 0; i < lessThan; i++)
            {
                if (suffixArrayIdx < suffixArray.Length)
                {
                    //Convert what we're searching for to the digits to be searched for
                    string sSearch    = i.ToString(toStringFormatter);
                    byte[] bArrSearch = SearchString.StrToByteArr(sSearch);

                    long suffixArrayVal = (long)suffixArray[suffixArrayIdx];

                    //Find when this string starts
                    while (suffixArrayVal < fourBitDigitArray.Length &&
                           suffixArrayIdx < suffixArray.Length &&
                           SearchString.doesStartWithSuffix(fourBitDigitArray, bArrSearch, suffixArrayVal) == -1)
                    {
                        suffixArrayIdx++;
                        if (suffixArrayIdx < suffixArray.Length)
                        {
                            suffixArrayVal = (long)suffixArray[suffixArrayIdx];
                        }
                    }

                    precomputedResults[i * 2] = (ulong)suffixArrayIdx;

                    //Find when this string ends
                    while (suffixArrayVal < fourBitDigitArray.Length &&
                           suffixArrayIdx < suffixArray.Length &&
                           SearchString.doesStartWithSuffix(fourBitDigitArray, bArrSearch, suffixArrayVal) == 0)
                    {
                        suffixArrayIdx++;
                        if (suffixArrayIdx < suffixArray.Length)
                        {
                            suffixArrayVal = (long)suffixArray[suffixArrayIdx];
                        }
                    }

                    //Noe that here the exclusive maximum is stored, so if min == max the string wasn't found
                    precomputedResults[(i * 2) + 1] = (ulong)suffixArrayIdx;
                }
                else
                {
                    precomputedResults[i * 2]       = (ulong)suffixArray.Length;
                    precomputedResults[(i * 2) + 1] = (ulong)suffixArray.Length;
                }
            }

            return(precomputedResults);
        }
        public void TestOddNumberOfDigits()
        {
            Stream memStream = convertStringTo4BitDigitStream("123");

            FourBitDigitBigArray a = new FourBitDigitBigArray(memStream);

            Assert.AreEqual(3, a.Length);
        }
        public void TestEmpty()
        {
            Stream memStream = convertStringTo4BitDigitStream("");

            FourBitDigitBigArray a = new FourBitDigitBigArray(memStream);

            Assert.AreEqual(0, a.Length);
        }
        public void TestConstructor()
        {
            const string STR = "1234";

            Stream memStream = convertStringTo4BitDigitStream(STR);

            FourBitDigitBigArray a = new FourBitDigitBigArray(memStream);
        }
示例#12
0
 // Constructors
 public SuffixArrayRange(long min, long max, IBigArray <ulong> suffixArray, FourBitDigitBigArray digits)
 {
     HasResults  = true;
     Min         = min;
     Max         = max;
     SuffixArray = suffixArray;
     Digits      = digits;
 }
        public void TestAccessOutOfRange()
        {
            FourBitDigitBigArray a = convertStringTo4BitDigitArray("123");

            Assert.Throws <IndexOutOfRangeException>(() =>
            {
                byte b = a[3];
            });
        }
        public void TestSetOverflow()
        {
            FourBitDigitBigArray a = convertStringTo4BitDigitArray("123");

            Assert.Throws <OverflowException>(() =>
            {
                a[0] = 16;
            });
        }
示例#15
0
        public void SearchSuffixArrayForEmptyString()
        {
            const string STR = "123456789";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            Assert.Throws <ArgumentException>(() => SearchString.Search(suffixArray, fourBitDigitArray, ""));
        }
        public void TestSetOutOfRange()
        {
            FourBitDigitBigArray a = convertStringTo4BitDigitArray("123");

            Assert.Throws <IndexOutOfRangeException>(() =>
            {
                a[3] = 3;
            });
        }
        sufsort(FourBitDigitBigArray T, IBigArray <ulong> SA, long n)
        {
            if ((T == null) || (SA == null) ||
                (SA.Length < n) || (T.Length < n))
            {
                return(new LongArray(SA, 0));
            }

            return(sais_main(new FourBitDigitStreamArray(T, 0), new LongArray(SA, 0), 0, n, 10, false)); //k => 10, not the maximum of this datatype but the only reasonable reason to use it (that it's designed for) is for digits
        }
        public void TestSetReservedOverflow()
        {
            //Highest possible value in 4 bits (15) reserved for marking that half of the byte as not in use
            //  so it counts as overflow
            FourBitDigitBigArray a = convertStringTo4BitDigitArray("123");

            Assert.Throws <OverflowException>(() =>
            {
                a[0] = 15;
            });
        }
示例#19
0
        public void TestDoesStartWithSuffixLastDigitsInDigitArray()
        {
            const string STR = "1234567890";

            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            string strToFind = "90";

            byte[] toFind = stringToByteArr(strToFind);

            Assert.AreEqual(0, SearchString.doesStartWithSuffix(fourBitDigitArray, toFind, STR.Length - 2));
        }
示例#20
0
        public void TestDoesStartWithSuffixTooHigh()
        {
            const string STR = "12345678901234";

            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            string strToFind = "2" + STR.Substring(1);

            byte[] toFind = stringToByteArr(strToFind);

            Assert.AreEqual(-1, SearchString.doesStartWithSuffix(fourBitDigitArray, toFind, 0));
        }
        public void TestGet()
        {
            const string STR = "391";

            FourBitDigitBigArray a = convertStringTo4BitDigitArray(STR);

            for (int i = 0; i < STR.Length; i++)
            {
                char c = STR[i];
                byte b = a[i];

                Assert.AreEqual(c.ToString(), b.ToString());
            }
        }
        public void TestSetEven()
        {
            const string ORIG = "391";

            FourBitDigitBigArray a = convertStringTo4BitDigitArray(ORIG);

            a[0] = 7;
            Assert.AreEqual(7, a[0]);

            for (int i = 1; i < ORIG.Length; i++)
            {
                Assert.AreEqual(ORIG[i].ToString(), a[i].ToString());
            }
        }
示例#23
0
        public void SearchSuffixArrayAllDigits()
        {
            const string STR = "1234567899912340";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            long[] expected = new long[] { 0 };

            SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, STR);

            long[] actual = suffixArrayRange.SortedValues;

            CollectionAssert.AreEqual(expected, actual);
        }
示例#24
0
        public void TestBinarySearchForPrefixSingleChars()
        {
            const string STR = "2734981324";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            for (int i = 0; i < STR.Length; i++)
            {
                byte[] find = new byte[] { (byte)(STR[i] - '0') };

                long answer = SearchString.binarySearchForPrefix(suffixArray, fourBitDigitArray, find, 0, STR.Length - 1);

                Assert.AreEqual(fourBitDigitArray[i], fourBitDigitArray[(long)suffixArray[answer]]);
            }
        }
示例#25
0
        public void SearchSuffixArraySearchEmptyString()
        {
            const string STR  = "";
            const string FIND = "1";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            long[] expected = new long[0];

            SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, FIND);

            long[] actual = suffixArrayRange.SortedValues;

            CollectionAssert.AreEqual(expected, actual);
        }
示例#26
0
        public void TestSuffixArraySearchDigitNotInString()
        {
            const string STR  = "1234567912340";
            const string FIND = "8";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            long[] expected = new long[] {  };

            SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, FIND);

            long[] actual = suffixArrayRange.SortedValues;

            Assert.AreEqual(false, suffixArrayRange.HasResults);
            CollectionAssert.AreEqual(expected, actual);
        }
示例#27
0
        public void TestBinarySearchForPrefixDontExist()
        {
            const string STR = "8651287431284472619471";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            string[] toFind = { "1234", "0", "0987654321", "5676", "10", "111", "33" };

            foreach (string s in toFind)
            {
                byte[] find = stringToByteArr(s);

                long answer = SearchString.binarySearchForPrefix(suffixArray, fourBitDigitArray, find, 0, fourBitDigitArray.Length - 1);

                Assert.AreEqual(-1, answer);
            }
        }
示例#28
0
        public void TestDoesStartWithSuffixDigitArrayDigitArrayTooSmallNotMatchUntilEnd()
        {
            const string STR = "1234567890";

            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            string strToFindHigh = "911";

            byte[] toFindHigh = stringToByteArr(strToFindHigh);

            Assert.AreEqual(-1, SearchString.doesStartWithSuffix(fourBitDigitArray, toFindHigh, STR.Length - 2));

            string strToFindLow = "871";

            byte[] toFindLow = stringToByteArr(strToFindLow);

            Assert.AreEqual(1, SearchString.doesStartWithSuffix(fourBitDigitArray, toFindLow, STR.Length - 2));
        }
示例#29
0
        public void TestDoesStartWithSuffix()
        {
            const string STR = "12345678901234";

            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            //Start index
            for (int i = 0; i < STR.Length - 1; i++)
            {
                //End index
                for (int j = i + 1; j < STR.Length; j++)
                {
                    string strFind = STR.Substring(i, j - i);
                    byte[] find    = stringToByteArr(strFind);

                    Assert.AreEqual(0, SearchString.doesStartWithSuffix(fourBitDigitArray, find, i));
                }
            }
        }
示例#30
0
 public SuffixArrayRange(PrecomputedSearchResult precomputedResult, IBigArray <ulong> suffixArray,
                         FourBitDigitBigArray digits)
 {
     //If there are no results
     if (precomputedResult.MinSuffixArrayIdx == precomputedResult.MaxSuffixArrayIdx)
     {
         HasResults = false;
     }
     else //Otherwise there are search results
     {
         HasResults = true;
         Min        = precomputedResult.MinSuffixArrayIdx;
         //Note that the precomputed results are stored with the max value exclusive so that it can also encode HasResults
         //  whereas this class uses inclusive, so correct for that by taking 1
         Max         = precomputedResult.MaxSuffixArrayIdx - 1;
         SuffixArray = suffixArray;
         Digits      = digits;
     }
 }