Пример #1
0
        public void SearchSuffixArrayManualTest()
        {
            const string STR = "1234567899912340";

            Dictionary <string, long[]> answers = new Dictionary <string, long[]>()
            {
                { "1", new long[] { 0, 11 } },
                { "2", new long[] { 1, 12 } },
                { "12", new long[] { 0, 11 } },
                { "5", new long[] { 4 } }
            };

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            foreach (KeyValuePair <string, long[]> kvp in answers)
            {
                string find     = kvp.Key;
                long[] expected = kvp.Value;

                SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, find);
                long[]           actual           = suffixArrayRange.SortedValues;

                CollectionAssert.AreEqual(expected, actual);
            }
        }
Пример #2
0
        internal static long binarySearchForPrefix(IBigArray <ulong> suffixArray, FourBitDigitBigArray digitArray,
                                                   byte[] findPrefix, long min, long max)
        {
            long numLeftToSearch = max - min + 1;

            //If there are no values left to search
            if (numLeftToSearch <= 0)
            {
                return(-1);
            }
            //There are multiuple values left to search
            else
            {
                long idx = min + ((numLeftToSearch - 1) / 2);

                int hit = doesStartWithSuffix(digitArray, findPrefix, (long)suffixArray[idx]);

                //If this is the answer
                if (hit == 0)
                {
                    return(idx);
                }
                //Otherwise if we're too high in the array
                else if (hit == 1)
                {
                    return(binarySearchForPrefix(suffixArray, digitArray, findPrefix, min, idx - 1));
                }
                //Otherwise we're too low in the array
                else // hit == -1
                {
                    return(binarySearchForPrefix(suffixArray, digitArray, findPrefix, idx + 1, max));
                }
            }
        }
        //Constructor
        public MemoryEfficientComplementBigULongArray(long length, ulong maxValue,
                                                      IBigArray <ulong> values, IBigArray <bool> complements)
        {
            //Validation
            if (values == null)
            {
                throw new ArgumentNullException(nameof(values));
            }

            if (complements == null)
            {
                throw new ArgumentNullException(nameof(complements));
            }

            if (values.Length < length)
            {
                throw new ArgumentException("values length must be >= length", nameof(values));
            }

            if (complements.Length < length)
            {
                throw new ArgumentException("complements length must be >= length", nameof(complements));
            }

            //TODO: Length validation?? (must be positive)

            Length   = length;
            MaxValue = maxValue;

            this.values      = values;
            this.complements = complements;
        }
Пример #4
0
        public void TestSuffixArrayWrongSize()
        {
            IBigArray <ulong>    suffixArray = Program.convertIntArrayToBigUlongArray(new int[] { 1, 2, 3 });
            FourBitDigitBigArray a           = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray("12345");

            Assert.Throws <ArgumentException>(() => SearchString.Search(suffixArray, a, "23"));
        }
Пример #5
0
        public static MemoryEfficientBigULongArray GenerateSearchResults(FourBitDigitBigArray fourBitDigitArray,
                                                                         IBigArray <ulong> suffixArray, int stringLength)
        {
            int    lessThan          = NumPrecomputedResults(stringLength);
            string toStringFormatter = "D" + stringLength;

            MemoryEfficientBigULongArray precomputedResults = new MemoryEfficientBigULongArray(
                lessThan * 2, (ulong)fourBitDigitArray.Length);

            long suffixArrayIdx = 0;

            for (int i = 0; i < lessThan; i++)
            {
                if (suffixArrayIdx < suffixArray.Length)
                {
                    //Convert what we're searching for to the digits to be searched for
                    string sSearch    = i.ToString(toStringFormatter);
                    byte[] bArrSearch = SearchString.StrToByteArr(sSearch);

                    long suffixArrayVal = (long)suffixArray[suffixArrayIdx];

                    //Find when this string starts
                    while (suffixArrayVal < fourBitDigitArray.Length &&
                           suffixArrayIdx < suffixArray.Length &&
                           SearchString.doesStartWithSuffix(fourBitDigitArray, bArrSearch, suffixArrayVal) == -1)
                    {
                        suffixArrayIdx++;
                        if (suffixArrayIdx < suffixArray.Length)
                        {
                            suffixArrayVal = (long)suffixArray[suffixArrayIdx];
                        }
                    }

                    precomputedResults[i * 2] = (ulong)suffixArrayIdx;

                    //Find when this string ends
                    while (suffixArrayVal < fourBitDigitArray.Length &&
                           suffixArrayIdx < suffixArray.Length &&
                           SearchString.doesStartWithSuffix(fourBitDigitArray, bArrSearch, suffixArrayVal) == 0)
                    {
                        suffixArrayIdx++;
                        if (suffixArrayIdx < suffixArray.Length)
                        {
                            suffixArrayVal = (long)suffixArray[suffixArrayIdx];
                        }
                    }

                    //Noe that here the exclusive maximum is stored, so if min == max the string wasn't found
                    precomputedResults[(i * 2) + 1] = (ulong)suffixArrayIdx;
                }
                else
                {
                    precomputedResults[i * 2]       = (ulong)suffixArray.Length;
                    precomputedResults[(i * 2) + 1] = (ulong)suffixArray.Length;
                }
            }

            return(precomputedResults);
        }
Пример #6
0
 // Constructors
 public SuffixArrayRange(long min, long max, IBigArray <ulong> suffixArray, FourBitDigitBigArray digits)
 {
     HasResults  = true;
     Min         = min;
     Max         = max;
     SuffixArray = suffixArray;
     Digits      = digits;
 }
Пример #7
0
        public void SearchSuffixArrayForEmptyString()
        {
            const string STR = "123456789";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            Assert.Throws <ArgumentException>(() => SearchString.Search(suffixArray, fourBitDigitArray, ""));
        }
        sufsort(FourBitDigitBigArray T, IBigArray <ulong> SA, long n)
        {
            if ((T == null) || (SA == null) ||
                (SA.Length < n) || (T.Length < n))
            {
                return(new LongArray(SA, 0));
            }

            return(sais_main(new FourBitDigitStreamArray(T, 0), new LongArray(SA, 0), 0, n, 10, false)); //k => 10, not the maximum of this datatype but the only reasonable reason to use it (that it's designed for) is for digits
        }
        public HashSet <long> ToList(LongArray array)
        {
            var hashSet = new HashSet <long>();

            m_array = array.m_array;
            foreach (var val in m_array)
            {
                hashSet.Add(Convert.ToInt64(val));
            }
            return(hashSet);
        }
Пример #10
0
        public void SearchSuffixArrayAllDigits()
        {
            const string STR = "1234567899912340";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            long[] expected = new long[] { 0 };

            SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, STR);

            long[] actual = suffixArrayRange.SortedValues;

            CollectionAssert.AreEqual(expected, actual);
        }
Пример #11
0
        public void TestBinarySearchForPrefixSingleChars()
        {
            const string STR = "2734981324";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            for (int i = 0; i < STR.Length; i++)
            {
                byte[] find = new byte[] { (byte)(STR[i] - '0') };

                long answer = SearchString.binarySearchForPrefix(suffixArray, fourBitDigitArray, find, 0, STR.Length - 1);

                Assert.AreEqual(fourBitDigitArray[i], fourBitDigitArray[(long)suffixArray[answer]]);
            }
        }
Пример #12
0
        public void SearchSuffixArraySearchEmptyString()
        {
            const string STR  = "";
            const string FIND = "1";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            long[] expected = new long[0];

            SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, FIND);

            long[] actual = suffixArrayRange.SortedValues;

            CollectionAssert.AreEqual(expected, actual);
        }
Пример #13
0
 sufsort(string T, IBigArray <ulong> SA, int n)
 {
     if ((T == null) || (SA == null) ||
         (T.Length < n) || (SA.Length < n))
     {
         return(-1);
     }
     if (n <= 1)
     {
         if (n == 1)
         {
             SA[0] = 0;
         }
         return(0);
     }
     return(sais_main(new StringArray(T, 0), new LongArray(SA, 0), 0, n, 65536, false));
 }
Пример #14
0
        public void TestSuffixArraySearchDigitNotInString()
        {
            const string STR  = "1234567912340";
            const string FIND = "8";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            long[] expected = new long[] {  };

            SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, FIND);

            long[] actual = suffixArrayRange.SortedValues;

            Assert.AreEqual(false, suffixArrayRange.HasResults);
            CollectionAssert.AreEqual(expected, actual);
        }
Пример #15
0
        public void TestBinarySearchForPrefixDontExist()
        {
            const string STR = "8651287431284472619471";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            string[] toFind = { "1234", "0", "0987654321", "5676", "10", "111", "33" };

            foreach (string s in toFind)
            {
                byte[] find = stringToByteArr(s);

                long answer = SearchString.binarySearchForPrefix(suffixArray, fourBitDigitArray, find, 0, fourBitDigitArray.Length - 1);

                Assert.AreEqual(-1, answer);
            }
        }
Пример #16
0
 public SuffixArrayRange(PrecomputedSearchResult precomputedResult, IBigArray <ulong> suffixArray,
                         FourBitDigitBigArray digits)
 {
     //If there are no results
     if (precomputedResult.MinSuffixArrayIdx == precomputedResult.MaxSuffixArrayIdx)
     {
         HasResults = false;
     }
     else //Otherwise there are search results
     {
         HasResults = true;
         Min        = precomputedResult.MinSuffixArrayIdx;
         //Note that the precomputed results are stored with the max value exclusive so that it can also encode HasResults
         //  whereas this class uses inclusive, so correct for that by taking 1
         Max         = precomputedResult.MaxSuffixArrayIdx - 1;
         SuffixArray = suffixArray;
         Digits      = digits;
     }
 }
        /* string */
        /// <summary>
        /// Constructs the suffix array of a given string in linear time.
        /// </summary>
        /// <param name="T">input string</param>
        /// <param name="SA">output suffix array</param>
        /// <param name="n">length of the given string</param>
        /// <returns>0 if no error occurred, -1 or -2 otherwise</returns>
        public static HashSet <long> Sufsort(string T, IBigArray <ulong> SA, int n)
        {
            if ((T == null) || (SA == null) ||
                (T.Length < n) || (SA.Length < n))
            {
                return(new HashSet <long>());
            }
            if (n <= 1)
            {
                if (n == 1)
                {
                    SA[0] = 0;
                }
                return(new HashSet <long>());
            }
            var longArray = sais_main(new StringArray(T, 0), new LongArray(SA, 0), 0, n, 65536, false);

            return(CleanSuffixArray(longArray.ToList(longArray), T));
            // return new HashSet<long>(longArray.ToList(longArray));
        }
Пример #18
0
        public void SearchSuffixArray()
        {
            const string STR = "123456789";

            IBigArray <ulong>    suffixArray       = buildSuffixArray(STR);
            FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR);

            for (int i = 0; i < STR.Length; i++)
            {
                for (int j = i + 1; j <= STR.Length; j++)
                {
                    string find = STR.Substring(i, j - i);

                    long[]           seqSearchRes         = SearchString.Search(STR, find).ToLongArr();
                    SuffixArrayRange suffixArrayRange     = SearchString.Search(suffixArray, fourBitDigitArray, find);
                    long[]           suffixArraySearchRes = suffixArrayRange.SortedValues;

                    CollectionAssert.AreEqual(seqSearchRes, suffixArraySearchRes);
                }
            }
        }
Пример #19
0
        public static SuffixArrayRange Search(IBigArray <ulong> suffixArray, FourBitDigitBigArray digitArray, byte[] lookFor,
                                              IBigArray <PrecomputedSearchResult>[] precomputedResults = null)
        {
            //Validation
            if (lookFor.Length == 0)
            {
                throw new ArgumentException("lookFor must contain at least 1 digit");
            }

            if (digitArray.Length == 0)
            {
                return(new SuffixArrayRange(false));
            }

            if (suffixArray.Length != digitArray.Length)
            {
                throw new ArgumentException(
                          "Suffix Array must be the same length as the Digit Array. This is not the correct suffix array for this digit array");
            }

            //If we've been passed null for the precomputedResults, make an empty array for them
            if (precomputedResults == null)
            {
                precomputedResults = new IBigArray <PrecomputedSearchResult> [0];
            }

            //If we have been given the precomputed results for strings of the length we're looking for
            if (precomputedResults.Length >= lookFor.Length)
            {
                IBigArray <PrecomputedSearchResult> precomputedResultsOfRequiredLength =
                    precomputedResults[lookFor.Length - 1];

                //Convert the string of bytes we're looking for to a long to use as the array index
                long precomputedResultIdx = ByteArrToLong(lookFor);

                PrecomputedSearchResult precomputedResult = precomputedResultsOfRequiredLength[precomputedResultIdx];

                //Convert this precomputed result into a SuffixArrayRange before returning it
                SuffixArrayRange suffixArrayRange = new SuffixArrayRange(precomputedResult, suffixArray, digitArray);
                return(suffixArrayRange);
            }
            else //Otherwise we don't have the precomputed results for this search, run the suffix array search
            {
                long matchingPosition = binarySearchForPrefix(suffixArray, digitArray, lookFor, 0,
                                                              suffixArray.Length - 1);

                //If there were no matches
                if (matchingPosition == -1)
                {
                    return(new SuffixArrayRange(false));
                }
                else //Otherwise match found, look for more
                {
                    long min = matchingPosition;
                    long max = matchingPosition;

                    while (min > 0 && doesStartWithSuffix(digitArray, lookFor, (long)suffixArray[min - 1]) == 0)
                    {
                        min--;
                    }

                    while (max < digitArray.Length - 1 &&
                           doesStartWithSuffix(digitArray, lookFor, (long)suffixArray[max + 1]) == 0)
                    {
                        max++;
                    }

                    SuffixArrayRange suffixArrayRange = new SuffixArrayRange(min, max, suffixArray, digitArray);
                    return(suffixArrayRange);
                }
            }
        }
Пример #20
0
 public static SuffixArrayRange Search(IBigArray <ulong> suffixArray, FourBitDigitBigArray digitArray, string lookFor,
                                       IBigArray <PrecomputedSearchResult>[] precomputedResults = null)
 {
     return(Search(suffixArray, digitArray, StrToByteArr(lookFor), precomputedResults));
 }
 public LongArray(IBigArray <ulong> array, long pos)
 {
     _mArray = array;
     _mPos   = pos;
 }
 //Constructor
 public BigPrecomputedSearchResultsArray(IBigArray <ulong> underlyingArray)
 {
     this.underlyingArray = underlyingArray;
 }
 public LongArray(LongArray array, long pos)
 {
     m_array = array.m_array;
     m_pos   = array.m_pos + pos;
 }
 public LongArray(IBigArray <ulong> array, long pos)
 {
     m_array = array;
     m_pos   = pos;
 }
 public LongArray(LongArray array, long pos)
 {
     _mArray = array._mArray;
     _mPos   = array._mPos + pos;
 }
 public MemoryEfficientComplementBigULongArray(long length, ulong maxValue,
                                               IBigArray <ulong> values)
     : this(length, maxValue, values, new BigBoolArray(length))
 {
 }