public void SearchSuffixArrayManualTest() { const string STR = "1234567899912340"; Dictionary <string, long[]> answers = new Dictionary <string, long[]>() { { "1", new long[] { 0, 11 } }, { "2", new long[] { 1, 12 } }, { "12", new long[] { 0, 11 } }, { "5", new long[] { 4 } } }; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); foreach (KeyValuePair <string, long[]> kvp in answers) { string find = kvp.Key; long[] expected = kvp.Value; SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, find); long[] actual = suffixArrayRange.SortedValues; CollectionAssert.AreEqual(expected, actual); } }
internal static int doesStartWithSuffix(FourBitDigitBigArray digitArray, byte[] findPrefix, long startIdx) { //Number of digits in the digit array from startIdx (inclusive) long numDigitsAfter = digitArray.Length - startIdx; for (int i = 0; i < findPrefix.Length && i < numDigitsAfter; i++) { byte findPrefixByte = findPrefix[i]; byte actualByte = digitArray[startIdx + i]; if (findPrefixByte < actualByte) { return(1); //Searching too high (in the array) } else if (findPrefixByte > actualByte) { return(-1); //Searching too low (in the array) } } //If the search terminated because there wasn't enough remaining digits if (numDigitsAfter < findPrefix.Length) { //Searching too low (in the suffix array) // This is because a string s starting with string t is lexicographically greater than t // i.e. 954 > 95 return(-1); } else //Otherwise the search terminated because we'd matched all digits we'd been given to find { return(0); //Jackpot } }
public void TestSuffixArrayWrongSize() { IBigArray <ulong> suffixArray = Program.convertIntArrayToBigUlongArray(new int[] { 1, 2, 3 }); FourBitDigitBigArray a = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray("12345"); Assert.Throws <ArgumentException>(() => SearchString.Search(suffixArray, a, "23")); }
private static FourBitDigitBigArray makeNew(long length) { //If length is odd, add one to it bool odd = false; if (length % 2 == 1) { odd = true; length++; } Stream stream; long streamLength = length / 2; if (length > int.MaxValue) { stream = new BigMemoryStream(streamLength); } else { stream = new MemoryStream((int)streamLength); } stream.SetLength(streamLength); //If the length was odd, set the last byte to 15 (last 4 bits are all 1's) if (odd) { stream.Position = streamLength - 1; stream.WriteByte(15); } FourBitDigitBigArray a = new FourBitDigitBigArray(stream); return(a); }
internal static long binarySearchForPrefix(IBigArray <ulong> suffixArray, FourBitDigitBigArray digitArray, byte[] findPrefix, long min, long max) { long numLeftToSearch = max - min + 1; //If there are no values left to search if (numLeftToSearch <= 0) { return(-1); } //There are multiuple values left to search else { long idx = min + ((numLeftToSearch - 1) / 2); int hit = doesStartWithSuffix(digitArray, findPrefix, (long)suffixArray[idx]); //If this is the answer if (hit == 0) { return(idx); } //Otherwise if we're too high in the array else if (hit == 1) { return(binarySearchForPrefix(suffixArray, digitArray, findPrefix, min, idx - 1)); } //Otherwise we're too low in the array else // hit == -1 { return(binarySearchForPrefix(suffixArray, digitArray, findPrefix, idx + 1, max)); } } }
public void TestGetSetBig() { FourBitDigitBigArray a = makeNew(3000000000); a[2500000000] = 5; Assert.AreEqual(5, a[2500000000]); }
public void TestLengthBigEven() { const long LENGTH = 3000000000; FourBitDigitBigArray a = makeNew(LENGTH); Assert.AreEqual(LENGTH, a.Length); }
public static MemoryEfficientBigULongArray GenerateSearchResults(FourBitDigitBigArray fourBitDigitArray, IBigArray <ulong> suffixArray, int stringLength) { int lessThan = NumPrecomputedResults(stringLength); string toStringFormatter = "D" + stringLength; MemoryEfficientBigULongArray precomputedResults = new MemoryEfficientBigULongArray( lessThan * 2, (ulong)fourBitDigitArray.Length); long suffixArrayIdx = 0; for (int i = 0; i < lessThan; i++) { if (suffixArrayIdx < suffixArray.Length) { //Convert what we're searching for to the digits to be searched for string sSearch = i.ToString(toStringFormatter); byte[] bArrSearch = SearchString.StrToByteArr(sSearch); long suffixArrayVal = (long)suffixArray[suffixArrayIdx]; //Find when this string starts while (suffixArrayVal < fourBitDigitArray.Length && suffixArrayIdx < suffixArray.Length && SearchString.doesStartWithSuffix(fourBitDigitArray, bArrSearch, suffixArrayVal) == -1) { suffixArrayIdx++; if (suffixArrayIdx < suffixArray.Length) { suffixArrayVal = (long)suffixArray[suffixArrayIdx]; } } precomputedResults[i * 2] = (ulong)suffixArrayIdx; //Find when this string ends while (suffixArrayVal < fourBitDigitArray.Length && suffixArrayIdx < suffixArray.Length && SearchString.doesStartWithSuffix(fourBitDigitArray, bArrSearch, suffixArrayVal) == 0) { suffixArrayIdx++; if (suffixArrayIdx < suffixArray.Length) { suffixArrayVal = (long)suffixArray[suffixArrayIdx]; } } //Noe that here the exclusive maximum is stored, so if min == max the string wasn't found precomputedResults[(i * 2) + 1] = (ulong)suffixArrayIdx; } else { precomputedResults[i * 2] = (ulong)suffixArray.Length; precomputedResults[(i * 2) + 1] = (ulong)suffixArray.Length; } } return(precomputedResults); }
public void TestOddNumberOfDigits() { Stream memStream = convertStringTo4BitDigitStream("123"); FourBitDigitBigArray a = new FourBitDigitBigArray(memStream); Assert.AreEqual(3, a.Length); }
public void TestEmpty() { Stream memStream = convertStringTo4BitDigitStream(""); FourBitDigitBigArray a = new FourBitDigitBigArray(memStream); Assert.AreEqual(0, a.Length); }
public void TestConstructor() { const string STR = "1234"; Stream memStream = convertStringTo4BitDigitStream(STR); FourBitDigitBigArray a = new FourBitDigitBigArray(memStream); }
// Constructors public SuffixArrayRange(long min, long max, IBigArray <ulong> suffixArray, FourBitDigitBigArray digits) { HasResults = true; Min = min; Max = max; SuffixArray = suffixArray; Digits = digits; }
public void TestAccessOutOfRange() { FourBitDigitBigArray a = convertStringTo4BitDigitArray("123"); Assert.Throws <IndexOutOfRangeException>(() => { byte b = a[3]; }); }
public void TestSetOverflow() { FourBitDigitBigArray a = convertStringTo4BitDigitArray("123"); Assert.Throws <OverflowException>(() => { a[0] = 16; }); }
public void SearchSuffixArrayForEmptyString() { const string STR = "123456789"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); Assert.Throws <ArgumentException>(() => SearchString.Search(suffixArray, fourBitDigitArray, "")); }
public void TestSetOutOfRange() { FourBitDigitBigArray a = convertStringTo4BitDigitArray("123"); Assert.Throws <IndexOutOfRangeException>(() => { a[3] = 3; }); }
sufsort(FourBitDigitBigArray T, IBigArray <ulong> SA, long n) { if ((T == null) || (SA == null) || (SA.Length < n) || (T.Length < n)) { return(new LongArray(SA, 0)); } return(sais_main(new FourBitDigitStreamArray(T, 0), new LongArray(SA, 0), 0, n, 10, false)); //k => 10, not the maximum of this datatype but the only reasonable reason to use it (that it's designed for) is for digits }
public void TestSetReservedOverflow() { //Highest possible value in 4 bits (15) reserved for marking that half of the byte as not in use // so it counts as overflow FourBitDigitBigArray a = convertStringTo4BitDigitArray("123"); Assert.Throws <OverflowException>(() => { a[0] = 15; }); }
public void TestDoesStartWithSuffixLastDigitsInDigitArray() { const string STR = "1234567890"; FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); string strToFind = "90"; byte[] toFind = stringToByteArr(strToFind); Assert.AreEqual(0, SearchString.doesStartWithSuffix(fourBitDigitArray, toFind, STR.Length - 2)); }
public void TestDoesStartWithSuffixTooHigh() { const string STR = "12345678901234"; FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); string strToFind = "2" + STR.Substring(1); byte[] toFind = stringToByteArr(strToFind); Assert.AreEqual(-1, SearchString.doesStartWithSuffix(fourBitDigitArray, toFind, 0)); }
public void TestGet() { const string STR = "391"; FourBitDigitBigArray a = convertStringTo4BitDigitArray(STR); for (int i = 0; i < STR.Length; i++) { char c = STR[i]; byte b = a[i]; Assert.AreEqual(c.ToString(), b.ToString()); } }
public void TestSetEven() { const string ORIG = "391"; FourBitDigitBigArray a = convertStringTo4BitDigitArray(ORIG); a[0] = 7; Assert.AreEqual(7, a[0]); for (int i = 1; i < ORIG.Length; i++) { Assert.AreEqual(ORIG[i].ToString(), a[i].ToString()); } }
public void SearchSuffixArrayAllDigits() { const string STR = "1234567899912340"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); long[] expected = new long[] { 0 }; SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, STR); long[] actual = suffixArrayRange.SortedValues; CollectionAssert.AreEqual(expected, actual); }
public void TestBinarySearchForPrefixSingleChars() { const string STR = "2734981324"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); for (int i = 0; i < STR.Length; i++) { byte[] find = new byte[] { (byte)(STR[i] - '0') }; long answer = SearchString.binarySearchForPrefix(suffixArray, fourBitDigitArray, find, 0, STR.Length - 1); Assert.AreEqual(fourBitDigitArray[i], fourBitDigitArray[(long)suffixArray[answer]]); } }
public void SearchSuffixArraySearchEmptyString() { const string STR = ""; const string FIND = "1"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); long[] expected = new long[0]; SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, FIND); long[] actual = suffixArrayRange.SortedValues; CollectionAssert.AreEqual(expected, actual); }
public void TestSuffixArraySearchDigitNotInString() { const string STR = "1234567912340"; const string FIND = "8"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); long[] expected = new long[] { }; SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, FIND); long[] actual = suffixArrayRange.SortedValues; Assert.AreEqual(false, suffixArrayRange.HasResults); CollectionAssert.AreEqual(expected, actual); }
public void TestBinarySearchForPrefixDontExist() { const string STR = "8651287431284472619471"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); string[] toFind = { "1234", "0", "0987654321", "5676", "10", "111", "33" }; foreach (string s in toFind) { byte[] find = stringToByteArr(s); long answer = SearchString.binarySearchForPrefix(suffixArray, fourBitDigitArray, find, 0, fourBitDigitArray.Length - 1); Assert.AreEqual(-1, answer); } }
public void TestDoesStartWithSuffixDigitArrayDigitArrayTooSmallNotMatchUntilEnd() { const string STR = "1234567890"; FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); string strToFindHigh = "911"; byte[] toFindHigh = stringToByteArr(strToFindHigh); Assert.AreEqual(-1, SearchString.doesStartWithSuffix(fourBitDigitArray, toFindHigh, STR.Length - 2)); string strToFindLow = "871"; byte[] toFindLow = stringToByteArr(strToFindLow); Assert.AreEqual(1, SearchString.doesStartWithSuffix(fourBitDigitArray, toFindLow, STR.Length - 2)); }
public void TestDoesStartWithSuffix() { const string STR = "12345678901234"; FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); //Start index for (int i = 0; i < STR.Length - 1; i++) { //End index for (int j = i + 1; j < STR.Length; j++) { string strFind = STR.Substring(i, j - i); byte[] find = stringToByteArr(strFind); Assert.AreEqual(0, SearchString.doesStartWithSuffix(fourBitDigitArray, find, i)); } } }
public SuffixArrayRange(PrecomputedSearchResult precomputedResult, IBigArray <ulong> suffixArray, FourBitDigitBigArray digits) { //If there are no results if (precomputedResult.MinSuffixArrayIdx == precomputedResult.MaxSuffixArrayIdx) { HasResults = false; } else //Otherwise there are search results { HasResults = true; Min = precomputedResult.MinSuffixArrayIdx; //Note that the precomputed results are stored with the max value exclusive so that it can also encode HasResults // whereas this class uses inclusive, so correct for that by taking 1 Max = precomputedResult.MaxSuffixArrayIdx - 1; SuffixArray = suffixArray; Digits = digits; } }