public void SearchSuffixArrayManualTest() { const string STR = "1234567899912340"; Dictionary <string, long[]> answers = new Dictionary <string, long[]>() { { "1", new long[] { 0, 11 } }, { "2", new long[] { 1, 12 } }, { "12", new long[] { 0, 11 } }, { "5", new long[] { 4 } } }; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); foreach (KeyValuePair <string, long[]> kvp in answers) { string find = kvp.Key; long[] expected = kvp.Value; SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, find); long[] actual = suffixArrayRange.SortedValues; CollectionAssert.AreEqual(expected, actual); } }
internal static long binarySearchForPrefix(IBigArray <ulong> suffixArray, FourBitDigitBigArray digitArray, byte[] findPrefix, long min, long max) { long numLeftToSearch = max - min + 1; //If there are no values left to search if (numLeftToSearch <= 0) { return(-1); } //There are multiuple values left to search else { long idx = min + ((numLeftToSearch - 1) / 2); int hit = doesStartWithSuffix(digitArray, findPrefix, (long)suffixArray[idx]); //If this is the answer if (hit == 0) { return(idx); } //Otherwise if we're too high in the array else if (hit == 1) { return(binarySearchForPrefix(suffixArray, digitArray, findPrefix, min, idx - 1)); } //Otherwise we're too low in the array else // hit == -1 { return(binarySearchForPrefix(suffixArray, digitArray, findPrefix, idx + 1, max)); } } }
//Constructor public MemoryEfficientComplementBigULongArray(long length, ulong maxValue, IBigArray <ulong> values, IBigArray <bool> complements) { //Validation if (values == null) { throw new ArgumentNullException(nameof(values)); } if (complements == null) { throw new ArgumentNullException(nameof(complements)); } if (values.Length < length) { throw new ArgumentException("values length must be >= length", nameof(values)); } if (complements.Length < length) { throw new ArgumentException("complements length must be >= length", nameof(complements)); } //TODO: Length validation?? (must be positive) Length = length; MaxValue = maxValue; this.values = values; this.complements = complements; }
public void TestSuffixArrayWrongSize() { IBigArray <ulong> suffixArray = Program.convertIntArrayToBigUlongArray(new int[] { 1, 2, 3 }); FourBitDigitBigArray a = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray("12345"); Assert.Throws <ArgumentException>(() => SearchString.Search(suffixArray, a, "23")); }
public static MemoryEfficientBigULongArray GenerateSearchResults(FourBitDigitBigArray fourBitDigitArray, IBigArray <ulong> suffixArray, int stringLength) { int lessThan = NumPrecomputedResults(stringLength); string toStringFormatter = "D" + stringLength; MemoryEfficientBigULongArray precomputedResults = new MemoryEfficientBigULongArray( lessThan * 2, (ulong)fourBitDigitArray.Length); long suffixArrayIdx = 0; for (int i = 0; i < lessThan; i++) { if (suffixArrayIdx < suffixArray.Length) { //Convert what we're searching for to the digits to be searched for string sSearch = i.ToString(toStringFormatter); byte[] bArrSearch = SearchString.StrToByteArr(sSearch); long suffixArrayVal = (long)suffixArray[suffixArrayIdx]; //Find when this string starts while (suffixArrayVal < fourBitDigitArray.Length && suffixArrayIdx < suffixArray.Length && SearchString.doesStartWithSuffix(fourBitDigitArray, bArrSearch, suffixArrayVal) == -1) { suffixArrayIdx++; if (suffixArrayIdx < suffixArray.Length) { suffixArrayVal = (long)suffixArray[suffixArrayIdx]; } } precomputedResults[i * 2] = (ulong)suffixArrayIdx; //Find when this string ends while (suffixArrayVal < fourBitDigitArray.Length && suffixArrayIdx < suffixArray.Length && SearchString.doesStartWithSuffix(fourBitDigitArray, bArrSearch, suffixArrayVal) == 0) { suffixArrayIdx++; if (suffixArrayIdx < suffixArray.Length) { suffixArrayVal = (long)suffixArray[suffixArrayIdx]; } } //Noe that here the exclusive maximum is stored, so if min == max the string wasn't found precomputedResults[(i * 2) + 1] = (ulong)suffixArrayIdx; } else { precomputedResults[i * 2] = (ulong)suffixArray.Length; precomputedResults[(i * 2) + 1] = (ulong)suffixArray.Length; } } return(precomputedResults); }
// Constructors public SuffixArrayRange(long min, long max, IBigArray <ulong> suffixArray, FourBitDigitBigArray digits) { HasResults = true; Min = min; Max = max; SuffixArray = suffixArray; Digits = digits; }
public void SearchSuffixArrayForEmptyString() { const string STR = "123456789"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); Assert.Throws <ArgumentException>(() => SearchString.Search(suffixArray, fourBitDigitArray, "")); }
sufsort(FourBitDigitBigArray T, IBigArray <ulong> SA, long n) { if ((T == null) || (SA == null) || (SA.Length < n) || (T.Length < n)) { return(new LongArray(SA, 0)); } return(sais_main(new FourBitDigitStreamArray(T, 0), new LongArray(SA, 0), 0, n, 10, false)); //k => 10, not the maximum of this datatype but the only reasonable reason to use it (that it's designed for) is for digits }
public HashSet <long> ToList(LongArray array) { var hashSet = new HashSet <long>(); m_array = array.m_array; foreach (var val in m_array) { hashSet.Add(Convert.ToInt64(val)); } return(hashSet); }
public void SearchSuffixArrayAllDigits() { const string STR = "1234567899912340"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); long[] expected = new long[] { 0 }; SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, STR); long[] actual = suffixArrayRange.SortedValues; CollectionAssert.AreEqual(expected, actual); }
public void TestBinarySearchForPrefixSingleChars() { const string STR = "2734981324"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); for (int i = 0; i < STR.Length; i++) { byte[] find = new byte[] { (byte)(STR[i] - '0') }; long answer = SearchString.binarySearchForPrefix(suffixArray, fourBitDigitArray, find, 0, STR.Length - 1); Assert.AreEqual(fourBitDigitArray[i], fourBitDigitArray[(long)suffixArray[answer]]); } }
public void SearchSuffixArraySearchEmptyString() { const string STR = ""; const string FIND = "1"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); long[] expected = new long[0]; SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, FIND); long[] actual = suffixArrayRange.SortedValues; CollectionAssert.AreEqual(expected, actual); }
sufsort(string T, IBigArray <ulong> SA, int n) { if ((T == null) || (SA == null) || (T.Length < n) || (SA.Length < n)) { return(-1); } if (n <= 1) { if (n == 1) { SA[0] = 0; } return(0); } return(sais_main(new StringArray(T, 0), new LongArray(SA, 0), 0, n, 65536, false)); }
public void TestSuffixArraySearchDigitNotInString() { const string STR = "1234567912340"; const string FIND = "8"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); long[] expected = new long[] { }; SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, FIND); long[] actual = suffixArrayRange.SortedValues; Assert.AreEqual(false, suffixArrayRange.HasResults); CollectionAssert.AreEqual(expected, actual); }
public void TestBinarySearchForPrefixDontExist() { const string STR = "8651287431284472619471"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); string[] toFind = { "1234", "0", "0987654321", "5676", "10", "111", "33" }; foreach (string s in toFind) { byte[] find = stringToByteArr(s); long answer = SearchString.binarySearchForPrefix(suffixArray, fourBitDigitArray, find, 0, fourBitDigitArray.Length - 1); Assert.AreEqual(-1, answer); } }
public SuffixArrayRange(PrecomputedSearchResult precomputedResult, IBigArray <ulong> suffixArray, FourBitDigitBigArray digits) { //If there are no results if (precomputedResult.MinSuffixArrayIdx == precomputedResult.MaxSuffixArrayIdx) { HasResults = false; } else //Otherwise there are search results { HasResults = true; Min = precomputedResult.MinSuffixArrayIdx; //Note that the precomputed results are stored with the max value exclusive so that it can also encode HasResults // whereas this class uses inclusive, so correct for that by taking 1 Max = precomputedResult.MaxSuffixArrayIdx - 1; SuffixArray = suffixArray; Digits = digits; } }
/* string */ /// <summary> /// Constructs the suffix array of a given string in linear time. /// </summary> /// <param name="T">input string</param> /// <param name="SA">output suffix array</param> /// <param name="n">length of the given string</param> /// <returns>0 if no error occurred, -1 or -2 otherwise</returns> public static HashSet <long> Sufsort(string T, IBigArray <ulong> SA, int n) { if ((T == null) || (SA == null) || (T.Length < n) || (SA.Length < n)) { return(new HashSet <long>()); } if (n <= 1) { if (n == 1) { SA[0] = 0; } return(new HashSet <long>()); } var longArray = sais_main(new StringArray(T, 0), new LongArray(SA, 0), 0, n, 65536, false); return(CleanSuffixArray(longArray.ToList(longArray), T)); // return new HashSet<long>(longArray.ToList(longArray)); }
public void SearchSuffixArray() { const string STR = "123456789"; IBigArray <ulong> suffixArray = buildSuffixArray(STR); FourBitDigitBigArray fourBitDigitArray = FourBitDigitBigArrayTests.convertStringTo4BitDigitArray(STR); for (int i = 0; i < STR.Length; i++) { for (int j = i + 1; j <= STR.Length; j++) { string find = STR.Substring(i, j - i); long[] seqSearchRes = SearchString.Search(STR, find).ToLongArr(); SuffixArrayRange suffixArrayRange = SearchString.Search(suffixArray, fourBitDigitArray, find); long[] suffixArraySearchRes = suffixArrayRange.SortedValues; CollectionAssert.AreEqual(seqSearchRes, suffixArraySearchRes); } } }
public static SuffixArrayRange Search(IBigArray <ulong> suffixArray, FourBitDigitBigArray digitArray, byte[] lookFor, IBigArray <PrecomputedSearchResult>[] precomputedResults = null) { //Validation if (lookFor.Length == 0) { throw new ArgumentException("lookFor must contain at least 1 digit"); } if (digitArray.Length == 0) { return(new SuffixArrayRange(false)); } if (suffixArray.Length != digitArray.Length) { throw new ArgumentException( "Suffix Array must be the same length as the Digit Array. This is not the correct suffix array for this digit array"); } //If we've been passed null for the precomputedResults, make an empty array for them if (precomputedResults == null) { precomputedResults = new IBigArray <PrecomputedSearchResult> [0]; } //If we have been given the precomputed results for strings of the length we're looking for if (precomputedResults.Length >= lookFor.Length) { IBigArray <PrecomputedSearchResult> precomputedResultsOfRequiredLength = precomputedResults[lookFor.Length - 1]; //Convert the string of bytes we're looking for to a long to use as the array index long precomputedResultIdx = ByteArrToLong(lookFor); PrecomputedSearchResult precomputedResult = precomputedResultsOfRequiredLength[precomputedResultIdx]; //Convert this precomputed result into a SuffixArrayRange before returning it SuffixArrayRange suffixArrayRange = new SuffixArrayRange(precomputedResult, suffixArray, digitArray); return(suffixArrayRange); } else //Otherwise we don't have the precomputed results for this search, run the suffix array search { long matchingPosition = binarySearchForPrefix(suffixArray, digitArray, lookFor, 0, suffixArray.Length - 1); //If there were no matches if (matchingPosition == -1) { return(new SuffixArrayRange(false)); } else //Otherwise match found, look for more { long min = matchingPosition; long max = matchingPosition; while (min > 0 && doesStartWithSuffix(digitArray, lookFor, (long)suffixArray[min - 1]) == 0) { min--; } while (max < digitArray.Length - 1 && doesStartWithSuffix(digitArray, lookFor, (long)suffixArray[max + 1]) == 0) { max++; } SuffixArrayRange suffixArrayRange = new SuffixArrayRange(min, max, suffixArray, digitArray); return(suffixArrayRange); } } }
public static SuffixArrayRange Search(IBigArray <ulong> suffixArray, FourBitDigitBigArray digitArray, string lookFor, IBigArray <PrecomputedSearchResult>[] precomputedResults = null) { return(Search(suffixArray, digitArray, StrToByteArr(lookFor), precomputedResults)); }
public LongArray(IBigArray <ulong> array, long pos) { _mArray = array; _mPos = pos; }
//Constructor public BigPrecomputedSearchResultsArray(IBigArray <ulong> underlyingArray) { this.underlyingArray = underlyingArray; }
public LongArray(LongArray array, long pos) { m_array = array.m_array; m_pos = array.m_pos + pos; }
public LongArray(IBigArray <ulong> array, long pos) { m_array = array; m_pos = pos; }
public LongArray(LongArray array, long pos) { _mArray = array._mArray; _mPos = array._mPos + pos; }
public MemoryEfficientComplementBigULongArray(long length, ulong maxValue, IBigArray <ulong> values) : this(length, maxValue, values, new BigBoolArray(length)) { }