public void ConstructTest() { int[] suffix1 = SuffixArray.Construct(StringToShortArr(input1)); int[] suffix2 = SuffixArray.Construct(StringToShortArr(input2)); CollectionAssert.AreEqual(output1, suffix1); CollectionAssert.AreEqual(output2, suffix2); }
/**/ public static void main(string[] strarr) { In @in = new In(strarr[0]); int num = Integer.parseInt(strarr[1]); string text = java.lang.String.instancehelper_replaceAll(@in.readAll(), "\\s+", " "); int num2 = java.lang.String.instancehelper_length(text); SuffixArray suffixArray = new SuffixArray(text); while (StdIn.hasNextLine()) { string text2 = StdIn.readLine(); for (int i = suffixArray.rank(text2); i < num2; i++) { int num3 = suffixArray.index(i); int endIndex = java.lang.Math.min(num2, num3 + java.lang.String.instancehelper_length(text2)); if (!java.lang.String.instancehelper_equals(text2, java.lang.String.instancehelper_substring(text, num3, endIndex))) { break; } int beginIndex = java.lang.Math.max(0, suffixArray.index(i) - num); int endIndex2 = java.lang.Math.min(num2, suffixArray.index(i) + num + java.lang.String.instancehelper_length(text2)); StdOut.println(java.lang.String.instancehelper_substring(text, beginIndex, endIndex2)); } StdOut.println(); } }
/** * Reads a string from a file specified as the first * command-line argument; read an integer k specified as the * second command line argument; then repeatedly processes * use queries, printing all occurrences of the given query * string in the text string with k characters of surrounding * context on either side. * * @param args the command-line arguments */ public static void main(String[] args) { In in = new In(args[0]); int context = Integer.parseInt(args[1]); // read in text String text = in.readAll().replaceAll("\\s+", " "); int n = text.length(); // build suffix array SuffixArray sa = new SuffixArray(text); // find all occurrences of queries and give context while (StdIn.hasNextLine()) { String query = StdIn.readLine(); for (int i = sa.rank(query); i < n; i++) { int from1 = sa.index(i); int to1 = Math.min(n, from1 + query.length()); if (!query.equals(text.substring(from1, to1))) break; int from2 = Math.max(0, sa.index(i) - context); int to2 = Math.min(n, sa.index(i) + context + query.length()); StdOut.println(text.substring(from2, to2)); } StdOut.println(); } }
public SuffixArrayMatchFinder(FindLimitations limits, FindOptions options) { _array = new SuffixArray(); FindLimitations = limits; FindOptions = options; }
/** * Unit tests the {@code SuffixArrayx} data type. * * @param args the command-line arguments */ public static void main(String[] args) { String s = StdIn.readAll().replaceAll("\n", " ").trim(); SuffixArrayX suffix1 = new SuffixArrayX(s); SuffixArray suffix2 = new SuffixArray(s); boolean check = true; for (int i = 0; check && i < s.length(); i++) { if (suffix1.index(i) != suffix2.index(i)) { StdOut.println("suffix1(" + i + ") = " + suffix1.index(i)); StdOut.println("suffix2(" + i + ") = " + suffix2.index(i)); String ith = "\"" + s.substring(suffix1.index(i), Math.min(suffix1.index(i) + 50, s.length())) + "\""; String jth = "\"" + s.substring(suffix2.index(i), Math.min(suffix2.index(i) + 50, s.length())) + "\""; StdOut.println(ith); StdOut.println(jth); check = false; } } StdOut.println(" i ind lcp rnk select"); StdOut.println("---------------------------"); for (int i = 0; i < s.length(); i++) { int index = suffix2.index(i); String ith = "\"" + s.substring(index, Math.min(index + 50, s.length())) + "\""; int rank = suffix2.rank(s.substring(index)); assert s.substring(index).equals(suffix2.select(i)); if (i == 0) { StdOut.printf("%3d %3d %3s %3d %s\n", i, index, "-", rank, ith); } else { // int lcp = suffix.lcp(suffix2.index(i), suffix2.index(i-1)); int lcp = suffix2.lcp(i); StdOut.printf("%3d %3d %3d %3d %s\n", i, index, lcp, rank, ith); } } }
/**/ public static void main(string[] strarr) { In @in = new In(strarr[0]); In in2 = new In(strarr[1]); string text = java.lang.String.instancehelper_replaceAll(java.lang.String.instancehelper_trim(@in.readAll()), "\\s+", " "); string text2 = java.lang.String.instancehelper_replaceAll(java.lang.String.instancehelper_trim(in2.readAll()), "\\s+", " "); int num = java.lang.String.instancehelper_length(text); java.lang.String.instancehelper_length(text2); string text3 = new StringBuilder().append(text).append('\u0001').append(text2).toString(); int num2 = java.lang.String.instancehelper_length(text3); SuffixArray suffixArray = new SuffixArray(text3); string text4 = ""; for (int i = 1; i < num2; i++) { if (suffixArray.index(i) >= num || suffixArray.index(i - 1) >= num) { if (suffixArray.index(i) <= num || suffixArray.index(i - 1) <= num) { int num3 = suffixArray.lcp(i); if (num3 > java.lang.String.instancehelper_length(text4)) { text4 = java.lang.String.instancehelper_substring(text3, suffixArray.index(i), suffixArray.index(i) + num3); } } } } StdOut.println(java.lang.String.instancehelper_length(text4)); StdOut.println(new StringBuilder().append("'").append(text4).append("'").toString()); }
public static byte[] Transform(byte[] input) { byte[] output = new byte[input.Length + 4]; short[] newInput = new short[input.Length + 1]; for (int i = 0; i < input.Length; i++) { newInput[i] = (Int16)(input[i] + 1); } newInput[input.Length] = 0; int[] suffixArray = SuffixArray.Construct(newInput); int end = 0; int outputInd = 0; for (int i = 0; i < suffixArray.Length; i++) { if (suffixArray[i] == 0) { end = i; continue; } output[outputInd] = (byte)(newInput[suffixArray[i] - 1] - 1); outputInd++; } byte[] endByte = IntToByteArr(end); endByte.CopyTo(output, input.Length); return(output); }
public void testsuccessfulFindAllStringsArrays() { string theString = "abracadabraababbra"; SuffixArray theArray = new SuffixArray(theString); List <int> result = new List <int>(theArray.FindAllOccurrences("bra")); List <int> correct = new List <int>(); correct.Add(1); correct.Add(8); correct.Add(15); //Assert.AreEqual(result, correct); Assert.AreEqual(result.Count(), 3); result.Clear(); correct.Clear(); result = new List <int>(theArray.FindAllOccurrences("brac")); Assert.AreEqual(result.Count(), 1); correct.Add(1); //Assert.AreEqual(result, correct); result.Clear(); correct.Clear(); result = new List <int>(theArray.FindAllOccurrences("ab")); correct.Add(0); correct.Add(7); correct.Add(11); correct.Add(13); //Assert.AreEqual(result, correct); Assert.AreEqual(result.Count(), 4); }
public void Occurences2() { var sa = SuffixArray.Create("papapa"); PrintSortedArray(sa); Assert.AreEqual(new[] { 0, 2 }, sa.FindOccurences("papa".ToCharArray()).ToArray()); }
public void LCPInt() { var rnd = new Random(); for (int n = 1; n < 100; n++) { var str = rnd.NextIntArray(n, -5, 5); var sa = SuffixArray.Create(str); var saNative = GetNative((ReadOnlySpan <int>)str); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } { var str = Enumerable.Repeat(1, 100).ToArray(); var sa = SuffixArray.Create(str); var saNative = GetNative((ReadOnlySpan <int>)str); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } { var str = Enumerable.Range(0, 100).ToArray(); var sa = SuffixArray.Create(str); var saNative = GetNative((ReadOnlySpan <int>)str); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } { var str = new[] { -4210, 4219014, -5, -4210, -4210, 4219014, -5, -4210 }; var sa = SuffixArray.Create(str); var saNative = GetNative((ReadOnlySpan <int>)str); sa.LongestCommonPrefix(0, 3).Should().Be(1); sa.LongestCommonPrefix(0, 4).Should().Be(4); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } }
public void Week4_Q2() { var s = "AACGATAGCGGTAGA$"; var order = SuffixArray.SortCharacters(s, SuffixArray.NucleotideAlphabet); var actual = SuffixArray.ComputeCharClasses(s, order); Console.WriteLine(string.Join(",", actual.Select(i => i.ToString()))); }
public void TestSortCharacters2() { const string input = "AAA$"; var expected = new[] { 3, 2, 1, 0 }; var actual = SuffixArray.SortCharacters(input, SuffixArray.NucleotideAlphabet); actual.ShouldBeEquivalentTo(expected); }
public void TestSortCharacters1() { const string input = "ababaa$"; var expected = new[] { 6, 5, 4, 2, 0, 3, 1 }; var actual = SuffixArray.SortCharacters(input, "$ab"); actual.ShouldBeEquivalentTo(expected); }
public void Sufa_Abracadabra() { var sa = SuffixArray.Create("abracadabra"); //DumpSortedArray(sa); Debug.Print(sa.ToString()); Assert.AreEqual(new[] { 0, 3, 5, 7, 10 }, sa.FindOccurences("a".ToCharArray()).OrderBy(i => i).ToArray()); }
/**/ public static void main(string[] strarr) { string text = java.lang.String.instancehelper_trim(java.lang.String.instancehelper_replaceAll(StdIn.readAll(), "\n", " ")); SuffixArrayX suffixArrayX = new SuffixArrayX(text); SuffixArray suffixArray = new SuffixArray(text); int num = 1; int i = 0; while (num != 0 && i < java.lang.String.instancehelper_length(text)) { if (suffixArray.index(i) != suffixArrayX.index(i)) { StdOut.println(new StringBuilder().append("suffixReference(").append(i).append(") = ").append(suffixArray.index(i)).toString()); StdOut.println(new StringBuilder().append("suffix(").append(i).append(") = ").append(suffixArrayX.index(i)).toString()); string obj = new StringBuilder().append("\"").append(java.lang.String.instancehelper_substring(text, suffixArrayX.index(i), java.lang.Math.min(suffixArrayX.index(i) + 50, java.lang.String.instancehelper_length(text)))).append("\"").toString(); string text2 = new StringBuilder().append("\"").append(java.lang.String.instancehelper_substring(text, suffixArray.index(i), java.lang.Math.min(suffixArray.index(i) + 50, java.lang.String.instancehelper_length(text)))).append("\"").toString(); StdOut.println(obj); StdOut.println(text2); num = 0; } i++; } StdOut.println(" i ind lcp rnk select"); StdOut.println("---------------------------"); for (i = 0; i < java.lang.String.instancehelper_length(text); i++) { int num2 = suffixArrayX.index(i); string text2 = new StringBuilder().append("\"").append(java.lang.String.instancehelper_substring(text, num2, java.lang.Math.min(num2 + 50, java.lang.String.instancehelper_length(text)))).append("\"").toString(); int i2 = suffixArrayX.rank(java.lang.String.instancehelper_substring(text, num2)); if (!SuffixArrayX.s_assertionsDisabled && !java.lang.String.instancehelper_equals(java.lang.String.instancehelper_substring(text, num2), suffixArrayX.select(i))) { throw new AssertionError(); } if (i == 0) { StdOut.printf("%3d %3d %3s %3d %s\n", new object[] { Integer.valueOf(i), Integer.valueOf(num2), "-", Integer.valueOf(i2), text2 }); } else { int i3 = suffixArrayX.lcp(i); StdOut.printf("%3d %3d %3d %3d %s\n", new object[] { Integer.valueOf(i), Integer.valueOf(num2), Integer.valueOf(i3), Integer.valueOf(i2), text2 }); } } }
public InvertedIndex(ITokenizer tokenizer) { this.tokenizer = tokenizer; this.suffixArray = new SuffixArray <string, List <DocumentPosition> >(); this.matchComparer = StringComparer.Ordinal; this.prefixComparer = new PrefixStringComparer(); this.syncObj = new object(); this.indexedFiles = new Dictionary <string, string[]>(); }
static void Main(string[] args) { var texts = ReadFile("tale.txt"); var sb = new StringBuilder(); sb.Append(texts); var text = sb.ToString(); var sa = new SuffixArray(text); }
public void SuffixArray_NotItems() { SuffixArray sa = new SuffixArray("items"); for (int i = 0; i < 20; i++) { Assert.IsFalse(sa.Contains(i.ToString())); } }
public void testfailedFindAllOccurrencesArrays() { string theString = "abracadabraababbra"; SuffixArray theArray = new SuffixArray(theString); List<int> result = new List<int>(theArray.FindAllOccurrences("jason")); Assert.AreEqual(result.Count(), 0); result = new List<int>(theArray.FindAllOccurrences("testword")); Assert.AreEqual(result.Count(), 0); }
public void SortSuffixes() { int[] suffixArray = SuffixArray.Build("banana"); Assert.AreEqual(5, suffixArray[0]); Assert.AreEqual(3, suffixArray[1]); Assert.AreEqual(1, suffixArray[2]); Assert.AreEqual(0, suffixArray[3]); Assert.AreEqual(4, suffixArray[4]); Assert.AreEqual(2, suffixArray[5]); }
public void Sufa_ByteArray() { var sa = SuffixArray.Create(new byte[] { 0x01, 0x02, 0x4E, 0x75, 0x01, 0x02, 0x4E, 0x75, }); Assert.AreEqual(new[] { 2, 6 }, sa.FindOccurences(new byte[] { 0x4E, 0x75 }).OrderBy(i => i).ToArray()); }
static void Main(string[] args) { string str = "abcracadabra"; // Create a new suffix array SuffixArray sa = new SuffixArray(str); // Find substring int index = sa.IndexOf("rac"); // Get LCP value int lcp = sa.Lcp[index]; }
public void SuffixArray_Index() { SuffixArray sa = new SuffixArray("items"); Assert.AreEqual(0, sa.Find("items")); Assert.AreEqual(1, sa.Find("tems")); Assert.AreEqual(2, sa.Find("ems")); Assert.AreEqual(3, sa.Find("ms")); Assert.AreEqual(4, sa.Find("s")); }
public void SuffixArray_MultiIndex() { SuffixArray sa = new SuffixArray("itemsitems"); Assert.IsTrue(0 <= sa.Find("items")); Assert.IsTrue(1 <= sa.Find("tems")); Assert.IsTrue(2 <= sa.Find("ems")); Assert.IsTrue(3 <= sa.Find("ms")); Assert.IsTrue(4 <= sa.Find("s")); }
public void Compare_Via_Insert_Comparer_On_Add() { var insertComparerMock = new Mock <IComparer <string> >(); var suffixArray = new SuffixArray <string, int>(); suffixArray.TryAdd("test", 1, insertComparerMock.Object); suffixArray.TryAdd("tes", 2, insertComparerMock.Object); insertComparerMock.Verify(x => x.Compare("test", "tes"), Times.Once); }
public void nullTestingArrays() { string nonNullString = "abracadabra"; SuffixArray theSuffixArray = new SuffixArray(nonNullString); bool result = theSuffixArray.FindSubstring(null); Assert.AreEqual(false, result); result = theSuffixArray.FindSubstring(""); Assert.AreEqual(false, result); }
public void LCPString() { var rnd = new Random(); for (int n = 1; n < 100; n++) { var str = rnd.NextString(n); var sa = SuffixArray.Create(str); var saNative = GetNative(str.AsSpan()); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } { var str = "abcaabca"; var sa = SuffixArray.Create(str); var saNative = GetNative(str.AsSpan()); sa.LongestCommonPrefix(0, 3).Should().Be(1); sa.LongestCommonPrefix(0, 4).Should().Be(4); sa.SA.Should().Equal( 7, // a 3, // aabca 4, // abca 0, // abcaabca 5, // bca 1, // bcaabca 6, // ca 2 // caabca ); sa.LcpArray.Should().Equal( 1, // a - aabca 1, // aabca - abca 4, // abca - abcaabca 0, // abcaabca - bca 3, // bca - bcaabca 0, // bcaabca - ca 2 // ca - caabca ); sa.Rank.Should().Equal(3, 5, 7, 1, 2, 4, 6, 0); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } }
public void Create() { // Arrange var text = "camel"; // Act var sa = new SuffixArray(text); // Assert CollectionAssert.AreEqual(new[] {1, 0, 3, 4, 2}, sa.SA); }
public void DumpSortedArray(SuffixArray <char> sa) { for (int i = 0; i < sa.Length; i++) { Debug.Print("{0,4} {1,4} lcp = {2,4} {3}", i, sa[i], sa.Lcp[i], string.Join("", sa.Str.Skip(sa[i]))); } }
public void UpperBound2() { // Arrange var sa = new SuffixArray("ABABBAB"); // Act var resultIdx = sa.UpperBound("B"); // Assert Assert.AreEqual(3, resultIdx); }
public void testfailedFindAllOccurrencesArrays() { string theString = "abracadabraababbra"; SuffixArray theArray = new SuffixArray(theString); List <int> result = new List <int>(theArray.FindAllOccurrences("jason")); Assert.AreEqual(result.Count(), 0); result = new List <int>(theArray.FindAllOccurrences("testword")); Assert.AreEqual(result.Count(), 0); }
public void PrintSortedArray(SuffixArray <char> sa) { for (int i = 0; i < sa.Length; i++) { Console.Write("{0,4} {1,4} lcp = {2,4} {3}", i, sa[i], sa.Lcp[i], string.Join("", sa.Str.Skip(sa[i]))); } }
public void successfulStringsArrays() { string testString = "abracadabra"; SuffixArray theSuffixArray = new SuffixArray(testString); bool result = theSuffixArray.FindSubstring("bra"); Assert.AreEqual(true, result); result = theSuffixArray.FindSubstring("ab"); Assert.AreEqual(true, result); result = theSuffixArray.FindSubstring("cada"); Assert.AreEqual(true, result); result = theSuffixArray.FindSubstring("dab"); Assert.AreEqual(true, result); }
public void failedStringsArrays() { string testString = "abracadabra"; SuffixArray theSuffixArray = new SuffixArray(testString); bool result; result = theSuffixArray.FindSubstring("jason"); Assert.AreEqual(false, result); result = theSuffixArray.FindSubstring("masud"); Assert.AreEqual(false, result); result = theSuffixArray.FindSubstring("testing"); Assert.AreEqual(false, result); result = theSuffixArray.FindSubstring("notastring"); Assert.AreEqual(false, result); }
public void testsuccessfulFindAllStringsArrays() { string theString = "abracadabraababbra"; SuffixArray theArray = new SuffixArray(theString); List<int> result = new List<int>(theArray.FindAllOccurrences("bra")); List<int> correct = new List<int>(); correct.Add(1); correct.Add(8); correct.Add(15); //Assert.AreEqual(result, correct); Assert.AreEqual(result.Count(), 3); result.Clear(); correct.Clear(); result = new List<int>(theArray.FindAllOccurrences("brac")); Assert.AreEqual(result.Count(), 1); correct.Add(1); //Assert.AreEqual(result, correct); result.Clear(); correct.Clear(); result = new List<int>(theArray.FindAllOccurrences("ab")); correct.Add(0); correct.Add(7); correct.Add(11); correct.Add(13); //Assert.AreEqual(result, correct); Assert.AreEqual(result.Count(), 4); }
public void DumpSortedArray(SuffixArray<char> sa) { for (int i = 0; i < sa.Length; i++) { Debug.Print("{0,4} {1,4} lcp = {2,4} {3}", i, sa[i], sa.Lcp[i], string.Join("", sa.Str.Skip(sa[i]))); } }