public void LCPInt() { var rnd = new Random(); for (int n = 1; n < 100; n++) { var str = rnd.NextIntArray(n, -5, 5); var sa = SuffixArray.Create(str); var saNative = GetNative((ReadOnlySpan <int>)str); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } { var str = Enumerable.Repeat(1, 100).ToArray(); var sa = SuffixArray.Create(str); var saNative = GetNative((ReadOnlySpan <int>)str); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } { var str = Enumerable.Range(0, 100).ToArray(); var sa = SuffixArray.Create(str); var saNative = GetNative((ReadOnlySpan <int>)str); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } { var str = new[] { -4210, 4219014, -5, -4210, -4210, 4219014, -5, -4210 }; var sa = SuffixArray.Create(str); var saNative = GetNative((ReadOnlySpan <int>)str); sa.LongestCommonPrefix(0, 3).Should().Be(1); sa.LongestCommonPrefix(0, 4).Should().Be(4); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } }
public void Occurences2() { var sa = SuffixArray.Create("papapa"); PrintSortedArray(sa); Assert.AreEqual(new[] { 0, 2 }, sa.FindOccurences("papa".ToCharArray()).ToArray()); }
public void Sufa_Abracadabra() { var sa = SuffixArray.Create("abracadabra"); //DumpSortedArray(sa); Debug.Print(sa.ToString()); Assert.AreEqual(new[] { 0, 3, 5, 7, 10 }, sa.FindOccurences("a".ToCharArray()).OrderBy(i => i).ToArray()); }
public void Sufa_ByteArray() { var sa = SuffixArray.Create(new byte[] { 0x01, 0x02, 0x4E, 0x75, 0x01, 0x02, 0x4E, 0x75, }); Assert.AreEqual(new[] { 2, 6 }, sa.FindOccurences(new byte[] { 0x4E, 0x75 }).OrderBy(i => i).ToArray()); }
public void LCPString() { var rnd = new Random(); for (int n = 1; n < 100; n++) { var str = rnd.NextString(n); var sa = SuffixArray.Create(str); var saNative = GetNative(str.AsSpan()); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } { var str = "abcaabca"; var sa = SuffixArray.Create(str); var saNative = GetNative(str.AsSpan()); sa.LongestCommonPrefix(0, 3).Should().Be(1); sa.LongestCommonPrefix(0, 4).Should().Be(4); sa.SA.Should().Equal( 7, // a 3, // aabca 4, // abca 0, // abcaabca 5, // bca 1, // bcaabca 6, // ca 2 // caabca ); sa.LcpArray.Should().Equal( 1, // a - aabca 1, // aabca - abca 4, // abca - abcaabca 0, // abcaabca - bca 3, // bca - bcaabca 0, // bcaabca - ca 2 // ca - caabca ); sa.Rank.Should().Equal(3, 5, 7, 1, 2, 4, 6, 0); for (int i = 0; i < str.Length; i++) { for (int j = i; j < str.Length; j++) { sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j)); } } } }
//[Test] public void Timing() { for (int N = 2; N < 100000000; N = N * 3 / 2) { var rnd = new Random(0x4711); var bytes = new byte[N]; rnd.NextBytes(bytes); var str = new string(bytes.Select(b => (char)b).ToArray()); var sw1 = Time(str, s => SuffixArray.Create(s)); var sw2 = Time(str, s => { }); var beforeGc = GC.GetTotalMemory(false) / 1024.0 / 1024.0; GC.Collect(); var afterGc = GC.GetTotalMemory(false) / 1024.0 / 1024.0; Debug.Print("{0,15} {1,15} msec {2,15} msec {3} / {4} MiB", N, sw1.ElapsedMilliseconds, sw2.ElapsedMilliseconds, beforeGc, afterGc); } }
public void Test3() { string str = "abracadabra"; string[] expectedSubstrs = { "a", "abra", "abracadabra", "acadabra", "adabra", "bra", "bracadabra", "cadabra", "dabra", "ra", "racadabra" }; int[] expectedLcps = { 0, 1, 4, 1, 1, 0, 3, 0, 0, 0, 2 }; var sa = SuffixArray.Create(str); //DumpSortedArray(sa); Assert.AreEqual(sa.Length, str.Length, "Wrong SA length"); Assert.AreEqual(sa.Lcp.Length, str.Length + 1, "Wrong LCP length"); for (int i = 0; i < str.Length; ++i) { Assert.AreEqual(str.Substring(sa[i]), expectedSubstrs[i], String.Format("Wrong entry {0}", i)); Assert.AreEqual(sa.Lcp[i], expectedLcps[i], String.Format("Wrong LCP {0}", i)); } }
/// <summary> /// Creates the LCP array in O(n) time from the given text. /// The last character in argument 'text' must be the unique $ char. Refer to SuffixArray class remarks.. /// </summary> /// <param name="text">The text.</param> /// <returns></returns> public static int[] Build(string text) { var suffixArray = SuffixArray.Create(text); if (suffixArray != null) { var suffixArrayReverse = new int[suffixArray.Length]; for (int k = 0; k < suffixArray.Length; ++k) { suffixArrayReverse[suffixArray[k]] = k; } var lcpArray = new int[suffixArray.Length]; lcpArray[0] = -1; int m = 0; for (int i = 0; i < suffixArray.Length - 1; i++) { int j = suffixArray[suffixArrayReverse[i] - 1]; while (text[m + i] == text[m + j]) { ++m; } lcpArray[suffixArrayReverse[i]] = m; if (m > 0) { --m; } } return(lcpArray); } else { return(null); } }
private object EnsureSuffixArray(string filename, byte[] image) { var fsSvc = Services.RequireService <IFileSystemService>(); var diagSvc = Services.RequireService <IDiagnosticsService>(); Stream?stm = null; try { if (fsSvc.FileExists(filename)) { stm = fsSvc.CreateFileStream(filename, FileMode.Open, FileAccess.Read); try { var sSuffix = (int[])new UbjsonReader(stm).Read() !; return(SuffixArray.Load(image, sSuffix)); } catch (Exception ex) { diagSvc.Warn("Unable to load suffix array {0}. {1}", filename, ex.Message); } finally { stm.Close(); } } var sa = SuffixArray.Create(image); stm = fsSvc.CreateFileStream(filename, FileMode.Create, FileAccess.Write); new UbjsonWriter(stm).Write(sa.Save()); return(sa); } finally { if (stm != null) { stm.Dispose(); } } }
public void TestSearch3() { var sa = SuffixArray.Create("yakawow"); Assert.AreEqual(sa.IndexOf(new char[] { 'z' }), -1, "Found wrong substring."); }
public void TestSearch2() { var sa = SuffixArray.Create("yakawow"); Assert.AreEqual(sa.IndexOf("yakawow".ToCharArray()), 0, "Wrong index"); }
public void TestSearch1() { var sa = SuffixArray.Create("yakawow"); Assert.AreEqual(sa.IndexOf(new char[] { 'a' }), 1, "Substring not found/Wrong index"); }
public void Test2() { SuffixArray <char> sa = SuffixArray.Create((string)null); Assert.AreEqual(sa.IndexOf(new[] { 'a' }), -1, "Found wrong substring"); }
public void Occurences() { var sa = SuffixArray.Create("zappa"); Assert.AreEqual(new[] { 1, 4 }, sa.FindOccurences("a".ToCharArray()).ToArray()); }
public void TestSearch5() { var sa = SuffixArray.Create("yakawow"); Assert.AreEqual(sa.IndexOf(new char[0]), -1, "Wrong index"); }