示例#1
0
 public void ConstructTest()
 {
     int[] suffix1 = SuffixArray.Construct(StringToShortArr(input1));
     int[] suffix2 = SuffixArray.Construct(StringToShortArr(input2));
     CollectionAssert.AreEqual(output1, suffix1);
     CollectionAssert.AreEqual(output2, suffix2);
 }
    /**/ public static void main(string[] strarr)
    {
        In          @in         = new In(strarr[0]);
        int         num         = Integer.parseInt(strarr[1]);
        string      text        = java.lang.String.instancehelper_replaceAll(@in.readAll(), "\\s+", " ");
        int         num2        = java.lang.String.instancehelper_length(text);
        SuffixArray suffixArray = new SuffixArray(text);

        while (StdIn.hasNextLine())
        {
            string text2 = StdIn.readLine();
            for (int i = suffixArray.rank(text2); i < num2; i++)
            {
                int num3     = suffixArray.index(i);
                int endIndex = java.lang.Math.min(num2, num3 + java.lang.String.instancehelper_length(text2));
                if (!java.lang.String.instancehelper_equals(text2, java.lang.String.instancehelper_substring(text, num3, endIndex)))
                {
                    break;
                }
                int beginIndex = java.lang.Math.max(0, suffixArray.index(i) - num);
                int endIndex2  = java.lang.Math.min(num2, suffixArray.index(i) + num + java.lang.String.instancehelper_length(text2));
                StdOut.println(java.lang.String.instancehelper_substring(text, beginIndex, endIndex2));
            }
            StdOut.println();
        }
    }
示例#3
0
    /**
     * Reads a string from a file specified as the first
     * command-line argument; read an integer k specified as the
     * second command line argument; then repeatedly processes
     * use queries, printing all occurrences of the given query
     * string in the text string with k characters of surrounding
     * context on either side.
     *
     * @param args the command-line arguments
     */
    public static void main(String[] args) {
        In in = new In(args[0]);
        int context = Integer.parseInt(args[1]);

        // read in text
        String text = in.readAll().replaceAll("\\s+", " ");
        int n = text.length();

        // build suffix array
        SuffixArray sa = new SuffixArray(text);

        // find all occurrences of queries and give context
        while (StdIn.hasNextLine()) {
            String query = StdIn.readLine();
            for (int i = sa.rank(query); i < n; i++) {
                int from1 = sa.index(i);
                int to1   = Math.min(n, from1 + query.length());
                if (!query.equals(text.substring(from1, to1))) break;
                int from2 = Math.max(0, sa.index(i) - context);
                int to2   = Math.min(n, sa.index(i) + context + query.length());
                StdOut.println(text.substring(from2, to2));
            }
            StdOut.println();
        }
    } 
示例#4
0
        public SuffixArrayMatchFinder(FindLimitations limits, FindOptions options)
        {
            _array = new SuffixArray();

            FindLimitations = limits;
            FindOptions     = options;
        }
示例#5
0
    /**
     * Unit tests the {@code SuffixArrayx} data type.
     *
     * @param args the command-line arguments
     */
    public static void main(String[] args) {
        String s = StdIn.readAll().replaceAll("\n", " ").trim();
        SuffixArrayX suffix1 = new SuffixArrayX(s);
        SuffixArray suffix2 = new SuffixArray(s);
        boolean check = true;
        for (int i = 0; check && i < s.length(); i++) {
            if (suffix1.index(i) != suffix2.index(i)) {
                StdOut.println("suffix1(" + i + ") = " + suffix1.index(i));
                StdOut.println("suffix2(" + i + ") = " + suffix2.index(i));
                String ith = "\"" + s.substring(suffix1.index(i), Math.min(suffix1.index(i) + 50, s.length())) + "\"";
                String jth = "\"" + s.substring(suffix2.index(i), Math.min(suffix2.index(i) + 50, s.length())) + "\"";
                StdOut.println(ith);
                StdOut.println(jth);
                check = false;
            }
        }

        StdOut.println("  i ind lcp rnk  select");
        StdOut.println("---------------------------");

        for (int i = 0; i < s.length(); i++) {
            int index = suffix2.index(i);
            String ith = "\"" + s.substring(index, Math.min(index + 50, s.length())) + "\"";
            int rank = suffix2.rank(s.substring(index));
            assert s.substring(index).equals(suffix2.select(i));
            if (i == 0) {
                StdOut.printf("%3d %3d %3s %3d  %s\n", i, index, "-", rank, ith);
            }
            else {
                // int lcp  = suffix.lcp(suffix2.index(i), suffix2.index(i-1));
                int lcp  = suffix2.lcp(i);
                StdOut.printf("%3d %3d %3d %3d  %s\n", i, index, lcp, rank, ith);
            }
        }
    }
    /**/ public static void main(string[] strarr)
    {
        In @in = new In(strarr[0]);

        In     in2   = new In(strarr[1]);
        string text  = java.lang.String.instancehelper_replaceAll(java.lang.String.instancehelper_trim(@in.readAll()), "\\s+", " ");
        string text2 = java.lang.String.instancehelper_replaceAll(java.lang.String.instancehelper_trim(in2.readAll()), "\\s+", " ");
        int    num   = java.lang.String.instancehelper_length(text);

        java.lang.String.instancehelper_length(text2);
        string      text3       = new StringBuilder().append(text).append('\u0001').append(text2).toString();
        int         num2        = java.lang.String.instancehelper_length(text3);
        SuffixArray suffixArray = new SuffixArray(text3);
        string      text4       = "";

        for (int i = 1; i < num2; i++)
        {
            if (suffixArray.index(i) >= num || suffixArray.index(i - 1) >= num)
            {
                if (suffixArray.index(i) <= num || suffixArray.index(i - 1) <= num)
                {
                    int num3 = suffixArray.lcp(i);
                    if (num3 > java.lang.String.instancehelper_length(text4))
                    {
                        text4 = java.lang.String.instancehelper_substring(text3, suffixArray.index(i), suffixArray.index(i) + num3);
                    }
                }
            }
        }
        StdOut.println(java.lang.String.instancehelper_length(text4));
        StdOut.println(new StringBuilder().append("'").append(text4).append("'").toString());
    }
        public static byte[] Transform(byte[] input)
        {
            byte[]  output   = new byte[input.Length + 4];
            short[] newInput = new short[input.Length + 1];

            for (int i = 0; i < input.Length; i++)
            {
                newInput[i] = (Int16)(input[i] + 1);
            }

            newInput[input.Length] = 0;
            int[] suffixArray = SuffixArray.Construct(newInput);
            int   end         = 0;
            int   outputInd   = 0;

            for (int i = 0; i < suffixArray.Length; i++)
            {
                if (suffixArray[i] == 0)
                {
                    end = i;
                    continue;
                }
                output[outputInd] = (byte)(newInput[suffixArray[i] - 1] - 1);
                outputInd++;
            }
            byte[] endByte = IntToByteArr(end);
            endByte.CopyTo(output, input.Length);
            return(output);
        }
示例#8
0
        public void testsuccessfulFindAllStringsArrays()
        {
            string      theString = "abracadabraababbra";
            SuffixArray theArray  = new SuffixArray(theString);
            List <int>  result    = new List <int>(theArray.FindAllOccurrences("bra"));
            List <int>  correct   = new List <int>();

            correct.Add(1); correct.Add(8); correct.Add(15);
            //Assert.AreEqual(result, correct);
            Assert.AreEqual(result.Count(), 3);

            result.Clear(); correct.Clear();

            result = new List <int>(theArray.FindAllOccurrences("brac"));
            Assert.AreEqual(result.Count(), 1);
            correct.Add(1);
            //Assert.AreEqual(result, correct);

            result.Clear(); correct.Clear();

            result = new List <int>(theArray.FindAllOccurrences("ab"));
            correct.Add(0); correct.Add(7); correct.Add(11); correct.Add(13);
            //Assert.AreEqual(result, correct);
            Assert.AreEqual(result.Count(), 4);
        }
示例#9
0
        public void Occurences2()
        {
            var sa = SuffixArray.Create("papapa");

            PrintSortedArray(sa);
            Assert.AreEqual(new[] { 0, 2 }, sa.FindOccurences("papa".ToCharArray()).ToArray());
        }
        public void LCPInt()
        {
            var rnd = new Random();

            for (int n = 1; n < 100; n++)
            {
                var str      = rnd.NextIntArray(n, -5, 5);
                var sa       = SuffixArray.Create(str);
                var saNative = GetNative((ReadOnlySpan <int>)str);

                for (int i = 0; i < str.Length; i++)
                {
                    for (int j = i; j < str.Length; j++)
                    {
                        sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j));
                    }
                }
            }
            {
                var str      = Enumerable.Repeat(1, 100).ToArray();
                var sa       = SuffixArray.Create(str);
                var saNative = GetNative((ReadOnlySpan <int>)str);

                for (int i = 0; i < str.Length; i++)
                {
                    for (int j = i; j < str.Length; j++)
                    {
                        sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j));
                    }
                }
            }
            {
                var str      = Enumerable.Range(0, 100).ToArray();
                var sa       = SuffixArray.Create(str);
                var saNative = GetNative((ReadOnlySpan <int>)str);

                for (int i = 0; i < str.Length; i++)
                {
                    for (int j = i; j < str.Length; j++)
                    {
                        sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j));
                    }
                }
            }
            {
                var str      = new[] { -4210, 4219014, -5, -4210, -4210, 4219014, -5, -4210 };
                var sa       = SuffixArray.Create(str);
                var saNative = GetNative((ReadOnlySpan <int>)str);
                sa.LongestCommonPrefix(0, 3).Should().Be(1);
                sa.LongestCommonPrefix(0, 4).Should().Be(4);

                for (int i = 0; i < str.Length; i++)
                {
                    for (int j = i; j < str.Length; j++)
                    {
                        sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j));
                    }
                }
            }
        }
示例#11
0
        public void Week4_Q2()
        {
            var s      = "AACGATAGCGGTAGA$";
            var order  = SuffixArray.SortCharacters(s, SuffixArray.NucleotideAlphabet);
            var actual = SuffixArray.ComputeCharClasses(s, order);

            Console.WriteLine(string.Join(",", actual.Select(i => i.ToString())));
        }
        public void TestSortCharacters2()
        {
            const string input    = "AAA$";
            var          expected = new[] { 3, 2, 1, 0 };
            var          actual   = SuffixArray.SortCharacters(input, SuffixArray.NucleotideAlphabet);

            actual.ShouldBeEquivalentTo(expected);
        }
        public void TestSortCharacters1()
        {
            const string input    = "ababaa$";
            var          expected = new[] { 6, 5, 4, 2, 0, 3, 1 };
            var          actual   = SuffixArray.SortCharacters(input, "$ab");

            actual.ShouldBeEquivalentTo(expected);
        }
示例#14
0
        public void Sufa_Abracadabra()
        {
            var sa = SuffixArray.Create("abracadabra");

            //DumpSortedArray(sa);
            Debug.Print(sa.ToString());
            Assert.AreEqual(new[] { 0, 3, 5, 7, 10 }, sa.FindOccurences("a".ToCharArray()).OrderBy(i => i).ToArray());
        }
示例#15
0
        /**/
        public static void main(string[] strarr)
        {
            string       text         = java.lang.String.instancehelper_trim(java.lang.String.instancehelper_replaceAll(StdIn.readAll(), "\n", " "));
            SuffixArrayX suffixArrayX = new SuffixArrayX(text);
            SuffixArray  suffixArray  = new SuffixArray(text);
            int          num          = 1;
            int          i            = 0;

            while (num != 0 && i < java.lang.String.instancehelper_length(text))
            {
                if (suffixArray.index(i) != suffixArrayX.index(i))
                {
                    StdOut.println(new StringBuilder().append("suffixReference(").append(i).append(") = ").append(suffixArray.index(i)).toString());
                    StdOut.println(new StringBuilder().append("suffix(").append(i).append(") = ").append(suffixArrayX.index(i)).toString());
                    string obj   = new StringBuilder().append("\"").append(java.lang.String.instancehelper_substring(text, suffixArrayX.index(i), java.lang.Math.min(suffixArrayX.index(i) + 50, java.lang.String.instancehelper_length(text)))).append("\"").toString();
                    string text2 = new StringBuilder().append("\"").append(java.lang.String.instancehelper_substring(text, suffixArray.index(i), java.lang.Math.min(suffixArray.index(i) + 50, java.lang.String.instancehelper_length(text)))).append("\"").toString();
                    StdOut.println(obj);
                    StdOut.println(text2);
                    num = 0;
                }
                i++;
            }
            StdOut.println("  i ind lcp rnk  select");
            StdOut.println("---------------------------");
            for (i = 0; i < java.lang.String.instancehelper_length(text); i++)
            {
                int    num2  = suffixArrayX.index(i);
                string text2 = new StringBuilder().append("\"").append(java.lang.String.instancehelper_substring(text, num2, java.lang.Math.min(num2 + 50, java.lang.String.instancehelper_length(text)))).append("\"").toString();
                int    i2    = suffixArrayX.rank(java.lang.String.instancehelper_substring(text, num2));
                if (!SuffixArrayX.s_assertionsDisabled && !java.lang.String.instancehelper_equals(java.lang.String.instancehelper_substring(text, num2), suffixArrayX.select(i)))
                {
                    throw new AssertionError();
                }
                if (i == 0)
                {
                    StdOut.printf("%3d %3d %3s %3d  %s\n", new object[]
                    {
                        Integer.valueOf(i),
                        Integer.valueOf(num2),
                        "-",
                        Integer.valueOf(i2),
                        text2
                    });
                }
                else
                {
                    int i3 = suffixArrayX.lcp(i);
                    StdOut.printf("%3d %3d %3d %3d  %s\n", new object[]
                    {
                        Integer.valueOf(i),
                        Integer.valueOf(num2),
                        Integer.valueOf(i3),
                        Integer.valueOf(i2),
                        text2
                    });
                }
            }
        }
 public InvertedIndex(ITokenizer tokenizer)
 {
     this.tokenizer      = tokenizer;
     this.suffixArray    = new SuffixArray <string, List <DocumentPosition> >();
     this.matchComparer  = StringComparer.Ordinal;
     this.prefixComparer = new PrefixStringComparer();
     this.syncObj        = new object();
     this.indexedFiles   = new Dictionary <string, string[]>();
 }
示例#17
0
        static void Main(string[] args)
        {
            var texts = ReadFile("tale.txt");
            var sb    = new StringBuilder();

            sb.Append(texts);
            var text = sb.ToString();
            var sa   = new SuffixArray(text);
        }
示例#18
0
        public void SuffixArray_NotItems()
        {
            SuffixArray sa = new SuffixArray("items");

            for (int i = 0; i < 20; i++)
            {
                Assert.IsFalse(sa.Contains(i.ToString()));
            }
        }
示例#19
0
        public void testfailedFindAllOccurrencesArrays()
        {
            string theString = "abracadabraababbra";
            SuffixArray theArray = new SuffixArray(theString);
            List<int> result = new List<int>(theArray.FindAllOccurrences("jason"));
            Assert.AreEqual(result.Count(), 0);

            result = new List<int>(theArray.FindAllOccurrences("testword"));
            Assert.AreEqual(result.Count(), 0);
        }
 public void SortSuffixes()
 {
     int[] suffixArray = SuffixArray.Build("banana");
     Assert.AreEqual(5, suffixArray[0]);
     Assert.AreEqual(3, suffixArray[1]);
     Assert.AreEqual(1, suffixArray[2]);
     Assert.AreEqual(0, suffixArray[3]);
     Assert.AreEqual(4, suffixArray[4]);
     Assert.AreEqual(2, suffixArray[5]);
 }
示例#21
0
        public void Sufa_ByteArray()
        {
            var sa = SuffixArray.Create(new byte[]
            {
                0x01, 0x02, 0x4E, 0x75,
                0x01, 0x02, 0x4E, 0x75,
            });

            Assert.AreEqual(new[] { 2, 6 }, sa.FindOccurences(new byte[] { 0x4E, 0x75 }).OrderBy(i => i).ToArray());
        }
 static void Main(string[] args)
 {
     string str = "abcracadabra";
     // Create a new suffix array
     SuffixArray sa = new SuffixArray(str);
     // Find substring
     int index = sa.IndexOf("rac");
     // Get LCP value
     int lcp = sa.Lcp[index];
 }
示例#23
0
        public void SuffixArray_Index()
        {
            SuffixArray sa = new SuffixArray("items");

            Assert.AreEqual(0, sa.Find("items"));
            Assert.AreEqual(1, sa.Find("tems"));
            Assert.AreEqual(2, sa.Find("ems"));
            Assert.AreEqual(3, sa.Find("ms"));
            Assert.AreEqual(4, sa.Find("s"));
        }
示例#24
0
        public void SuffixArray_MultiIndex()
        {
            SuffixArray sa = new SuffixArray("itemsitems");

            Assert.IsTrue(0 <= sa.Find("items"));
            Assert.IsTrue(1 <= sa.Find("tems"));
            Assert.IsTrue(2 <= sa.Find("ems"));
            Assert.IsTrue(3 <= sa.Find("ms"));
            Assert.IsTrue(4 <= sa.Find("s"));
        }
        public void Compare_Via_Insert_Comparer_On_Add()
        {
            var insertComparerMock = new Mock <IComparer <string> >();
            var suffixArray        = new SuffixArray <string, int>();

            suffixArray.TryAdd("test", 1, insertComparerMock.Object);
            suffixArray.TryAdd("tes", 2, insertComparerMock.Object);

            insertComparerMock.Verify(x => x.Compare("test", "tes"), Times.Once);
        }
示例#26
0
        public void nullTestingArrays()
        {
            string nonNullString = "abracadabra";
            SuffixArray theSuffixArray = new SuffixArray(nonNullString);
            bool result = theSuffixArray.FindSubstring(null);
            Assert.AreEqual(false, result);

            result = theSuffixArray.FindSubstring("");
            Assert.AreEqual(false, result);
        }
        public void LCPString()
        {
            var rnd = new Random();

            for (int n = 1; n < 100; n++)
            {
                var str      = rnd.NextString(n);
                var sa       = SuffixArray.Create(str);
                var saNative = GetNative(str.AsSpan());

                for (int i = 0; i < str.Length; i++)
                {
                    for (int j = i; j < str.Length; j++)
                    {
                        sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j));
                    }
                }
            }
            {
                var str      = "abcaabca";
                var sa       = SuffixArray.Create(str);
                var saNative = GetNative(str.AsSpan());
                sa.LongestCommonPrefix(0, 3).Should().Be(1);
                sa.LongestCommonPrefix(0, 4).Should().Be(4);

                sa.SA.Should().Equal(
                    7, // a
                    3, // aabca
                    4, // abca
                    0, // abcaabca
                    5, // bca
                    1, // bcaabca
                    6, // ca
                    2  // caabca
                    );
                sa.LcpArray.Should().Equal(
                    1, // a - aabca
                    1, // aabca - abca
                    4, // abca - abcaabca
                    0, // abcaabca - bca
                    3, // bca - bcaabca
                    0, // bcaabca - ca
                    2  // ca - caabca
                    );
                sa.Rank.Should().Equal(3, 5, 7, 1, 2, 4, 6, 0);

                for (int i = 0; i < str.Length; i++)
                {
                    for (int j = i; j < str.Length; j++)
                    {
                        sa.LongestCommonPrefix(i, j).Should().Be(sa.LongestCommonPrefix(j, i)).And.Be(saNative.GetLCP(i, j));
                    }
                }
            }
        }
        public void Create()
        {
            // Arrange
            var text = "camel";

            // Act
            var sa = new SuffixArray(text);

            // Assert
            CollectionAssert.AreEqual(new[] {1, 0, 3, 4, 2}, sa.SA);
        }
示例#29
0
 public void DumpSortedArray(SuffixArray <char> sa)
 {
     for (int i = 0; i < sa.Length; i++)
     {
         Debug.Print("{0,4} {1,4} lcp = {2,4} {3}",
                     i,
                     sa[i],
                     sa.Lcp[i],
                     string.Join("", sa.Str.Skip(sa[i])));
     }
 }
        public void UpperBound2()
        {
            // Arrange
            var sa = new SuffixArray("ABABBAB");

            // Act
            var resultIdx = sa.UpperBound("B");

            // Assert
            Assert.AreEqual(3, resultIdx);
        }
示例#31
0
        public void testfailedFindAllOccurrencesArrays()
        {
            string      theString = "abracadabraababbra";
            SuffixArray theArray  = new SuffixArray(theString);
            List <int>  result    = new List <int>(theArray.FindAllOccurrences("jason"));

            Assert.AreEqual(result.Count(), 0);

            result = new List <int>(theArray.FindAllOccurrences("testword"));
            Assert.AreEqual(result.Count(), 0);
        }
示例#32
0
 public void PrintSortedArray(SuffixArray <char> sa)
 {
     for (int i = 0; i < sa.Length; i++)
     {
         Console.Write("{0,4} {1,4} lcp = {2,4} {3}",
                       i,
                       sa[i],
                       sa.Lcp[i],
                       string.Join("", sa.Str.Skip(sa[i])));
     }
 }
示例#33
0
        public void nullTestingArrays()
        {
            string      nonNullString  = "abracadabra";
            SuffixArray theSuffixArray = new SuffixArray(nonNullString);
            bool        result         = theSuffixArray.FindSubstring(null);

            Assert.AreEqual(false, result);

            result = theSuffixArray.FindSubstring("");
            Assert.AreEqual(false, result);
        }
示例#34
0
        public void successfulStringsArrays()
        {
            string testString = "abracadabra";
            SuffixArray theSuffixArray = new SuffixArray(testString);
            bool result = theSuffixArray.FindSubstring("bra");
            Assert.AreEqual(true, result);

            result = theSuffixArray.FindSubstring("ab");
            Assert.AreEqual(true, result);

            result = theSuffixArray.FindSubstring("cada");
            Assert.AreEqual(true, result);

            result = theSuffixArray.FindSubstring("dab");
            Assert.AreEqual(true, result);
        }
示例#35
0
        public void failedStringsArrays()
        {
            string testString = "abracadabra";
            SuffixArray theSuffixArray = new SuffixArray(testString);
            bool result;

            result = theSuffixArray.FindSubstring("jason");
            Assert.AreEqual(false, result);

            result = theSuffixArray.FindSubstring("masud");
            Assert.AreEqual(false, result);

            result = theSuffixArray.FindSubstring("testing");
            Assert.AreEqual(false, result);

            result = theSuffixArray.FindSubstring("notastring");
            Assert.AreEqual(false, result);
        }
示例#36
0
        public void testsuccessfulFindAllStringsArrays()
        {
            string theString = "abracadabraababbra";
            SuffixArray theArray = new SuffixArray(theString);
            List<int> result = new List<int>(theArray.FindAllOccurrences("bra"));
            List<int> correct = new List<int>();
            correct.Add(1); correct.Add(8); correct.Add(15);
            //Assert.AreEqual(result, correct);
            Assert.AreEqual(result.Count(), 3);

            result.Clear(); correct.Clear();

            result = new List<int>(theArray.FindAllOccurrences("brac"));
            Assert.AreEqual(result.Count(), 1);
            correct.Add(1);
            //Assert.AreEqual(result, correct);

            result.Clear(); correct.Clear();

            result = new List<int>(theArray.FindAllOccurrences("ab"));
            correct.Add(0); correct.Add(7); correct.Add(11); correct.Add(13);
            //Assert.AreEqual(result, correct);
            Assert.AreEqual(result.Count(), 4);
        }
示例#37
0
 public void DumpSortedArray(SuffixArray<char> sa)
 {
     for (int i = 0; i < sa.Length; i++)
     {
         Debug.Print("{0,4} {1,4} lcp = {2,4} {3}",
             i,
             sa[i],
             sa.Lcp[i],
             string.Join("", sa.Str.Skip(sa[i])));
     }
 }