Inheritance: Lucene.Net.Analysis.Tokenizer
Example #1
0
        public virtual void TestReset()
        {
            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);

            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
            tokenizer.SetReader(new StringReader("abcde"));
            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
        }
Example #2
0
 public virtual void TestRandomStrings()
 {
     for (int i = 0; i < 10; i++)
     {
         int      min = TestUtil.NextInt32(Random, 2, 10);
         int      max = TestUtil.NextInt32(Random, min, 20);
         Analyzer a   = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
         {
             Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);
             return(new TokenStreamComponents(tokenizer, tokenizer));
         });
         CheckRandomData(Random, a, 200 * RandomMultiplier, 20);
         CheckRandomData(Random, a, 10 * RandomMultiplier, 1027);
     }
 }
Example #3
0
 public void TestOversizedNgrams()
 {
     NGramTokenizer tokenizer = new NGramTokenizer(input, 6, 7);
     AssertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */);
 }
Example #4
0
 public void TestNgrams()
 {
     NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 3);
     AssertTokenStreamContents(tokenizer,
       new String[] { "a", "b", "c", "d", "e", "ab", "bc", "cd", "de", "abc", "bcd", "cde" },
       new int[] { 0, 1, 2, 3, 4, 0, 1, 2, 3, 0, 1, 2 },
       new int[] { 1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5 },
       5 /* abcde */
     );
 }
Example #5
0
 public void TestBigrams()
 {
     NGramTokenizer tokenizer = new NGramTokenizer(input, 2, 2);
     AssertTokenStreamContents(tokenizer, new String[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5 /* abcde */);
 }
Example #6
0
 public void TestUnigrams()
 {
     NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1);
     AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
 }
Example #7
0
 public void TestReset()
 {
     NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1);
     AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
     tokenizer.Reset(new StringReader("abcde"));
     AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
 }
Example #8
0
        public virtual void TestOversizedNgrams()
        {
            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 6, 7);

            AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5); // abcde
        }
Example #9
0
        public virtual void TestNgrams()
        {
            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);

            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4 }, new int[] { 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 5 }, null, null, null, 5, false); // abcde
        }
Example #10
0
        public virtual void TestBigrams()
        {
            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 2);

            AssertTokenStreamContents(tokenizer, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5); // abcde
        }
Example #11
0
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);

                return(new TokenStreamComponents(tokenizer, tokenizer));
            }