Esempio n. 1
0
        public void TestReset()
        {
            Tokenizer wsTokenizer = new WhitespaceTokenizer(new StringReader("please divide this sentence"));
            TokenStream filter = new ShingleFilter(wsTokenizer, 2);

            AssertTokenStreamContents(filter,
                                      new[]
                                          {
                                              "please", "please divide", "divide", "divide this", "this",
                                              "this sentence",
                                              "sentence"
                                          },
                                      new[] {0, 0, 7, 7, 14, 14, 19}, new[] {6, 13, 13, 18, 18, 27, 27},
                                      new[]
                                          {
                                              TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE,
                                              "shingle", TypeAttribute.DEFAULT_TYPE, "shingle",
                                              TypeAttribute.DEFAULT_TYPE
                                          },
                                      new[] {1, 0, 1, 0, 1, 0, 1}
                );

            wsTokenizer.Reset(new StringReader("please divide this sentence"));

            AssertTokenStreamContents(filter,
                                      new[]
                                          {
                                              "please", "please divide", "divide", "divide this", "this",
                                              "this sentence",
                                              "sentence"
                                          },
                                      new[] {0, 0, 7, 7, 14, 14, 19}, new[] {6, 13, 13, 18, 18, 27, 27},
                                      new[]
                                          {
                                              TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE,
                                              "shingle", TypeAttribute.DEFAULT_TYPE, "shingle",
                                              TypeAttribute.DEFAULT_TYPE
                                          },
                                      new[] {1, 0, 1, 0, 1, 0, 1}
                );
        }
Esempio n. 2
0
 public void TestReset()
 {
     WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
     NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
     AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
     tokenizer.Reset(new StringReader("abcde"));
     AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
 }
 public void TestReset()
 {
     WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
     EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
     AssertTokenStreamContents(filter, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 });
     tokenizer.Reset(new StringReader("abcde"));
     AssertTokenStreamContents(filter, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 });
 }