Пример #1
0
        public void TestReset()
        {
            WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
            NGramTokenFilter    filter    = new NGramTokenFilter(tokenizer, 1, 1);

            AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
            tokenizer.Reset(new StringReader("abcde"));
            AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
        }
Пример #2
0
        public void TestReset()
        {
            WhitespaceTokenizer  tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
            EdgeNGramTokenFilter filter    = new EdgeNGramTokenFilter(tokenizer, Side.FRONT, 1, 3);

            AssertTokenStreamContents(filter, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 });
            tokenizer.Reset(new StringReader("abcde"));
            AssertTokenStreamContents(filter, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 });
        }
Пример #3
0
        public void TestReset()
        {
            Tokenizer   wsTokenizer = new WhitespaceTokenizer(new StringReader("please divide this sentence"));
            TokenStream filter      = new ShingleFilter(wsTokenizer, 2);

            AssertTokenStreamContents(filter,
                                      new[]
            {
                "please", "please divide", "divide", "divide this", "this",
                "this sentence",
                "sentence"
            },
                                      new[] { 0, 0, 7, 7, 14, 14, 19 }, new[] { 6, 13, 13, 18, 18, 27, 27 },
                                      new[]
            {
                TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE,
                "shingle", TypeAttribute.DEFAULT_TYPE, "shingle",
                TypeAttribute.DEFAULT_TYPE
            },
                                      new[] { 1, 0, 1, 0, 1, 0, 1 }
                                      );

            wsTokenizer.Reset(new StringReader("please divide this sentence"));

            AssertTokenStreamContents(filter,
                                      new[]
            {
                "please", "please divide", "divide", "divide this", "this",
                "this sentence",
                "sentence"
            },
                                      new[] { 0, 0, 7, 7, 14, 14, 19 }, new[] { 6, 13, 13, 18, 18, 27, 27 },
                                      new[]
            {
                TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE,
                "shingle", TypeAttribute.DEFAULT_TYPE, "shingle",
                TypeAttribute.DEFAULT_TYPE
            },
                                      new[] { 1, 0, 1, 0, 1, 0, 1 }
                                      );
        }