public void TestReset() { WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde")); NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1); AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }); tokenizer.Reset(new StringReader("abcde")); AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }); }
public void TestReset() { WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde")); EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, Side.FRONT, 1, 3); AssertTokenStreamContents(filter, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }); tokenizer.Reset(new StringReader("abcde")); AssertTokenStreamContents(filter, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }); }
public void TestReset() { Tokenizer wsTokenizer = new WhitespaceTokenizer(new StringReader("please divide this sentence")); TokenStream filter = new ShingleFilter(wsTokenizer, 2); AssertTokenStreamContents(filter, new[] { "please", "please divide", "divide", "divide this", "this", "this sentence", "sentence" }, new[] { 0, 0, 7, 7, 14, 14, 19 }, new[] { 6, 13, 13, 18, 18, 27, 27 }, new[] { TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE }, new[] { 1, 0, 1, 0, 1, 0, 1 } ); wsTokenizer.Reset(new StringReader("please divide this sentence")); AssertTokenStreamContents(filter, new[] { "please", "please divide", "divide", "divide this", "this", "this sentence", "sentence" }, new[] { 0, 0, 7, 7, 14, 14, 19 }, new[] { 6, 13, 13, 18, 18, 27, 27 }, new[] { TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE }, new[] { 1, 0, 1, 0, 1, 0, 1 } ); }