public void TestNgrams() { NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3); AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e", "ab", "bc", "cd", "de", "abc", "bcd", "cde" }, new int[] { 0, 1, 2, 3, 4, 0, 1, 2, 3, 0, 1, 2 }, new int[] { 1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5 } ); }
public void TestOversizedNgrams() { NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7); AssertTokenStreamContents(filter, new String[0], new int[0], new int[0]); }
public void TestBigrams() { NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2); AssertTokenStreamContents(filter, new String[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }); }
public void TestUnigrams() { NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1); AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }); }
public void TestReset() { WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde")); NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1); AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }); tokenizer.Reset(new StringReader("abcde")); AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }); }
public void TestSmallTokenInStream() { input = new WhitespaceTokenizer(new StringReader("abc de fgh")); NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3); AssertTokenStreamContents(filter, new String[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 }); }
public PositionLengthAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance) { this.outerInstance = outerInstance; }