This Tokenizer create n-grams from the beginning edge or ending edge of a input token.
As of Lucene 4.4, this tokenizer
maxGram
Although highly discouraged, it is still possible to use the old behavior through Lucene43EdgeNGramTokenizer.
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3); assertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde tokenizer.Reader = new StringReader("abcde"); assertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testTokenizerPositions() throws Exception public virtual void testTokenizerPositions() { Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.FRONT, 1, 3); assertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, null, new int[] { 1, 0, 0 }, null, null, false); tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"), 1, 3); assertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, null, new int[] { 1, 1, 1 }, null, null, false); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testFrontUnigram() throws Exception public virtual void testFrontUnigram() { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1); assertTokenStreamContents(tokenizer, new string[] { "a" }, new int[] { 0 }, new int[] { 1 }, 5); // abcde }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, min, max); return(new TokenStreamComponents(tokenizer, tokenizer)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testFrontRangeOfNgrams() throws Exception public virtual void testFrontRangeOfNgrams() { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3); assertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testFrontUnigram() throws Exception public virtual void testFrontUnigram() { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1); assertTokenStreamContents(tokenizer, new string[]{"a"}, new int[]{0}, new int[]{1}, 5); // abcde }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testOversizedNgrams() throws Exception public virtual void testOversizedNgrams() { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 6, 6); assertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5); // abcde }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testFrontRangeOfNgrams() throws Exception public virtual void testFrontRangeOfNgrams() { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3); assertTokenStreamContents(tokenizer, new string[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}, 5); // abcde }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, min, max); return new TokenStreamComponents(tokenizer, tokenizer); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testTokenizerPositions() throws Exception public virtual void testTokenizerPositions() { Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.FRONT, 1, 3); assertTokenStreamContents(tokenizer, new string[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}, null, new int[] {1,0,0}, null, null, false); tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"), 1, 3); assertTokenStreamContents(tokenizer, new string[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}, null, new int[]{1,1,1}, null, null, false); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3); assertTokenStreamContents(tokenizer, new string[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}, 5); // abcde tokenizer.Reader = new StringReader("abcde"); assertTokenStreamContents(tokenizer, new string[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}, 5); // abcde }