This TokenFilter limits the number of tokens while indexing. It is a replacement for the maximum field length setting inside org.apache.lucene.index.IndexWriter.

By default, this filter ignores any tokens in the wrapped {@code TokenStream} once the limit has been reached, which can result in {@code reset()} being called prior to {@code incrementToken()} returning {@code false}. For most {@code TokenStream} implementations this should be acceptable, and faster then consuming the full stream. If you are wrapping a {@code TokenStream} which requires that the full stream of tokens be exhausted in order to function properly, use the #LimitTokenCountFilter(TokenStream,int,boolean) consumeAllTokens option.

Inheritance: TokenFilter
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void test() throws Exception
 public virtual void test()
 {
     foreach (bool consumeAll in new bool[]{true, false})
     {
       MockTokenizer tokenizer = new MockTokenizer(new StringReader("A1 B2 C3 D4 E5 F6"), MockTokenizer.WHITESPACE, false);
       tokenizer.EnableChecks = consumeAll;
       TokenStream stream = new LimitTokenCountFilter(tokenizer, 3, consumeAll);
       assertTokenStreamContents(stream, new string[]{"A1", "B2", "C3"});
     }
 }
Example #2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void test() throws Exception
        public virtual void test()
        {
            foreach (bool consumeAll in new bool[] { true, false })
            {
                MockTokenizer tokenizer = new MockTokenizer(new StringReader("A1 B2 C3 D4 E5 F6"), MockTokenizer.WHITESPACE, false);
                tokenizer.EnableChecks = consumeAll;
                TokenStream stream = new LimitTokenCountFilter(tokenizer, 3, consumeAll);
                assertTokenStreamContents(stream, new string[] { "A1", "B2", "C3" });
            }
        }