This TokenFilter limits the number of tokens while indexing. It is a replacement for the maximum field length setting inside org.apache.lucene.index.IndexWriter.

By default, this filter ignores any tokens in the wrapped {@code TokenStream} once the limit has been reached, which can result in {@code reset()} being called prior to {@code incrementToken()} returning {@code false}. For most {@code TokenStream} implementations this should be acceptable, and faster then consuming the full stream. If you are wrapping a {@code TokenStream} which requires that the full stream of tokens be exhausted in order to function properly, use the #LimitTokenCountFilter(TokenStream,int,boolean) consumeAllTokens option.

Inheritance: TokenFilter
コード例 #1
0
 public virtual void Test()
 {
     foreach (bool consumeAll in new bool[] { true, false })
     {
         MockTokenizer tokenizer = new MockTokenizer(new StringReader("A1 B2 C3 D4 E5 F6"), MockTokenizer.WHITESPACE, false);
         tokenizer.EnableChecks = consumeAll;
         TokenStream stream = new LimitTokenCountFilter(tokenizer, 3, consumeAll);
         AssertTokenStreamContents(stream, new string[] { "A1", "B2", "C3" });
     }
 }
コード例 #2
0
 public virtual void Test()
 {
     foreach (bool consumeAll in new bool[] { true, false })
     {
         MockTokenizer tokenizer = new MockTokenizer(new StringReader("A1 B2 C3 D4 E5 F6"), MockTokenizer.WHITESPACE, false);
         tokenizer.EnableChecks = consumeAll;
         TokenStream stream = new LimitTokenCountFilter(tokenizer, 3, consumeAll);
         AssertTokenStreamContents(stream, new string[] { "A1", "B2", "C3" });
     }
 }