This TokenFilter limits its emitted tokens to those with positions that are not greater than the configured limit.

By default, this filter ignores any tokens in the wrapped {@code TokenStream} once the limit has been exceeded, which can result in {@code reset()} being called prior to {@code incrementToken()} returning {@code false}. For most {@code TokenStream} implementations this should be acceptable, and faster then consuming the full stream. If you are wrapping a {@code TokenStream} which requires that the full stream of tokens be exhausted in order to function properly, use the #LimitTokenPositionFilter(TokenStream,int,boolean) consumeAllTokens option.

Inheritance: TokenFilter
コード例 #1
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testMaxPosition3WithSynomyms() throws java.io.IOException
        public virtual void testMaxPosition3WithSynomyms()
        {
            foreach (bool consumeAll in new bool[] { true, false })
            {
                MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
                // if we are consuming all tokens, we can use the checks, otherwise we can't
                tokenizer.EnableChecks = consumeAll;

                SynonymMap.Builder builder = new SynonymMap.Builder(true);
                builder.add(new CharsRef("one"), new CharsRef("first"), true);
                builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
                builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
                CharsRef multiWordCharsRef = new CharsRef();
                SynonymMap.Builder.join(new string[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
                builder.add(new CharsRef("one"), multiWordCharsRef, true);
                SynonymMap.Builder.join(new string[] { "dopple", "ganger" }, multiWordCharsRef);
                builder.add(new CharsRef("two"), multiWordCharsRef, true);
                SynonymMap  synonymMap = builder.build();
                TokenStream stream     = new SynonymFilter(tokenizer, synonymMap, true);
                stream = new LimitTokenPositionFilter(stream, 3, consumeAll);

                // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
                assertTokenStreamContents(stream, new string[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" }, new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 });
            }
        }
コード例 #2
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testMaxPosition3WithSynomyms() throws java.io.IOException
        public virtual void testMaxPosition3WithSynomyms()
        {
            foreach (bool consumeAll in new bool[]{true, false})
            {
              MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
              // if we are consuming all tokens, we can use the checks, otherwise we can't
              tokenizer.EnableChecks = consumeAll;

              SynonymMap.Builder builder = new SynonymMap.Builder(true);
              builder.add(new CharsRef("one"), new CharsRef("first"), true);
              builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
              builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
              CharsRef multiWordCharsRef = new CharsRef();
              SynonymMap.Builder.join(new string[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
              builder.add(new CharsRef("one"), multiWordCharsRef, true);
              SynonymMap.Builder.join(new string[]{"dopple", "ganger"}, multiWordCharsRef);
              builder.add(new CharsRef("two"), multiWordCharsRef, true);
              SynonymMap synonymMap = builder.build();
              TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
              stream = new LimitTokenPositionFilter(stream, 3, consumeAll);

              // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
              assertTokenStreamContents(stream, new string[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"}, new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
            }
        }