Trims leading and trailing whitespace from Tokens in the stream.

As of Lucene 4.4, this filter does not support updateOffsets=true anymore as it can lead to broken token streams.

Inheritance: TokenFilter
        public override TokenStream Create(TokenStream input)
        {
#pragma warning disable 612, 618
            var filter = new TrimFilter(luceneMatchVersion, input, updateOffsets);
#pragma warning restore 612, 618
            return filter;
        }
        public virtual void TestTrim()
        {
            char[] a = " a ".ToCharArray();
            char[] b = "b   ".ToCharArray();
            char[] ccc = "cCc".ToCharArray();
            char[] whitespace = "   ".ToCharArray();
            char[] empty = "".ToCharArray();

            TokenStream ts = new IterTokenStream(new Token(a, 0, a.Length, 1, 5), new Token(b, 0, b.Length, 6, 10), new Token(ccc, 0, ccc.Length, 11, 15), new Token(whitespace, 0, whitespace.Length, 16, 20), new Token(empty, 0, empty.Length, 21, 21));
            ts = new TrimFilter(TEST_VERSION_CURRENT, ts, false);

            AssertTokenStreamContents(ts, new string[] { "a", "b", "cCc", "", "" });

            a = " a".ToCharArray();
            b = "b ".ToCharArray();
            ccc = " c ".ToCharArray();
            whitespace = "   ".ToCharArray();
            ts = new IterTokenStream(new Token(a, 0, a.Length, 0, 2), new Token(b, 0, b.Length, 0, 2), new Token(ccc, 0, ccc.Length, 0, 3), new Token(whitespace, 0, whitespace.Length, 0, 3));
            ts = new TrimFilter(LuceneVersion.LUCENE_43, ts, true);

            AssertTokenStreamContents(ts, new string[] { "a", "b", "c", "" }, new int[] { 1, 0, 1, 3 }, new int[] { 2, 1, 2, 3 }, null, new int[] { 1, 1, 1, 1 }, null, null, false);
        }
Esempio n. 3
0
 public override TokenStream Create(TokenStream input)
 {
     var filter = new TrimFilter(luceneMatchVersion, input, updateOffsets);
     return filter;
 }