Trims leading and trailing whitespace from Tokens in the stream.

As of Lucene 4.4, this filter does not support updateOffsets=true anymore as it can lead to broken token streams.

Inheritance: TokenFilter
コード例 #1
0
        public override TokenStream Create(TokenStream input)
        {
#pragma warning disable 612, 618
            var filter = new TrimFilter(luceneMatchVersion, input, updateOffsets);
#pragma warning restore 612, 618
            return filter;
        }
コード例 #2
0
        public virtual void TestTrim()
        {
            char[] a = " a ".ToCharArray();
            char[] b = "b   ".ToCharArray();
            char[] ccc = "cCc".ToCharArray();
            char[] whitespace = "   ".ToCharArray();
            char[] empty = "".ToCharArray();

            TokenStream ts = new IterTokenStream(new Token(a, 0, a.Length, 1, 5), new Token(b, 0, b.Length, 6, 10), new Token(ccc, 0, ccc.Length, 11, 15), new Token(whitespace, 0, whitespace.Length, 16, 20), new Token(empty, 0, empty.Length, 21, 21));
            ts = new TrimFilter(TEST_VERSION_CURRENT, ts, false);

            AssertTokenStreamContents(ts, new string[] { "a", "b", "cCc", "", "" });

            a = " a".ToCharArray();
            b = "b ".ToCharArray();
            ccc = " c ".ToCharArray();
            whitespace = "   ".ToCharArray();
            ts = new IterTokenStream(new Token(a, 0, a.Length, 0, 2), new Token(b, 0, b.Length, 0, 2), new Token(ccc, 0, ccc.Length, 0, 3), new Token(whitespace, 0, whitespace.Length, 0, 3));
            ts = new TrimFilter(LuceneVersion.LUCENE_43, ts, true);

            AssertTokenStreamContents(ts, new string[] { "a", "b", "c", "" }, new int[] { 1, 0, 1, 3 }, new int[] { 2, 1, 2, 3 }, null, new int[] { 1, 1, 1, 1 }, null, null, false);
        }
コード例 #3
0
 public override TokenStream Create(TokenStream input)
 {
     var filter = new TrimFilter(luceneMatchVersion, input, updateOffsets);
     return filter;
 }