As of Lucene 4.4, this filter does not support updateOffsets=true anymore as it can lead to broken token streams.
public override TokenStream Create(TokenStream input) { #pragma warning disable 612, 618 var filter = new TrimFilter(luceneMatchVersion, input, updateOffsets); #pragma warning restore 612, 618 return filter; }
public virtual void TestTrim() { char[] a = " a ".ToCharArray(); char[] b = "b ".ToCharArray(); char[] ccc = "cCc".ToCharArray(); char[] whitespace = " ".ToCharArray(); char[] empty = "".ToCharArray(); TokenStream ts = new IterTokenStream(new Token(a, 0, a.Length, 1, 5), new Token(b, 0, b.Length, 6, 10), new Token(ccc, 0, ccc.Length, 11, 15), new Token(whitespace, 0, whitespace.Length, 16, 20), new Token(empty, 0, empty.Length, 21, 21)); ts = new TrimFilter(TEST_VERSION_CURRENT, ts, false); AssertTokenStreamContents(ts, new string[] { "a", "b", "cCc", "", "" }); a = " a".ToCharArray(); b = "b ".ToCharArray(); ccc = " c ".ToCharArray(); whitespace = " ".ToCharArray(); ts = new IterTokenStream(new Token(a, 0, a.Length, 0, 2), new Token(b, 0, b.Length, 0, 2), new Token(ccc, 0, ccc.Length, 0, 3), new Token(whitespace, 0, whitespace.Length, 0, 3)); ts = new TrimFilter(LuceneVersion.LUCENE_43, ts, true); AssertTokenStreamContents(ts, new string[] { "a", "b", "c", "" }, new int[] { 1, 0, 1, 3 }, new int[] { 2, 1, 2, 3 }, null, new int[] { 1, 1, 1, 1 }, null, null, false); }
public override TokenStream Create(TokenStream input) { var filter = new TrimFilter(luceneMatchVersion, input, updateOffsets); return filter; }