public virtual void TestSupplementaryCharacters() { string s = TestUtil.RandomUnicodeString(Random(), 10); int codePointCount = Character.CodePointCount(s, 0, s.Length); int minGram = TestUtil.NextInt(Random(), 1, 3); int maxGram = TestUtil.NextInt(Random(), minGram, 10); TokenStream tk = new KeywordTokenizer(new StringReader(s)); tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram); ICharTermAttribute termAtt = tk.AddAttribute <ICharTermAttribute>(); IOffsetAttribute offsetAtt = tk.AddAttribute <IOffsetAttribute>(); tk.Reset(); for (int i = minGram; i <= Math.Min(codePointCount, maxGram); ++i) { assertTrue(tk.IncrementToken()); assertEquals(0, offsetAtt.StartOffset()); assertEquals(s.Length, offsetAtt.EndOffset()); int end = Character.OffsetByCodePoints(s, 0, i); assertEquals(s.Substring(0, end), termAtt.ToString()); } assertFalse(tk.IncrementToken()); }
public virtual void TestSupplementaryCharacters() { string s = TestUtil.RandomUnicodeString(Random(), 10); int codePointCount = Character.CodePointCount(s, 0, s.Length); int minGram = TestUtil.NextInt(Random(), 1, 3); int maxGram = TestUtil.NextInt(Random(), minGram, 10); TokenStream tk = new KeywordTokenizer(new StringReader(s)); tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram); ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>(); IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>(); tk.Reset(); for (int i = minGram; i <= Math.Min(codePointCount, maxGram); ++i) { assertTrue(tk.IncrementToken()); assertEquals(0, offsetAtt.StartOffset()); assertEquals(s.Length, offsetAtt.EndOffset()); int end = Character.OffsetByCodePoints(s, 0, i); assertEquals(s.Substring(0, end), termAtt.ToString()); } assertFalse(tk.IncrementToken()); }