Note: Length is calculated as the number of Unicode codepoints.
public virtual void TestFilterWithPosIncr() { TokenStream stream = new MockTokenizer(new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false); CodepointCountFilter filter = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, 2, 6); AssertTokenStreamContents(filter, new string[] { "short", "ab", "foo" }, new int[] { 1, 4, 2 }); }
public virtual void TestRandomStrings() { for (int i = 0; i < 10000; i++) { string text = TestUtil.RandomUnicodeString(Random, 100); int min = TestUtil.NextInt32(Random, 0, 100); int max = TestUtil.NextInt32(Random, 0, 100); int count = text.CodePointCount(0, text.Length); if (min > max) { int temp = min; min = max; max = temp; } bool expected = count >= min && count <= max; TokenStream stream = new KeywordTokenizer(new StringReader(text)); stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max); stream.Reset(); assertEquals(expected, stream.IncrementToken()); stream.End(); stream.Dispose(); } }
public virtual void TestRandomStrings() { for (int i = 0; i < 10000; i++) { string text = TestUtil.RandomUnicodeString(Random(), 100); int min = TestUtil.NextInt(Random(), 0, 100); int max = TestUtil.NextInt(Random(), 0, 100); int count = Character.CodePointCount(text, 0, text.Length);// text.codePointCount(0, text.Length); if (min > max) { int temp = min; min = max; max = temp; } bool expected = count >= min && count <= max; TokenStream stream = new KeywordTokenizer(new StringReader(text)); stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max); stream.Reset(); assertEquals(expected, stream.IncrementToken()); stream.End(); stream.Dispose(); } }