Removes words that are too long or too short from the stream.

Note: Length is calculated as the number of Unicode codepoints.

Inheritance: Lucene.Net.Analysis.Util.FilteringTokenFilter
Beispiel #1
0
        public virtual void TestFilterWithPosIncr()
        {
            TokenStream          stream = new MockTokenizer(new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
            CodepointCountFilter filter = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, 2, 6);

            AssertTokenStreamContents(filter, new string[] { "short", "ab", "foo" }, new int[] { 1, 4, 2 });
        }
 public virtual void TestRandomStrings()
 {
     for (int i = 0; i < 10000; i++)
     {
         string text = TestUtil.RandomUnicodeString(Random, 100);
         int min = TestUtil.NextInt32(Random, 0, 100);
         int max = TestUtil.NextInt32(Random, 0, 100);
         int count = text.CodePointCount(0, text.Length);
         if (min > max)
         {
             int temp = min;
             min = max;
             max = temp;
         }
         bool expected = count >= min && count <= max;
         TokenStream stream = new KeywordTokenizer(new StringReader(text));
         stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max);
         stream.Reset();
         assertEquals(expected, stream.IncrementToken());
         stream.End();
         stream.Dispose();
     }
 }
 public virtual void TestRandomStrings()
 {
     for (int i = 0; i < 10000; i++)
     {
         string text = TestUtil.RandomUnicodeString(Random(), 100);
         int min = TestUtil.NextInt(Random(), 0, 100);
         int max = TestUtil.NextInt(Random(), 0, 100);
         int count = Character.CodePointCount(text, 0, text.Length);// text.codePointCount(0, text.Length);
         if (min > max)
         {
             int temp = min;
             min = max;
             max = temp;
         }
         bool expected = count >= min && count <= max;
         TokenStream stream = new KeywordTokenizer(new StringReader(text));
         stream = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, min, max);
         stream.Reset();
         assertEquals(expected, stream.IncrementToken());
         stream.End();
         stream.Dispose();
     }
 }
 public virtual void TestFilterWithPosIncr()
 {
     TokenStream stream = new MockTokenizer(new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
     CodepointCountFilter filter = new CodepointCountFilter(TEST_VERSION_CURRENT, stream, 2, 6);
     AssertTokenStreamContents(filter, new string[] { "short", "ab", "foo" }, new int[] { 1, 4, 2 });
 }