public virtual void TestTooLongToken()
        {
            Analyzer whitespace = new AnalyzerAnonymousInnerClassHelper(this);

            AssertTokenStreamContents(whitespace.TokenStream("bogus", new StringReader("test 123 toolong ok ")), new string[] { "test", "123", "toolo", "ng", "ok" }, new int[] { 0, 5, 9, 14, 17 }, new int[] { 4, 8, 14, 16, 19 }, new int?(20));
            AssertTokenStreamContents(whitespace.TokenStream("bogus", new StringReader("test 123 toolo")), new string[] { "test", "123", "toolo" }, new int[] { 0, 5, 9 }, new int[] { 4, 8, 14 }, new int?(14));
        }
Exemple #2
0
        public virtual void TestCrossPlaneNormalization()
        {
            var analyzer = new AnalyzerAnonymousInnerClassHelper();
            var num      = 1000 * RANDOM_MULTIPLIER;

            for (var i = 0; i < num; i++)
            {
                var s  = TestUtil.RandomUnicodeString(Random());
                var ts = analyzer.TokenStream("foo", s);
                try
                {
                    ts.Reset();
                    var offsetAtt = ts.AddAttribute <IOffsetAttribute>();
                    while (ts.IncrementToken())
                    {
                        var highlightedText = s.Substring(offsetAtt.StartOffset(), offsetAtt.EndOffset() - offsetAtt.StartOffset());
                        for (int j = 0, cp = 0; j < highlightedText.Length; j += Character.CharCount(cp))
                        {
                            cp = char.ConvertToUtf32(highlightedText, j);
                            assertTrue("non-letter:" + cp.ToString("x"), Character.IsLetter(cp));
                        }
                    }
                    ts.End();
                }
                finally
                {
                    IOUtils.CloseWhileHandlingException(ts);
                }
            }
            // just for fun
            CheckRandomData(Random(), analyzer, num);
        }
 public virtual void TestCrossPlaneNormalization()
 {
     var analyzer = new AnalyzerAnonymousInnerClassHelper();
     var num = 1000 * RANDOM_MULTIPLIER;
     for (var i = 0; i < num; i++)
     {
         var s = TestUtil.RandomUnicodeString(Random());
         var ts = analyzer.TokenStream("foo", s);
         try
         {
             ts.Reset();
             var offsetAtt = ts.AddAttribute<IOffsetAttribute>();
             while (ts.IncrementToken())
             {
                 var highlightedText = s.Substring(offsetAtt.StartOffset(), offsetAtt.EndOffset() - offsetAtt.StartOffset());
                 for (int j = 0, cp = 0; j < highlightedText.Length; j += Character.CharCount(cp))
                 {
                     cp = char.ConvertToUtf32(highlightedText, j);
                     assertTrue("non-letter:" + cp.ToString("x"), Character.IsLetter(cp));
                 }
             }
             ts.End();
         }
         finally
         {
             IOUtils.CloseWhileHandlingException(ts);
         }
     }
     // just for fun
     CheckRandomData(Random(), analyzer, num);
 }
Exemple #4
0
        public virtual void TestMaxPosition2()
        {
            foreach (bool consumeAll in new bool[] { true, false })
            {
                Analyzer a = new AnalyzerAnonymousInnerClassHelper(consumeAll);

                // don't use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)!
                AssertTokenStreamContents(a.TokenStream("dummy", "1  2     3  4  5"), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 16 : (int?)null);
                AssertTokenStreamContents(a.TokenStream("dummy", new StringReader("1 2 3 4 5")), new string[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? 9 : (int?)null);

                // less than the limit, ensure we behave correctly
                AssertTokenStreamContents(a.TokenStream("dummy", "1  "), new string[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll ? 3 : (int?)null);

                // equal to limit
                AssertTokenStreamContents(a.TokenStream("dummy", "1  2  "), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : (int?)null);
            }
        }
        public virtual void TestMaxPosition2()
        {
            foreach (bool consumeAll in new bool[] { true, false })
            {
                Analyzer a = new AnalyzerAnonymousInnerClassHelper(consumeAll);

                // don't use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)!
                AssertTokenStreamContents(a.TokenStream("dummy", "1  2     3  4  5"), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 16 : (int?)null);
                AssertTokenStreamContents(a.TokenStream("dummy", new StringReader("1 2 3 4 5")), new string[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? 9 : (int?)null);

                // less than the limit, ensure we behave correctly
                AssertTokenStreamContents(a.TokenStream("dummy", "1  "), new string[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll ? 3 : (int?)null);

                // equal to limit
                AssertTokenStreamContents(a.TokenStream("dummy", "1  2  "), new string[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : (int?)null);
            }
        }
 public virtual void TestLetterAscii()
 {
     Random random = Random();
     Analyzer left = new MockAnalyzer(random, jvmLetter, false);
     Analyzer right = new AnalyzerAnonymousInnerClassHelper(this);
     for (int i = 0; i < 1000; i++)
     {
         string s = TestUtil.RandomSimpleString(random);
         assertEquals(s, left.TokenStream("foo", newStringReader(s)), right.TokenStream("foo", newStringReader(s)));
     }
 }
        public virtual void TestLetterAscii()
        {
            Random   random = Random();
            Analyzer left   = new MockAnalyzer(random, jvmLetter, false);
            Analyzer right  = new AnalyzerAnonymousInnerClassHelper(this);

            for (int i = 0; i < 1000; i++)
            {
                string s = TestUtil.RandomSimpleString(random);
                assertEquals(s, left.TokenStream("foo", newStringReader(s)), right.TokenStream("foo", newStringReader(s)));
            }
        }
 public virtual void TestTooLongToken()
 {
     Analyzer whitespace = new AnalyzerAnonymousInnerClassHelper(this);
     AssertTokenStreamContents(whitespace.TokenStream("bogus", new StringReader("test 123 toolong ok ")), new string[] { "test", "123", "toolo", "ng", "ok" }, new int[] { 0, 5, 9, 14, 17 }, new int[] { 4, 8, 14, 16, 19 }, new int?(20));
     AssertTokenStreamContents(whitespace.TokenStream("bogus", new StringReader("test 123 toolo")), new string[] { "test", "123", "toolo" }, new int[] { 0, 5, 9 }, new int[] { 4, 8, 14 }, new int?(14));
 }