Example #1
0
        public void TestBreakAllChars()
        {
            // Make a "word" from each code point, separated by spaces.
            // For dictionary based breaking, runs the start-of-range
            // logic with all possible dictionary characters.
            StringBuilder sb = new StringBuilder();

            for (int c = 0; c < 0x110000; ++c)
            {
                sb.AppendCodePoint(c);
                sb.AppendCodePoint(c);
                sb.AppendCodePoint(c);
                sb.AppendCodePoint(c);
                sb.Append(' ');
            }
            String s = sb.ToString();

            for (int breakKind = BreakIterator.KIND_CHARACTER; breakKind <= BreakIterator.KIND_TITLE; ++breakKind)
            {
                RuleBasedBreakIterator bi =
                    (RuleBasedBreakIterator)BreakIterator.GetBreakInstance(ULocale.ENGLISH, breakKind);
                bi.SetText(s);
                int lastb = -1;
                for (int b = bi.First(); b != BreakIterator.Done; b = bi.Next())
                {
                    assertTrue("(lastb < b) : (" + lastb + " < " + b + ")", lastb < b);
                }
            }
        }
Example #2
0
        public void TestHashCode()
        {
            RuleBasedBreakIterator bi1 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture);
            RuleBasedBreakIterator bi3 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture);
            RuleBasedBreakIterator bi2 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture);

            Logln("Testing hashCode()");
            bi1.SetText("Hash code");
            bi2.SetText("Hash code");
            bi3.SetText("Hash code");
            RuleBasedBreakIterator bi1clone = (RuleBasedBreakIterator)bi1.Clone();
            RuleBasedBreakIterator bi2clone = (RuleBasedBreakIterator)bi2.Clone();

            if (bi1.GetHashCode() != bi1clone.GetHashCode() ||
                bi1.GetHashCode() != bi3.GetHashCode() ||
                bi1clone.GetHashCode() != bi3.GetHashCode() ||
                bi2.GetHashCode() != bi2clone.GetHashCode())
            {
                Errln("ERROR: identical objects have different hashcodes");
            }

            if (bi1.GetHashCode() == bi2.GetHashCode() ||
                bi2.GetHashCode() == bi3.GetHashCode() ||
                bi1clone.GetHashCode() == bi2clone.GetHashCode() ||
                bi1clone.GetHashCode() == bi2.GetHashCode())
            {
                Errln("ERROR: different objects have same hashcodes");
            }
        }
Example #3
0
 public IEnumerable <string> Split(BreakIterator.UBreakIteratorType type, string locale,
                                   string text)
 {
     using (var breakIterator = new RuleBasedBreakIterator(type, locale))
     {
         breakIterator.SetText(text);
         return(breakIterator);
     }
 }
Example #4
0
        public void TestRuledump()
        {
            RuleBasedBreakIterator bi   = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance();
            MemoryStream           bos  = new MemoryStream();
            TextWriter             @out = new StreamWriter(bos);

            bi.Dump(@out);
            assertTrue(null, bos.Length > 100);
        }
Example #5
0
        public void TestBug12797()
        {
            String rules = "!!chain; !!forward; $v=b c; a b; $v; !!reverse; .*;";
            RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rules);

            bi.SetText("abc");
            bi.First();
            assertEquals("Rule chaining test", 3, bi.Next());
        }
Example #6
0
        public void TestFirst()
        {
            RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");

            // Tests when "if (fText == null)" is true
            rbbi.SetText((CharacterIterator)null);
            assertEquals("RuleBasedBreakIterator.First()", BreakIterator.Done, rbbi.First());

            rbbi.SetText("abc");
            assertEquals("RuleBasedBreakIterator.First()", 0, rbbi.First());
            assertEquals("RuleBasedBreakIterator.Next()", 1, rbbi.Next());
        }
Example #7
0
        public void TestLast()
        {
            RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");

            // Tests when "if (fText == null)" is true
            rbbi.SetText((CharacterIterator)null);
            if (rbbi.Last() != BreakIterator.Done)
            {
                Errln("RuleBasedBreakIterator.Last() was supposed to return "
                      + "BreakIterator.Done when the object has a null fText.");
            }
        }
Example #8
0
        public void TestFollowing()
        {
            RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");

            // Tests when "else if (offset < fText.getBeginIndex())" is true
            rbbi.SetText("dummy");
            if (rbbi.Following(-1) != 0)
            {
                Errln("RuleBasedBreakIterator.following(-1) was suppose to return "
                      + "0 when the object has a fText of dummy.");
            }
        }
Example #9
0
 static JdkBreakIterator()
 {
     using (Stream @is =
                typeof(JdkBreakIterator).FindAndGetManifestResourceStream("jdksent.brk"))
     {
         SentenceInstance = RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is);
     }
     using (Stream @is =
                typeof(JdkBreakIterator).FindAndGetManifestResourceStream("jdkword.brk"))
     {
         WordInstance = RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is);
     }
 }
Example #10
0
        public void TestCloneEquals()
        {
            RuleBasedBreakIterator bi1     = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture);
            RuleBasedBreakIterator biequal = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture);
            RuleBasedBreakIterator bi3     = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture);
            RuleBasedBreakIterator bi2     = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture);

            string testString = "Testing word break iterators's clone() and equals()";

            bi1.SetText(testString);
            bi2.SetText(testString);
            biequal.SetText(testString);

            bi3.SetText("hello");
            Logln("Testing equals()");
            Logln("Testing == and !=");
            if (!bi1.Equals(biequal) || bi1.Equals(bi2) || bi1.Equals(bi3))
            {
                Errln("ERROR:1 RBBI's == and !- operator failed.");
            }
            if (bi2.Equals(biequal) || bi2.Equals(bi1) || biequal.Equals(bi3))
            {
                Errln("ERROR:2 RBBI's == and != operator  failed.");
            }
            Logln("Testing clone()");
            RuleBasedBreakIterator bi1clone = (RuleBasedBreakIterator)bi1.Clone();
            RuleBasedBreakIterator bi2clone = (RuleBasedBreakIterator)bi2.Clone();

            if (!bi1clone.Equals(bi1) ||
                !bi1clone.Equals(biequal) ||
                bi1clone.Equals(bi3) ||
                bi1clone.Equals(bi2))
            {
                Errln("ERROR:1 RBBI's clone() method failed");
            }

            if (bi2clone.Equals(bi1) ||
                bi2clone.Equals(biequal) ||
                bi2clone.Equals(bi3) ||
                !bi2clone.Equals(bi2))
            {
                Errln("ERROR:2 RBBI's clone() method failed");
            }

            if (!bi1.Text.Equals(bi1clone.Text) ||
                !bi2clone.Text.Equals(bi2.Text) ||
                bi2clone.Equals(bi1clone))
            {
                Errln("ERROR: RBBI's clone() method failed");
            }
        }
Example #11
0
        public void TestIsBoundary()
        {
            String testString1 = "Write here. \u092d\u0301\u0930\u0924 \u0938\u0941\u0902\u0926\u0930 a\u0301u";
            RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(new CultureInfo("en"));

            charIter1.SetText(testString1);
            int[] bounds1 = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 25, 26 };
            doBoundaryTest(charIter1, testString1, bounds1);
            RuleBasedBreakIterator wordIter2 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(new CultureInfo("en"));

            wordIter2.SetText(testString1);
            int[] bounds2 = { 0, 5, 6, 10, 11, 12, 16, 17, 22, 23, 26 };
            doBoundaryTest(wordIter2, testString1, bounds2);
        }
Example #12
0
        public List <string> GetEnumerator(BreakIterator.UBreakIteratorType type)
        {
            using (var breakIterator = new RuleBasedBreakIterator(type, "en-US"))
            {
                breakIterator.SetText("Aa bb. Cc 3.5 x? Y?x! Z");
                var result = new List <string>();
                foreach (var s in breakIterator)
                {
                    result.Add(s);
                }

                return(result);
            }
        }
Example #13
0
 private static RuleBasedBreakIterator ReadBreakIterator(string filename)
 {
     using Stream @is = typeof(DefaultICUTokenizerConfig).FindAndGetManifestResourceStream(filename);
     try
     {
         RuleBasedBreakIterator bi =
             RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is);
         return(bi);
     }
     catch (IOException e)
     {
         throw new Exception(e.ToString(), e);
     }
 }
Example #14
0
        public void TestBug12873()
        {
            // Bug with RuleBasedBreakIterator's internal structure for recording potential look-ahead
            // matches not being cloned when a break iterator is cloned. This resulted in usage
            // collisions if the original break iterator and its clone were used concurrently.

            // The Line Break rules for Regional Indicators make use of look-ahead rules, and
            // show the bug. 1F1E6 = \uD83C\uDDE6 = REGIONAL INDICATOR SYMBOL LETTER A
            // Regional indicators group into pairs, expect breaks after two code points, which
            // is after four 16 bit code units.

            string dataToBreak        = "\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6";
            RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.GetLineInstance();

            AssertionException[] assertErr = new AssertionException[1];  // saves an error found from within a thread



            List <ThreadJob> threads = new List <ThreadJob>();

            for (int n = 0; n < 4; ++n)
            {
                threads.Add(new WorkerThread(dataToBreak, bi, assertErr));
            }
            foreach (var thread in threads)
            {
                thread.Start();
            }
            foreach (var thread in threads)
            {
#if FEATURE_THREADINTERRUPT
                try
                {
#endif
                thread.Join();
#if FEATURE_THREADINTERRUPT
            }
            catch (ThreadInterruptedException e) {
                fail(e.ToString());
            }
#endif
            }

            // JUnit wont see failures from within the worker threads, so
            // check again if one occurred.
            if (assertErr[0] != null)
            {
                throw assertErr[0];
            }
        }
Example #15
0
        public void TestToString()
        {
            RuleBasedBreakIterator bi1 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture);
            RuleBasedBreakIterator bi2 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture);

            Logln("Testing toString()");
            bi1.SetText("Hello there");
            RuleBasedBreakIterator bi3 = (RuleBasedBreakIterator)bi1.Clone();
            String temp  = bi1.ToString();
            String temp2 = bi2.ToString();
            String temp3 = bi3.ToString();

            if (temp2.Equals(temp3) || temp.Equals(temp2) || !temp.Equals(temp3))
            {
                Errln("ERROR: error in toString() method");
            }
        }
Example #16
0
        public void TestCurrent()
        {
            RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");

            // Tests when "(fText != null) ? fText.getIndex() : BreakIterator.Done" is true and false
            rbbi.SetText((CharacterIterator)null);
            if (rbbi.Current != BreakIterator.Done)
            {
                Errln("RuleBasedBreakIterator.Current was suppose to return "
                      + "BreakIterator.Done when the object has a fText of null.");
            }
            rbbi.SetText("dummy");
            if (rbbi.Current != 0)
            {
                Errln("RuleBasedBreakIterator.Current was suppose to return "
                      + "0 when the object has a fText of dummy.");
            }
        }
Example #17
0
        public void TestClone()
        {
            RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");

            try
            {
                rbbi.SetText((CharacterIterator)null);
                if (((RuleBasedBreakIterator)rbbi.Clone()).Text != null)
                {
                    Errln("RuleBasedBreakIterator.clone() was suppose to return "
                          + "the same object because fText is set to null.");
                }
            }
            catch (Exception e)
            {
                Errln("RuleBasedBreakIterator.clone() was not suppose to return " + "an exception.");
            }
        }
Example #18
0
        public void TestGetSetText()
        {
            Logln("Testing getText setText ");
            String str1 = "first string.";
            String str2 = "Second string.";
            //RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault());
            RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture);
            CharacterIterator      text1     = new StringCharacterIterator(str1);

            //CharacterIterator text1Clone = (CharacterIterator) text1.Clone();
            //CharacterIterator text2 = new StringCharacterIterator(str2);
            wordIter1.SetText(str1);
            if (!wordIter1.Text.Equals(text1))
            {
                Errln("ERROR:1 error in setText or getText ");
            }
            if (wordIter1.Current != 0)
            {
                Errln("ERROR:1 setText did not set the iteration position to the beginning of the text, it is"
                      + wordIter1.Current + "\n");
            }
            wordIter1.Next(2);
            wordIter1.SetText(str2);
            if (wordIter1.Current != 0)
            {
                Errln("ERROR:2 setText did not reset the iteration position to the beginning of the text, it is"
                      + wordIter1.Current + "\n");
            }

            // Test the CharSequence overload of setText() for a simple case.
            BreakIterator lineIter = BreakIterator.GetLineInstance(new CultureInfo("en"));
            ICharSequence csText   = "Hello, World. ".ToCharSequence();
            // Expected Line Brks  ^      ^      ^
            //                     0123456789012345
            List <int> expected = new List <int>();

            expected.Add(0); expected.Add(7); expected.Add(14);
            lineIter.SetText(csText);
            for (int pos = lineIter.First(); pos != BreakIterator.Done; pos = lineIter.Next())
            {
                assertTrue("", expected.Contains(pos));
            }
            assertEquals("", csText.Length, lineIter.Current);
        }
Example #19
0
        public void TestBug12519()
        {
            RuleBasedBreakIterator biEn = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(ULocale.ENGLISH);
            RuleBasedBreakIterator biFr = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(ULocale.FRANCE);

            assertEquals("", ULocale.ENGLISH, biEn.GetLocale(ULocale.VALID_LOCALE));
            assertEquals("", ULocale.FRENCH, biFr.GetLocale(ULocale.VALID_LOCALE));
            assertEquals("Locales do not participate in BreakIterator equality.", biEn, biFr);

            RuleBasedBreakIterator cloneEn = (RuleBasedBreakIterator)biEn.Clone();

            assertEquals("", biEn, cloneEn);
            assertEquals("", ULocale.ENGLISH, cloneEn.GetLocale(ULocale.VALID_LOCALE));

            RuleBasedBreakIterator cloneFr = (RuleBasedBreakIterator)biFr.Clone();

            assertEquals("", biFr, cloneFr);
            assertEquals("", ULocale.FRENCH, cloneFr.GetLocale(ULocale.VALID_LOCALE));
        }
Example #20
0
        public void TestBug12519()
        {
            RuleBasedBreakIterator biEn = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(new UCultureInfo("en"));
            RuleBasedBreakIterator biFr = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(new UCultureInfo("fr_FR"));

            assertEquals("", new UCultureInfo("en"), biEn.ValidCulture);
            assertEquals("", new UCultureInfo("fr"), biFr.ValidCulture);
            assertEquals("Locales do not participate in BreakIterator equality.", biEn, biFr);

            RuleBasedBreakIterator cloneEn = (RuleBasedBreakIterator)biEn.Clone();

            assertEquals("", biEn, cloneEn);
            assertEquals("", new UCultureInfo("en"), cloneEn.ValidCulture);

            RuleBasedBreakIterator cloneFr = (RuleBasedBreakIterator)biFr.Clone();

            assertEquals("", biFr, cloneFr);
            assertEquals("", new UCultureInfo("fr"), cloneFr.ValidCulture);
        }
Example #21
0
        public void TestPreceding()
        {
            RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;");

            // Tests when "if (fText == null || offset > fText.getEndIndex())" is true
            rbbi.SetText((CharacterIterator)null);
            if (rbbi.Preceding(-1) != BreakIterator.Done)
            {
                Errln("RuleBasedBreakIterator.Preceding(-1) was suppose to return "
                      + "0 when the object has a fText of null.");
            }

            // Tests when "else if (offset < fText.getBeginIndex())" is true
            rbbi.SetText("dummy");
            if (rbbi.Preceding(-1) != 0)
            {
                Errln("RuleBasedBreakIterator.Preceding(-1) was suppose to return "
                      + "0 when the object has a fText of dummy.");
            }
        }
Example #22
0
 public override void Run()
 {
     try
     {
         RuleBasedBreakIterator localBI = (RuleBasedBreakIterator)bi.Clone();
         localBI.SetText(dataToBreak);
         for (int loop = 0; loop < 100; loop++)
         {
             int nextExpectedBreak = 0;
             for (int actualBreak = localBI.First(); actualBreak != BreakIterator.Done;
                  actualBreak = localBI.Next(), nextExpectedBreak += 4)
             {
                 assertEquals("", nextExpectedBreak, actualBreak);
             }
             assertEquals("", dataToBreak.Length + 4, nextExpectedBreak);
         }
     }
     catch (AssertionException e)
     {
         assertErr[0] = e;
     }
 }
Example #23
0
        public void TestEquals()
        {
            RuleBasedBreakIterator rbbi  = new RuleBasedBreakIterator(".;");
            RuleBasedBreakIterator rbbi1 = new RuleBasedBreakIterator(".;");

            // TODO: Tests when "if (fRData != other.fRData && (fRData == null || other.fRData == null))" is true

            // Tests when "if (fText == null || other.fText == null)" is true
            rbbi.SetText((CharacterIterator)null);
            if (rbbi.Equals(rbbi1))
            {
                Errln("RuleBasedBreakIterator.equals(Object) was not suppose to return "
                      + "true when the other object has a null fText.");
            }

            // Tests when "if (fText == null && other.fText == null)" is true
            rbbi1.SetText((CharacterIterator)null);
            if (!rbbi.Equals(rbbi1))
            {
                Errln("RuleBasedBreakIterator.equals(Object) was not suppose to return "
                      + "false when both objects has a null fText.");
            }

            // Tests when an exception occurs
            if (rbbi.Equals(0))
            {
                Errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to integer 0.");
            }
            if (rbbi.Equals(0.0))
            {
                Errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to float 0.0.");
            }
            if (rbbi.Equals("0"))
            {
                Errln("RuleBasedBreakIterator.equals(Object) was suppose to return "
                      + "false when comparing to string '0'.");
            }
        }
Example #24
0
        private static readonly RuleBasedBreakIterator WordInstance     = LoadBreakRules("jdkword.brk"); // LUCENENET: CA1810: Initialize reference type static fields inline

        private static RuleBasedBreakIterator LoadBreakRules(string fileName)
        {
            using Stream @is = typeof(JdkBreakIterator).FindAndGetManifestResourceStream(fileName);
            return(RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is));
        }
Example #25
0
 public WorkerThread(string dataToBreak, RuleBasedBreakIterator bi, AssertionException[] assertErr)
 {
     this.dataToBreak = dataToBreak;
     this.bi          = bi;
     this.assertErr   = assertErr;
 }
Example #26
0
        public void TestFirstNextFollowing()
        {
            int    p, q;
            String testString = "This is a word break. Isn't it? 2.25";

            Logln("Testing first() and next(), following() with custom rules");
            Logln("testing word iterator - string :- \"" + testString + "\"\n");
            RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture);

            wordIter1.SetText(testString);
            p = wordIter1.First();
            if (p != 0)
            {
                Errln("ERROR: first() returned" + p + "instead of 0");
            }
            q = wordIter1.Next(9);
            doTest(testString, p, q, 20, "This is a word break");
            p = q;
            q = wordIter1.Next();
            doTest(testString, p, q, 21, ".");
            p = q;
            q = wordIter1.Next(3);
            doTest(testString, p, q, 28, " Isn't ");
            p = q;
            q = wordIter1.Next(2);
            doTest(testString, p, q, 31, "it?");
            q = wordIter1.Following(2);
            doTest(testString, 2, q, 4, "is");
            q = wordIter1.Following(22);
            doTest(testString, 22, q, 27, "Isn't");
            wordIter1.Last();
            p = wordIter1.Next();
            q = wordIter1.Following(wordIter1.Last());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: next()/following() at last position returned #"
                      + p + " and " + q + " instead of" + testString.Length + "\n");
            }
            RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture);

            testString = "Write hindi here. ";
            Logln("testing char iter - string:- \"" + testString + "\"");
            charIter1.SetText(testString);
            p = charIter1.First();
            if (p != 0)
            {
                Errln("ERROR: first() returned" + p + "instead of 0");
            }
            q = charIter1.Next();
            doTest(testString, p, q, 1, "W");
            p = q;
            q = charIter1.Next(4);
            doTest(testString, p, q, 5, "rite");
            p = q;
            q = charIter1.Next(12);
            doTest(testString, p, q, 17, " hindi here.");
            p = q;
            q = charIter1.Next(-6);
            doTest(testString, p, q, 11, " here.");
            p = q;
            q = charIter1.Next(6);
            doTest(testString, p, q, 17, " here.");
            p = charIter1.Following(charIter1.Last());
            q = charIter1.Next(charIter1.Last());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: following()/next() at last position returned #"
                      + p + " and " + q + " instead of" + testString.Length);
            }
            testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This  costs $20,00,000.";
            RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator)BreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture);

            Logln("testing sentence iter - String:- \"" + testString + "\"");
            sentIter1.SetText(testString);
            p = sentIter1.First();
            if (p != 0)
            {
                Errln("ERROR: first() returned" + p + "instead of 0");
            }
            q = sentIter1.Next();
            doTest(testString, p, q, 7, "Hello! ");
            p = q;
            q = sentIter1.Next(2);
            doTest(testString, p, q, 31, "how are you? I'am fine. ");
            p = q;
            q = sentIter1.Next(-2);
            doTest(testString, p, q, 7, "how are you? I'am fine. ");
            p = q;
            q = sentIter1.Next(4);
            doTest(testString, p, q, 60, "how are you? I'am fine. Thankyou. How are you doing? ");
            p = q;
            q = sentIter1.Next();
            doTest(testString, p, q, 83, "This  costs $20,00,000.");
            q = sentIter1.Following(1);
            doTest(testString, 1, q, 7, "ello! ");
            q = sentIter1.Following(10);
            doTest(testString, 10, q, 20, " are you? ");
            q = sentIter1.Following(20);
            doTest(testString, 20, q, 31, "I'am fine. ");
            p = sentIter1.Following(sentIter1.Last());
            q = sentIter1.Next(sentIter1.Last());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: following()/next() at last position returned #"
                      + p + " and " + q + " instead of" + testString.Length);
            }
            testString = "Hello! how\r\n (are)\r you? I'am fine- Thankyou. foo\u00a0bar How, are, you? This, costs $20,00,000.";
            Logln("(UnicodeString)testing line iter - String:- \"" + testString + "\"");
            RuleBasedBreakIterator lineIter1 = (RuleBasedBreakIterator)BreakIterator.GetLineInstance(CultureInfo.CurrentCulture);

            lineIter1.SetText(testString);
            p = lineIter1.First();
            if (p != 0)
            {
                Errln("ERROR: first() returned" + p + "instead of 0");
            }
            q = lineIter1.Next();
            doTest(testString, p, q, 7, "Hello! ");
            p = q;
            p = q;
            q = lineIter1.Next(4);
            doTest(testString, p, q, 20, "how\r\n (are)\r ");
            p = q;
            q = lineIter1.Next(-4);
            doTest(testString, p, q, 7, "how\r\n (are)\r ");
            p = q;
            q = lineIter1.Next(6);
            doTest(testString, p, q, 30, "how\r\n (are)\r you? I'am ");
            p = q;
            q = lineIter1.Next();
            doTest(testString, p, q, 36, "fine- ");
            p = q;
            q = lineIter1.Next(2);
            doTest(testString, p, q, 54, "Thankyou. foo\u00a0bar ");
            q = lineIter1.Following(60);
            doTest(testString, 60, q, 64, "re, ");
            q = lineIter1.Following(1);
            doTest(testString, 1, q, 7, "ello! ");
            q = lineIter1.Following(10);
            doTest(testString, 10, q, 12, "\r\n");
            q = lineIter1.Following(20);
            doTest(testString, 20, q, 25, "you? ");
            p = lineIter1.Following(lineIter1.Last());
            q = lineIter1.Next(lineIter1.Last());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: following()/next() at last position returned #"
                      + p + " and " + q + " instead of" + testString.Length);
            }
        }
Example #27
0
        public void TestLastPreviousPreceding()
        {
            int    p, q;
            String testString = "This is a word break. Isn't it? 2.25 dollars";

            Logln("Testing last(),previous(), preceding() with custom rules");
            Logln("testing word iteration for string \"" + testString + "\"");
            RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(new CultureInfo("en"));

            wordIter1.SetText(testString);
            p = wordIter1.Last();
            if (p != testString.Length)
            {
                Errln("ERROR: last() returned" + p + "instead of" + testString.Length);
            }
            q = wordIter1.Previous();
            doTest(testString, p, q, 37, "dollars");
            p = q;
            q = wordIter1.Previous();
            doTest(testString, p, q, 36, " ");
            q = wordIter1.Preceding(25);
            doTest(testString, 25, q, 22, "Isn");
            p = q;
            q = wordIter1.Previous();
            doTest(testString, p, q, 21, " ");
            q = wordIter1.Preceding(20);
            doTest(testString, 20, q, 15, "break");
            p = wordIter1.Preceding(wordIter1.First());
            if (p != BreakIterator.Done)
            {
                Errln("ERROR: preceding()  at starting position returned #" + p + " instead of 0");
            }
            testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This  costs $20,00,000.";
            Logln("testing sentence iter - String:- \"" + testString + "\"");
            RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator)BreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture);

            sentIter1.SetText(testString);
            p = sentIter1.Last();
            if (p != testString.Length)
            {
                Errln("ERROR: last() returned" + p + "instead of " + testString.Length);
            }
            q = sentIter1.Previous();
            doTest(testString, p, q, 60, "This  costs $20,00,000.");
            p = q;
            q = sentIter1.Previous();
            doTest(testString, p, q, 41, "How are you doing? ");
            q = sentIter1.Preceding(40);
            doTest(testString, 40, q, 31, "Thankyou.");
            q = sentIter1.Preceding(25);
            doTest(testString, 25, q, 20, "I'am ");
            sentIter1.First();
            p = sentIter1.Previous();
            q = sentIter1.Preceding(sentIter1.First());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: previous()/preceding() at starting position returned #"
                      + p + " and " + q + " instead of 0\n");
            }
            testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000.";
            Logln("testing line iter - String:- \"" + testString + "\"");
            RuleBasedBreakIterator lineIter1 = (RuleBasedBreakIterator)BreakIterator.GetLineInstance(CultureInfo.CurrentCulture);

            lineIter1.SetText(testString);
            p = lineIter1.Last();
            if (p != testString.Length)
            {
                Errln("ERROR: last() returned" + p + "instead of " + testString.Length);
            }
            q = lineIter1.Previous();
            doTest(testString, p, q, 72, "$20,00,000.");
            p = q;
            q = lineIter1.Previous();
            doTest(testString, p, q, 66, "costs ");
            q = lineIter1.Preceding(40);
            doTest(testString, 40, q, 31, "Thankyou.");
            q = lineIter1.Preceding(25);
            doTest(testString, 25, q, 20, "I'am ");
            lineIter1.First();
            p = lineIter1.Previous();
            q = lineIter1.Preceding(sentIter1.First());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: previous()/preceding() at starting position returned #"
                      + p + " and " + q + " instead of 0\n");
            }
        }
Example #28
0
        private static BreakIterator CreateBreakInstance(ULocale locale, int kind)
        {
            RuleBasedBreakIterator iter = null;
            ICUResourceBundle      rb   = ICUResourceBundle.
                                          GetBundleInstance(ICUData.ICU_BRKITR_BASE_NAME, locale,
                                                            ICUResourceBundle.OpenType.LOCALE_ROOT);

            //
            //  Get the binary rules.
            //
            ByteBuffer bytes      = null;
            string     typeKeyExt = null;

            if (kind == BreakIterator.KIND_LINE)
            {
                string lbKeyValue = locale.GetKeywordValue("lb");
                if (lbKeyValue != null && (lbKeyValue.Equals("strict") || lbKeyValue.Equals("normal") || lbKeyValue.Equals("loose")))
                {
                    typeKeyExt = "_" + lbKeyValue;
                }
            }

            try
            {
                string typeKey       = (typeKeyExt == null) ? KIND_NAMES[kind] : KIND_NAMES[kind] + typeKeyExt;
                string brkfname      = rb.GetStringWithFallback("boundaries/" + typeKey);
                string rulesFileName = ICUData.ICU_BRKITR_NAME + '/' + brkfname;
                bytes = ICUBinary.GetData(rulesFileName);
            }
            catch (Exception e)
            {
                throw new MissingManifestResourceException(e.ToString(), e /*, "", ""*/);
            }

            //
            // Create a normal RuleBasedBreakIterator.
            //
            try
            {
#pragma warning disable 612, 618
                iter = RuleBasedBreakIterator.GetInstanceFromCompiledRules(bytes);
#pragma warning restore 612, 618
            }
            catch (IOException e)
            {
                // Shouldn't be possible to get here.
                // If it happens, the compiled rules are probably corrupted in some way.
                Assert.Fail(e);
            }
            // TODO: Determine valid and actual locale correctly.
            ULocale uloc = ULocale.ForLocale(rb.GetLocale());
            iter.SetLocale(uloc, uloc);
            iter.BreakType = kind;

            // filtered break
            if (kind == BreakIterator.KIND_SENTENCE)
            {
                string ssKeyword = locale.GetKeywordValue("ss");
                if (ssKeyword != null && ssKeyword.Equals("standard"))
                {
                    ULocale @base = new ULocale(locale.GetBaseName());
                    return(FilteredBreakIteratorBuilder.GetInstance(@base).WrapIteratorWithFilter(iter));
                }
            }

            return(iter);
        }
 internal RBBIWrapper(RuleBasedBreakIterator rbbi)
 {
     this.rbbi = rbbi;
 }