public void TestBreakAllChars() { // Make a "word" from each code point, separated by spaces. // For dictionary based breaking, runs the start-of-range // logic with all possible dictionary characters. StringBuilder sb = new StringBuilder(); for (int c = 0; c < 0x110000; ++c) { sb.AppendCodePoint(c); sb.AppendCodePoint(c); sb.AppendCodePoint(c); sb.AppendCodePoint(c); sb.Append(' '); } String s = sb.ToString(); for (int breakKind = BreakIterator.KIND_CHARACTER; breakKind <= BreakIterator.KIND_TITLE; ++breakKind) { RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.GetBreakInstance(ULocale.ENGLISH, breakKind); bi.SetText(s); int lastb = -1; for (int b = bi.First(); b != BreakIterator.Done; b = bi.Next()) { assertTrue("(lastb < b) : (" + lastb + " < " + b + ")", lastb < b); } } }
public void TestHashCode() { RuleBasedBreakIterator bi1 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture); RuleBasedBreakIterator bi3 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture); RuleBasedBreakIterator bi2 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture); Logln("Testing hashCode()"); bi1.SetText("Hash code"); bi2.SetText("Hash code"); bi3.SetText("Hash code"); RuleBasedBreakIterator bi1clone = (RuleBasedBreakIterator)bi1.Clone(); RuleBasedBreakIterator bi2clone = (RuleBasedBreakIterator)bi2.Clone(); if (bi1.GetHashCode() != bi1clone.GetHashCode() || bi1.GetHashCode() != bi3.GetHashCode() || bi1clone.GetHashCode() != bi3.GetHashCode() || bi2.GetHashCode() != bi2clone.GetHashCode()) { Errln("ERROR: identical objects have different hashcodes"); } if (bi1.GetHashCode() == bi2.GetHashCode() || bi2.GetHashCode() == bi3.GetHashCode() || bi1clone.GetHashCode() == bi2clone.GetHashCode() || bi1clone.GetHashCode() == bi2.GetHashCode()) { Errln("ERROR: different objects have same hashcodes"); } }
public IEnumerable <string> Split(BreakIterator.UBreakIteratorType type, string locale, string text) { using (var breakIterator = new RuleBasedBreakIterator(type, locale)) { breakIterator.SetText(text); return(breakIterator); } }
public void TestRuledump() { RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(); MemoryStream bos = new MemoryStream(); TextWriter @out = new StreamWriter(bos); bi.Dump(@out); assertTrue(null, bos.Length > 100); }
public void TestBug12797() { String rules = "!!chain; !!forward; $v=b c; a b; $v; !!reverse; .*;"; RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rules); bi.SetText("abc"); bi.First(); assertEquals("Rule chaining test", 3, bi.Next()); }
public void TestFirst() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); // Tests when "if (fText == null)" is true rbbi.SetText((CharacterIterator)null); assertEquals("RuleBasedBreakIterator.First()", BreakIterator.Done, rbbi.First()); rbbi.SetText("abc"); assertEquals("RuleBasedBreakIterator.First()", 0, rbbi.First()); assertEquals("RuleBasedBreakIterator.Next()", 1, rbbi.Next()); }
public void TestLast() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); // Tests when "if (fText == null)" is true rbbi.SetText((CharacterIterator)null); if (rbbi.Last() != BreakIterator.Done) { Errln("RuleBasedBreakIterator.Last() was supposed to return " + "BreakIterator.Done when the object has a null fText."); } }
public void TestFollowing() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); // Tests when "else if (offset < fText.getBeginIndex())" is true rbbi.SetText("dummy"); if (rbbi.Following(-1) != 0) { Errln("RuleBasedBreakIterator.following(-1) was suppose to return " + "0 when the object has a fText of dummy."); } }
static JdkBreakIterator() { using (Stream @is = typeof(JdkBreakIterator).FindAndGetManifestResourceStream("jdksent.brk")) { SentenceInstance = RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is); } using (Stream @is = typeof(JdkBreakIterator).FindAndGetManifestResourceStream("jdkword.brk")) { WordInstance = RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is); } }
public void TestCloneEquals() { RuleBasedBreakIterator bi1 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture); RuleBasedBreakIterator biequal = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture); RuleBasedBreakIterator bi3 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture); RuleBasedBreakIterator bi2 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture); string testString = "Testing word break iterators's clone() and equals()"; bi1.SetText(testString); bi2.SetText(testString); biequal.SetText(testString); bi3.SetText("hello"); Logln("Testing equals()"); Logln("Testing == and !="); if (!bi1.Equals(biequal) || bi1.Equals(bi2) || bi1.Equals(bi3)) { Errln("ERROR:1 RBBI's == and !- operator failed."); } if (bi2.Equals(biequal) || bi2.Equals(bi1) || biequal.Equals(bi3)) { Errln("ERROR:2 RBBI's == and != operator failed."); } Logln("Testing clone()"); RuleBasedBreakIterator bi1clone = (RuleBasedBreakIterator)bi1.Clone(); RuleBasedBreakIterator bi2clone = (RuleBasedBreakIterator)bi2.Clone(); if (!bi1clone.Equals(bi1) || !bi1clone.Equals(biequal) || bi1clone.Equals(bi3) || bi1clone.Equals(bi2)) { Errln("ERROR:1 RBBI's clone() method failed"); } if (bi2clone.Equals(bi1) || bi2clone.Equals(biequal) || bi2clone.Equals(bi3) || !bi2clone.Equals(bi2)) { Errln("ERROR:2 RBBI's clone() method failed"); } if (!bi1.Text.Equals(bi1clone.Text) || !bi2clone.Text.Equals(bi2.Text) || bi2clone.Equals(bi1clone)) { Errln("ERROR: RBBI's clone() method failed"); } }
public void TestIsBoundary() { String testString1 = "Write here. \u092d\u0301\u0930\u0924 \u0938\u0941\u0902\u0926\u0930 a\u0301u"; RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(new CultureInfo("en")); charIter1.SetText(testString1); int[] bounds1 = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 25, 26 }; doBoundaryTest(charIter1, testString1, bounds1); RuleBasedBreakIterator wordIter2 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(new CultureInfo("en")); wordIter2.SetText(testString1); int[] bounds2 = { 0, 5, 6, 10, 11, 12, 16, 17, 22, 23, 26 }; doBoundaryTest(wordIter2, testString1, bounds2); }
public List <string> GetEnumerator(BreakIterator.UBreakIteratorType type) { using (var breakIterator = new RuleBasedBreakIterator(type, "en-US")) { breakIterator.SetText("Aa bb. Cc 3.5 x? Y?x! Z"); var result = new List <string>(); foreach (var s in breakIterator) { result.Add(s); } return(result); } }
private static RuleBasedBreakIterator ReadBreakIterator(string filename) { using Stream @is = typeof(DefaultICUTokenizerConfig).FindAndGetManifestResourceStream(filename); try { RuleBasedBreakIterator bi = RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is); return(bi); } catch (IOException e) { throw new Exception(e.ToString(), e); } }
public void TestBug12873() { // Bug with RuleBasedBreakIterator's internal structure for recording potential look-ahead // matches not being cloned when a break iterator is cloned. This resulted in usage // collisions if the original break iterator and its clone were used concurrently. // The Line Break rules for Regional Indicators make use of look-ahead rules, and // show the bug. 1F1E6 = \uD83C\uDDE6 = REGIONAL INDICATOR SYMBOL LETTER A // Regional indicators group into pairs, expect breaks after two code points, which // is after four 16 bit code units. string dataToBreak = "\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6\uD83C\uDDE6"; RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.GetLineInstance(); AssertionException[] assertErr = new AssertionException[1]; // saves an error found from within a thread List <ThreadJob> threads = new List <ThreadJob>(); for (int n = 0; n < 4; ++n) { threads.Add(new WorkerThread(dataToBreak, bi, assertErr)); } foreach (var thread in threads) { thread.Start(); } foreach (var thread in threads) { #if FEATURE_THREADINTERRUPT try { #endif thread.Join(); #if FEATURE_THREADINTERRUPT } catch (ThreadInterruptedException e) { fail(e.ToString()); } #endif } // JUnit wont see failures from within the worker threads, so // check again if one occurred. if (assertErr[0] != null) { throw assertErr[0]; } }
public void TestToString() { RuleBasedBreakIterator bi1 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture); RuleBasedBreakIterator bi2 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture); Logln("Testing toString()"); bi1.SetText("Hello there"); RuleBasedBreakIterator bi3 = (RuleBasedBreakIterator)bi1.Clone(); String temp = bi1.ToString(); String temp2 = bi2.ToString(); String temp3 = bi3.ToString(); if (temp2.Equals(temp3) || temp.Equals(temp2) || !temp.Equals(temp3)) { Errln("ERROR: error in toString() method"); } }
public void TestCurrent() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); // Tests when "(fText != null) ? fText.getIndex() : BreakIterator.Done" is true and false rbbi.SetText((CharacterIterator)null); if (rbbi.Current != BreakIterator.Done) { Errln("RuleBasedBreakIterator.Current was suppose to return " + "BreakIterator.Done when the object has a fText of null."); } rbbi.SetText("dummy"); if (rbbi.Current != 0) { Errln("RuleBasedBreakIterator.Current was suppose to return " + "0 when the object has a fText of dummy."); } }
public void TestClone() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); try { rbbi.SetText((CharacterIterator)null); if (((RuleBasedBreakIterator)rbbi.Clone()).Text != null) { Errln("RuleBasedBreakIterator.clone() was suppose to return " + "the same object because fText is set to null."); } } catch (Exception e) { Errln("RuleBasedBreakIterator.clone() was not suppose to return " + "an exception."); } }
public void TestGetSetText() { Logln("Testing getText setText "); String str1 = "first string."; String str2 = "Second string."; //RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault()); RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture); CharacterIterator text1 = new StringCharacterIterator(str1); //CharacterIterator text1Clone = (CharacterIterator) text1.Clone(); //CharacterIterator text2 = new StringCharacterIterator(str2); wordIter1.SetText(str1); if (!wordIter1.Text.Equals(text1)) { Errln("ERROR:1 error in setText or getText "); } if (wordIter1.Current != 0) { Errln("ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1.Current + "\n"); } wordIter1.Next(2); wordIter1.SetText(str2); if (wordIter1.Current != 0) { Errln("ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1.Current + "\n"); } // Test the CharSequence overload of setText() for a simple case. BreakIterator lineIter = BreakIterator.GetLineInstance(new CultureInfo("en")); ICharSequence csText = "Hello, World. ".ToCharSequence(); // Expected Line Brks ^ ^ ^ // 0123456789012345 List <int> expected = new List <int>(); expected.Add(0); expected.Add(7); expected.Add(14); lineIter.SetText(csText); for (int pos = lineIter.First(); pos != BreakIterator.Done; pos = lineIter.Next()) { assertTrue("", expected.Contains(pos)); } assertEquals("", csText.Length, lineIter.Current); }
public void TestBug12519() { RuleBasedBreakIterator biEn = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(ULocale.ENGLISH); RuleBasedBreakIterator biFr = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(ULocale.FRANCE); assertEquals("", ULocale.ENGLISH, biEn.GetLocale(ULocale.VALID_LOCALE)); assertEquals("", ULocale.FRENCH, biFr.GetLocale(ULocale.VALID_LOCALE)); assertEquals("Locales do not participate in BreakIterator equality.", biEn, biFr); RuleBasedBreakIterator cloneEn = (RuleBasedBreakIterator)biEn.Clone(); assertEquals("", biEn, cloneEn); assertEquals("", ULocale.ENGLISH, cloneEn.GetLocale(ULocale.VALID_LOCALE)); RuleBasedBreakIterator cloneFr = (RuleBasedBreakIterator)biFr.Clone(); assertEquals("", biFr, cloneFr); assertEquals("", ULocale.FRENCH, cloneFr.GetLocale(ULocale.VALID_LOCALE)); }
public void TestBug12519() { RuleBasedBreakIterator biEn = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(new UCultureInfo("en")); RuleBasedBreakIterator biFr = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(new UCultureInfo("fr_FR")); assertEquals("", new UCultureInfo("en"), biEn.ValidCulture); assertEquals("", new UCultureInfo("fr"), biFr.ValidCulture); assertEquals("Locales do not participate in BreakIterator equality.", biEn, biFr); RuleBasedBreakIterator cloneEn = (RuleBasedBreakIterator)biEn.Clone(); assertEquals("", biEn, cloneEn); assertEquals("", new UCultureInfo("en"), cloneEn.ValidCulture); RuleBasedBreakIterator cloneFr = (RuleBasedBreakIterator)biFr.Clone(); assertEquals("", biFr, cloneFr); assertEquals("", new UCultureInfo("fr"), cloneFr.ValidCulture); }
public void TestPreceding() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); // Tests when "if (fText == null || offset > fText.getEndIndex())" is true rbbi.SetText((CharacterIterator)null); if (rbbi.Preceding(-1) != BreakIterator.Done) { Errln("RuleBasedBreakIterator.Preceding(-1) was suppose to return " + "0 when the object has a fText of null."); } // Tests when "else if (offset < fText.getBeginIndex())" is true rbbi.SetText("dummy"); if (rbbi.Preceding(-1) != 0) { Errln("RuleBasedBreakIterator.Preceding(-1) was suppose to return " + "0 when the object has a fText of dummy."); } }
public override void Run() { try { RuleBasedBreakIterator localBI = (RuleBasedBreakIterator)bi.Clone(); localBI.SetText(dataToBreak); for (int loop = 0; loop < 100; loop++) { int nextExpectedBreak = 0; for (int actualBreak = localBI.First(); actualBreak != BreakIterator.Done; actualBreak = localBI.Next(), nextExpectedBreak += 4) { assertEquals("", nextExpectedBreak, actualBreak); } assertEquals("", dataToBreak.Length + 4, nextExpectedBreak); } } catch (AssertionException e) { assertErr[0] = e; } }
public void TestEquals() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); RuleBasedBreakIterator rbbi1 = new RuleBasedBreakIterator(".;"); // TODO: Tests when "if (fRData != other.fRData && (fRData == null || other.fRData == null))" is true // Tests when "if (fText == null || other.fText == null)" is true rbbi.SetText((CharacterIterator)null); if (rbbi.Equals(rbbi1)) { Errln("RuleBasedBreakIterator.equals(Object) was not suppose to return " + "true when the other object has a null fText."); } // Tests when "if (fText == null && other.fText == null)" is true rbbi1.SetText((CharacterIterator)null); if (!rbbi.Equals(rbbi1)) { Errln("RuleBasedBreakIterator.equals(Object) was not suppose to return " + "false when both objects has a null fText."); } // Tests when an exception occurs if (rbbi.Equals(0)) { Errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to integer 0."); } if (rbbi.Equals(0.0)) { Errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to float 0.0."); } if (rbbi.Equals("0")) { Errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to string '0'."); } }
private static readonly RuleBasedBreakIterator WordInstance = LoadBreakRules("jdkword.brk"); // LUCENENET: CA1810: Initialize reference type static fields inline private static RuleBasedBreakIterator LoadBreakRules(string fileName) { using Stream @is = typeof(JdkBreakIterator).FindAndGetManifestResourceStream(fileName); return(RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is)); }
public WorkerThread(string dataToBreak, RuleBasedBreakIterator bi, AssertionException[] assertErr) { this.dataToBreak = dataToBreak; this.bi = bi; this.assertErr = assertErr; }
public void TestFirstNextFollowing() { int p, q; String testString = "This is a word break. Isn't it? 2.25"; Logln("Testing first() and next(), following() with custom rules"); Logln("testing word iterator - string :- \"" + testString + "\"\n"); RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture); wordIter1.SetText(testString); p = wordIter1.First(); if (p != 0) { Errln("ERROR: first() returned" + p + "instead of 0"); } q = wordIter1.Next(9); doTest(testString, p, q, 20, "This is a word break"); p = q; q = wordIter1.Next(); doTest(testString, p, q, 21, "."); p = q; q = wordIter1.Next(3); doTest(testString, p, q, 28, " Isn't "); p = q; q = wordIter1.Next(2); doTest(testString, p, q, 31, "it?"); q = wordIter1.Following(2); doTest(testString, 2, q, 4, "is"); q = wordIter1.Following(22); doTest(testString, 22, q, 27, "Isn't"); wordIter1.Last(); p = wordIter1.Next(); q = wordIter1.Following(wordIter1.Last()); if (p != BreakIterator.Done || q != BreakIterator.Done) { Errln("ERROR: next()/following() at last position returned #" + p + " and " + q + " instead of" + testString.Length + "\n"); } RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture); testString = "Write hindi here. "; Logln("testing char iter - string:- \"" + testString + "\""); charIter1.SetText(testString); p = charIter1.First(); if (p != 0) { Errln("ERROR: first() returned" + p + "instead of 0"); } q = charIter1.Next(); doTest(testString, p, q, 1, "W"); p = q; q = charIter1.Next(4); doTest(testString, p, q, 5, "rite"); p = q; q = charIter1.Next(12); doTest(testString, p, q, 17, " hindi here."); p = q; q = charIter1.Next(-6); doTest(testString, p, q, 11, " here."); p = q; q = charIter1.Next(6); doTest(testString, p, q, 17, " here."); p = charIter1.Following(charIter1.Last()); q = charIter1.Next(charIter1.Last()); if (p != BreakIterator.Done || q != BreakIterator.Done) { Errln("ERROR: following()/next() at last position returned #" + p + " and " + q + " instead of" + testString.Length); } testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This costs $20,00,000."; RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator)BreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture); Logln("testing sentence iter - String:- \"" + testString + "\""); sentIter1.SetText(testString); p = sentIter1.First(); if (p != 0) { Errln("ERROR: first() returned" + p + "instead of 0"); } q = sentIter1.Next(); doTest(testString, p, q, 7, "Hello! "); p = q; q = sentIter1.Next(2); doTest(testString, p, q, 31, "how are you? I'am fine. "); p = q; q = sentIter1.Next(-2); doTest(testString, p, q, 7, "how are you? I'am fine. "); p = q; q = sentIter1.Next(4); doTest(testString, p, q, 60, "how are you? I'am fine. Thankyou. How are you doing? "); p = q; q = sentIter1.Next(); doTest(testString, p, q, 83, "This costs $20,00,000."); q = sentIter1.Following(1); doTest(testString, 1, q, 7, "ello! "); q = sentIter1.Following(10); doTest(testString, 10, q, 20, " are you? "); q = sentIter1.Following(20); doTest(testString, 20, q, 31, "I'am fine. "); p = sentIter1.Following(sentIter1.Last()); q = sentIter1.Next(sentIter1.Last()); if (p != BreakIterator.Done || q != BreakIterator.Done) { Errln("ERROR: following()/next() at last position returned #" + p + " and " + q + " instead of" + testString.Length); } testString = "Hello! how\r\n (are)\r you? I'am fine- Thankyou. foo\u00a0bar How, are, you? This, costs $20,00,000."; Logln("(UnicodeString)testing line iter - String:- \"" + testString + "\""); RuleBasedBreakIterator lineIter1 = (RuleBasedBreakIterator)BreakIterator.GetLineInstance(CultureInfo.CurrentCulture); lineIter1.SetText(testString); p = lineIter1.First(); if (p != 0) { Errln("ERROR: first() returned" + p + "instead of 0"); } q = lineIter1.Next(); doTest(testString, p, q, 7, "Hello! "); p = q; p = q; q = lineIter1.Next(4); doTest(testString, p, q, 20, "how\r\n (are)\r "); p = q; q = lineIter1.Next(-4); doTest(testString, p, q, 7, "how\r\n (are)\r "); p = q; q = lineIter1.Next(6); doTest(testString, p, q, 30, "how\r\n (are)\r you? I'am "); p = q; q = lineIter1.Next(); doTest(testString, p, q, 36, "fine- "); p = q; q = lineIter1.Next(2); doTest(testString, p, q, 54, "Thankyou. foo\u00a0bar "); q = lineIter1.Following(60); doTest(testString, 60, q, 64, "re, "); q = lineIter1.Following(1); doTest(testString, 1, q, 7, "ello! "); q = lineIter1.Following(10); doTest(testString, 10, q, 12, "\r\n"); q = lineIter1.Following(20); doTest(testString, 20, q, 25, "you? "); p = lineIter1.Following(lineIter1.Last()); q = lineIter1.Next(lineIter1.Last()); if (p != BreakIterator.Done || q != BreakIterator.Done) { Errln("ERROR: following()/next() at last position returned #" + p + " and " + q + " instead of" + testString.Length); } }
public void TestLastPreviousPreceding() { int p, q; String testString = "This is a word break. Isn't it? 2.25 dollars"; Logln("Testing last(),previous(), preceding() with custom rules"); Logln("testing word iteration for string \"" + testString + "\""); RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(new CultureInfo("en")); wordIter1.SetText(testString); p = wordIter1.Last(); if (p != testString.Length) { Errln("ERROR: last() returned" + p + "instead of" + testString.Length); } q = wordIter1.Previous(); doTest(testString, p, q, 37, "dollars"); p = q; q = wordIter1.Previous(); doTest(testString, p, q, 36, " "); q = wordIter1.Preceding(25); doTest(testString, 25, q, 22, "Isn"); p = q; q = wordIter1.Previous(); doTest(testString, p, q, 21, " "); q = wordIter1.Preceding(20); doTest(testString, 20, q, 15, "break"); p = wordIter1.Preceding(wordIter1.First()); if (p != BreakIterator.Done) { Errln("ERROR: preceding() at starting position returned #" + p + " instead of 0"); } testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This costs $20,00,000."; Logln("testing sentence iter - String:- \"" + testString + "\""); RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator)BreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture); sentIter1.SetText(testString); p = sentIter1.Last(); if (p != testString.Length) { Errln("ERROR: last() returned" + p + "instead of " + testString.Length); } q = sentIter1.Previous(); doTest(testString, p, q, 60, "This costs $20,00,000."); p = q; q = sentIter1.Previous(); doTest(testString, p, q, 41, "How are you doing? "); q = sentIter1.Preceding(40); doTest(testString, 40, q, 31, "Thankyou."); q = sentIter1.Preceding(25); doTest(testString, 25, q, 20, "I'am "); sentIter1.First(); p = sentIter1.Previous(); q = sentIter1.Preceding(sentIter1.First()); if (p != BreakIterator.Done || q != BreakIterator.Done) { Errln("ERROR: previous()/preceding() at starting position returned #" + p + " and " + q + " instead of 0\n"); } testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000."; Logln("testing line iter - String:- \"" + testString + "\""); RuleBasedBreakIterator lineIter1 = (RuleBasedBreakIterator)BreakIterator.GetLineInstance(CultureInfo.CurrentCulture); lineIter1.SetText(testString); p = lineIter1.Last(); if (p != testString.Length) { Errln("ERROR: last() returned" + p + "instead of " + testString.Length); } q = lineIter1.Previous(); doTest(testString, p, q, 72, "$20,00,000."); p = q; q = lineIter1.Previous(); doTest(testString, p, q, 66, "costs "); q = lineIter1.Preceding(40); doTest(testString, 40, q, 31, "Thankyou."); q = lineIter1.Preceding(25); doTest(testString, 25, q, 20, "I'am "); lineIter1.First(); p = lineIter1.Previous(); q = lineIter1.Preceding(sentIter1.First()); if (p != BreakIterator.Done || q != BreakIterator.Done) { Errln("ERROR: previous()/preceding() at starting position returned #" + p + " and " + q + " instead of 0\n"); } }
private static BreakIterator CreateBreakInstance(ULocale locale, int kind) { RuleBasedBreakIterator iter = null; ICUResourceBundle rb = ICUResourceBundle. GetBundleInstance(ICUData.ICU_BRKITR_BASE_NAME, locale, ICUResourceBundle.OpenType.LOCALE_ROOT); // // Get the binary rules. // ByteBuffer bytes = null; string typeKeyExt = null; if (kind == BreakIterator.KIND_LINE) { string lbKeyValue = locale.GetKeywordValue("lb"); if (lbKeyValue != null && (lbKeyValue.Equals("strict") || lbKeyValue.Equals("normal") || lbKeyValue.Equals("loose"))) { typeKeyExt = "_" + lbKeyValue; } } try { string typeKey = (typeKeyExt == null) ? KIND_NAMES[kind] : KIND_NAMES[kind] + typeKeyExt; string brkfname = rb.GetStringWithFallback("boundaries/" + typeKey); string rulesFileName = ICUData.ICU_BRKITR_NAME + '/' + brkfname; bytes = ICUBinary.GetData(rulesFileName); } catch (Exception e) { throw new MissingManifestResourceException(e.ToString(), e /*, "", ""*/); } // // Create a normal RuleBasedBreakIterator. // try { #pragma warning disable 612, 618 iter = RuleBasedBreakIterator.GetInstanceFromCompiledRules(bytes); #pragma warning restore 612, 618 } catch (IOException e) { // Shouldn't be possible to get here. // If it happens, the compiled rules are probably corrupted in some way. Assert.Fail(e); } // TODO: Determine valid and actual locale correctly. ULocale uloc = ULocale.ForLocale(rb.GetLocale()); iter.SetLocale(uloc, uloc); iter.BreakType = kind; // filtered break if (kind == BreakIterator.KIND_SENTENCE) { string ssKeyword = locale.GetKeywordValue("ss"); if (ssKeyword != null && ssKeyword.Equals("standard")) { ULocale @base = new ULocale(locale.GetBaseName()); return(FilteredBreakIteratorBuilder.GetInstance(@base).WrapIteratorWithFilter(iter)); } } return(iter); }
internal RBBIWrapper(RuleBasedBreakIterator rbbi) { this.rbbi = rbbi; }