private void Test1Sentence(BreakIterator bi, String text)
        {
            int start = bi.Text.BeginIndex;

            assertEquals(start, bi.First());
            int current = bi.Current;

            assertEquals(bi.Text.EndIndex, bi.Next());
            int end = bi.Current - start;

            assertEquals(text, text.Substring(current - start, end - start));

            assertEquals(text.Length, bi.Last() - start);
            end = bi.Current;
            bi.Previous();
            assertEquals(BreakIterator.Done, bi.Previous());
            int previous = bi.Current;

            assertEquals(text, text.Substring(previous - start, end - start));
            assertEquals(start, bi.Current);

            assertEquals(BreakIterator.Done, bi.Following(bi.Last() / 2 + start));

            assertEquals(BreakIterator.Done, bi.Preceding(bi.Last() / 2 + start));

            assertEquals(start, bi.First());
            assertEquals(BreakIterator.Done, bi.Next(13));
            assertEquals(BreakIterator.Done, bi.Next(-8));
        }
 private void Test0Sentences(BreakIterator bi)
 {
     assertEquals(0, bi.Current);
     assertEquals(0, bi.First());
     assertEquals(BreakIterator.Done, bi.Next());
     assertEquals(0, bi.Last());
     assertEquals(BreakIterator.Done, bi.Previous());
     assertEquals(BreakIterator.Done, bi.Following(0));
     assertEquals(BreakIterator.Done, bi.Preceding(0));
     assertEquals(0, bi.First());
     assertEquals(BreakIterator.Done, bi.Next(13));
     assertEquals(BreakIterator.Done, bi.Next(-8));
 }
示例#3
0
        /** Asserts that two breakiterators break the text the same way */
        public void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual)
        {
            expected.SetText(one);
            actual.SetText(two);

            assertEquals(expected.Current, actual.Current);

            // next()
            int v = expected.Current;

            while (v != BreakIterator.DONE)
            {
                assertEquals(v = expected.Next(), actual.Next());
                assertEquals(expected.Current, actual.Current);
            }

            // first()
            assertEquals(expected.First(), actual.First());
            assertEquals(expected.Current, actual.Current);
            // last()
            assertEquals(expected.Last(), actual.Last());
            assertEquals(expected.Current, actual.Current);

            // previous()
            v = expected.Current;
            while (v != BreakIterator.DONE)
            {
                assertEquals(v = expected.Previous(), actual.Previous());
                assertEquals(expected.Current, actual.Current);
            }

            // following()
            for (int i = one.BeginIndex; i <= one.EndIndex; i++)
            {
                expected.First();
                actual.First();
                assertEquals(expected.Following(i), actual.Following(i));
                assertEquals(expected.Current, actual.Current);
            }

            // preceding()
            for (int i = one.BeginIndex; i <= one.EndIndex; i++)
            {
                expected.Last();
                actual.Last();
                assertEquals(expected.Preceding(i), actual.Preceding(i));
                assertEquals(expected.Current, actual.Current);
            }
        }
示例#4
0
        //=========================================================================
        // general test subroutines
        //=========================================================================

        private List <String> _testFirstAndNext(BreakIterator bi, String text)
        {
            int           p      = bi.First();
            int           lastP  = p;
            List <String> result = new List <String>();

            if (p != 0)
            {
                Errln("first() returned " + p + " instead of 0");
            }
            while (p != BreakIterator.Done)
            {
                p = bi.Next();
                if (p != BreakIterator.Done)
                {
                    if (p <= lastP)
                    {
                        Errln("next() failed to move forward: next() on position "
                              + lastP + " yielded " + p);
                    }

                    result.Add(text.Substring(lastP, p - lastP)); // ICU4N: Corrected 2nd substring parameter
                }
                else
                {
                    if (lastP != text.Length)
                    {
                        Errln("next() returned DONE prematurely: offset was "
                              + lastP + " instead of " + text.Length);
                    }
                }
                lastP = p;
            }
            return(result);
        }
示例#5
0
 /**
  * @param filteredBI
  * @param text
  */
 private void assertFrenchBreakBehavior(BreakIterator filteredBI, String text)
 {
     Logln("Testing French behavior:");
     filteredBI.SetText(text);
     assertEquals("6th next", 20, filteredBI.Next());
     assertEquals("6th next", 84, filteredBI.Next());
     filteredBI.First();
 }
示例#6
0
        /**
         * @param filteredBI
         * @param text
         */
        private void assertEnglishBreakBehavior(BreakIterator filteredBI, String text)
        {
            Logln("Testing English filtered behavior:");
            filteredBI.SetText(text);

            assertEquals("5th next", 84, filteredBI.Next());
            assertEquals("5th next", 278, filteredBI.Next());
            filteredBI.First();
        }
        private void Do3SentenceTest(BreakIterator bi) // LUCENENET NOTE: Refactored a bit because Substring in .NET requires some light math to match Java
        {
            assertEquals(0, bi.Current);
            assertEquals(0, bi.First());
            int current = bi.Current;

            assertEquals(SENTENCES[0], TEXT.Substring(current, bi.Next() - current)); // LUCNENENET: Corrected 2nd parameter
            current = bi.Current;
            assertEquals(SENTENCES[1], TEXT.Substring(current, bi.Next() - current)); // LUCNENENET: Corrected 2nd parameter
            current = bi.Current;
            assertEquals(bi.Text.EndIndex, bi.Next());
            int next = bi.Current;

            assertEquals(SENTENCES[2], TEXT.Substring(current, next - current)); // LUCNENENET: Corrected 2nd parameter
            assertEquals(BreakIterator.Done, bi.Next());

            assertEquals(TEXT.Length, bi.Last());
            int end  = bi.Current;
            int prev = bi.Previous();

            assertEquals(SENTENCES[2], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter
            end  = bi.Current;
            prev = bi.Previous();
            assertEquals(SENTENCES[1], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter
            end  = bi.Current;
            prev = bi.Previous();
            assertEquals(SENTENCES[0], TEXT.Substring(prev, end - prev)); // LUCNENENET: Corrected 2nd parameter
            assertEquals(BreakIterator.Done, bi.Previous());
            assertEquals(0, bi.Current);

            assertEquals(59, bi.Following(39));
            assertEquals(59, bi.Following(31));
            assertEquals(31, bi.Following(30));

            assertEquals(0, bi.Preceding(57));
            assertEquals(0, bi.Preceding(58));
            assertEquals(31, bi.Preceding(59));

            assertEquals(0, bi.First());
            assertEquals(59, bi.Next(2));
            assertEquals(0, bi.Next(-2));
        }
示例#8
0
 /**
  * @param filteredBI
  * @param text
  */
 private void assertDefaultBreakBehavior(BreakIterator filteredBI, String text)
 {
     Logln("Testing Default Behavior:");
     filteredBI.SetText(text);
     assertEquals("1st next", 20, filteredBI.Next());
     assertEquals("1st next", 84, filteredBI.Next());
     assertEquals("1st next", 90, filteredBI.Next());
     assertEquals("1st next", 181, filteredBI.Next());
     assertEquals("1st next", 278, filteredBI.Next());
     filteredBI.First();
 }
示例#9
0
        public static IEnumerable <Token> EnumerateTokens(this BreakIterator bi)
        {
            string text = bi.GetCLRText();
            int    start = bi.First(), end = bi.Next();

            while (end != BreakIterator.DONE)
            {
                yield return(new Token(start, end, text.Substring(start, end - start), bi.GetRuleStatus()));

                start = end;
                end   = bi.Next();
            }
        }
示例#10
0
        public static IEnumerable <string> Enumerate(this BreakIterator bi)
        {
            var    sb = new StringBuilder();
            string text = bi.GetCLRText();
            int    start = bi.First(), end = bi.Next();

            while (end != BreakIterator.DONE)
            {
                yield return(text.Substring(start, end - start));

                start = end; end = bi.Next();
            }
        }
示例#11
0
        public void TestGetSetText()
        {
            Logln("Testing getText setText ");
            String str1 = "first string.";
            String str2 = "Second string.";
            //RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault());
            RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture);
            CharacterIterator      text1     = new StringCharacterIterator(str1);

            //CharacterIterator text1Clone = (CharacterIterator) text1.Clone();
            //CharacterIterator text2 = new StringCharacterIterator(str2);
            wordIter1.SetText(str1);
            if (!wordIter1.Text.Equals(text1))
            {
                Errln("ERROR:1 error in setText or getText ");
            }
            if (wordIter1.Current != 0)
            {
                Errln("ERROR:1 setText did not set the iteration position to the beginning of the text, it is"
                      + wordIter1.Current + "\n");
            }
            wordIter1.Next(2);
            wordIter1.SetText(str2);
            if (wordIter1.Current != 0)
            {
                Errln("ERROR:2 setText did not reset the iteration position to the beginning of the text, it is"
                      + wordIter1.Current + "\n");
            }

            // Test the CharSequence overload of setText() for a simple case.
            BreakIterator lineIter = BreakIterator.GetLineInstance(new CultureInfo("en"));
            ICharSequence csText   = "Hello, World. ".ToCharSequence();
            // Expected Line Brks  ^      ^      ^
            //                     0123456789012345
            List <int> expected = new List <int>();

            expected.Add(0); expected.Add(7); expected.Add(14);
            lineIter.SetText(csText);
            for (int pos = lineIter.First(); pos != BreakIterator.Done; pos = lineIter.Next())
            {
                assertTrue("", expected.Contains(pos));
            }
            assertEquals("", csText.Length, lineIter.Current);
        }
示例#12
0
        List <String> ParseText(String text)
        {
            List <String> words    = new List <String>();
            BreakIterator boundary = BreakIterator.GetWordInstance();

            boundary.Text = text;
            int start = boundary.First();

            for (int end = boundary.Next(); end != BreakIterator.DONE; start = end, end = boundary.Next())
            {
                if (!Char.IsLetter(text[start]))
                {
                    continue;
                }
                words.Add(text.Substring(start, end - start));
            }

            return(words);
        }
示例#13
0
        public void TestEndBehavior()
        {
            String        testString = "boo.";
            BreakIterator wb         = BreakIterator.GetWordInstance();

            wb.SetText(testString);

            if (wb.First() != 0)
            {
                Errln("Didn't get break at beginning of string.");
            }
            if (wb.Next() != 3)
            {
                Errln("Didn't get break before period in \"boo.\"");
            }
            if (wb.Current != 4 && wb.Next() != 4)
            {
                Errln("Didn't get break at end of string.");
            }
        }
示例#14
0
        public void TestBug12918()
        {
            // This test triggered an assertion failure in ICU4C, in dictbe.cpp
            // The equivalent code in ICU4J is structured slightly differently,
            // and does not appear vulnerable to the same issue.
            //
            // \u3325 decomposes with normalization, then the CJK dictionary
            // finds a break within the decomposition.

            String        crasherString = "\u3325\u4a16";
            BreakIterator iter          = BreakIterator.GetWordInstance(ULocale.ENGLISH);

            iter.SetText(crasherString);
            iter.First();
            int pos     = 0;
            int lastPos = -1;

            while ((pos = iter.Next()) != BreakIterator.Done)
            {
                assertTrue("", pos > lastPos);
            }
        }
示例#15
0
        public void TestPreceding()
        {
            String        words3 = "aaa bbb ccc";
            BreakIterator e      = BreakIterator.GetWordInstance(CultureInfo.CurrentCulture);

            e.SetText(words3);
            e.First();
            int p1 = e.Next();
            int p2 = e.Next();
            int p3 = e.Next();
            int p4 = e.Next();

            int f = e.Following(p2 + 1);
            int p = e.Preceding(p2 + 1);

            if (f != p3)
            {
                Errln("IntlTestTextBoundary::TestPreceding: f!=p3");
            }
            if (p != p2)
            {
                Errln("IntlTestTextBoundary::TestPreceding: p!=p2");
            }

            if (p1 + 1 != p2)
            {
                Errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");
            }

            if (p3 + 1 != p4)
            {
                Errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");
            }

            if (!e.IsBoundary(p2) || e.IsBoundary(p2 + 1) || !e.IsBoundary(p3))
            {
                Errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
            }
        }
示例#16
0
        private void doOtherInvariantTest(BreakIterator tb, String testChars)
        {
            StringBuffer work       = new StringBuffer("a\r\na");
            int          errorCount = 0;

            // a break should never occur between CR and LF
            for (int i = 0; i < testChars.Length; i++)
            {
                work[0] = testChars[i];
                for (int j = 0; j < testChars.Length; j++)
                {
                    work[3] = testChars[j];
                    tb.SetText(work.ToString());
                    for (int k = tb.First(); k != BreakIterator.Done; k = tb.Next())
                    {
                        if (k == 2)
                        {
                            Errln("Break between CR and LF in string U+" +
                                  (work[0]).ToHexString() + ", U+d U+a U+" +
                                  (work[3]).ToHexString());
                            errorCount++;
                            if (errorCount >= 75)
                            {
                                return;
                            }
                        }
                    }
                }
            }

            // a break should never occur before a non-spacing mark, unless it's preceded
            // by a line terminator
            work.Length = (0);
            work.Append("aaaa");
            for (int i = 0; i < testChars.Length; i++)
            {
                char c = testChars[i];
                if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')
                {
                    continue;
                }
                work[1] = c;
                for (int j = 0; j < testChars.Length; j++)
                {
                    c = testChars[j];
                    if (Character.GetType(c) != UnicodeCategory.NonSpacingMark && Character.GetType(c)
                        != UnicodeCategory.EnclosingMark)
                    {
                        continue;
                    }
                    work[2] = c;
                    tb.SetText(work.ToString());
                    for (int k = tb.First(); k != BreakIterator.Done; k = tb.Next())
                    {
                        if (k == 2)
                        {
                            Errln("Break between U+" + ((work[1])).ToHexString()
                                  + " and U+" + ((work[2])).ToHexString());
                            errorCount++;
                            if (errorCount >= 75)
                            {
                                return;
                            }
                        }
                    }
                }
            }
        }
        public void TestLineIteration()
        {
            BreakIterator bi = GetLineInstance(System.Globalization.CultureInfo.InvariantCulture);

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.Done, bi.Next());
            Assert.AreEqual(0, bi.Current);

            bi.SetText(LINE_TEXT);

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (Apache\t^Lucene) - Ensure we break on \t
            Assert.AreEqual(7, bi.Next());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(7, bi.Current);

            // Check next boundary (Lucene^(TM))
            Assert.AreEqual(13, bi.Next());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(13, bi.Current);

            // Check next boundary (Lucene(TM) ^is a)
            Assert.AreEqual(18, bi.Next());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(18, bi.Current);

            // Move to start of high-performance
            bi.Next();
            bi.Next();

            // Check next boundary (high-\n^performance)
            Assert.AreEqual(29, bi.Next());


            // Check last boundary (in Java.^)
            Assert.AreEqual(108, bi.Last());


            // Check move past last boundary
            Assert.AreEqual(BreakIterator.Done, bi.Next());

            // Ensure we are still at last boundary
            Assert.AreEqual(108, bi.Current);


            // Check MovePrevious
            Assert.AreEqual(103, bi.Previous());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(103, bi.Current);


            // Check MoveFirst
            Assert.AreEqual(0, bi.First());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(0, bi.Current);


            // Check moving beyond first boundary
            Assert.AreEqual(BreakIterator.Done, bi.Previous());

            // Ensure we are still at first boundary
            Assert.AreEqual(0, bi.Current);


            // Check MoveLast()
            Assert.AreEqual(108, bi.Last());
        }
        public void TestWordIteration()
        {
            BreakIterator bi = GetWordInstance(System.Globalization.CultureInfo.InvariantCulture);

            bi.SetText("");

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.Done, bi.Next());
            Assert.AreEqual(0, bi.Current);

            bi.SetText(TEXT);

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (Apache^)
            Assert.AreEqual(6, bi.Next());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(6, bi.Current);

            // Check second boundary (^Lucene)
            Assert.AreEqual(7, bi.Next());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(7, bi.Current);

            // Check third boundary (Lucene^)
            Assert.AreEqual(13, bi.Next());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(13, bi.Current);

            // Check fourth boundary (^TM)
            Assert.AreEqual(14, bi.Next());

            // Check fifth boundary (TM^)
            Assert.AreEqual(16, bi.Next());

            // Check sixth boundary (TM)^
            Assert.AreEqual(17, bi.Next());

            // Check seventh boundary (^is)
            Assert.AreEqual(18, bi.Next());

            // Move to (^high-performance)
            bi.Next();
            bi.Next();
            bi.Next();

            // Check next boundary (^high-performance)
            Assert.AreEqual(23, bi.Next());

            // Ensure we don't break on hyphen (high-performance^)
            Assert.AreEqual(39, bi.Next());


            // Check MoveLast()
            Assert.AreEqual(107, bi.Last());

            // Check going past last boundary
            Assert.AreEqual(BreakIterator.Done, bi.Next());

            // Check we are still at last boundary
            Assert.AreEqual(107, bi.Current);


            // Check MoveFirst()
            Assert.AreEqual(0, bi.First());

            // Check going past first boundary
            Assert.AreEqual(BreakIterator.Done, bi.Previous());

            // Check we are still at first boundary
            Assert.AreEqual(0, bi.Current);
        }
        public void TestSentenceIteration()
        {
            BreakIterator bi = GetSentenceInstance(System.Globalization.CultureInfo.InvariantCulture);

            bi.SetText("");

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.Done, bi.Next());
            Assert.AreEqual(0, bi.Current);

            bi.SetText(SENTENCE_TEXT);

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (in Java.^) - Ensure we don't break on \n
            Assert.AreEqual(108, bi.Next());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(108, bi.Current);

            // Check next boundary (especially cross-platform.^)
            Assert.AreEqual(221, bi.Next());

            // Check next boundary (free download.^)
            Assert.AreEqual(290, bi.Next());

            // Check next boundary (things easy.^)
            Assert.AreEqual(324, bi.Next());

            // Check next boundary (is powerful.^)
            Assert.AreEqual(344, bi.Next());

            // Check next boundary (is exciting.^)
            Assert.AreEqual(364, bi.Next());

            // Check next boundary (is cool.^)
            Assert.AreEqual(380, bi.Next());

            // Check last boundary (Lucene now?^)
            Assert.AreEqual(400, bi.Next());

            // Check move past last boundary
            Assert.AreEqual(BreakIterator.Done, bi.Next());

            // Ensure we are still at last boundary
            Assert.AreEqual(400, bi.Current);


            // Check MovePrevious
            Assert.AreEqual(380, bi.Previous());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(380, bi.Current);


            // Check MoveFirst
            Assert.AreEqual(0, bi.First());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(0, bi.Current);

            // Check moving beyond first boundary
            Assert.AreEqual(BreakIterator.Done, bi.Previous());

            // Ensure we are still at first boundary
            Assert.AreEqual(0, bi.Current);


            // Check MoveLast()
            Assert.AreEqual(400, bi.Last());
        }
        public void TestWordIterationThai()
        {
            BreakIterator bi = GetWordInstance(new System.Globalization.CultureInfo("th"));

            bi.SetText("");


            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.Done, bi.Next());
            Assert.AreEqual(0, bi.Current);

            bi.SetText("บริษัทMicrosoftบริการดีที่สุด");

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (บริษัท^Microsoft)
            Assert.AreEqual(6, bi.Next());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(6, bi.Current);

            // Check second boundary (Microsoft^บริการ)
            Assert.AreEqual(15, bi.Next());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(15, bi.Current);

            // Check third boundary (บริการ^ดี)
            Assert.AreEqual(21, bi.Next());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(21, bi.Current);

            // Check fourth boundary (ดี^ที่สุด)
            Assert.AreEqual(23, bi.Next());

            // Check fifth boundary (ดีที่สุด^)
            Assert.AreEqual(29, bi.Next());

            // Check beyond last boundary (ดีที่สุด)^
            Assert.AreEqual(BreakIterator.Done, bi.Next());

            // Check we are still at last boundary
            Assert.AreEqual(29, bi.Current);

            // Check MovePrevious() (ดี^ที่สุด)
            Assert.AreEqual(23, bi.Previous());


            // Check MoveFirst()
            Assert.AreEqual(0, bi.First());

            // Check going past first boundary
            Assert.AreEqual(BreakIterator.Done, bi.Previous());

            // Check we are still at first boundary
            Assert.AreEqual(0, bi.Current);


            // Check Numerals
            bi.SetText("๑23๔๕๖7");

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Ensure Hindu and Thai numerals stay in one group
            Assert.AreEqual(7, bi.Next());
        }
示例#21
0
        // ICU4N specific: Removed clone, as the cast to SimpleFilteredSentenceBreakIterator when we return object is completely pointless


        public override int First()
        {
            // Don't suppress a break opportunity at the beginning of text.
            return(@delegate.First());
        }
示例#22
0
        public void TestGetTitleInstance()
        {
            BreakIterator bi = BreakIterator.GetTitleInstance(new CultureInfo("en-CA"));

            TestFmwk.assertNotEquals("Title instance break iterator not correctly instantiated", bi.First(), null);
            bi.SetText("Here is some Text");
            TestFmwk.assertEquals("Title instance break iterator not correctly instantiated", bi.First(), 0);
        }
示例#23
0
        protected override void HandleTransliterate(IReplaceable text, TransliterationPosition pos, bool incremental)
        {
            lock (this)
            {
                boundaryCount = 0;
                int boundary = 0;
                GetBreakIterator(); // Lazy-create it if necessary
                bi.SetText(new ReplaceableCharacterIterator(text, pos.Start, pos.Limit, pos.Start));
                // TODO: fix clumsy workaround used below.

                /*
                 * char[] tempBuffer = new char[text.length()];
                 * text.getChars(0, text.length(), tempBuffer, 0);
                 * bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));
                 */
                // end debugging

                // To make things much easier, we will stack the boundaries, and then insert at the end.
                // generally, we won't need too many, since we will be filtered.

                for (boundary = bi.First(); boundary != BreakIterator.Done && boundary < pos.Limit; boundary = bi.Next())
                {
                    if (boundary == 0)
                    {
                        continue;
                    }
                    // HACK: Check to see that preceeding item was a letter

                    int cp   = UTF16.CharAt(text, boundary - 1);
                    int type = UChar.GetUnicodeCategory(cp).ToInt32();
                    //System.out.println(Integer.toString(cp,16) + " (before): " + type);
                    if (((1 << type) & LETTER_OR_MARK_MASK) == 0)
                    {
                        continue;
                    }

                    cp   = UTF16.CharAt(text, boundary);
                    type = UChar.GetUnicodeCategory(cp).ToInt32();
                    //System.out.println(Integer.toString(cp,16) + " (after): " + type);
                    if (((1 << type) & LETTER_OR_MARK_MASK) == 0)
                    {
                        continue;
                    }

                    if (boundaryCount >= boundaries.Length)
                    {       // realloc if necessary
                        int[] temp = new int[boundaries.Length * 2];
                        System.Array.Copy(boundaries, 0, temp, 0, boundaries.Length);
                        boundaries = temp;
                    }

                    boundaries[boundaryCount++] = boundary;
                    //System.out.println(boundary);
                }

                int delta        = 0;
                int lastBoundary = 0;

                if (boundaryCount != 0)
                { // if we found something, adjust
                    delta        = boundaryCount * insertion.Length;
                    lastBoundary = boundaries[boundaryCount - 1];

                    // we do this from the end backwards, so that we don't have to keep updating.

                    while (boundaryCount > 0)
                    {
                        boundary = boundaries[--boundaryCount];
                        text.Replace(boundary, boundary, insertion);
                    }
                }

                // Now fix up the return values
                pos.ContextLimit += delta;
                pos.Limit        += delta;
                pos.Start         = incremental ? lastBoundary + delta : pos.Limit;
            }
        }