public void TestCharSequenceIterator() { String text = "abc123\ud800\udc01 "; // Includes a Unicode supplemental character String vals = "LLLNNNX?S"; Trie2Writable tw = new Trie2Writable(0, 666); tw.SetRange('a', 'z', 'L', false); tw.SetRange('1', '9', 'N', false); tw.Set(' ', 'S'); tw.Set(0x10001, 'X'); using (Trie2CharSequenceEnumerator it = tw.GetCharSequenceEnumerator(text, 0)) { // Check forwards iteration. Trie2CharSequenceValues ir; int i; for (i = 0; it.MoveNext(); i++) { ir = it.Current; int expectedCP = Character.CodePointAt(text, i); assertEquals("" + " i=" + i, expectedCP, ir.CodePoint); assertEquals("" + " i=" + i, i, ir.Index); assertEquals("" + " i=" + i, vals[i], ir.Value); if (expectedCP >= 0x10000) { i++; } } assertEquals("", text.Length, i); // Check reverse iteration, starting at an intermediate point. it.Set(5); for (i = 5; it.MovePrevious();) { ir = it.Current; int expectedCP = Character.CodePointBefore(text, i); i -= (expectedCP < 0x10000 ? 1 : 2); assertEquals("" + " i=" + i, expectedCP, ir.CodePoint); assertEquals("" + " i=" + i, i, ir.Index); assertEquals("" + " i=" + i, vals[i], ir.Value); } assertEquals("", 0, i); } }
private void doOtherInvariantTest(BreakIterator tb, String testChars) { StringBuffer work = new StringBuffer("a\r\na"); int errorCount = 0; // a break should never occur between CR and LF for (int i = 0; i < testChars.Length; i++) { work[0] = testChars[i]; for (int j = 0; j < testChars.Length; j++) { work[3] = testChars[j]; tb.SetText(work.ToString()); for (int k = tb.First(); k != BreakIterator.Done; k = tb.Next()) { if (k == 2) { Errln("Break between CR and LF in string U+" + (work[0]).ToHexString() + ", U+d U+a U+" + (work[3]).ToHexString()); errorCount++; if (errorCount >= 75) { return; } } } } } // a break should never occur before a non-spacing mark, unless it's preceded // by a line terminator work.Length = (0); work.Append("aaaa"); for (int i = 0; i < testChars.Length; i++) { char c = testChars[i]; if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003') { continue; } work[1] = c; for (int j = 0; j < testChars.Length; j++) { c = testChars[j]; if (Character.GetType(c) != UnicodeCategory.NonSpacingMark && Character.GetType(c) != UnicodeCategory.EnclosingMark) { continue; } work[2] = c; tb.SetText(work.ToString()); for (int k = tb.First(); k != BreakIterator.Done; k = tb.Next()) { if (k == 2) { Errln("Break between U+" + ((work[1])).ToHexString() + " and U+" + ((work[2])).ToHexString()); errorCount++; if (errorCount >= 75) { return; } } } } } }