public void TestJitterbug1952() { //test previous code point char[] src = new char[] { '\uDC00', '\uD800', '\uDC01', '\uD802', '\uDC02', '\uDC03' }; UCharacterIterator iter = UCharacterIterator.GetInstance(src); iter.Index = 1; int ch; // this should never go into a infinite loop // if it does then we have a problem while ((ch = iter.PreviousCodePoint()) != UCharacterIterator.DONE) { if (ch != 0xDc00) { Errln("iter.PreviousCodePoint() failed"); } } iter.Index = (5); while ((ch = iter.NextCodePoint()) != UCharacterIterator.DONE) { if (ch != 0xDC03) { Errln("iter.NextCodePoint() failed"); } } }
public void TestIteration() { UCharacterIterator iterator = UCharacterIterator.GetInstance( ITERATION_STRING_); UCharacterIterator iterator2 = UCharacterIterator.GetInstance( ITERATION_STRING_); iterator.SetToStart(); if (iterator.Current != ITERATION_STRING_[0]) { Errln("Iterator failed retrieving first character"); } iterator.SetToLimit(); if (iterator.Previous() != ITERATION_STRING_[ ITERATION_STRING_.Length - 1]) { Errln("Iterator failed retrieving last character"); } if (iterator.Length != ITERATION_STRING_.Length) { Errln("Iterator failed determining begin and end index"); } iterator2.Index = 0; iterator.Index = 0; int ch = 0; while (ch != UCharacterIterator.DONE) { int index = iterator2.Index; ch = iterator2.NextCodePoint(); if (index != ITERATION_SUPPLEMENTARY_INDEX) { if (ch != iterator.Next() && ch != UCharacterIterator.DONE) { Errln("Error mismatch in next() and nextCodePoint()"); } } else { if (UTF16.GetLeadSurrogate(ch) != iterator.Next() || UTF16.GetTrailSurrogate(ch) != iterator.Next()) { Errln("Error mismatch in next and nextCodePoint for " + "supplementary characters"); } } } iterator.Index = ITERATION_STRING_.Length; iterator2.Index = ITERATION_STRING_.Length; while (ch != UCharacterIterator.DONE) { int index = iterator2.Index; ch = iterator2.PreviousCodePoint(); if (index != ITERATION_SUPPLEMENTARY_INDEX) { if (ch != iterator.Previous() && ch != UCharacterIterator.DONE) { Errln("Error mismatch in previous() and " + "previousCodePoint()"); } } else { if (UTF16.GetLeadSurrogate(ch) != iterator.Previous() || UTF16.GetTrailSurrogate(ch) != iterator.Previous()) { Errln("Error mismatch in previous and " + "previousCodePoint for supplementary characters"); } } } }
public void previousNext(UCharacterIterator iter) { int[] expect = { 0x2f999, 0x1d15f, 0xc4, 0x1ed0 }; // expected src indexes corresponding to expect indexes int[] expectIndex = { 0, 0, 1, 1, 2, 3, 4 //needed }; // initial indexes into the src and expect strings int SRC_MIDDLE = 4; int EXPECT_MIDDLE = 2; // movement vector // - for previous(), 0 for current(), + for next() // not const so that we can terminate it below for the error message String moves = "0+0+0--0-0-+++0--+++++++0--------"; UCharIterator iter32 = new UCharIterator(expect, expect.Length, EXPECT_MIDDLE); int c1, c2; char m; // initially set the indexes into the middle of the strings iter.Index = (SRC_MIDDLE); // move around and compare the iteration code points with // the expected ones int movesIndex = 0; while (movesIndex < moves.Length) { m = moves[movesIndex++]; if (m == '-') { c1 = iter.PreviousCodePoint(); c2 = iter32.Previous(); } else if (m == '0') { c1 = iter.CurrentCodePoint; c2 = iter32.Current; } else {// m=='+' c1 = iter.NextCodePoint(); c2 = iter32.Next(); } // compare results if (c1 != c2) { // copy the moves until the current (m) move, and terminate String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter Errln("error: mismatch in Normalizer iteration at " + history + ": " + "got c1= " + Hex(c1) + " != expected c2= " + Hex(c2)); break; } // compare indexes if (expectIndex[iter.Index] != iter32.Index) { // copy the moves until the current (m) move, and terminate String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter Errln("error: index mismatch in Normalizer iteration at " + history + " : " + "Normalizer index " + iter.Index + " expected " + expectIndex[iter32.Index]); break; } } }
/// <summary> /// Is there an exception at this point? /// </summary> /// <param name="n">The location of the possible break.</param> /// <returns></returns> private bool BreakExceptionAt(int n) { // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt() int bestPosn = -1; int bestValue = -1; // loops while 'n' points to an exception text.Index = n; backwardsTrie.Reset(); int uch; // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") if ((uch = text.PreviousCodePoint()) == ' ') { // TODO: skip a class of chars here?? // TODO only do this the 1st time? } else { uch = text.NextCodePoint(); } Result r = Result.IntermediateValue; while ((uch = text.PreviousCodePoint()) != UCharacterIterator.Done && // more to consume backwards and.. ((r = backwardsTrie.NextForCodePoint(uch)).HasNext())) { // more in the trie if (r.HasValue()) { // remember the best match so far bestPosn = text.Index; bestValue = backwardsTrie.GetValue(); } } if (r.Matches()) { // exact match? bestValue = backwardsTrie.GetValue(); bestPosn = text.Index; } if (bestPosn >= 0) { if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Match) { // exact match! return(true); // Exception here. } else if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Partial && forwardsPartialTrie != null) { // make sure there's a forward trie // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie // to see if it matches something going forward. forwardsPartialTrie.Reset(); Result rfwd = Result.IntermediateValue; text.Index = bestPosn; // hope that's close .. while ((uch = text.NextCodePoint()) != BreakIterator.Done && ((rfwd = forwardsPartialTrie.NextForCodePoint(uch)).HasNext())) { } if (rfwd.Matches()) { // Exception here return(true); } // else fall through } // else fall through } // else fall through return(false); // No exception here. }