public void Test32NextForCodePoint() { StringAndValue[] data = { // "\u4dff\\U00010000\u9999\\U00020000\udfff\\U0010ffff" new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc00\udfff\udbff\udfff", 2000000000), // "\u4dff\\U00010000\u9999\\U00020002" new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc02", 44444), // "\u4dff\\U000103ff" new StringAndValue("\u4dff\ud800\udfff", 99999) }; CharsTrie trie = buildTrie(data, data.Length, TrieBuilderOption.Fast); Result result; if ((result = trie.NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x20000)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0xdfff)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x10ffff)) != Result.FinalValue || result != trie.Current || trie.GetValue() != 2000000000 ) { Errln("CharsTrie.NextForCodePoint() fails for " + data[0].s); } if ((result = trie.FirstForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x20002)) != Result.FinalValue || result != trie.Current || trie.GetValue() != 44444 ) { Errln("CharsTrie.NextForCodePoint() fails for " + data[1].s); } if ((result = trie.Reset().NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x20222)) != Result.NoMatch || result != trie.Current // no match for trail surrogate ) { Errln("CharsTrie.NextForCodePoint() fails for \u4dff\\U00010000\u9999\\U00020222"); } if ((result = trie.Reset().NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x103ff)) != Result.FinalValue || result != trie.Current || trie.GetValue() != 99999 ) { Errln("CharsTrie.NextForCodePoint() fails for " + data[2].s); } }
private void checkFirst(CharsTrie trie, StringAndValue[] data, int dataLength) { for (int i = 0; i < dataLength; ++i) { if (data[i].s.Length == 0) { continue; // skip empty string } String expectedString = data[i].s; int c = expectedString[0]; int nextCp = expectedString.Length > 1 ? expectedString[1] : 0; Result firstResult = trie.First(c); int firstValue = firstResult.HasValue() ? trie.GetValue() : -1; Result nextResult = trie.Next(nextCp); if (firstResult != trie.Reset().Next(c) || firstResult != trie.Current || firstValue != (firstResult.HasValue() ? trie.GetValue() : -1) || nextResult != trie.Next(nextCp) ) { Errln(String.Format("trie.first(U+{0:X4})!=trie.Reset().Next(same) for {1}", c, data[i].s)); } c = expectedString.CodePointAt(0); int cLength = Character.CharCount(c); nextCp = expectedString.Length > cLength?expectedString.CodePointAt(cLength) : 0; firstResult = trie.FirstForCodePoint(c); firstValue = firstResult.HasValue() ? trie.GetValue() : -1; nextResult = trie.NextForCodePoint(nextCp); if (firstResult != trie.Reset().NextForCodePoint(c) || firstResult != trie.Current || firstValue != (firstResult.HasValue() ? trie.GetValue() : -1) || nextResult != trie.NextForCodePoint(nextCp) ) { Errln(String.Format("trie.firstForCodePoint(U+{0:X4})!=trie.Reset().NextForCodePoint(same) for {1}", c, data[i].s)); } } trie.Reset(); }
// next(string) is also tested in other functions, // but here we try to go partway through the string, and then beyond it. private void checkNextString(CharsTrie trie, StringAndValue[] data, int dataLength) { for (int i = 0; i < dataLength; ++i) { String expectedString = data[i].s; int stringLength = expectedString.Length; if (!trie.Next(expectedString, 0, stringLength / 2).Matches()) { Errln("trie.Next(up to middle of string)=BytesTrie.Result.NO_MATCH for " + data[i].s); continue; } // Test that we stop properly at the end of the string. trie.Next(expectedString, stringLength / 2, stringLength); if (trie.Next(0).Matches()) { Errln("trie.Next(string+NUL)!=BytesTrie.Result.NO_MATCH for " + data[i].s); } trie.Reset(); } }
/// <summary> /// Is there an exception at this point? /// </summary> /// <param name="n">The location of the possible break.</param> /// <returns></returns> private bool BreakExceptionAt(int n) { // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt() int bestPosn = -1; int bestValue = -1; // loops while 'n' points to an exception text.Index = n; backwardsTrie.Reset(); int uch; // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") if ((uch = text.PreviousCodePoint()) == ' ') { // TODO: skip a class of chars here?? // TODO only do this the 1st time? } else { uch = text.NextCodePoint(); } Result r = Result.IntermediateValue; while ((uch = text.PreviousCodePoint()) != UCharacterIterator.Done && // more to consume backwards and.. ((r = backwardsTrie.NextForCodePoint(uch)).HasNext())) { // more in the trie if (r.HasValue()) { // remember the best match so far bestPosn = text.Index; bestValue = backwardsTrie.GetValue(); } } if (r.Matches()) { // exact match? bestValue = backwardsTrie.GetValue(); bestPosn = text.Index; } if (bestPosn >= 0) { if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Match) { // exact match! return(true); // Exception here. } else if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Partial && forwardsPartialTrie != null) { // make sure there's a forward trie // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie // to see if it matches something going forward. forwardsPartialTrie.Reset(); Result rfwd = Result.IntermediateValue; text.Index = bestPosn; // hope that's close .. while ((uch = text.NextCodePoint()) != BreakIterator.Done && ((rfwd = forwardsPartialTrie.NextForCodePoint(uch)).HasNext())) { } if (rfwd.Matches()) { // Exception here return(true); } // else fall through } // else fall through } // else fall through return(false); // No exception here. }
private void checkNextWithState(CharsTrie trie, StringAndValue[] data, int dataLength) { CharsTrieState noState = new CharsTrieState(), state = new CharsTrieState(); for (int i = 0; i < dataLength; ++i) { if ((i & 1) == 0) { try { trie.ResetToState(noState); Errln("trie.resetToState(noState) should throw an ArgumentException"); } catch (ArgumentException e) { // good } } String expectedString = data[i].s; int stringLength = expectedString.Length; int partialLength = stringLength / 3; for (int j = 0; j < partialLength; ++j) { if (!trie.Next(expectedString[j]).Matches()) { Errln("trie.Next()=BytesTrie.Result.NO_MATCH for a prefix of " + data[i].s); return; } } trie.SaveState(state); Result resultAtState = trie.Current; Result result; int valueAtState = -99; if (resultAtState.HasValue()) { valueAtState = trie.GetValue(); } result = trie.Next(0); // mismatch if (result != Result.NoMatch || result != trie.Current) { Errln("trie.Next(0) matched after part of " + data[i].s); } if (resultAtState != trie.ResetToState(state).Current || (resultAtState.HasValue() && valueAtState != trie.GetValue()) ) { Errln("trie.Next(part of " + data[i].s + ") changes current()/getValue() after " + "saveState/next(0)/resetToState"); } else if (!(result = trie.Next(expectedString, partialLength, stringLength)).HasValue() || result != trie.Current) { Errln("trie.Next(rest of " + data[i].s + ") does not seem to contain " + data[i].s + " after " + "saveState/next(0)/resetToState"); } else if (!(result = trie.ResetToState(state). Next(expectedString, partialLength, stringLength)).HasValue() || result != trie.Current) { Errln("trie does not seem to contain " + data[i].s + " after saveState/next(rest)/resetToState"); } else if (trie.GetValue() != data[i].value) { Errln(String.Format("trie value for {0} is {1}=0x{2:x} instead of expected {3}=0x{4:x}", data[i].s, trie.GetValue(), trie.GetValue(), data[i].value, data[i].value)); } trie.Reset(); } }
private void checkNext(CharsTrie trie, StringAndValue[] data, int dataLength) { CharsTrieState state = new CharsTrieState(); for (int i = 0; i < dataLength; ++i) { String expectedString = data[i].s; int stringLength = expectedString.Length; Result result; if (!(result = trie.Next(expectedString, 0, stringLength)).HasValue() || result != trie.Current ) { Errln("trie does not seem to contain " + data[i].s); } else if (trie.GetValue() != data[i].value) { Errln(String.Format("trie value for {0} is {1}=0x{2:x} instead of expected {3}=0x{4:x}", data[i].s, trie.GetValue(), trie.GetValue(), data[i].value, data[i].value)); } else if (result != trie.Current || trie.GetValue() != data[i].value) { Errln("trie value for " + data[i].s + " changes when repeating current()/getValue()"); } trie.Reset(); result = trie.Current; for (int j = 0; j < stringLength; ++j) { if (!result.HasNext()) { Errln(String.Format("trie.Current!=hasNext before end of {0} (at index {1})", data[i].s, j)); break; } if (result == Result.IntermediateValue) { trie.GetValue(); if (trie.Current != Result.IntermediateValue) { Errln(String.Format("trie.getValue().Current!=Result.INTERMEDIATE_VALUE " + "before end of {0} (at index {1})", data[i].s, j)); break; } } result = trie.Next(expectedString[j]); if (!result.Matches()) { Errln(String.Format("trie.Next()=Result.NO_MATCH " + "before end of {0} (at index {1})", data[i].s, j)); break; } if (result != trie.Current) { Errln(String.Format("trie.Next()!=following current() " + "before end of {0} (at index {1})", data[i].s, j)); break; } } if (!result.HasValue()) { Errln("trie.Next()!=hasValue at the end of " + data[i].s); continue; } trie.GetValue(); if (result != trie.Current) { Errln("trie.Current != current()+getValue()+current() after end of " + data[i].s); } // Compare the final current() with whether next() can actually continue. trie.SaveState(state); bool nextContinues = false; for (int c = 0x20; c < 0xe000; ++c) { if (c == 0x80) { c = 0xd800; // Check for ASCII and surrogates but not all of the BMP. } if (trie.ResetToState(state).Next(c).Matches()) { nextContinues = true; break; } } if ((result == Result.IntermediateValue) != nextContinues) { Errln("(trie.Current==BytesTrie.Result.INTERMEDIATE_VALUE) contradicts " + "(trie.Next(some char)!=BytesTrie.Result.NO_MATCH) after end of " + data[i].s); } trie.Reset(); } }