public void Test32NextForCodePoint() { StringAndValue[] data = { // "\u4dff\\U00010000\u9999\\U00020000\udfff\\U0010ffff" new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc00\udfff\udbff\udfff", 2000000000), // "\u4dff\\U00010000\u9999\\U00020002" new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc02", 44444), // "\u4dff\\U000103ff" new StringAndValue("\u4dff\ud800\udfff", 99999) }; CharsTrie trie = buildTrie(data, data.Length, TrieBuilderOption.Fast); Result result; if ((result = trie.NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x20000)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0xdfff)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x10ffff)) != Result.FinalValue || result != trie.Current || trie.GetValue() != 2000000000 ) { Errln("CharsTrie.NextForCodePoint() fails for " + data[0].s); } if ((result = trie.FirstForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x20002)) != Result.FinalValue || result != trie.Current || trie.GetValue() != 44444 ) { Errln("CharsTrie.NextForCodePoint() fails for " + data[1].s); } if ((result = trie.Reset().NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x20222)) != Result.NoMatch || result != trie.Current // no match for trail surrogate ) { Errln("CharsTrie.NextForCodePoint() fails for \u4dff\\U00010000\u9999\\U00020222"); } if ((result = trie.Reset().NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current || (result = trie.NextForCodePoint(0x103ff)) != Result.FinalValue || result != trie.Current || trie.GetValue() != 99999 ) { Errln("CharsTrie.NextForCodePoint() fails for " + data[2].s); } }
public void Test37LargeTrie() { CharsTrie trie = buildLargeTrie(1111); Generator gen = new Generator(); while (gen.countUniqueFirstChars() < 1111) { ICharSequence x = gen.GetString(); int value = gen.GetValue(); int index; if (x.Length == 0) { index = 0; } else { if (trie.First(x[0]) == Result.NoMatch) { Errln(String.Format("first(first char U+{0:x4})=BytesTrie.Result.NO_MATCH for string {1}\n", char.GetNumericValue(x[0]), gen.GetIndex())); break; } index = 1; } Result result = trie.Next(x, index, x.Length); if (!result.HasValue() || result != trie.Current || value != trie.GetValue()) { Errln(String.Format("next(" + Prettify(x) + ")!=hasValue or " + "next()!=current() or getValue() wrong " + "for string " + gen.GetIndex())); break; } gen.Next(); } }
private void checkFirst(CharsTrie trie, StringAndValue[] data, int dataLength) { for (int i = 0; i < dataLength; ++i) { if (data[i].s.Length == 0) { continue; // skip empty string } String expectedString = data[i].s; int c = expectedString[0]; int nextCp = expectedString.Length > 1 ? expectedString[1] : 0; Result firstResult = trie.First(c); int firstValue = firstResult.HasValue() ? trie.GetValue() : -1; Result nextResult = trie.Next(nextCp); if (firstResult != trie.Reset().Next(c) || firstResult != trie.Current || firstValue != (firstResult.HasValue() ? trie.GetValue() : -1) || nextResult != trie.Next(nextCp) ) { Errln(String.Format("trie.first(U+{0:X4})!=trie.Reset().Next(same) for {1}", c, data[i].s)); } c = expectedString.CodePointAt(0); int cLength = Character.CharCount(c); nextCp = expectedString.Length > cLength?expectedString.CodePointAt(cLength) : 0; firstResult = trie.FirstForCodePoint(c); firstValue = firstResult.HasValue() ? trie.GetValue() : -1; nextResult = trie.NextForCodePoint(nextCp); if (firstResult != trie.Reset().NextForCodePoint(c) || firstResult != trie.Current || firstValue != (firstResult.HasValue() ? trie.GetValue() : -1) || nextResult != trie.NextForCodePoint(nextCp) ) { Errln(String.Format("trie.firstForCodePoint(U+{0:X4})!=trie.Reset().NextForCodePoint(same) for {1}", c, data[i].s)); } } trie.Reset(); }
public override int Matches(CharacterIterator text_, int maxLength, int[] lengths, int[] count_, int limit, int[] values) { UCharacterIterator text = UCharacterIterator.GetInstance(text_); CharsTrie uct = new CharsTrie(characters, 0); int c = text.NextCodePoint(); if (c == UCharacterIterator.Done) { return(0); } Result result = uct.FirstForCodePoint(c); // TODO: should numChars count Character.charCount? int numChars = 1; int count = 0; for (; ;) { if (result.HasValue()) { if (count < limit) { if (values != null) { values[count] = uct.GetValue(); } lengths[count] = numChars; count++; } if (result == Result.FinalValue) { break; } } else if (result == Result.NoMatch) { break; } if (numChars >= maxLength) { break; } c = text.NextCodePoint(); if (c == UCharacterIterator.Done) { break; } ++numChars; result = uct.NextForCodePoint(c); } count_[0] = count; return(numChars); }
public void Test41GetNextChars() { CharsTrie trie = buildMonthsTrie(TrieBuilderOption.Small); StringBuilder buffer = new StringBuilder(); int count = trie.GetNextChars(buffer); if (count != 2 || !"aj".ContentEquals(buffer)) { Errln("months getNextChars()!=[aj] at root"); } trie.Next('j'); trie.Next('a'); trie.Next('n'); // getNextChars() directly after next() buffer.Length = (0); count = trie.GetNextChars(buffer); if (count != 20 || !".abcdefghijklmnopqru".ContentEquals(buffer)) { Errln("months getNextChars()!=[.abcdefghijklmnopqru] after \"jan\""); } // getNextChars() after getValue() trie.GetValue(); // next() had returned BytesTrie.Result.INTERMEDIATE_VALUE. buffer.Length = (0); count = trie.GetNextChars(buffer); if (count != 20 || !".abcdefghijklmnopqru".ContentEquals(buffer)) { Errln("months getNextChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); } // getNextChars() from a linear-match node trie.Next('u'); buffer.Length = (0); count = trie.GetNextChars(buffer); if (count != 1 || !"a".ContentEquals(buffer)) { Errln("months getNextChars()!=[a] after \"janu\""); } trie.Next('a'); buffer.Length = (0); count = trie.GetNextChars(buffer); if (count != 1 || !"r".ContentEquals(buffer)) { Errln("months getNextChars()!=[r] after \"janua\""); } trie.Next('r'); trie.Next('y'); // getNextChars() after a final match buffer.Length = (0); count = trie.GetNextChars(buffer); if (count != 0 || buffer.Length != 0) { Errln("months getNextChars()!=[] after \"january\""); } }
public void Test40GetUniqueValue() { CharsTrie trie = buildMonthsTrie(TrieBuilderOption.Fast); long uniqueValue; if ((uniqueValue = trie.GetUniqueValue()) != 0) { Errln("unique value at root"); } trie.Next('j'); trie.Next('a'); trie.Next('n'); // getUniqueValue() directly after next() if ((uniqueValue = trie.GetUniqueValue()) != ((1 << 1) | 1)) { Errln("not unique value 1 after \"jan\": instead " + uniqueValue); } trie.First('j'); trie.Next('u'); if ((uniqueValue = trie.GetUniqueValue()) != 0) { Errln("unique value after \"ju\""); } if (trie.Next('n') != Result.IntermediateValue || 6 != trie.GetValue()) { Errln("not normal value 6 after \"jun\""); } // getUniqueValue() after getValue() if ((uniqueValue = trie.GetUniqueValue()) != ((6 << 1) | 1)) { Errln("not unique value 6 after \"jun\""); } // getUniqueValue() from within a linear-match node trie.First('a'); trie.Next('u'); if ((uniqueValue = trie.GetUniqueValue()) != ((8 << 1) | 1)) { Errln("not unique value 8 after \"au\""); } }
/// <summary> /// Is there an exception at this point? /// </summary> /// <param name="n">The location of the possible break.</param> /// <returns></returns> private bool BreakExceptionAt(int n) { // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt() int bestPosn = -1; int bestValue = -1; // loops while 'n' points to an exception text.Index = n; backwardsTrie.Reset(); int uch; // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") if ((uch = text.PreviousCodePoint()) == ' ') { // TODO: skip a class of chars here?? // TODO only do this the 1st time? } else { uch = text.NextCodePoint(); } Result r = Result.IntermediateValue; while ((uch = text.PreviousCodePoint()) != UCharacterIterator.Done && // more to consume backwards and.. ((r = backwardsTrie.NextForCodePoint(uch)).HasNext())) { // more in the trie if (r.HasValue()) { // remember the best match so far bestPosn = text.Index; bestValue = backwardsTrie.GetValue(); } } if (r.Matches()) { // exact match? bestValue = backwardsTrie.GetValue(); bestPosn = text.Index; } if (bestPosn >= 0) { if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Match) { // exact match! return(true); // Exception here. } else if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Partial && forwardsPartialTrie != null) { // make sure there's a forward trie // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie // to see if it matches something going forward. forwardsPartialTrie.Reset(); Result rfwd = Result.IntermediateValue; text.Index = bestPosn; // hope that's close .. while ((uch = text.NextCodePoint()) != BreakIterator.Done && ((rfwd = forwardsPartialTrie.NextForCodePoint(uch)).HasNext())) { } if (rfwd.Matches()) { // Exception here return(true); } // else fall through } // else fall through } // else fall through return(false); // No exception here. }
private void checkNextWithState(CharsTrie trie, StringAndValue[] data, int dataLength) { CharsTrieState noState = new CharsTrieState(), state = new CharsTrieState(); for (int i = 0; i < dataLength; ++i) { if ((i & 1) == 0) { try { trie.ResetToState(noState); Errln("trie.resetToState(noState) should throw an ArgumentException"); } catch (ArgumentException e) { // good } } String expectedString = data[i].s; int stringLength = expectedString.Length; int partialLength = stringLength / 3; for (int j = 0; j < partialLength; ++j) { if (!trie.Next(expectedString[j]).Matches()) { Errln("trie.Next()=BytesTrie.Result.NO_MATCH for a prefix of " + data[i].s); return; } } trie.SaveState(state); Result resultAtState = trie.Current; Result result; int valueAtState = -99; if (resultAtState.HasValue()) { valueAtState = trie.GetValue(); } result = trie.Next(0); // mismatch if (result != Result.NoMatch || result != trie.Current) { Errln("trie.Next(0) matched after part of " + data[i].s); } if (resultAtState != trie.ResetToState(state).Current || (resultAtState.HasValue() && valueAtState != trie.GetValue()) ) { Errln("trie.Next(part of " + data[i].s + ") changes current()/getValue() after " + "saveState/next(0)/resetToState"); } else if (!(result = trie.Next(expectedString, partialLength, stringLength)).HasValue() || result != trie.Current) { Errln("trie.Next(rest of " + data[i].s + ") does not seem to contain " + data[i].s + " after " + "saveState/next(0)/resetToState"); } else if (!(result = trie.ResetToState(state). Next(expectedString, partialLength, stringLength)).HasValue() || result != trie.Current) { Errln("trie does not seem to contain " + data[i].s + " after saveState/next(rest)/resetToState"); } else if (trie.GetValue() != data[i].value) { Errln(String.Format("trie value for {0} is {1}=0x{2:x} instead of expected {3}=0x{4:x}", data[i].s, trie.GetValue(), trie.GetValue(), data[i].value, data[i].value)); } trie.Reset(); } }
private void checkNext(CharsTrie trie, StringAndValue[] data, int dataLength) { CharsTrieState state = new CharsTrieState(); for (int i = 0; i < dataLength; ++i) { String expectedString = data[i].s; int stringLength = expectedString.Length; Result result; if (!(result = trie.Next(expectedString, 0, stringLength)).HasValue() || result != trie.Current ) { Errln("trie does not seem to contain " + data[i].s); } else if (trie.GetValue() != data[i].value) { Errln(String.Format("trie value for {0} is {1}=0x{2:x} instead of expected {3}=0x{4:x}", data[i].s, trie.GetValue(), trie.GetValue(), data[i].value, data[i].value)); } else if (result != trie.Current || trie.GetValue() != data[i].value) { Errln("trie value for " + data[i].s + " changes when repeating current()/getValue()"); } trie.Reset(); result = trie.Current; for (int j = 0; j < stringLength; ++j) { if (!result.HasNext()) { Errln(String.Format("trie.Current!=hasNext before end of {0} (at index {1})", data[i].s, j)); break; } if (result == Result.IntermediateValue) { trie.GetValue(); if (trie.Current != Result.IntermediateValue) { Errln(String.Format("trie.getValue().Current!=Result.INTERMEDIATE_VALUE " + "before end of {0} (at index {1})", data[i].s, j)); break; } } result = trie.Next(expectedString[j]); if (!result.Matches()) { Errln(String.Format("trie.Next()=Result.NO_MATCH " + "before end of {0} (at index {1})", data[i].s, j)); break; } if (result != trie.Current) { Errln(String.Format("trie.Next()!=following current() " + "before end of {0} (at index {1})", data[i].s, j)); break; } } if (!result.HasValue()) { Errln("trie.Next()!=hasValue at the end of " + data[i].s); continue; } trie.GetValue(); if (result != trie.Current) { Errln("trie.Current != current()+getValue()+current() after end of " + data[i].s); } // Compare the final current() with whether next() can actually continue. trie.SaveState(state); bool nextContinues = false; for (int c = 0x20; c < 0xe000; ++c) { if (c == 0x80) { c = 0xd800; // Check for ASCII and surrogates but not all of the BMP. } if (trie.ResetToState(state).Next(c).Matches()) { nextContinues = true; break; } } if ((result == Result.IntermediateValue) != nextContinues) { Errln("(trie.Current==BytesTrie.Result.INTERMEDIATE_VALUE) contradicts " + "(trie.Next(some char)!=BytesTrie.Result.NO_MATCH) after end of " + data[i].s); } trie.Reset(); } }