Exemplo n.º 1
        public void Test32NextForCodePoint()
            StringAndValue[] data =
                // "\u4dff\\U00010000\u9999\\U00020000\udfff\\U0010ffff"
                new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc00\udfff\udbff\udfff", 2000000000),
                // "\u4dff\\U00010000\u9999\\U00020002"
                new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc02",                        44444),
                // "\u4dff\\U000103ff"
                new StringAndValue("\u4dff\ud800\udfff", 99999)
            CharsTrie trie = buildTrie(data, data.Length, TrieBuilderOption.Fast);
            Result    result;

            if ((result = trie.NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x20000)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0xdfff)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x10ffff)) != Result.FinalValue || result != trie.Current ||
                trie.GetValue() != 2000000000
                Errln("CharsTrie.NextForCodePoint() fails for " + data[0].s);
            if ((result = trie.FirstForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x20002)) != Result.FinalValue || result != trie.Current ||
                trie.GetValue() != 44444
                Errln("CharsTrie.NextForCodePoint() fails for " + data[1].s);
            if ((result = trie.Reset().NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x20222)) != Result.NoMatch || result != trie.Current  // no match for trail surrogate
                Errln("CharsTrie.NextForCodePoint() fails for \u4dff\\U00010000\u9999\\U00020222");
            if ((result = trie.Reset().NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x103ff)) != Result.FinalValue || result != trie.Current ||
                trie.GetValue() != 99999
                Errln("CharsTrie.NextForCodePoint() fails for " + data[2].s);
Exemplo n.º 2
        private void checkFirst(CharsTrie trie, StringAndValue[] data, int dataLength)
            for (int i = 0; i < dataLength; ++i)
                if (data[i].s.Length == 0)
                    continue;  // skip empty string
                String expectedString = data[i].s;
                int    c           = expectedString[0];
                int    nextCp      = expectedString.Length > 1 ? expectedString[1] : 0;
                Result firstResult = trie.First(c);
                int    firstValue  = firstResult.HasValue() ? trie.GetValue() : -1;
                Result nextResult  = trie.Next(nextCp);
                if (firstResult != trie.Reset().Next(c) ||
                    firstResult != trie.Current ||
                    firstValue != (firstResult.HasValue() ? trie.GetValue() : -1) ||
                    nextResult != trie.Next(nextCp)
                    Errln(String.Format("trie.first(U+{0:X4})!=trie.Reset().Next(same) for {1}",
                                        c, data[i].s));
                c = expectedString.CodePointAt(0);
                int cLength = Character.CharCount(c);
                nextCp = expectedString.Length > cLength?expectedString.CodePointAt(cLength) : 0;

                firstResult = trie.FirstForCodePoint(c);
                firstValue  = firstResult.HasValue() ? trie.GetValue() : -1;
                nextResult  = trie.NextForCodePoint(nextCp);
                if (firstResult != trie.Reset().NextForCodePoint(c) ||
                    firstResult != trie.Current ||
                    firstValue != (firstResult.HasValue() ? trie.GetValue() : -1) ||
                    nextResult != trie.NextForCodePoint(nextCp)
                    Errln(String.Format("trie.firstForCodePoint(U+{0:X4})!=trie.Reset().NextForCodePoint(same) for {1}",
                                        c, data[i].s));
Exemplo n.º 3
 // next(string) is also tested in other functions,
 // but here we try to go partway through the string, and then beyond it.
 private void checkNextString(CharsTrie trie, StringAndValue[] data, int dataLength)
     for (int i = 0; i < dataLength; ++i)
         String expectedString = data[i].s;
         int    stringLength   = expectedString.Length;
         if (!trie.Next(expectedString, 0, stringLength / 2).Matches())
             Errln("trie.Next(up to middle of string)=BytesTrie.Result.NO_MATCH for " + data[i].s);
         // Test that we stop properly at the end of the string.
         trie.Next(expectedString, stringLength / 2, stringLength);
         if (trie.Next(0).Matches())
             Errln("trie.Next(string+NUL)!=BytesTrie.Result.NO_MATCH for " + data[i].s);
Exemplo n.º 4
        /// <summary>
        /// Is there an exception at this point?
        /// </summary>
        /// <param name="n">The location of the possible break.</param>
        /// <returns></returns>
        private bool BreakExceptionAt(int n)
            // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt()

            int bestPosn  = -1;
            int bestValue = -1;

            // loops while 'n' points to an exception
            text.Index = n;
            int uch;

            // Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
            if ((uch = text.PreviousCodePoint()) == ' ')
            { // TODO: skip a class of chars here??
              // TODO only do this the 1st time?
                uch = text.NextCodePoint();

            Result r = Result.IntermediateValue;

            while ((uch = text.PreviousCodePoint()) != UCharacterIterator.Done && // more to consume backwards and..
                   ((r = backwardsTrie.NextForCodePoint(uch)).HasNext()))
            {                                                                     // more in the trie
                if (r.HasValue())
                {                                                                 // remember the best match so far
                    bestPosn  = text.Index;
                    bestValue = backwardsTrie.GetValue();

            if (r.Matches())
            { // exact match?
                bestValue = backwardsTrie.GetValue();
                bestPosn  = text.Index;

            if (bestPosn >= 0)
                if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Match)
                {                 // exact match!
                    return(true); // Exception here.
                else if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Partial && forwardsPartialTrie != null)
                    // make sure there's a forward trie
                    // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
                    // to see if it matches something going forward.

                    Result rfwd = Result.IntermediateValue;
                    text.Index = bestPosn; // hope that's close ..
                    while ((uch = text.NextCodePoint()) != BreakIterator.Done &&
                           ((rfwd = forwardsPartialTrie.NextForCodePoint(uch)).HasNext()))
                    if (rfwd.Matches())
                        // Exception here
                    } // else fall through
                }     // else fall through
            }         // else fall through
            return(false); // No exception here.
Exemplo n.º 5
        private void checkNextWithState(CharsTrie trie, StringAndValue[] data, int dataLength)
            CharsTrieState noState = new CharsTrieState(), state = new CharsTrieState();

            for (int i = 0; i < dataLength; ++i)
                if ((i & 1) == 0)
                        Errln("trie.resetToState(noState) should throw an ArgumentException");
                    catch (ArgumentException e)
                        // good
                String expectedString = data[i].s;
                int    stringLength   = expectedString.Length;
                int    partialLength  = stringLength / 3;
                for (int j = 0; j < partialLength; ++j)
                    if (!trie.Next(expectedString[j]).Matches())
                        Errln("trie.Next()=BytesTrie.Result.NO_MATCH for a prefix of " + data[i].s);
                Result resultAtState = trie.Current;
                Result result;
                int    valueAtState = -99;
                if (resultAtState.HasValue())
                    valueAtState = trie.GetValue();
                result = trie.Next(0);  // mismatch
                if (result != Result.NoMatch || result != trie.Current)
                    Errln("trie.Next(0) matched after part of " + data[i].s);
                if (resultAtState != trie.ResetToState(state).Current ||
                    (resultAtState.HasValue() && valueAtState != trie.GetValue())
                    Errln("trie.Next(part of " + data[i].s + ") changes current()/getValue() after " +
                else if (!(result = trie.Next(expectedString, partialLength, stringLength)).HasValue() ||
                         result != trie.Current)
                    Errln("trie.Next(rest of " + data[i].s + ") does not seem to contain " + data[i].s + " after " +
                else if (!(result = trie.ResetToState(state).
                                    Next(expectedString, partialLength, stringLength)).HasValue() ||
                         result != trie.Current)
                    Errln("trie does not seem to contain " + data[i].s +
                          " after saveState/next(rest)/resetToState");
                else if (trie.GetValue() != data[i].value)
                    Errln(String.Format("trie value for {0} is {1}=0x{2:x} instead of expected {3}=0x{4:x}",
                                        trie.GetValue(), trie.GetValue(),
                                        data[i].value, data[i].value));
Exemplo n.º 6
        private void checkNext(CharsTrie trie, StringAndValue[] data, int dataLength)
            CharsTrieState state = new CharsTrieState();

            for (int i = 0; i < dataLength; ++i)
                String expectedString = data[i].s;
                int    stringLength   = expectedString.Length;
                Result result;
                if (!(result = trie.Next(expectedString, 0, stringLength)).HasValue() ||
                    result != trie.Current
                    Errln("trie does not seem to contain " + data[i].s);
                else if (trie.GetValue() != data[i].value)
                    Errln(String.Format("trie value for {0} is {1}=0x{2:x} instead of expected {3}=0x{4:x}",
                                        trie.GetValue(), trie.GetValue(),
                                        data[i].value, data[i].value));
                else if (result != trie.Current || trie.GetValue() != data[i].value)
                    Errln("trie value for " + data[i].s + " changes when repeating current()/getValue()");
                result = trie.Current;
                for (int j = 0; j < stringLength; ++j)
                    if (!result.HasNext())
                        Errln(String.Format("trie.Current!=hasNext before end of {0} (at index {1})",
                                            data[i].s, j));
                    if (result == Result.IntermediateValue)
                        if (trie.Current != Result.IntermediateValue)
                            Errln(String.Format("trie.getValue().Current!=Result.INTERMEDIATE_VALUE " +
                                                "before end of {0} (at index {1})", data[i].s, j));
                    result = trie.Next(expectedString[j]);
                    if (!result.Matches())
                        Errln(String.Format("trie.Next()=Result.NO_MATCH " +
                                            "before end of {0} (at index {1})", data[i].s, j));
                    if (result != trie.Current)
                        Errln(String.Format("trie.Next()!=following current() " +
                                            "before end of {0} (at index {1})", data[i].s, j));
                if (!result.HasValue())
                    Errln("trie.Next()!=hasValue at the end of " + data[i].s);
                if (result != trie.Current)
                    Errln("trie.Current != current()+getValue()+current() after end of " +
                // Compare the final current() with whether next() can actually continue.
                bool nextContinues = false;
                for (int c = 0x20; c < 0xe000; ++c)
                    if (c == 0x80)
                        c = 0xd800;  // Check for ASCII and surrogates but not all of the BMP.
                    if (trie.ResetToState(state).Next(c).Matches())
                        nextContinues = true;
                if ((result == Result.IntermediateValue) != nextContinues)
                    Errln("(trie.Current==BytesTrie.Result.INTERMEDIATE_VALUE) contradicts " +
                          "(trie.Next(some char)!=BytesTrie.Result.NO_MATCH) after end of " + data[i].s);