Esempio n. 1
0
        public void Test32NextForCodePoint()
        {
            StringAndValue[] data =
            {
                // "\u4dff\\U00010000\u9999\\U00020000\udfff\\U0010ffff"
                new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc00\udfff\udbff\udfff", 2000000000),
                // "\u4dff\\U00010000\u9999\\U00020002"
                new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc02",                        44444),
                // "\u4dff\\U000103ff"
                new StringAndValue("\u4dff\ud800\udfff", 99999)
            };
            CharsTrie trie = buildTrie(data, data.Length, TrieBuilderOption.Fast);
            Result    result;

            if ((result = trie.NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x20000)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0xdfff)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x10ffff)) != Result.FinalValue || result != trie.Current ||
                trie.GetValue() != 2000000000
                )
            {
                Errln("CharsTrie.NextForCodePoint() fails for " + data[0].s);
            }
            if ((result = trie.FirstForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x20002)) != Result.FinalValue || result != trie.Current ||
                trie.GetValue() != 44444
                )
            {
                Errln("CharsTrie.NextForCodePoint() fails for " + data[1].s);
            }
            if ((result = trie.Reset().NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x10000)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x9999)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x20222)) != Result.NoMatch || result != trie.Current  // no match for trail surrogate
                )
            {
                Errln("CharsTrie.NextForCodePoint() fails for \u4dff\\U00010000\u9999\\U00020222");
            }
            if ((result = trie.Reset().NextForCodePoint(0x4dff)) != Result.NoValue || result != trie.Current ||
                (result = trie.NextForCodePoint(0x103ff)) != Result.FinalValue || result != trie.Current ||
                trie.GetValue() != 99999
                )
            {
                Errln("CharsTrie.NextForCodePoint() fails for " + data[2].s);
            }
        }
Esempio n. 2
0
        public void Test37LargeTrie()
        {
            CharsTrie trie = buildLargeTrie(1111);
            Generator gen  = new Generator();

            while (gen.countUniqueFirstChars() < 1111)
            {
                ICharSequence x     = gen.GetString();
                int           value = gen.GetValue();
                int           index;
                if (x.Length == 0)
                {
                    index = 0;
                }
                else
                {
                    if (trie.First(x[0]) == Result.NoMatch)
                    {
                        Errln(String.Format("first(first char U+{0:x4})=BytesTrie.Result.NO_MATCH for string {1}\n",
                                            char.GetNumericValue(x[0]), gen.GetIndex()));
                        break;
                    }
                    index = 1;
                }
                Result result = trie.Next(x, index, x.Length);
                if (!result.HasValue() || result != trie.Current || value != trie.GetValue())
                {
                    Errln(String.Format("next(" + Prettify(x) + ")!=hasValue or " +
                                        "next()!=current() or getValue() wrong " +
                                        "for string " + gen.GetIndex()));
                    break;
                }
                gen.Next();
            }
        }
Esempio n. 3
0
        private void checkFirst(CharsTrie trie, StringAndValue[] data, int dataLength)
        {
            for (int i = 0; i < dataLength; ++i)
            {
                if (data[i].s.Length == 0)
                {
                    continue;  // skip empty string
                }
                String expectedString = data[i].s;
                int    c           = expectedString[0];
                int    nextCp      = expectedString.Length > 1 ? expectedString[1] : 0;
                Result firstResult = trie.First(c);
                int    firstValue  = firstResult.HasValue() ? trie.GetValue() : -1;
                Result nextResult  = trie.Next(nextCp);
                if (firstResult != trie.Reset().Next(c) ||
                    firstResult != trie.Current ||
                    firstValue != (firstResult.HasValue() ? trie.GetValue() : -1) ||
                    nextResult != trie.Next(nextCp)
                    )
                {
                    Errln(String.Format("trie.first(U+{0:X4})!=trie.Reset().Next(same) for {1}",
                                        c, data[i].s));
                }
                c = expectedString.CodePointAt(0);
                int cLength = Character.CharCount(c);
                nextCp = expectedString.Length > cLength?expectedString.CodePointAt(cLength) : 0;

                firstResult = trie.FirstForCodePoint(c);
                firstValue  = firstResult.HasValue() ? trie.GetValue() : -1;
                nextResult  = trie.NextForCodePoint(nextCp);
                if (firstResult != trie.Reset().NextForCodePoint(c) ||
                    firstResult != trie.Current ||
                    firstValue != (firstResult.HasValue() ? trie.GetValue() : -1) ||
                    nextResult != trie.NextForCodePoint(nextCp)
                    )
                {
                    Errln(String.Format("trie.firstForCodePoint(U+{0:X4})!=trie.Reset().NextForCodePoint(same) for {1}",
                                        c, data[i].s));
                }
            }
            trie.Reset();
        }
Esempio n. 4
0
        public override int Matches(CharacterIterator text_, int maxLength, int[] lengths, int[] count_, int limit, int[] values)
        {
            UCharacterIterator text = UCharacterIterator.GetInstance(text_);
            CharsTrie          uct  = new CharsTrie(characters, 0);
            int c = text.NextCodePoint();

            if (c == UCharacterIterator.Done)
            {
                return(0);
            }
            Result result = uct.FirstForCodePoint(c);
            // TODO: should numChars count Character.charCount?
            int numChars = 1;
            int count    = 0;

            for (; ;)
            {
                if (result.HasValue())
                {
                    if (count < limit)
                    {
                        if (values != null)
                        {
                            values[count] = uct.GetValue();
                        }
                        lengths[count] = numChars;
                        count++;
                    }

                    if (result == Result.FinalValue)
                    {
                        break;
                    }
                }
                else if (result == Result.NoMatch)
                {
                    break;
                }

                if (numChars >= maxLength)
                {
                    break;
                }
                c = text.NextCodePoint();
                if (c == UCharacterIterator.Done)
                {
                    break;
                }
                ++numChars;
                result = uct.NextForCodePoint(c);
            }
            count_[0] = count;
            return(numChars);
        }
Esempio n. 5
0
        public void Test41GetNextChars()
        {
            CharsTrie     trie   = buildMonthsTrie(TrieBuilderOption.Small);
            StringBuilder buffer = new StringBuilder();
            int           count  = trie.GetNextChars(buffer);

            if (count != 2 || !"aj".ContentEquals(buffer))
            {
                Errln("months getNextChars()!=[aj] at root");
            }
            trie.Next('j');
            trie.Next('a');
            trie.Next('n');
            // getNextChars() directly after next()
            buffer.Length = (0);
            count         = trie.GetNextChars(buffer);
            if (count != 20 || !".abcdefghijklmnopqru".ContentEquals(buffer))
            {
                Errln("months getNextChars()!=[.abcdefghijklmnopqru] after \"jan\"");
            }
            // getNextChars() after getValue()
            trie.GetValue();  // next() had returned BytesTrie.Result.INTERMEDIATE_VALUE.
            buffer.Length = (0);
            count         = trie.GetNextChars(buffer);
            if (count != 20 || !".abcdefghijklmnopqru".ContentEquals(buffer))
            {
                Errln("months getNextChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
            }
            // getNextChars() from a linear-match node
            trie.Next('u');
            buffer.Length = (0);
            count         = trie.GetNextChars(buffer);
            if (count != 1 || !"a".ContentEquals(buffer))
            {
                Errln("months getNextChars()!=[a] after \"janu\"");
            }
            trie.Next('a');
            buffer.Length = (0);
            count         = trie.GetNextChars(buffer);
            if (count != 1 || !"r".ContentEquals(buffer))
            {
                Errln("months getNextChars()!=[r] after \"janua\"");
            }
            trie.Next('r');
            trie.Next('y');
            // getNextChars() after a final match
            buffer.Length = (0);
            count         = trie.GetNextChars(buffer);
            if (count != 0 || buffer.Length != 0)
            {
                Errln("months getNextChars()!=[] after \"january\"");
            }
        }
Esempio n. 6
0
        public void Test40GetUniqueValue()
        {
            CharsTrie trie = buildMonthsTrie(TrieBuilderOption.Fast);
            long      uniqueValue;

            if ((uniqueValue = trie.GetUniqueValue()) != 0)
            {
                Errln("unique value at root");
            }
            trie.Next('j');
            trie.Next('a');
            trie.Next('n');
            // getUniqueValue() directly after next()
            if ((uniqueValue = trie.GetUniqueValue()) != ((1 << 1) | 1))
            {
                Errln("not unique value 1 after \"jan\": instead " + uniqueValue);
            }
            trie.First('j');
            trie.Next('u');
            if ((uniqueValue = trie.GetUniqueValue()) != 0)
            {
                Errln("unique value after \"ju\"");
            }
            if (trie.Next('n') != Result.IntermediateValue || 6 != trie.GetValue())
            {
                Errln("not normal value 6 after \"jun\"");
            }
            // getUniqueValue() after getValue()
            if ((uniqueValue = trie.GetUniqueValue()) != ((6 << 1) | 1))
            {
                Errln("not unique value 6 after \"jun\"");
            }
            // getUniqueValue() from within a linear-match node
            trie.First('a');
            trie.Next('u');
            if ((uniqueValue = trie.GetUniqueValue()) != ((8 << 1) | 1))
            {
                Errln("not unique value 8 after \"au\"");
            }
        }
Esempio n. 7
0
        /// <summary>
        /// Is there an exception at this point?
        /// </summary>
        /// <param name="n">The location of the possible break.</param>
        /// <returns></returns>
        private bool BreakExceptionAt(int n)
        {
            // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt()

            int bestPosn  = -1;
            int bestValue = -1;

            // loops while 'n' points to an exception
            text.Index = n;
            backwardsTrie.Reset();
            int uch;



            // Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
            if ((uch = text.PreviousCodePoint()) == ' ')
            { // TODO: skip a class of chars here??
              // TODO only do this the 1st time?
            }
            else
            {
                uch = text.NextCodePoint();
            }

            Result r = Result.IntermediateValue;

            while ((uch = text.PreviousCodePoint()) != UCharacterIterator.Done && // more to consume backwards and..
                   ((r = backwardsTrie.NextForCodePoint(uch)).HasNext()))
            {                                                                     // more in the trie
                if (r.HasValue())
                {                                                                 // remember the best match so far
                    bestPosn  = text.Index;
                    bestValue = backwardsTrie.GetValue();
                }
            }

            if (r.Matches())
            { // exact match?
                bestValue = backwardsTrie.GetValue();
                bestPosn  = text.Index;
            }

            if (bestPosn >= 0)
            {
                if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Match)
                {                 // exact match!
                    return(true); // Exception here.
                }
                else if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Partial && forwardsPartialTrie != null)
                {
                    // make sure there's a forward trie
                    // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
                    // to see if it matches something going forward.
                    forwardsPartialTrie.Reset();

                    Result rfwd = Result.IntermediateValue;
                    text.Index = bestPosn; // hope that's close ..
                    while ((uch = text.NextCodePoint()) != BreakIterator.Done &&
                           ((rfwd = forwardsPartialTrie.NextForCodePoint(uch)).HasNext()))
                    {
                    }
                    if (rfwd.Matches())
                    {
                        // Exception here
                        return(true);
                    } // else fall through
                }     // else fall through
            }         // else fall through
            return(false); // No exception here.
        }
Esempio n. 8
0
        private void checkNextWithState(CharsTrie trie, StringAndValue[] data, int dataLength)
        {
            CharsTrieState noState = new CharsTrieState(), state = new CharsTrieState();

            for (int i = 0; i < dataLength; ++i)
            {
                if ((i & 1) == 0)
                {
                    try
                    {
                        trie.ResetToState(noState);
                        Errln("trie.resetToState(noState) should throw an ArgumentException");
                    }
                    catch (ArgumentException e)
                    {
                        // good
                    }
                }
                String expectedString = data[i].s;
                int    stringLength   = expectedString.Length;
                int    partialLength  = stringLength / 3;
                for (int j = 0; j < partialLength; ++j)
                {
                    if (!trie.Next(expectedString[j]).Matches())
                    {
                        Errln("trie.Next()=BytesTrie.Result.NO_MATCH for a prefix of " + data[i].s);
                        return;
                    }
                }
                trie.SaveState(state);
                Result resultAtState = trie.Current;
                Result result;
                int    valueAtState = -99;
                if (resultAtState.HasValue())
                {
                    valueAtState = trie.GetValue();
                }
                result = trie.Next(0);  // mismatch
                if (result != Result.NoMatch || result != trie.Current)
                {
                    Errln("trie.Next(0) matched after part of " + data[i].s);
                }
                if (resultAtState != trie.ResetToState(state).Current ||
                    (resultAtState.HasValue() && valueAtState != trie.GetValue())
                    )
                {
                    Errln("trie.Next(part of " + data[i].s + ") changes current()/getValue() after " +
                          "saveState/next(0)/resetToState");
                }
                else if (!(result = trie.Next(expectedString, partialLength, stringLength)).HasValue() ||
                         result != trie.Current)
                {
                    Errln("trie.Next(rest of " + data[i].s + ") does not seem to contain " + data[i].s + " after " +
                          "saveState/next(0)/resetToState");
                }
                else if (!(result = trie.ResetToState(state).
                                    Next(expectedString, partialLength, stringLength)).HasValue() ||
                         result != trie.Current)
                {
                    Errln("trie does not seem to contain " + data[i].s +
                          " after saveState/next(rest)/resetToState");
                }
                else if (trie.GetValue() != data[i].value)
                {
                    Errln(String.Format("trie value for {0} is {1}=0x{2:x} instead of expected {3}=0x{4:x}",
                                        data[i].s,
                                        trie.GetValue(), trie.GetValue(),
                                        data[i].value, data[i].value));
                }
                trie.Reset();
            }
        }
Esempio n. 9
0
        private void checkNext(CharsTrie trie, StringAndValue[] data, int dataLength)
        {
            CharsTrieState state = new CharsTrieState();

            for (int i = 0; i < dataLength; ++i)
            {
                String expectedString = data[i].s;
                int    stringLength   = expectedString.Length;
                Result result;
                if (!(result = trie.Next(expectedString, 0, stringLength)).HasValue() ||
                    result != trie.Current
                    )
                {
                    Errln("trie does not seem to contain " + data[i].s);
                }
                else if (trie.GetValue() != data[i].value)
                {
                    Errln(String.Format("trie value for {0} is {1}=0x{2:x} instead of expected {3}=0x{4:x}",
                                        data[i].s,
                                        trie.GetValue(), trie.GetValue(),
                                        data[i].value, data[i].value));
                }
                else if (result != trie.Current || trie.GetValue() != data[i].value)
                {
                    Errln("trie value for " + data[i].s + " changes when repeating current()/getValue()");
                }
                trie.Reset();
                result = trie.Current;
                for (int j = 0; j < stringLength; ++j)
                {
                    if (!result.HasNext())
                    {
                        Errln(String.Format("trie.Current!=hasNext before end of {0} (at index {1})",
                                            data[i].s, j));
                        break;
                    }
                    if (result == Result.IntermediateValue)
                    {
                        trie.GetValue();
                        if (trie.Current != Result.IntermediateValue)
                        {
                            Errln(String.Format("trie.getValue().Current!=Result.INTERMEDIATE_VALUE " +
                                                "before end of {0} (at index {1})", data[i].s, j));
                            break;
                        }
                    }
                    result = trie.Next(expectedString[j]);
                    if (!result.Matches())
                    {
                        Errln(String.Format("trie.Next()=Result.NO_MATCH " +
                                            "before end of {0} (at index {1})", data[i].s, j));
                        break;
                    }
                    if (result != trie.Current)
                    {
                        Errln(String.Format("trie.Next()!=following current() " +
                                            "before end of {0} (at index {1})", data[i].s, j));
                        break;
                    }
                }
                if (!result.HasValue())
                {
                    Errln("trie.Next()!=hasValue at the end of " + data[i].s);
                    continue;
                }
                trie.GetValue();
                if (result != trie.Current)
                {
                    Errln("trie.Current != current()+getValue()+current() after end of " +
                          data[i].s);
                }
                // Compare the final current() with whether next() can actually continue.
                trie.SaveState(state);
                bool nextContinues = false;
                for (int c = 0x20; c < 0xe000; ++c)
                {
                    if (c == 0x80)
                    {
                        c = 0xd800;  // Check for ASCII and surrogates but not all of the BMP.
                    }
                    if (trie.ResetToState(state).Next(c).Matches())
                    {
                        nextContinues = true;
                        break;
                    }
                }
                if ((result == Result.IntermediateValue) != nextContinues)
                {
                    Errln("(trie.Current==BytesTrie.Result.INTERMEDIATE_VALUE) contradicts " +
                          "(trie.Next(some char)!=BytesTrie.Result.NO_MATCH) after end of " + data[i].s);
                }
                trie.Reset();
            }
        }