コード例 #1
0
        public void TestJitterbug1952()
        {
            //test previous code point
            char[]             src  = new char[] { '\uDC00', '\uD800', '\uDC01', '\uD802', '\uDC02', '\uDC03' };
            UCharacterIterator iter = UCharacterIterator.GetInstance(src);

            iter.Index = 1;
            int ch;

            // this should never go into a infinite loop
            // if it does then we have a problem
            while ((ch = iter.PreviousCodePoint()) != UCharacterIterator.DONE)
            {
                if (ch != 0xDc00)
                {
                    Errln("iter.PreviousCodePoint() failed");
                }
            }
            iter.Index = (5);
            while ((ch = iter.NextCodePoint()) != UCharacterIterator.DONE)
            {
                if (ch != 0xDC03)
                {
                    Errln("iter.NextCodePoint() failed");
                }
            }
        }
コード例 #2
0
        public void TestIteration()
        {
            UCharacterIterator iterator = UCharacterIterator.GetInstance(
                ITERATION_STRING_);
            UCharacterIterator iterator2 = UCharacterIterator.GetInstance(
                ITERATION_STRING_);

            iterator.SetToStart();
            if (iterator.Current != ITERATION_STRING_[0])
            {
                Errln("Iterator failed retrieving first character");
            }
            iterator.SetToLimit();
            if (iterator.Previous() != ITERATION_STRING_[
                    ITERATION_STRING_.Length - 1])
            {
                Errln("Iterator failed retrieving last character");
            }
            if (iterator.Length != ITERATION_STRING_.Length)
            {
                Errln("Iterator failed determining begin and end index");
            }
            iterator2.Index = 0;
            iterator.Index  = 0;
            int ch = 0;

            while (ch != UCharacterIterator.DONE)
            {
                int index = iterator2.Index;
                ch = iterator2.NextCodePoint();
                if (index != ITERATION_SUPPLEMENTARY_INDEX)
                {
                    if (ch != iterator.Next() &&
                        ch != UCharacterIterator.DONE)
                    {
                        Errln("Error mismatch in next() and nextCodePoint()");
                    }
                }
                else
                {
                    if (UTF16.GetLeadSurrogate(ch) != iterator.Next() ||
                        UTF16.GetTrailSurrogate(ch) != iterator.Next())
                    {
                        Errln("Error mismatch in next and nextCodePoint for " +
                              "supplementary characters");
                    }
                }
            }
            iterator.Index  = ITERATION_STRING_.Length;
            iterator2.Index = ITERATION_STRING_.Length;
            while (ch != UCharacterIterator.DONE)
            {
                int index = iterator2.Index;
                ch = iterator2.PreviousCodePoint();
                if (index != ITERATION_SUPPLEMENTARY_INDEX)
                {
                    if (ch != iterator.Previous() &&
                        ch != UCharacterIterator.DONE)
                    {
                        Errln("Error mismatch in previous() and " +
                              "previousCodePoint()");
                    }
                }
                else
                {
                    if (UTF16.GetLeadSurrogate(ch) != iterator.Previous() ||
                        UTF16.GetTrailSurrogate(ch) != iterator.Previous())
                    {
                        Errln("Error mismatch in previous and " +
                              "previousCodePoint for supplementary characters");
                    }
                }
            }
        }
コード例 #3
0
        public void previousNext(UCharacterIterator iter)
        {
            int[] expect =
            {
                0x2f999,
                0x1d15f,
                0xc4,
                0x1ed0
            };

            // expected src indexes corresponding to expect indexes
            int[] expectIndex =
            {
                0, 0,
                1, 1,
                2,
                3,
                4 //needed
            };

            // initial indexes into the src and expect strings

            int SRC_MIDDLE    = 4;
            int EXPECT_MIDDLE = 2;


            // movement vector
            // - for previous(), 0 for current(), + for next()
            // not const so that we can terminate it below for the error message
            String moves = "0+0+0--0-0-+++0--+++++++0--------";


            UCharIterator iter32 = new UCharIterator(expect, expect.Length,
                                                     EXPECT_MIDDLE);

            int  c1, c2;
            char m;

            // initially set the indexes into the middle of the strings
            iter.Index = (SRC_MIDDLE);

            // move around and compare the iteration code points with
            // the expected ones
            int movesIndex = 0;

            while (movesIndex < moves.Length)
            {
                m = moves[movesIndex++];
                if (m == '-')
                {
                    c1 = iter.PreviousCodePoint();
                    c2 = iter32.Previous();
                }
                else if (m == '0')
                {
                    c1 = iter.CurrentCodePoint;
                    c2 = iter32.Current;
                }
                else
                {// m=='+'
                    c1 = iter.NextCodePoint();
                    c2 = iter32.Next();
                }

                // compare results
                if (c1 != c2)
                {
                    // copy the moves until the current (m) move, and terminate
                    String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter
                    Errln("error: mismatch in Normalizer iteration at " + history + ": "
                          + "got c1= " + Hex(c1) + " != expected c2= " + Hex(c2));
                    break;
                }

                // compare indexes
                if (expectIndex[iter.Index] != iter32.Index)
                {
                    // copy the moves until the current (m) move, and terminate
                    String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter
                    Errln("error: index mismatch in Normalizer iteration at "
                          + history + " : " + "Normalizer index " + iter.Index
                          + " expected " + expectIndex[iter32.Index]);
                    break;
                }
            }
        }
コード例 #4
0
        /// <summary>
        /// Is there an exception at this point?
        /// </summary>
        /// <param name="n">The location of the possible break.</param>
        /// <returns></returns>
        private bool BreakExceptionAt(int n)
        {
            // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt()

            int bestPosn  = -1;
            int bestValue = -1;

            // loops while 'n' points to an exception
            text.Index = n;
            backwardsTrie.Reset();
            int uch;



            // Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
            if ((uch = text.PreviousCodePoint()) == ' ')
            { // TODO: skip a class of chars here??
              // TODO only do this the 1st time?
            }
            else
            {
                uch = text.NextCodePoint();
            }

            Result r = Result.IntermediateValue;

            while ((uch = text.PreviousCodePoint()) != UCharacterIterator.Done && // more to consume backwards and..
                   ((r = backwardsTrie.NextForCodePoint(uch)).HasNext()))
            {                                                                     // more in the trie
                if (r.HasValue())
                {                                                                 // remember the best match so far
                    bestPosn  = text.Index;
                    bestValue = backwardsTrie.GetValue();
                }
            }

            if (r.Matches())
            { // exact match?
                bestValue = backwardsTrie.GetValue();
                bestPosn  = text.Index;
            }

            if (bestPosn >= 0)
            {
                if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Match)
                {                 // exact match!
                    return(true); // Exception here.
                }
                else if (bestValue == SimpleFilteredSentenceBreakIteratorBuilder.Partial && forwardsPartialTrie != null)
                {
                    // make sure there's a forward trie
                    // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
                    // to see if it matches something going forward.
                    forwardsPartialTrie.Reset();

                    Result rfwd = Result.IntermediateValue;
                    text.Index = bestPosn; // hope that's close ..
                    while ((uch = text.NextCodePoint()) != BreakIterator.Done &&
                           ((rfwd = forwardsPartialTrie.NextForCodePoint(uch)).HasNext()))
                    {
                    }
                    if (rfwd.Matches())
                    {
                        // Exception here
                        return(true);
                    } // else fall through
                }     // else fall through
            }         // else fall through
            return(false); // No exception here.
        }