// private methods ------------------------------------------------

        /// <summary>
        /// Set the result values
        /// </summary>
        ///
        /// <param name="element">return result object</param>
        /// <param name="start">codepoint of range</param>
        /// <param name="limit">(end + 1) codepoint of range</param>
        /// <param name="value">common value of range</param>
        private void SetResult(RangeValueIterator_Constants.Element element, int start, int limit,
                               int value_ren)
        {
            element.start     = start;
            element.limit     = limit;
            element.value_ren = value_ren;
        }
Exemple #2
0
        // set of property starts for UnicodeSet ------------------------------- ***

        public void AddPropertyStarts(UnicodeSet set)
        {
            int i, length;
            int c, start, limit;

            sbyte prev, jg;

            /* add the start code point of each same-value range of the trie */
            TrieIterator iter = new TrieIterator(trie);

            RangeValueIterator_Constants.Element element = new RangeValueIterator_Constants.Element();

            while (iter.Next(element))
            {
                set.Add(element.start);
            }

            /* add the code points from the bidi mirroring table */
            length = indexes[IX_MIRROR_LENGTH];
            for (i = 0; i < length; ++i)
            {
                c = GetMirrorCodePoint(mirrors[i]);
                set.Add(c, c + 1);
            }

            /*
             * add the code points from the Joining_Group array where the value
             * changes
             */
            start  = indexes[IX_JG_START];
            limit  = indexes[IX_JG_LIMIT];
            length = limit - start;
            prev   = 0;
            for (i = 0; i < length; ++i)
            {
                jg = jgArray[i];
                if (jg != prev)
                {
                    set.Add(start);
                    prev = jg;
                }
                ++start;
            }
            if (prev != 0)
            {
                /*
                 * add the limit code point if the last value was not 0 (it is now
                 * start==limit)
                 */
                set.Add(limit);
            }

            /*
             * add code points with hardcoded properties, plus the ones following
             * them
             */

            /* (none right now) */
        }
        // public methods -------------------------------------------------

        /// <summary>
        /// <p>
        /// Returns true if we are not at the end of the iteration, false otherwise.
        /// </p>
        /// <p>
        /// The next set of codepoints with the same value type will be calculated
        /// during this call and returned in the arguement element.
        /// </p>
        /// </summary>
        ///
        /// <param name="element">return result</param>
        /// <returns>true if we are not at the end of the iteration, false otherwise.</returns>
        /// <exception cref="NoSuchElementException">- if no more elements exist.</exception>
        /// <seealso cref="null"/>
        /// @draft 2.1
        public bool Next(RangeValueIterator_Constants.Element element)
        {
            if (m_nextCodepoint_ > IBM.ICU.Lang.UCharacter.MAX_VALUE)
            {
                return(false);
            }
            if (m_nextCodepoint_ < IBM.ICU.Lang.UCharacter.SUPPLEMENTARY_MIN_VALUE &&
                CalculateNextBMPElement(element))
            {
                return(true);
            }
            CalculateNextSupplementaryElement(element);
            return(true);
        }
        /// <summary>
        /// Finding the next element. This method is called just before returning the
        /// result of next(). We always store the next element before it is
        /// requested. In the case that we have to continue calculations into the
        /// supplementary planes, a false will be returned.
        /// </summary>
        ///
        /// <param name="element">return result object</param>
        /// <returns>true if the next range is found, false if we have to proceed to
        /// the supplementary range.</returns>
        private bool CalculateNextBMPElement(RangeValueIterator_Constants.Element element)
        {
            int currentBlock = m_nextBlock_;
            int currentValue = m_nextValue_;

            m_currentCodepoint_ = m_nextCodepoint_;
            m_nextCodepoint_++;
            m_nextBlockIndex_++;
            if (!CheckBlockDetail(currentValue))
            {
                SetResult(element, m_currentCodepoint_, m_nextCodepoint_,
                          currentValue);
                return(true);
            }
            // synwee check that next block index == 0 here
            // enumerate BMP - the main loop enumerates data blocks
            while (m_nextCodepoint_ < IBM.ICU.Lang.UCharacter.SUPPLEMENTARY_MIN_VALUE)
            {
                m_nextIndex_++;
                // because of the way the character is split to form the index
                // the lead surrogate and trail surrogate can not be in the
                // mid of a block
                if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_)
                {
                    // skip lead surrogate code units,
                    // go to lead surrogate codepoints
                    m_nextIndex_ = BMP_INDEX_LENGTH_;
                }
                else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_)
                {
                    // go back to regular BMP code points
                    m_nextIndex_ = m_nextCodepoint_ >> IBM.ICU.Impl.Trie.INDEX_STAGE_1_SHIFT_;
                }

                m_nextBlockIndex_ = 0;
                if (!CheckBlock(currentBlock, currentValue))
                {
                    SetResult(element, m_currentCodepoint_, m_nextCodepoint_,
                              currentValue);
                    return(true);
                }
            }
            m_nextCodepoint_--;     // step one back since this value has not been
            m_nextBlockIndex_--;    // retrieved yet.
            return(false);
        }
Exemple #5
0
        private void _testTrieRanges(TrieTest.SetRange[] setRanges, int countSetRanges,
                                     TrieTest.CheckRange[] checkRanges, int countCheckRanges, bool latin1Linear)
        {
            IntTrieBuilder newTrie = new IntTrieBuilder(null, 2000,
                                                        checkRanges[0].value_ren, checkRanges[0].value_ren, latin1Linear);

            // set values from setRanges[]
            bool ok = true;

            for (int i = 0; i < countSetRanges; ++i)
            {
                int  start_0     = setRanges[i].start;
                int  limit_1     = setRanges[i].limit;
                int  value_ren   = setRanges[i].value_ren;
                bool overwrite_2 = setRanges[i].overwrite;
                if ((limit_1 - start_0) == 1 && overwrite_2)
                {
                    ok &= newTrie.SetValue(start_0, value_ren);
                }
                else
                {
                    ok &= newTrie.SetRange(start_0, limit_1, value_ren, overwrite_2);
                }
            }
            if (!ok)
            {
                Errln("setting values into a trie failed");
                return;
            }

            // verify that all these values are in the new Trie
            int start_3 = 0;

            for (int i_4 = 0; i_4 < countCheckRanges; ++i_4)
            {
                int limit_5 = checkRanges[i_4].limit;
                int value_6 = checkRanges[i_4].value_ren;

                while (start_3 < limit_5)
                {
                    if (value_6 != newTrie.GetValue(start_3))
                    {
                        Errln("newTrie [U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + "]==0x"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(newTrie.GetValue(start_3), 16)
                              + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_6, 16));
                    }
                    ++start_3;
                }
            }

            IntTrie trie = newTrie.Serialize(new TrieTest._testFoldedValue(newTrie),
                                             new TrieTest._testFoldingOffset());

            // test linear Latin-1 range from utrie_getData()
            if (latin1Linear)
            {
                start_3 = 0;
                for (int i_7 = 0; i_7 < countCheckRanges && start_3 <= 0xff; ++i_7)
                {
                    int limit_8 = checkRanges[i_7].limit;
                    int value_9 = checkRanges[i_7].value_ren;

                    while (start_3 < limit_8 && start_3 <= 0xff)
                    {
                        if (value_9 != trie.GetLatin1LinearValue((char)start_3))
                        {
                            Errln("IntTrie.getLatin1LinearValue[U+"
                                  + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16)
                                  + "]==0x"
                                  + ILOG.J2CsMapping.Util.IlNumber.ToString(trie
                                                                            .GetLatin1LinearValue((char)start_3), 16)
                                  + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_9, 16));
                        }
                        ++start_3;
                    }
                }
            }

            if (latin1Linear != trie.IsLatin1Linear())
            {
                Errln("trie serialization did not preserve " + "Latin-1-linearity");
            }

            // verify that all these values are in the serialized Trie
            start_3 = 0;
            for (int i_10 = 0; i_10 < countCheckRanges; ++i_10)
            {
                int limit_11 = checkRanges[i_10].limit;
                int value_12 = checkRanges[i_10].value_ren;

                if (start_3 == 0xd800)
                {
                    // skip surrogates
                    start_3 = limit_11;
                    continue;
                }

                while (start_3 < limit_11)
                {
                    if (start_3 <= 0xffff)
                    {
                        int value2 = trie.GetBMPValue((char)start_3);
                        if (value_12 != value2)
                        {
                            Errln("serialized trie.getBMPValue(U+"
                                  + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + " == 0x"
                                  + ILOG.J2CsMapping.Util.IlNumber.ToString(value2, 16)
                                  + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_12, 16));
                        }
                        if (!IBM.ICU.Text.UTF16.IsLeadSurrogate((char)start_3))
                        {
                            value2 = trie.GetLeadValue((char)start_3);
                            if (value_12 != value2)
                            {
                                Errln("serialized trie.getLeadValue(U+"
                                      + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + " == 0x"
                                      + ILOG.J2CsMapping.Util.IlNumber.ToString(value2, 16)
                                      + " instead of 0x"
                                      + ILOG.J2CsMapping.Util.IlNumber.ToString(value_12, 16));
                            }
                        }
                    }
                    int value2_13 = trie.GetCodePointValue(start_3);
                    if (value_12 != value2_13)
                    {
                        Errln("serialized trie.getCodePointValue(U+"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + ")==0x"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(value2_13, 16) + " instead of 0x"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(value_12, 16));
                    }
                    ++start_3;
                }
            }

            // enumerate and verify all ranges

            int          enumRanges = 1;
            TrieIterator iter       = new TrieTest._testEnumValue(trie);

            RangeValueIterator_Constants.Element result = new RangeValueIterator_Constants.Element();
            while (iter.Next(result))
            {
                if (result.start != checkRanges[enumRanges - 1].limit ||
                    result.limit != checkRanges[enumRanges].limit ||
                    (result.value_ren ^ 0x5555) != checkRanges[enumRanges].value_ren)
                {
                    Errln("utrie_enum() delivers wrong range [U+"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(result.start, 16)
                          + "..U+"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(result.limit, 16)
                          + "].0x"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(result.value_ren ^ 0x5555, 16)
                          + " instead of [U+"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(checkRanges[enumRanges - 1].limit, 16)
                          + "..U+"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(checkRanges[enumRanges].limit, 16)
                          + "].0x"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(checkRanges[enumRanges].value_ren, 16));
                }
                enumRanges++;
            }

            // test linear Latin-1 range
            if (trie.IsLatin1Linear())
            {
                for (start_3 = 0; start_3 < 0x100; ++start_3)
                {
                    if (trie.GetLatin1LinearValue((char)start_3) != trie
                        .GetLeadValue((char)start_3))
                    {
                        Errln("trie.getLatin1LinearValue[U+"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16)
                              + "]=0x"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(trie
                                                                        .GetLatin1LinearValue((char)start_3), 16)
                              + " instead of 0x"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(trie
                                                                        .GetLeadValue((char)start_3), 16));
                    }
                }
            }

            _testTrieIteration(trie, checkRanges, countCheckRanges);
        }
        /// <summary>
        /// Finds the next supplementary element. For each entry in the trie, the
        /// value to be delivered is passed through extract(). We always store the
        /// next element before it is requested. Called after calculateNextBMP()
        /// completes its round of BMP characters. There is a slight difference in
        /// the usage of m_currentCodepoint_ here as compared to calculateNextBMP().
        /// Though both represents the lower bound of the next element, in
        /// calculateNextBMP() it gets set at the start of any loop, where-else, in
        /// calculateNextSupplementary() since m_currentCodepoint_ already contains
        /// the lower bound of the next element (passed down from
        /// calculateNextBMP()), we keep it till the end before resetting it to the
        /// new value. Note, if there are no more iterations, it will never get to
        /// here. Blocked out by next().
        /// </summary>
        ///
        /// <param name="element">return result object</param>
        /// @draft 2.1
        private void CalculateNextSupplementaryElement(RangeValueIterator_Constants.Element element)
        {
            int currentValue = m_nextValue_;
            int currentBlock = m_nextBlock_;

            m_nextCodepoint_++;
            m_nextBlockIndex_++;

            if (IBM.ICU.Text.UTF16.GetTrailSurrogate(m_nextCodepoint_) != IBM.ICU.Text.UTF16.TRAIL_SURROGATE_MIN_VALUE)
            {
                // this piece is only called when we are in the middle of a lead
                // surrogate block
                if (!CheckNullNextTrailIndex() && !CheckBlockDetail(currentValue))
                {
                    SetResult(element, m_currentCodepoint_, m_nextCodepoint_,
                              currentValue);
                    m_currentCodepoint_ = m_nextCodepoint_;
                    return;
                }
                // we have cleared one block
                m_nextIndex_++;
                m_nextTrailIndexOffset_++;
                if (!CheckTrailBlock(currentBlock, currentValue))
                {
                    SetResult(element, m_currentCodepoint_, m_nextCodepoint_,
                              currentValue);
                    m_currentCodepoint_ = m_nextCodepoint_;
                    return;
                }
            }
            int nextLead = IBM.ICU.Text.UTF16.GetLeadSurrogate(m_nextCodepoint_);

            // enumerate supplementary code points
            while (nextLead < TRAIL_SURROGATE_MIN_VALUE_)
            {
                // lead surrogate access
                int leadBlock = m_trie_.m_index_[nextLead >> IBM.ICU.Impl.Trie.INDEX_STAGE_1_SHIFT_] << IBM.ICU.Impl.Trie.INDEX_STAGE_2_SHIFT_;
                if (leadBlock == m_trie_.m_dataOffset_)
                {
                    // no entries for a whole block of lead surrogates
                    if (currentValue != m_initialValue_)
                    {
                        m_nextValue_      = m_initialValue_;
                        m_nextBlock_      = 0;
                        m_nextBlockIndex_ = 0;
                        SetResult(element, m_currentCodepoint_, m_nextCodepoint_,
                                  currentValue);
                        m_currentCodepoint_ = m_nextCodepoint_;
                        return;
                    }

                    nextLead += DATA_BLOCK_LENGTH_;
                    // number of total affected supplementary codepoints in one
                    // block
                    // this is not a simple addition of
                    // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider
                    // that we might have moved some of the codepoints
                    m_nextCodepoint_ = IBM.ICU.Impl.UCharacterProperty
                                       .GetRawSupplementary((char)nextLead,
                                                            (char)IBM.ICU.Text.UTF16.TRAIL_SURROGATE_MIN_VALUE);
                    continue;
                }
                if (m_trie_.m_dataManipulate_ == null)
                {
                    throw new NullReferenceException(
                              "The field DataManipulate in this Trie is null");
                }
                // enumerate trail surrogates for this lead surrogate
                m_nextIndex_ = m_trie_.m_dataManipulate_
                               .GetFoldingOffset(m_trie_.GetValue(leadBlock
                                                                  + (nextLead & IBM.ICU.Impl.Trie.INDEX_STAGE_3_MASK_)));
                if (m_nextIndex_ <= 0)
                {
                    // no data for this lead surrogate
                    if (currentValue != m_initialValue_)
                    {
                        m_nextValue_      = m_initialValue_;
                        m_nextBlock_      = 0;
                        m_nextBlockIndex_ = 0;
                        SetResult(element, m_currentCodepoint_, m_nextCodepoint_,
                                  currentValue);
                        m_currentCodepoint_ = m_nextCodepoint_;
                        return;
                    }
                    m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_;
                }
                else
                {
                    m_nextTrailIndexOffset_ = 0;
                    if (!CheckTrailBlock(currentBlock, currentValue))
                    {
                        SetResult(element, m_currentCodepoint_, m_nextCodepoint_,
                                  currentValue);
                        m_currentCodepoint_ = m_nextCodepoint_;
                        return;
                    }
                }
                nextLead++;
            }

            // deliver last range
            SetResult(element, m_currentCodepoint_, IBM.ICU.Lang.UCharacter.MAX_VALUE + 1,
                      currentValue);
        }