Exemple #1
0
        // protected constructor -------------------------------------------

        /// <summary>
        /// Trie constructor for <see cref="CharTrie"/> use.
        /// </summary>
        /// <param name="bytes">Data of an ICU data file, containing the trie.</param>
        /// <param name="dataManipulate">Object containing the information to parse the trie data.</param>
        protected Trie(ByteBuffer bytes, IDataManipulate dataManipulate)
        {
            // Magic number to authenticate the data.
            int signature = bytes.GetInt32();

            m_options_ = bytes.GetInt32();

            if (!CheckHeader(signature))
            {
                throw new ArgumentException("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file");
            }

            if (dataManipulate != null)
            {
                m_dataManipulate_ = dataManipulate;
            }
            else
            {
                m_dataManipulate_ = new DefaultGetFoldingOffset();
            }
            m_isLatin1Linear_ = (m_options_ &
                                 HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
            m_dataOffset_ = bytes.GetInt32();
            m_dataLength_ = bytes.GetInt32();
            Unserialize(bytes);
        }
Exemple #2
0
        // package private methods -----------------------------------------

        /// <summary>
        /// Internal constructor for builder use.
        /// </summary>
        /// <param name="index">The index array to be slotted into this trie.</param>
        /// <param name="data">The data array to be slotted into this trie.</param>
        /// <param name="initialvalue">The initial value for this trie.</param>
        /// <param name="options">Trie options to use.</param>
        /// <param name="datamanipulate">Folding implementation.</param>
        internal Int32Trie(char[] index, int[] data, int initialvalue, int options,
                           IDataManipulate datamanipulate)
            : base(index, options, datamanipulate)
        {
            m_data_         = data;
            m_dataLength_   = m_data_.Length;
            m_initialValue_ = initialvalue;
        }
Exemple #3
0
 public IDataManipulate DataManipulate()
 {
     if (_dataManipulate == null)
     {
         _dataManipulate = CreateDataManipulate();
     }
     return(_dataManipulate);
 }
Exemple #4
0
        // public constructors ---------------------------------------------

        /// <summary>
        /// Creates a new Trie with the settings for the trie data.
        /// <para/>
        /// Unserialize the 32-bit-aligned input buffer and use the data for the trie.
        /// </summary>
        /// <param name="bytes">Data of an ICU data file, containing the trie.</param>
        /// <param name="dataManipulate">Object which provides methods to parse the char data.</param>
        public CharTrie(ByteBuffer bytes, IDataManipulate dataManipulate) // ICU4N TODO: API - make internal and make overload that accepts byte[]
            : base(bytes, dataManipulate)
        {
            if (!IsCharTrie)
            {
                throw new ArgumentException(
                          "Data given does not belong to a char trie.");
            }
        }
Exemple #5
0
        // public constructors ---------------------------------------------

        /// <summary>
        /// Creates a new Trie with the settings for the trie data.
        /// <para/>
        /// Unserialize the 32-bit-aligned input stream and use the data for the trie.
        /// </summary>
        /// <param name="bytes">File buffer to a ICU data file, containing the trie.</param>
        /// <param name="dataManipulate"><see cref="Trie.IDataManipulate"/> object which provides methods to parse the char data.</param>
        /// <exception cref="System.IO.IOException">Thrown when data reading fails.</exception>
        public Int32Trie(ByteBuffer bytes, IDataManipulate dataManipulate)
            : base(bytes, dataManipulate)
        {
            if (!IsInt32Trie)
            {
                throw new ArgumentException(
                          "Data given does not belong to a int trie.");
            }
        }
Exemple #6
0
        /// <summary>
        /// Make a dummy CharTrie.
        /// </summary>
        /// <remarks>
        /// A dummy trie is an empty runtime trie, used when a real data trie cannot
        /// be loaded.
        /// <para/>
        /// The trie always returns the initialValue,
        /// or the leadUnitValue for lead surrogate code points.
        /// The Latin-1 part is always set up to be linear.
        /// </remarks>
        /// <param name="initialValue">The initial value that is set for all code points.</param>
        /// <param name="leadUnitValue">The value for lead surrogate code _units_ that do not have associated supplementary data.</param>
        /// <param name="dataManipulate">Object which provides methods to parse the char data.</param>
        public CharTrie(int initialValue, int leadUnitValue, IDataManipulate dataManipulate)
            : base(new char[BMP_INDEX_LENGTH + SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate)
        {
            int  dataLength, latin1Length, i, limit;
            char block;

            /* calculate the actual size of the dummy trie data */

            /* max(Latin-1, block 0) */
            dataLength = latin1Length = INDEX_STAGE_1_SHIFT_ <= 8 ? 256 : DATA_BLOCK_LENGTH;
            if (leadUnitValue != initialValue)
            {
                dataLength += DATA_BLOCK_LENGTH;
            }
            m_data_       = new char[dataLength];
            m_dataLength_ = dataLength;

            m_initialValue_ = (char)initialValue;

            /* fill the index and data arrays */

            /* indexes are preset to 0 (block 0) */

            /* Latin-1 data */
            for (i = 0; i < latin1Length; ++i)
            {
                m_data_[i] = (char)initialValue;
            }

            if (leadUnitValue != initialValue)
            {
                /* indexes for lead surrogate code units to the block after Latin-1 */
                block = (char)(latin1Length >> INDEX_STAGE_2_SHIFT_);
                i     = 0xd800 >> INDEX_STAGE_1_SHIFT_;
                limit = 0xdc00 >> INDEX_STAGE_1_SHIFT_;
                for (; i < limit; ++i)
                {
                    m_index_[i] = block;
                }

                /* data for lead surrogate code units */
                limit = latin1Length + DATA_BLOCK_LENGTH;
                for (i = latin1Length; i < limit; ++i)
                {
                    m_data_[i] = (char)leadUnitValue;
                }
            }
        }
Exemple #7
0
 /// <summary>
 /// Trie constructor.
 /// </summary>
 /// <param name="index">Array to be used for index.</param>
 /// <param name="options">Options used by the trie.</param>
 /// <param name="dataManipulate">Object containing the information to parse the trie data.</param>
 protected Trie(char[] index, int options, IDataManipulate dataManipulate) // ICU4N TODO: API - change to use [Flags] enum for options ?
 {
     m_options_ = options;
     if (dataManipulate != null)
     {
         m_dataManipulate_ = dataManipulate;
     }
     else
     {
         m_dataManipulate_ = new DefaultGetFoldingOffset();
     }
     m_isLatin1Linear_ = (m_options_ &
                          HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
     m_index_      = index;
     m_dataOffset_ = m_index_.Length;
 }
Exemple #8
0
        /// <summary>
        /// Fold the normalization data for supplementary code points into
        /// a compact area on top of the BMP-part of the trie index,
        /// with the lead surrogates indexing this compact area.
        /// <para/>
        /// Duplicate the index values for lead surrogates:
        /// From inside the BMP area, where some may be overridden with folded values,
        /// to just after the BMP area, where they can be retrieved for
        /// code point lookups.
        /// </summary>
        /// <param name="manipulate">Fold implementation.</param>
        private void Fold(IDataManipulate manipulate)
        {
            int[] leadIndexes = new int[SURROGATE_BLOCK_COUNT_];
            int[] index       = m_index_;
            // copy the lead surrogate indexes into a temporary array
            System.Array.Copy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
                              SURROGATE_BLOCK_COUNT_);

            // set all values for lead surrogate code *units* to leadUnitValue
            // so that by default runtime lookups will find no data for associated
            // supplementary code points, unless there is data for such code points
            // which will result in a non-zero folding value below that is set for
            // the respective lead units
            // the above saved the indexes for surrogate code *points*
            // fill the indexes with simplified code from utrie_setRange32()
            int block = 0;

            if (m_leadUnitValue_ == m_initialValue_)
            {
                // leadUnitValue == initialValue, use all-initial-value block
                // block = 0; if block here left empty
            }
            else
            {
                // create and fill the repeatBlock
                block = AllocDataBlock();
                if (block < 0)
                {
                    // data table overflow
                    throw new InvalidOperationException("Internal error: Out of memory space");
                }
                FillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true);
                // negative block number to indicate that it is a repeat block
                block = -block;
            }
            for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++c)
            {
                m_index_[c] = block;
            }

            // Fold significant index values into the area just after the BMP
            // indexes.
            // In case the first lead surrogate has significant data,
            // its index block must be used first (in which case the folding is a
            // no-op).
            // Later all folded index blocks are moved up one to insert the copied
            // lead surrogate indexes.
            int indexLength = BMP_INDEX_LENGTH_;

            // search for any index (stage 1) entries for supplementary code points
            for (int c = 0x10000; c < 0x110000;)
            {
                if (index[c >> SHIFT_] != 0)
                {
                    // there is data, treat the full block for a lead surrogate
                    c &= ~0x3ff;
                    // is there an identical index block?
                    block = FindSameIndexBlock(index, indexLength, c >> SHIFT_);

                    // get a folded value for [c..c+0x400[ and,
                    // if different from the value for the lead surrogate code
                    // point, set it for the lead surrogate code unit

                    int value = manipulate.GetFoldedValue(c,
                                                          block + SURROGATE_BLOCK_COUNT_);
                    if (value != GetValue(UTF16.GetLeadSurrogate(c)))
                    {
                        if (!SetValue(UTF16.GetLeadSurrogate(c), value))
                        {
                            // data table overflow
                            throw new IndexOutOfRangeException(
                                      "Data table overflow");
                        }
                        // if we did not find an identical index block...
                        if (block == indexLength)
                        {
                            // move the actual index (stage 1) entries from the
                            // supplementary position to the new one
                            System.Array.Copy(index, c >> SHIFT_, index, indexLength,
                                              SURROGATE_BLOCK_COUNT_);
                            indexLength += SURROGATE_BLOCK_COUNT_;
                        }
                    }
                    c += 0x400;
                }
                else
                {
                    c += DATA_BLOCK_LENGTH;
                }
            }

            // index array overflow?
            // This is to guarantee that a folding offset is of the form
            // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
            // If the index is too large, then n>=1024 and more than 10 bits are
            // necessary.
            // In fact, it can only ever become n==1024 with completely unfoldable
            // data and the additional block of duplicated values for lead
            // surrogates.
            if (indexLength >= MAX_INDEX_LENGTH_)
            {
                throw new IndexOutOfRangeException("Index table overflow");
            }
            // make space for the lead surrogate index block and insert it between
            // the BMP indexes and the folded ones
            System.Array.Copy(index, BMP_INDEX_LENGTH_, index,
                              BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
                              indexLength - BMP_INDEX_LENGTH_);
            System.Array.Copy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
                              SURROGATE_BLOCK_COUNT_);
            indexLength   += SURROGATE_BLOCK_COUNT_;
            m_indexLength_ = indexLength;
        }