// protected constructor ------------------------------------------- /// <summary> /// Trie constructor for <see cref="CharTrie"/> use. /// </summary> /// <param name="bytes">Data of an ICU data file, containing the trie.</param> /// <param name="dataManipulate">Object containing the information to parse the trie data.</param> protected Trie(ByteBuffer bytes, IDataManipulate dataManipulate) { // Magic number to authenticate the data. int signature = bytes.GetInt32(); m_options_ = bytes.GetInt32(); if (!CheckHeader(signature)) { throw new ArgumentException("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file"); } if (dataManipulate != null) { m_dataManipulate_ = dataManipulate; } else { m_dataManipulate_ = new DefaultGetFoldingOffset(); } m_isLatin1Linear_ = (m_options_ & HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0; m_dataOffset_ = bytes.GetInt32(); m_dataLength_ = bytes.GetInt32(); Unserialize(bytes); }
// package private methods ----------------------------------------- /// <summary> /// Internal constructor for builder use. /// </summary> /// <param name="index">The index array to be slotted into this trie.</param> /// <param name="data">The data array to be slotted into this trie.</param> /// <param name="initialvalue">The initial value for this trie.</param> /// <param name="options">Trie options to use.</param> /// <param name="datamanipulate">Folding implementation.</param> internal Int32Trie(char[] index, int[] data, int initialvalue, int options, IDataManipulate datamanipulate) : base(index, options, datamanipulate) { m_data_ = data; m_dataLength_ = m_data_.Length; m_initialValue_ = initialvalue; }
public IDataManipulate DataManipulate() { if (_dataManipulate == null) { _dataManipulate = CreateDataManipulate(); } return(_dataManipulate); }
// public constructors --------------------------------------------- /// <summary> /// Creates a new Trie with the settings for the trie data. /// <para/> /// Unserialize the 32-bit-aligned input buffer and use the data for the trie. /// </summary> /// <param name="bytes">Data of an ICU data file, containing the trie.</param> /// <param name="dataManipulate">Object which provides methods to parse the char data.</param> public CharTrie(ByteBuffer bytes, IDataManipulate dataManipulate) // ICU4N TODO: API - make internal and make overload that accepts byte[] : base(bytes, dataManipulate) { if (!IsCharTrie) { throw new ArgumentException( "Data given does not belong to a char trie."); } }
// public constructors --------------------------------------------- /// <summary> /// Creates a new Trie with the settings for the trie data. /// <para/> /// Unserialize the 32-bit-aligned input stream and use the data for the trie. /// </summary> /// <param name="bytes">File buffer to a ICU data file, containing the trie.</param> /// <param name="dataManipulate"><see cref="Trie.IDataManipulate"/> object which provides methods to parse the char data.</param> /// <exception cref="System.IO.IOException">Thrown when data reading fails.</exception> public Int32Trie(ByteBuffer bytes, IDataManipulate dataManipulate) : base(bytes, dataManipulate) { if (!IsInt32Trie) { throw new ArgumentException( "Data given does not belong to a int trie."); } }
/// <summary> /// Make a dummy CharTrie. /// </summary> /// <remarks> /// A dummy trie is an empty runtime trie, used when a real data trie cannot /// be loaded. /// <para/> /// The trie always returns the initialValue, /// or the leadUnitValue for lead surrogate code points. /// The Latin-1 part is always set up to be linear. /// </remarks> /// <param name="initialValue">The initial value that is set for all code points.</param> /// <param name="leadUnitValue">The value for lead surrogate code _units_ that do not have associated supplementary data.</param> /// <param name="dataManipulate">Object which provides methods to parse the char data.</param> public CharTrie(int initialValue, int leadUnitValue, IDataManipulate dataManipulate) : base(new char[BMP_INDEX_LENGTH + SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate) { int dataLength, latin1Length, i, limit; char block; /* calculate the actual size of the dummy trie data */ /* max(Latin-1, block 0) */ dataLength = latin1Length = INDEX_STAGE_1_SHIFT_ <= 8 ? 256 : DATA_BLOCK_LENGTH; if (leadUnitValue != initialValue) { dataLength += DATA_BLOCK_LENGTH; } m_data_ = new char[dataLength]; m_dataLength_ = dataLength; m_initialValue_ = (char)initialValue; /* fill the index and data arrays */ /* indexes are preset to 0 (block 0) */ /* Latin-1 data */ for (i = 0; i < latin1Length; ++i) { m_data_[i] = (char)initialValue; } if (leadUnitValue != initialValue) { /* indexes for lead surrogate code units to the block after Latin-1 */ block = (char)(latin1Length >> INDEX_STAGE_2_SHIFT_); i = 0xd800 >> INDEX_STAGE_1_SHIFT_; limit = 0xdc00 >> INDEX_STAGE_1_SHIFT_; for (; i < limit; ++i) { m_index_[i] = block; } /* data for lead surrogate code units */ limit = latin1Length + DATA_BLOCK_LENGTH; for (i = latin1Length; i < limit; ++i) { m_data_[i] = (char)leadUnitValue; } } }
/// <summary> /// Trie constructor. /// </summary> /// <param name="index">Array to be used for index.</param> /// <param name="options">Options used by the trie.</param> /// <param name="dataManipulate">Object containing the information to parse the trie data.</param> protected Trie(char[] index, int options, IDataManipulate dataManipulate) // ICU4N TODO: API - change to use [Flags] enum for options ? { m_options_ = options; if (dataManipulate != null) { m_dataManipulate_ = dataManipulate; } else { m_dataManipulate_ = new DefaultGetFoldingOffset(); } m_isLatin1Linear_ = (m_options_ & HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0; m_index_ = index; m_dataOffset_ = m_index_.Length; }
/// <summary> /// Fold the normalization data for supplementary code points into /// a compact area on top of the BMP-part of the trie index, /// with the lead surrogates indexing this compact area. /// <para/> /// Duplicate the index values for lead surrogates: /// From inside the BMP area, where some may be overridden with folded values, /// to just after the BMP area, where they can be retrieved for /// code point lookups. /// </summary> /// <param name="manipulate">Fold implementation.</param> private void Fold(IDataManipulate manipulate) { int[] leadIndexes = new int[SURROGATE_BLOCK_COUNT_]; int[] index = m_index_; // copy the lead surrogate indexes into a temporary array System.Array.Copy(index, 0xd800 >> SHIFT_, leadIndexes, 0, SURROGATE_BLOCK_COUNT_); // set all values for lead surrogate code *units* to leadUnitValue // so that by default runtime lookups will find no data for associated // supplementary code points, unless there is data for such code points // which will result in a non-zero folding value below that is set for // the respective lead units // the above saved the indexes for surrogate code *points* // fill the indexes with simplified code from utrie_setRange32() int block = 0; if (m_leadUnitValue_ == m_initialValue_) { // leadUnitValue == initialValue, use all-initial-value block // block = 0; if block here left empty } else { // create and fill the repeatBlock block = AllocDataBlock(); if (block < 0) { // data table overflow throw new InvalidOperationException("Internal error: Out of memory space"); } FillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true); // negative block number to indicate that it is a repeat block block = -block; } for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++c) { m_index_[c] = block; } // Fold significant index values into the area just after the BMP // indexes. // In case the first lead surrogate has significant data, // its index block must be used first (in which case the folding is a // no-op). // Later all folded index blocks are moved up one to insert the copied // lead surrogate indexes. int indexLength = BMP_INDEX_LENGTH_; // search for any index (stage 1) entries for supplementary code points for (int c = 0x10000; c < 0x110000;) { if (index[c >> SHIFT_] != 0) { // there is data, treat the full block for a lead surrogate c &= ~0x3ff; // is there an identical index block? block = FindSameIndexBlock(index, indexLength, c >> SHIFT_); // get a folded value for [c..c+0x400[ and, // if different from the value for the lead surrogate code // point, set it for the lead surrogate code unit int value = manipulate.GetFoldedValue(c, block + SURROGATE_BLOCK_COUNT_); if (value != GetValue(UTF16.GetLeadSurrogate(c))) { if (!SetValue(UTF16.GetLeadSurrogate(c), value)) { // data table overflow throw new IndexOutOfRangeException( "Data table overflow"); } // if we did not find an identical index block... if (block == indexLength) { // move the actual index (stage 1) entries from the // supplementary position to the new one System.Array.Copy(index, c >> SHIFT_, index, indexLength, SURROGATE_BLOCK_COUNT_); indexLength += SURROGATE_BLOCK_COUNT_; } } c += 0x400; } else { c += DATA_BLOCK_LENGTH; } } // index array overflow? // This is to guarantee that a folding offset is of the form // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. // If the index is too large, then n>=1024 and more than 10 bits are // necessary. // In fact, it can only ever become n==1024 with completely unfoldable // data and the additional block of duplicated values for lead // surrogates. if (indexLength >= MAX_INDEX_LENGTH_) { throw new IndexOutOfRangeException("Index table overflow"); } // make space for the lead surrogate index block and insert it between // the BMP indexes and the folded ones System.Array.Copy(index, BMP_INDEX_LENGTH_, index, BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_, indexLength - BMP_INDEX_LENGTH_); System.Array.Copy(leadIndexes, 0, index, BMP_INDEX_LENGTH_, SURROGATE_BLOCK_COUNT_); indexLength += SURROGATE_BLOCK_COUNT_; m_indexLength_ = indexLength; }