/// <summary> /// Serializes the build table with 32 bit data /// </summary> /// /// <param name="datamanipulate">builder raw fold method implementation</param> /// <param name="triedatamanipulate">result trie fold method</param> /// <returns>a new trie</returns> public IntTrie Serialize(TrieBuilder.DataManipulate datamanipulate, Trie.DataManipulate triedatamanipulate) { if (datamanipulate == null) { throw new ArgumentException("Parameters can not be null"); } // fold and compact if necessary, also checks that indexLength is // within limits if (!m_isCompacted_) { // compact once without overlap to improve folding Compact(false); // fold the supplementary part of the index array Fold(datamanipulate); // compact again with overlap for minimum data array length Compact(true); m_isCompacted_ = true; } // is dataLength within limits? if (m_dataLength_ >= IBM.ICU.Impl.TrieBuilder.MAX_DATA_LENGTH_) { throw new IndexOutOfRangeException("Data length too small".ToString()); } char[] index = new char[m_indexLength_]; int[] data = new int[m_dataLength_]; // write the index (stage 1) array and the 32-bit data (stage 2) array // write 16-bit index values shifted right by INDEX_SHIFT_ for (int i = 0; i < m_indexLength_; i++) { index[i] = (char)((int)(((uint)m_index_[i]) >> IBM.ICU.Impl.TrieBuilder.INDEX_SHIFT_)); } // write 32-bit data values System.Array.Copy((Array)(m_data_), 0, (Array)(data), 0, m_dataLength_); int options = IBM.ICU.Impl.TrieBuilder.SHIFT_ | (IBM.ICU.Impl.TrieBuilder.INDEX_SHIFT_ << IBM.ICU.Impl.TrieBuilder.OPTIONS_INDEX_SHIFT_); options |= IBM.ICU.Impl.TrieBuilder.OPTIONS_DATA_IS_32_BIT_; if (m_isLatin1Linear_) { options |= IBM.ICU.Impl.TrieBuilder.OPTIONS_LATIN1_IS_LINEAR_; } return(new IntTrie(index, data, m_initialValue_, options, triedatamanipulate)); }
/// <summary> /// Fold the normalization data for supplementary code points into a compact /// area on top of the BMP-part of the trie index, with the lead surrogates /// indexing this compact area. /// Duplicate the index values for lead surrogates: From inside the BMP area, /// where some may be overridden with folded values, to just after the BMP /// area, where they can be retrieved for code point lookups. /// </summary> /// /// <param name="manipulate">fold implementation</param> private void Fold(TrieBuilder.DataManipulate manipulate) { int[] leadIndexes = new int[IBM.ICU.Impl.TrieBuilder.SURROGATE_BLOCK_COUNT_]; int[] index = m_index_; // copy the lead surrogate indexes into a temporary array System.Array.Copy((Array)(index), 0xd800 >> IBM.ICU.Impl.TrieBuilder.SHIFT_, (Array)(leadIndexes), 0, IBM.ICU.Impl.TrieBuilder.SURROGATE_BLOCK_COUNT_); // set all values for lead surrogate code *units* to leadUnitValue // so that by default runtime lookups will find no data for associated // supplementary code points, unless there is data for such code points // which will result in a non-zero folding value below that is set for // the respective lead units // the above saved the indexes for surrogate code *points* // fill the indexes with simplified code from utrie_setRange32() int block = 0; if (m_leadUnitValue_ == m_initialValue_) { // leadUnitValue == initialValue, use all-initial-value block // block = 0; if block here left empty } else { // create and fill the repeatBlock block = AllocDataBlock(); if (block < 0) { // data table overflow throw new InvalidOperationException( "Internal error: Out of memory space"); } FillBlock(block, 0, IBM.ICU.Impl.TrieBuilder.DATA_BLOCK_LENGTH, m_leadUnitValue_, true); // negative block number to indicate that it is a repeat block block = -block; } for (int c = (0xd800 >> IBM.ICU.Impl.TrieBuilder.SHIFT_); c < (0xdc00 >> IBM.ICU.Impl.TrieBuilder.SHIFT_); ++c) { m_index_[c] = block; } // Fold significant index values into the area just after the BMP // indexes. // In case the first lead surrogate has significant data, // its index block must be used first (in which case the folding is a // no-op). // Later all folded index blocks are moved up one to insert the copied // lead surrogate indexes. int indexLength = IBM.ICU.Impl.TrieBuilder.BMP_INDEX_LENGTH_; // search for any index (stage 1) entries for supplementary code points for (int c_0 = 0x10000; c_0 < 0x110000;) { if (index[c_0 >> IBM.ICU.Impl.TrieBuilder.SHIFT_] != 0) { // there is data, treat the full block for a lead surrogate c_0 &= ~0x3ff; // is there an identical index block? block = IBM.ICU.Impl.TrieBuilder.FindSameIndexBlock(index, indexLength, c_0 >> IBM.ICU.Impl.TrieBuilder.SHIFT_); // get a folded value for [c..c+0x400[ and, // if different from the value for the lead surrogate code // point, set it for the lead surrogate code unit int value_ren = manipulate.GetFoldedValue(c_0, block + IBM.ICU.Impl.TrieBuilder.SURROGATE_BLOCK_COUNT_); if (value_ren != GetValue(IBM.ICU.Text.UTF16.GetLeadSurrogate(c_0))) { if (!SetValue(IBM.ICU.Text.UTF16.GetLeadSurrogate(c_0), value_ren)) { // data table overflow throw new IndexOutOfRangeException("Data table overflow".ToString()); } // if we did not find an identical index block... if (block == indexLength) { // move the actual index (stage 1) entries from the // supplementary position to the new one System.Array.Copy((Array)(index), c_0 >> IBM.ICU.Impl.TrieBuilder.SHIFT_, (Array)(index), indexLength, IBM.ICU.Impl.TrieBuilder.SURROGATE_BLOCK_COUNT_); indexLength += IBM.ICU.Impl.TrieBuilder.SURROGATE_BLOCK_COUNT_; } } c_0 += 0x400; } else { c_0 += IBM.ICU.Impl.TrieBuilder.DATA_BLOCK_LENGTH; } } // index array overflow? // This is to guarantee that a folding offset is of the form // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. // If the index is too large, then n>=1024 and more than 10 bits are // necessary. // In fact, it can only ever become n==1024 with completely unfoldable // data and the additional block of duplicated values for lead // surrogates. if (indexLength >= IBM.ICU.Impl.TrieBuilder.MAX_INDEX_LENGTH_) { throw new IndexOutOfRangeException("Index table overflow".ToString()); } // make space for the lead surrogate index block and insert it between // the BMP indexes and the folded ones System.Array.Copy((Array)(index), IBM.ICU.Impl.TrieBuilder.BMP_INDEX_LENGTH_, (Array)(index), IBM.ICU.Impl.TrieBuilder.BMP_INDEX_LENGTH_ + IBM.ICU.Impl.TrieBuilder.SURROGATE_BLOCK_COUNT_, indexLength - IBM.ICU.Impl.TrieBuilder.BMP_INDEX_LENGTH_); System.Array.Copy((Array)(leadIndexes), 0, (Array)(index), IBM.ICU.Impl.TrieBuilder.BMP_INDEX_LENGTH_, IBM.ICU.Impl.TrieBuilder.SURROGATE_BLOCK_COUNT_); indexLength += IBM.ICU.Impl.TrieBuilder.SURROGATE_BLOCK_COUNT_; m_indexLength_ = indexLength; }
/// <summary> /// Serializes the build table to an output stream. /// Compacts the build-time trie after all values are set, and then writes /// the serialized form onto an output stream. /// After this, this build-time Trie can only be serialized again and/or /// closed; no further values can be added. /// This function is the rough equivalent of utrie_seriaize() in ICU4C. /// </summary> /// /// <param name="os">the output stream to which the seriaized trie will be written.If nul, the function still returns the size of the serializedTrie.</param> /// <param name="reduceTo16Bits">If true, reduce the data size to 16 bits. The resultingserialized form can then be used to create a CharTrie.</param> /// <param name="datamanipulate">builder raw fold method implementation</param> /// <returns>the number of bytes written to the output stream.</returns> public int Serialize(Stream os, bool reduceTo16Bits, TrieBuilder.DataManipulate datamanipulate) { if (datamanipulate == null) { throw new ArgumentException("Parameters can not be null"); } // fold and compact if necessary, also checks that indexLength is // within limits if (!m_isCompacted_) { // compact once without overlap to improve folding Compact(false); // fold the supplementary part of the index array Fold(datamanipulate); // compact again with overlap for minimum data array length Compact(true); m_isCompacted_ = true; } // is dataLength within limits? int length; if (reduceTo16Bits) { length = m_dataLength_ + m_indexLength_; } else { length = m_dataLength_; } if (length >= IBM.ICU.Impl.TrieBuilder.MAX_DATA_LENGTH_) { throw new IndexOutOfRangeException("Data length too small".ToString()); } // struct UTrieHeader { // int32_t signature; // int32_t options (a bit field) // int32_t indexLength // int32_t dataLength length = IBM.ICU.Impl.Trie.HEADER_LENGTH_ + 2 * m_indexLength_; if (reduceTo16Bits) { length += 2 * m_dataLength_; } else { length += 4 * m_dataLength_; } if (os == null) { // No output stream. Just return the length of the serialized Trie, // in bytes. return(length); } DataOutputStream dos = new DataOutputStream(os); dos.WriteInt(IBM.ICU.Impl.Trie.HEADER_SIGNATURE_); int options = IBM.ICU.Impl.Trie.INDEX_STAGE_1_SHIFT_ | (IBM.ICU.Impl.Trie.INDEX_STAGE_2_SHIFT_ << IBM.ICU.Impl.Trie.HEADER_OPTIONS_INDEX_SHIFT_); if (!reduceTo16Bits) { options |= IBM.ICU.Impl.Trie.HEADER_OPTIONS_DATA_IS_32_BIT_; } if (m_isLatin1Linear_) { options |= IBM.ICU.Impl.Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_; } dos.WriteInt(options); dos.WriteInt(m_indexLength_); dos.WriteInt(m_dataLength_); /* * write the index (stage 1) array and the 16/32-bit data (stage 2) * array */ if (reduceTo16Bits) { /* * write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, * after adding indexLength */ for (int i = 0; i < m_indexLength_; i++) { int v = (int)(((uint)(m_index_[i] + m_indexLength_)) >> IBM.ICU.Impl.Trie.INDEX_STAGE_2_SHIFT_); dos.WriteChar((char)v); } /* write 16-bit data values */ for (int i_0 = 0; i_0 < m_dataLength_; i_0++) { int v_1 = m_data_[i_0] & 0x0000ffff; dos.WriteChar((char)v_1); } } else { /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */ for (int i_2 = 0; i_2 < m_indexLength_; i_2++) { int v_3 = (int)(((uint)(m_index_[i_2])) >> IBM.ICU.Impl.Trie.INDEX_STAGE_2_SHIFT_); dos.WriteChar((char)v_3); } /* write 32-bit data values */ for (int i_4 = 0; i_4 < m_dataLength_; i_4++) { dos.WriteInt(m_data_[i_4]); } } return(length); }