/// <summary> /// Serializes the build table with 32 bit data. /// </summary> /// <param name="datamanipulate">Builder raw fold method implementation.</param> /// <param name="triedatamanipulate">Result trie fold method.</param> /// <returns>A new trie.</returns> public virtual Int32Trie Serialize(ITrieBuilderDataManipulate datamanipulate, ITrieDataManipulate triedatamanipulate) { if (datamanipulate == null) { throw new ArgumentException("Parameters can not be null"); } // fold and compact if necessary, also checks that indexLength is // within limits if (!m_isCompacted_) { // compact once without overlap to improve folding Compact(false); // fold the supplementary part of the index array Fold(datamanipulate); // compact again with overlap for minimum data array length Compact(true); m_isCompacted_ = true; } // is dataLength within limits? if (m_dataLength_ >= MaxDataLength) { throw new IndexOutOfRangeException("Data length too small"); } char[] index = new char[m_indexLength_]; int[] data = new int[m_dataLength_]; // write the index (stage 1) array and the 32-bit data (stage 2) array // write 16-bit index values shifted right by INDEX_SHIFT_ for (int i = 0; i < m_indexLength_; i++) { index[i] = (char)(m_index_[i].TripleShift(IndexShift)); } // write 32-bit data values System.Array.Copy(m_data_, 0, data, 0, m_dataLength_); int options = Shift | (IndexShift << OptionsIndexShift); options |= OptionsDataIs32Bit; if (m_isLatin1Linear_) { options |= OptionsLatin1IsLinear; } return(new Int32Trie(index, data, m_initialValue_, options, triedatamanipulate)); }
/// <summary> /// Fold the normalization data for supplementary code points into /// a compact area on top of the BMP-part of the trie index, /// with the lead surrogates indexing this compact area. /// <para/> /// Duplicate the index values for lead surrogates: /// From inside the BMP area, where some may be overridden with folded values, /// to just after the BMP area, where they can be retrieved for /// code point lookups. /// </summary> /// <param name="manipulate">Fold implementation.</param> private void Fold(ITrieBuilderDataManipulate manipulate) { int[] leadIndexes = new int[SurrogateBlockCount]; int[] index = m_index_; // copy the lead surrogate indexes into a temporary array System.Array.Copy(index, 0xd800 >> Shift, leadIndexes, 0, SurrogateBlockCount); // set all values for lead surrogate code *units* to leadUnitValue // so that by default runtime lookups will find no data for associated // supplementary code points, unless there is data for such code points // which will result in a non-zero folding value below that is set for // the respective lead units // the above saved the indexes for surrogate code *points* // fill the indexes with simplified code from utrie_setRange32() int block = 0; if (m_leadUnitValue_ == m_initialValue_) { // leadUnitValue == initialValue, use all-initial-value block // block = 0; if block here left empty } else { // create and fill the repeatBlock block = AllocDataBlock(); if (block < 0) { // data table overflow throw new InvalidOperationException("Internal error: Out of memory space"); } FillBlock(block, 0, DataBlockLength, m_leadUnitValue_, true); // negative block number to indicate that it is a repeat block block = -block; } for (int c = (0xd800 >> Shift); c < (0xdc00 >> Shift); ++c) { m_index_[c] = block; } // Fold significant index values into the area just after the BMP // indexes. // In case the first lead surrogate has significant data, // its index block must be used first (in which case the folding is a // no-op). // Later all folded index blocks are moved up one to insert the copied // lead surrogate indexes. int indexLength = BMPIndexLength; // search for any index (stage 1) entries for supplementary code points for (int c = 0x10000; c < 0x110000;) { if (index[c >> Shift] != 0) { // there is data, treat the full block for a lead surrogate c &= ~0x3ff; // is there an identical index block? block = FindSameIndexBlock(index, indexLength, c >> Shift); // get a folded value for [c..c+0x400[ and, // if different from the value for the lead surrogate code // point, set it for the lead surrogate code unit int value = manipulate.GetFoldedValue(c, block + SurrogateBlockCount); if (value != GetValue(UTF16.GetLeadSurrogate(c))) { if (!SetValue(UTF16.GetLeadSurrogate(c), value)) { // data table overflow throw new IndexOutOfRangeException( "Data table overflow"); } // if we did not find an identical index block... if (block == indexLength) { // move the actual index (stage 1) entries from the // supplementary position to the new one System.Array.Copy(index, c >> Shift, index, indexLength, SurrogateBlockCount); indexLength += SurrogateBlockCount; } } c += 0x400; } else { c += DataBlockLength; } } // index array overflow? // This is to guarantee that a folding offset is of the form // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. // If the index is too large, then n>=1024 and more than 10 bits are // necessary. // In fact, it can only ever become n==1024 with completely unfoldable // data and the additional block of duplicated values for lead // surrogates. if (indexLength >= MaxIndexLength) { throw new IndexOutOfRangeException("Index table overflow"); } // make space for the lead surrogate index block and insert it between // the BMP indexes and the folded ones System.Array.Copy(index, BMPIndexLength, index, BMPIndexLength + SurrogateBlockCount, indexLength - BMPIndexLength); System.Array.Copy(leadIndexes, 0, index, BMPIndexLength, SurrogateBlockCount); indexLength += SurrogateBlockCount; m_indexLength_ = indexLength; }
/// <summary> /// Serializes the build table to an output stream. /// <para/> /// Compacts the build-time trie after all values are set, and then /// writes the serialized form onto an output stream. /// <para/> /// After this, this build-time Trie can only be serialized again and/or closed; /// no further values can be added. /// <para/> /// This function is the rough equivalent of utrie_seriaize() in ICU4C. /// </summary> /// <param name="os">The output stream to which the seriaized trie will be written. /// If nul, the function still returns the size of the serialized Trie.</param> /// <param name="reduceTo16Bits">If true, reduce the data size to 16 bits. The resulting /// serialized form can then be used to create a <see cref="CharTrie"/>.</param> /// <param name="datamanipulate">Builder raw fold method implementation.</param> /// <returns>The number of bytes written to the output stream.</returns> public virtual int Serialize(Stream os, bool reduceTo16Bits, ITrieBuilderDataManipulate datamanipulate) { if (datamanipulate == null) { throw new ArgumentException("Parameters can not be null"); } // fold and compact if necessary, also checks that indexLength is // within limits if (!m_isCompacted_) { // compact once without overlap to improve folding Compact(false); // fold the supplementary part of the index array Fold(datamanipulate); // compact again with overlap for minimum data array length Compact(true); m_isCompacted_ = true; } // is dataLength within limits? int length; if (reduceTo16Bits) { length = m_dataLength_ + m_indexLength_; } else { length = m_dataLength_; } if (length >= MaxDataLength) { throw new IndexOutOfRangeException("Data length too small"); } // struct UTrieHeader { // int32_t signature; // int32_t options (a bit field) // int32_t indexLength // int32_t dataLength length = Trie.HeaderLength + 2 * m_indexLength_; if (reduceTo16Bits) { length += 2 * m_dataLength_; } else { length += 4 * m_dataLength_; } if (os == null) { // No output stream. Just return the length of the serialized Trie, in bytes. return(length); } DataOutputStream dos = new DataOutputStream(os); dos.WriteInt32(Trie.HeaderSignature); int options = Trie.IndexStage1Shift | (Trie.IndexStage2Shift << Trie.HeaderOptionsIndexShift); if (!reduceTo16Bits) { options |= Trie.HeaderOptionsDataIs32Bit; } if (m_isLatin1Linear_) { options |= Trie.HeaderOptionsLatin1IsLinearMask; } dos.WriteInt32(options); dos.WriteInt32(m_indexLength_); dos.WriteInt32(m_dataLength_); /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */ if (reduceTo16Bits) { /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */ for (int i = 0; i < m_indexLength_; i++) { int v = (m_index_[i] + m_indexLength_).TripleShift(Trie.IndexStage2Shift); dos.WriteChar(v); } /* write 16-bit data values */ for (int i = 0; i < m_dataLength_; i++) { int v = m_data_[i] & 0x0000ffff; dos.WriteChar(v); } } else { /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */ for (int i = 0; i < m_indexLength_; i++) { int v = (m_index_[i]).TripleShift(Trie.IndexStage2Shift); dos.WriteChar(v); } /* write 32-bit data values */ for (int i = 0; i < m_dataLength_; i++) { dos.WriteInt32(m_data_[i]); } } return(length); }