コード例 #1
0
ファイル: IntTrieBuilder.cs プロジェクト: NightOwl888/ICU4N
        /// <summary>
        /// Serializes the build table with 32 bit data.
        /// </summary>
        /// <param name="datamanipulate">Builder raw fold method implementation.</param>
        /// <param name="triedatamanipulate">Result trie fold method.</param>
        /// <returns>A new trie.</returns>
        public virtual Int32Trie Serialize(ITrieBuilderDataManipulate datamanipulate,
                                           ITrieDataManipulate triedatamanipulate)
        {
            if (datamanipulate == null)
            {
                throw new ArgumentException("Parameters can not be null");
            }
            // fold and compact if necessary, also checks that indexLength is
            // within limits
            if (!m_isCompacted_)
            {
                // compact once without overlap to improve folding
                Compact(false);
                // fold the supplementary part of the index array
                Fold(datamanipulate);
                // compact again with overlap for minimum data array length
                Compact(true);
                m_isCompacted_ = true;
            }
            // is dataLength within limits?
            if (m_dataLength_ >= MaxDataLength)
            {
                throw new IndexOutOfRangeException("Data length too small");
            }

            char[] index = new char[m_indexLength_];
            int[]  data  = new int[m_dataLength_];
            // write the index (stage 1) array and the 32-bit data (stage 2) array
            // write 16-bit index values shifted right by INDEX_SHIFT_
            for (int i = 0; i < m_indexLength_; i++)
            {
                index[i] = (char)(m_index_[i].TripleShift(IndexShift));
            }
            // write 32-bit data values
            System.Array.Copy(m_data_, 0, data, 0, m_dataLength_);

            int options = Shift | (IndexShift << OptionsIndexShift);

            options |= OptionsDataIs32Bit;
            if (m_isLatin1Linear_)
            {
                options |= OptionsLatin1IsLinear;
            }
            return(new Int32Trie(index, data, m_initialValue_, options,
                                 triedatamanipulate));
        }
コード例 #2
0
ファイル: IntTrieBuilder.cs プロジェクト: NightOwl888/ICU4N
        /// <summary>
        /// Fold the normalization data for supplementary code points into
        /// a compact area on top of the BMP-part of the trie index,
        /// with the lead surrogates indexing this compact area.
        /// <para/>
        /// Duplicate the index values for lead surrogates:
        /// From inside the BMP area, where some may be overridden with folded values,
        /// to just after the BMP area, where they can be retrieved for
        /// code point lookups.
        /// </summary>
        /// <param name="manipulate">Fold implementation.</param>
        private void Fold(ITrieBuilderDataManipulate manipulate)
        {
            int[] leadIndexes = new int[SurrogateBlockCount];
            int[] index       = m_index_;
            // copy the lead surrogate indexes into a temporary array
            System.Array.Copy(index, 0xd800 >> Shift, leadIndexes, 0,
                              SurrogateBlockCount);

            // set all values for lead surrogate code *units* to leadUnitValue
            // so that by default runtime lookups will find no data for associated
            // supplementary code points, unless there is data for such code points
            // which will result in a non-zero folding value below that is set for
            // the respective lead units
            // the above saved the indexes for surrogate code *points*
            // fill the indexes with simplified code from utrie_setRange32()
            int block = 0;

            if (m_leadUnitValue_ == m_initialValue_)
            {
                // leadUnitValue == initialValue, use all-initial-value block
                // block = 0; if block here left empty
            }
            else
            {
                // create and fill the repeatBlock
                block = AllocDataBlock();
                if (block < 0)
                {
                    // data table overflow
                    throw new InvalidOperationException("Internal error: Out of memory space");
                }
                FillBlock(block, 0, DataBlockLength, m_leadUnitValue_, true);
                // negative block number to indicate that it is a repeat block
                block = -block;
            }
            for (int c = (0xd800 >> Shift); c < (0xdc00 >> Shift); ++c)
            {
                m_index_[c] = block;
            }

            // Fold significant index values into the area just after the BMP
            // indexes.
            // In case the first lead surrogate has significant data,
            // its index block must be used first (in which case the folding is a
            // no-op).
            // Later all folded index blocks are moved up one to insert the copied
            // lead surrogate indexes.
            int indexLength = BMPIndexLength;

            // search for any index (stage 1) entries for supplementary code points
            for (int c = 0x10000; c < 0x110000;)
            {
                if (index[c >> Shift] != 0)
                {
                    // there is data, treat the full block for a lead surrogate
                    c &= ~0x3ff;
                    // is there an identical index block?
                    block = FindSameIndexBlock(index, indexLength, c >> Shift);

                    // get a folded value for [c..c+0x400[ and,
                    // if different from the value for the lead surrogate code
                    // point, set it for the lead surrogate code unit

                    int value = manipulate.GetFoldedValue(c,
                                                          block + SurrogateBlockCount);
                    if (value != GetValue(UTF16.GetLeadSurrogate(c)))
                    {
                        if (!SetValue(UTF16.GetLeadSurrogate(c), value))
                        {
                            // data table overflow
                            throw new IndexOutOfRangeException(
                                      "Data table overflow");
                        }
                        // if we did not find an identical index block...
                        if (block == indexLength)
                        {
                            // move the actual index (stage 1) entries from the
                            // supplementary position to the new one
                            System.Array.Copy(index, c >> Shift, index, indexLength,
                                              SurrogateBlockCount);
                            indexLength += SurrogateBlockCount;
                        }
                    }
                    c += 0x400;
                }
                else
                {
                    c += DataBlockLength;
                }
            }

            // index array overflow?
            // This is to guarantee that a folding offset is of the form
            // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
            // If the index is too large, then n>=1024 and more than 10 bits are
            // necessary.
            // In fact, it can only ever become n==1024 with completely unfoldable
            // data and the additional block of duplicated values for lead
            // surrogates.
            if (indexLength >= MaxIndexLength)
            {
                throw new IndexOutOfRangeException("Index table overflow");
            }
            // make space for the lead surrogate index block and insert it between
            // the BMP indexes and the folded ones
            System.Array.Copy(index, BMPIndexLength, index,
                              BMPIndexLength + SurrogateBlockCount,
                              indexLength - BMPIndexLength);
            System.Array.Copy(leadIndexes, 0, index, BMPIndexLength,
                              SurrogateBlockCount);
            indexLength   += SurrogateBlockCount;
            m_indexLength_ = indexLength;
        }
コード例 #3
0
ファイル: IntTrieBuilder.cs プロジェクト: NightOwl888/ICU4N
        /// <summary>
        /// Serializes the build table to an output stream.
        /// <para/>
        /// Compacts the build-time trie after all values are set, and then
        /// writes the serialized form onto an output stream.
        /// <para/>
        /// After this, this build-time Trie can only be serialized again and/or closed;
        /// no further values can be added.
        /// <para/>
        /// This function is the rough equivalent of utrie_seriaize() in ICU4C.
        /// </summary>
        /// <param name="os">The output stream to which the seriaized trie will be written.
        /// If nul, the function still returns the size of the serialized Trie.</param>
        /// <param name="reduceTo16Bits">If true, reduce the data size to 16 bits.  The resulting
        /// serialized form can then be used to create a <see cref="CharTrie"/>.</param>
        /// <param name="datamanipulate">Builder raw fold method implementation.</param>
        /// <returns>The number of bytes written to the output stream.</returns>
        public virtual int Serialize(Stream os, bool reduceTo16Bits,
                                     ITrieBuilderDataManipulate datamanipulate)
        {
            if (datamanipulate == null)
            {
                throw new ArgumentException("Parameters can not be null");
            }

            // fold and compact if necessary, also checks that indexLength is
            // within limits
            if (!m_isCompacted_)
            {
                // compact once without overlap to improve folding
                Compact(false);
                // fold the supplementary part of the index array
                Fold(datamanipulate);
                // compact again with overlap for minimum data array length
                Compact(true);
                m_isCompacted_ = true;
            }

            // is dataLength within limits?
            int length;

            if (reduceTo16Bits)
            {
                length = m_dataLength_ + m_indexLength_;
            }
            else
            {
                length = m_dataLength_;
            }
            if (length >= MaxDataLength)
            {
                throw new IndexOutOfRangeException("Data length too small");
            }

            //  struct UTrieHeader {
            //      int32_t   signature;
            //      int32_t   options  (a bit field)
            //      int32_t   indexLength
            //      int32_t   dataLength
            length = Trie.HeaderLength + 2 * m_indexLength_;
            if (reduceTo16Bits)
            {
                length += 2 * m_dataLength_;
            }
            else
            {
                length += 4 * m_dataLength_;
            }

            if (os == null)
            {
                // No output stream.  Just return the length of the serialized Trie, in bytes.
                return(length);
            }

            DataOutputStream dos = new DataOutputStream(os);

            dos.WriteInt32(Trie.HeaderSignature);

            int options = Trie.IndexStage1Shift | (Trie.IndexStage2Shift << Trie.HeaderOptionsIndexShift);

            if (!reduceTo16Bits)
            {
                options |= Trie.HeaderOptionsDataIs32Bit;
            }
            if (m_isLatin1Linear_)
            {
                options |= Trie.HeaderOptionsLatin1IsLinearMask;
            }
            dos.WriteInt32(options);

            dos.WriteInt32(m_indexLength_);
            dos.WriteInt32(m_dataLength_);

            /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
            if (reduceTo16Bits)
            {
                /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
                for (int i = 0; i < m_indexLength_; i++)
                {
                    int v = (m_index_[i] + m_indexLength_).TripleShift(Trie.IndexStage2Shift);
                    dos.WriteChar(v);
                }

                /* write 16-bit data values */
                for (int i = 0; i < m_dataLength_; i++)
                {
                    int v = m_data_[i] & 0x0000ffff;
                    dos.WriteChar(v);
                }
            }
            else
            {
                /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
                for (int i = 0; i < m_indexLength_; i++)
                {
                    int v = (m_index_[i]).TripleShift(Trie.IndexStage2Shift);
                    dos.WriteChar(v);
                }

                /* write 32-bit data values */
                for (int i = 0; i < m_dataLength_; i++)
                {
                    dos.WriteInt32(m_data_[i]);
                }
            }

            return(length);
        }