Example #1
0
        //CLOVER:ON

        // protected methods -----------------------------------------------

        /// <summary>
        /// Parses the byte buffer and stores its trie content into a index and
        /// data array.
        /// </summary>
        /// <param name="bytes">Buffer containing trie data.</param>
        protected override sealed void Unserialize(ByteBuffer bytes) // ICU4N TODO: API - make internal and add overload that accepts byte[]
        {
            int indexDataLength = m_dataOffset_ + m_dataLength_;

            m_index_        = ICUBinary.GetChars(bytes, indexDataLength, 0);
            m_data_         = m_index_;
            m_initialValue_ = m_data_[m_dataOffset_];
        }
Example #2
0
        private void ReadData(ByteBuffer bytes)
        {
            // read the header
            ICUBinary.ReadHeader(bytes, FMT, new IsAcceptable());

            // read indexes[]
            int count = bytes.GetInt32();

            if (count < IX_TOP)
            {
                throw new IOException("indexes[0] too small in " + DATA_FILE_NAME);
            }
            indexes = new int[count];

            indexes[0] = count;
            for (int i = 1; i < count; ++i)
            {
                indexes[i] = bytes.GetInt32();
            }

            // read the trie
            trie = Trie2_16.CreateFromSerialized(bytes);
            int expectedTrieLength = indexes[IX_TRIE_SIZE];
            int trieLength         = trie.GetSerializedLength();

            if (trieLength > expectedTrieLength)
            {
                throw new IOException(DATA_FILE_NAME + ": not enough bytes for the trie");
            }
            // skip padding after trie bytes
            ICUBinary.SkipBytes(bytes, expectedTrieLength - trieLength);

            // read exceptions[]
            count = indexes[IX_EXC_LENGTH];
            if (count > 0)
            {
                exceptions = ICUBinary.GetString(bytes, count, 0);
            }

            // read unfold[]
            count = indexes[IX_UNFOLD_LENGTH];
            if (count > 0)
            {
                unfold = ICUBinary.GetChars(bytes, count, 0);
            }
        }
Example #3
0
        // protected methods -------------------------------------------------

        /// <summary>
        /// Read and break up the stream of data passed in as arguments
        /// and fills up <see cref="UCharacterName"/>.
        /// If unsuccessful false will be returned.
        /// </summary>
        /// <param name="data">Instance of datablock.</param>
        /// <exception cref="IOException">Thrown when there's a data error.</exception>
        internal void Read(UCharacterName data)
        {
            // reading index
            m_tokenstringindex_ = m_byteBuffer_.GetInt32();
            m_groupindex_       = m_byteBuffer_.GetInt32();
            m_groupstringindex_ = m_byteBuffer_.GetInt32();
            m_algnamesindex_    = m_byteBuffer_.GetInt32();

            // reading tokens
            int count = m_byteBuffer_.GetChar();

            char[] token = ICUBinary.GetChars(m_byteBuffer_, count, 0);
            int    size  = m_groupindex_ - m_tokenstringindex_;

            byte[] tokenstr = new byte[size];
            m_byteBuffer_.Get(tokenstr);
            data.SetToken(token, tokenstr);

            // reading the group information records
            count = m_byteBuffer_.GetChar();
            data.SetGroupCountSize(count, GROUP_INFO_SIZE_);
            count *= GROUP_INFO_SIZE_;
            char[] group = ICUBinary.GetChars(m_byteBuffer_, count, 0);

            size = m_algnamesindex_ - m_groupstringindex_;
            byte[] groupstring = new byte[size];
            m_byteBuffer_.Get(groupstring);

            data.SetGroup(group, groupstring);

            count = m_byteBuffer_.GetInt32();
            UCharacterName.AlgorithmName[] alg =
                new UCharacterName.AlgorithmName[count];

            for (int i = 0; i < count; i++)
            {
                UCharacterName.AlgorithmName an = ReadAlg();
                if (an == null)
                {
                    throw new IOException("unames.icu read error: Algorithmic names creation error");
                }
                alg[i] = an;
            }
            data.SetAlgorithm(alg);
        }
Example #4
0
        // private methods ---------------------------------------------------

        /// <summary>
        /// Reads an individual record of <see cref="UCharacterName.AlgorithmName"/>s
        /// </summary>
        /// <returns>An instance of <see cref="UCharacterName.AlgorithmName"/>s if read is successful otherwise null.</returns>
        /// <exception cref="IOException">Thrown when file read error occurs or data is corrupted.</exception>
        private UCharacterName.AlgorithmName ReadAlg()
        {
            UCharacterName.AlgorithmName result =
                new UCharacterName.AlgorithmName();
            int  rangestart = m_byteBuffer_.GetInt32();
            int  rangeend   = m_byteBuffer_.GetInt32();
            byte type       = m_byteBuffer_.Get();
            byte variant    = m_byteBuffer_.Get();

            if (!result.SetInfo(rangestart, rangeend, type, variant))
            {
                return(null);
            }

            int size = m_byteBuffer_.GetChar();

            if (type == UCharacterName.AlgorithmName.TYPE_1_)
            {
                char[] factor = ICUBinary.GetChars(m_byteBuffer_, variant, 0);

                result.SetFactor(factor);
                size -= (variant << 1);
            }

            StringBuilder prefix = new StringBuilder();
            char          c      = (char)(m_byteBuffer_.Get() & 0x00FF);

            while (c != 0)
            {
                prefix.Append(c);
                c = (char)(m_byteBuffer_.Get() & 0x00FF);
            }

            result.SetPrefix(prefix.ToString());

            size -= (ALG_INFO_SIZE_ + prefix.Length + 1);

            if (size > 0)
            {
                byte[] str = new byte[size];
                m_byteBuffer_.Get(str);
                result.SetFactorString(str);
            }
            return(result);
        }
Example #5
0
 public char[] Read(int length)
 {
     //Read the extra data
     return(ICUBinary.GetChars(byteBuffer, length, 0));
 }
Example #6
0
 /// <summary>
 /// Parses the byte buffer and creates the trie index with it.
 /// <para/>
 /// The position of the input <see cref="ByteBuffer"/> must be right after the trie header.
 /// <para/>
 /// This is overwritten by the child classes.
 /// </summary>
 /// <param name="bytes">Buffer containing trie data.</param>
 protected virtual void Unserialize(ByteBuffer bytes)
 {
     m_index = ICUBinary.GetChars(bytes, m_dataOffset, 0);
 }
Example #7
0
        /// <summary>
        /// Create a <see cref="Trie2"/> from its serialized form.  Inverse of utrie2_serialize().
        /// </summary>
        /// <remarks>
        /// Reads from the current position and leaves the buffer after the end of the trie.
        /// <para/>
        /// The serialized format is identical between ICU4C, ICU4J, and ICU4N, so this function
        /// will work with serialized <see cref="Trie2"/>s from any.
        /// <para/>
        /// The actual type of the returned <see cref="Trie2"/> will be either <see cref="Trie2_16"/> or <see cref="Trie2_32"/>, depending
        /// on the width of the data.
        /// <para/>
        /// To obtain the width of the <see cref="Trie2"/>, check the actual class type of the returned <see cref="Trie2"/>.
        /// Or use the <see cref="Trie2_16.CreateFromSerialized(ByteBuffer)"/> or <see cref="Trie2_32.CreateFromSerialized(ByteBuffer)"/> method, which will
        /// return only <see cref="Trie"/>s of their specific type/size.
        /// <para/>
        /// The serialized <see cref="Trie2"/> on the stream may be in either little or big endian byte order.
        /// This allows using serialized <see cref="Trie"/>s from ICU4C without needing to consider the
        /// byte order of the system that created them.
        /// </remarks>
        /// <param name="bytes">A byte buffer to the serialized form of a UTrie2.</param>
        /// <returns>An unserialized <see cref="Trie2"/>, ready for use.</returns>
        /// <exception cref="ArgumentException">If the stream does not contain a serialized <see cref="Trie2"/>.</exception>
        /// <exception cref="IOException">If a read error occurs in the buffer.</exception>
        public static Trie2 CreateFromSerialized(ByteBuffer bytes) // ICU4N TODO: API Create overload that accepts byte[]
        {
            //    From ICU4C utrie2_impl.h
            //    * Trie2 data structure in serialized form:
            //     *
            //     * UTrie2Header header;
            //     * uint16_t index[header.index2Length];
            //     * uint16_t data[header.shiftedDataLength<<2];  -- or uint32_t data[...]
            //     * @internal
            //     */
            //    typedef struct UTrie2Header {
            //        /** "Tri2" in big-endian US-ASCII (0x54726932) */
            //        uint32_t signature;

            //       /**
            //         * options bit field:
            //         * 15.. 4   reserved (0)
            //         *  3.. 0   UTrie2ValueBits valueBits
            //         */
            //        uint16_t options;
            //
            //        /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */
            //        uint16_t indexLength;
            //
            //        /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */
            //        uint16_t shiftedDataLength;
            //
            //        /** Null index and data blocks, not shifted. */
            //        uint16_t index2NullOffset, dataNullOffset;
            //
            //        /**
            //         * First code point of the single-value range ending with U+10ffff,
            //         * rounded up and then shifted right by UTRIE2_SHIFT_1.
            //         */
            //        uint16_t shiftedHighStart;
            //    } UTrie2Header;

            ByteOrder outerByteOrder = bytes.Order;

            try
            {
                UTrie2Header header = new UTrie2Header();

                /* check the signature */
                header.signature = bytes.GetInt32();
                switch (header.signature)
                {
                case 0x54726932:
                    // The buffer is already set to the trie data byte order.
                    break;

                case 0x32697254:
                    // Temporarily reverse the byte order.
                    bool isBigEndian = outerByteOrder == ByteOrder.BigEndian;
                    bytes.Order      = isBigEndian ? ByteOrder.LittleEndian : ByteOrder.BigEndian;
                    header.signature = 0x54726932;
                    break;

                default:
                    throw new ArgumentException("Buffer does not contain a serialized UTrie2");
                }

                header.options           = bytes.GetChar();
                header.indexLength       = bytes.GetChar();
                header.shiftedDataLength = bytes.GetChar();
                header.index2NullOffset  = bytes.GetChar();
                header.dataNullOffset    = bytes.GetChar();
                header.shiftedHighStart  = bytes.GetChar();

                // Trie2 data width - 0: 16 bits
                //                    1: 32 bits
                if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) > 1)
                {
                    throw new ArgumentException("UTrie2 serialized format error.");
                }
                ValueWidth width;
                Trie2      This;
                if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) == 0)
                {
                    width = ValueWidth.BITS_16;
                    This  = new Trie2_16();
                }
                else
                {
                    width = ValueWidth.BITS_32;
                    This  = new Trie2_32();
                }
                This.header = header;

                /* get the length values and offsets */
                This.indexLength      = header.indexLength;
                This.dataLength       = header.shiftedDataLength << UTRIE2_INDEX_SHIFT;
                This.index2NullOffset = header.index2NullOffset;
                This.dataNullOffset   = header.dataNullOffset;
                This.highStart        = header.shiftedHighStart << UTRIE2_SHIFT_1;
                This.highValueIndex   = This.dataLength - UTRIE2_DATA_GRANULARITY;
                if (width == ValueWidth.BITS_16)
                {
                    This.highValueIndex += This.indexLength;
                }

                // Allocate the Trie2 index array. If the data width is 16 bits, the array also
                // includes the space for the data.

                int indexArraySize = This.indexLength;
                if (width == ValueWidth.BITS_16)
                {
                    indexArraySize += This.dataLength;
                }

                /* Read in the index */
                This.index = ICUBinary.GetChars(bytes, indexArraySize, 0);

                /* Read in the data. 16 bit data goes in the same array as the index.
                 * 32 bit data goes in its own separate data array.
                 */
                if (width == ValueWidth.BITS_16)
                {
                    This.data16 = This.indexLength;
                }
                else
                {
                    This.data32 = ICUBinary.GetInt32s(bytes, This.dataLength, 0);
                }

                switch (width)
                {
                case ValueWidth.BITS_16:
                    This.data32       = null;
                    This.initialValue = This.index[This.dataNullOffset];
                    This.errorValue   = This.index[This.data16 + UTRIE2_BAD_UTF8_DATA_OFFSET];
                    break;

                case ValueWidth.BITS_32:
                    This.data16       = 0;
                    This.initialValue = This.data32[This.dataNullOffset];
                    This.errorValue   = This.data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
                    break;

                default:
                    throw new ArgumentException("UTrie2 serialized format error.");
                }

                return(This);
            }
            finally
            {
                bytes.Order = outerByteOrder;
            }
        }