Esempio n. 1
0
        // public methods --------------------------------------------------------

        /// <summary>
        /// <p>
        /// ICU data header reader method. Takes a ICU generated big-endian input
        /// stream, parse the ICU standard file header and authenticates them.
        /// </p>
        /// <p>
        /// Header format:
        /// <ul>
        /// <li>Header size (char)
        /// <li>Magic number 1 (byte)
        /// <li>Magic number 2 (byte)
        /// <li>Rest of the header size (char)
        /// <li>Reserved word (char)
        /// <li>Big endian indicator (byte)
        /// <li>Character set family indicator (byte)
        /// <li>Size of a char (byte) for c++ and c use
        /// <li>Reserved byte (byte)
        /// <li>Data format identifier (4 bytes), each ICU data has its own
        /// identifier to distinguish them. [0] major [1] minor [2] milli [3] micro
        /// <li>Data version (4 bytes), the change version of the ICU data [0] major
        /// [1] minor [2] milli [3] micro
        /// <li>Unicode version (4 bytes) this ICU is based on.
        /// </ul>
        /// </p>
        /// <p>
        /// Example of use:<br>
        /// <pre>
        /// try {
        /// FileInputStream input = new FileInputStream(filename);
        /// If (Utility.readICUDataHeader(input, dataformat, dataversion,
        /// unicode) {
        /// System.out.println("Verified file header, this is a ICU data file");
        /// }
        /// } catch (IOException e) {
        /// System.out.println("This is not a ICU data file");
        /// }
        /// </pre>
        /// </p>
        /// </summary>
        ///
        /// <param name="inputStream">input stream that contains the ICU data header</param>
        /// <param name="dataFormatIDExpected">Data format expected. An array of 4 bytes information aboutthe data format. E.g. data format ID 1.2.3.4. will became anarray of {1, 2, 3, 4}</param>
        /// <param name="authenticate">user defined extra data authentication. This value can benull, if no extra authentication is needed.</param>
        /// <exception cref="IOException">thrown if there is a read error or when headerauthentication fails.</exception>
        /// @draft 2.1
        public static byte[] ReadHeader(Stream inputStream,
                                        byte[] dataFormatIDExpected, ICUBinary.Authenticate authenticate)
        {
            DataInputStream input      = new DataInputStream(inputStream);
            char            headersize = input.ReadChar();
            int             readcount  = 2;
            // reading the header format
            byte magic1 = (byte)input.ReadByte();

            readcount++;
            byte magic2 = (byte)input.ReadByte();

            readcount++;
            if (magic1 != MAGIC1 || magic2 != MAGIC2)
            {
                throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_);
            }

            input.ReadChar();     // reading size
            readcount += 2;
            input.ReadChar();     // reading reserved word
            readcount += 2;
            sbyte bigendian = input.ReadByte();

            readcount++;
            sbyte charset = input.ReadByte();

            readcount++;
            sbyte charsize = input.ReadByte();

            readcount++;
            input.ReadByte();     // reading reserved byte
            readcount++;

            byte[] dataFormatID = new byte[4];
            input.ReadFully(dataFormatID);
            readcount += 4;
            byte[] dataVersion = new byte[4];
            input.ReadFully(dataVersion);
            readcount += 4;
            byte[] unicodeVersion = new byte[4];
            input.ReadFully(unicodeVersion);
            readcount += 4;
            if (headersize < readcount)
            {
                throw new IOException("Internal Error: Header size error");
            }
            input.SkipBytes(headersize - readcount);

            if (bigendian != BIG_ENDIAN_ ||
                charset != CHAR_SET_ ||
                charsize != CHAR_SIZE_ ||
                !ILOG.J2CsMapping.Collections.Arrays.Equals(dataFormatIDExpected, dataFormatID) ||
                (authenticate != null && !authenticate
                 .IsDataVersionAcceptable(dataVersion)))
            {
                throw new IOException(HEADER_AUTHENTICATION_FAILED_);
            }
            return(unicodeVersion);
        }
Esempio n. 2
0
        // private methods ---------------------------------------------------

        /// <summary>
        /// Reads an individual record of AlgorithmNames
        /// </summary>
        ///
        /// <returns>an instance of AlgorithNames if read is successful otherwise null</returns>
        /// <exception cref="IOException">thrown when file read error occurs or data is corrupted</exception>
        private UCharacterName.AlgorithmName ReadAlg()
        {
            UCharacterName.AlgorithmName result = new UCharacterName.AlgorithmName();
            int   rangestart = m_dataInputStream_.ReadInt();
            int   rangeend   = m_dataInputStream_.ReadInt();
            sbyte type       = m_dataInputStream_.ReadByte();
            sbyte variant    = m_dataInputStream_.ReadByte();

            if (!result.SetInfo(rangestart, rangeend, type, variant))
            {
                return(null);
            }

            int size = m_dataInputStream_.ReadChar();

            if (type == IBM.ICU.Impl.UCharacterName.AlgorithmName.TYPE_1_)
            {
                char[] factor = new char[variant];
                for (int j = 0; j < variant; j++)
                {
                    factor[j] = m_dataInputStream_.ReadChar();
                }

                result.SetFactor(factor);
                size -= (variant << 1);
            }

            StringBuilder prefix = new StringBuilder();
            char          c      = (char)(m_dataInputStream_.ReadByte() & 0x00FF);

            while (c != 0)
            {
                prefix.Append(c);
                c = (char)(m_dataInputStream_.ReadByte() & 0x00FF);
            }

            result.SetPrefix(prefix.ToString());

            size -= (ALG_INFO_SIZE_ + prefix.Length + 1);

            if (size > 0)
            {
                byte[] str0 = new byte[size];
                m_dataInputStream_.ReadFully(str0);
                result.SetFactorString(str0);
            }
            return(result);
        }
Esempio n. 3
0
        /// <summary>
        /// <p>
        /// Parses the inputstream and creates the trie index with it.
        /// </p>
        /// <p>
        /// This is overwritten by the child classes.
        /// </summary>
        ///
        /// <param name="inputStream">input stream containing the trie information</param>
        /// <exception cref="IOException">thrown when data reading fails.</exception>
        /// @draft 2.1
        protected internal virtual void Unserialize(DataInputStream inputStream)
        {
            // indexLength is a multiple of 1024 >> INDEX_STAGE_2_SHIFT_
            m_index_ = new char[m_dataOffset_];
            DataInputStream input = inputStream; // new DataInputStream(inputStream);

            for (int i = 0; i < m_dataOffset_; i++)
            {
                m_index_[i] = input.ReadChar();
            }
        }
Esempio n. 4
0
        // /CLOVER:ON

        // protected methods -----------------------------------------------

        /// <summary>
        /// <p>
        /// Parses the input stream and stores its trie content into a index and data
        /// array
        /// </p>
        /// </summary>
        ///
        /// <param name="inputStream">data input stream containing trie data</param>
        /// <exception cref="IOException">thrown when data reading fails</exception>
        protected internal override void Unserialize(DataInputStream inputStream)
        {
            DataInputStream input           = inputStream; // new DataInputStream(inputStream);
            int             indexDataLength = m_dataOffset_ + m_dataLength_;

            m_index_ = new char[indexDataLength];
            for (int i = 0; i < indexDataLength; i++)
            {
                m_index_[i] = input.ReadChar();
            }
            m_data_         = m_index_;
            m_initialValue_ = m_data_[m_dataOffset_];
        }
Esempio n. 5
0
        /// <summary>
        /// Reads in the inverse uca data
        /// </summary>
        ///
        /// <param name="input">input stream with the inverse uca data</param>
        /// <returns>an object containing the inverse uca data</returns>
        /// <exception cref="IOException">thrown when error occurs while reading the inverse uca</exception>
        private static CollationParsedRuleBuilder.InverseUCA ReadInverseUCA(
            Stream inputStream)
        {
            byte[] UnicodeVersion = IBM.ICU.Impl.ICUBinary.ReadHeader(inputStream,
                                                                      INVERSE_UCA_DATA_FORMAT_ID_, INVERSE_UCA_AUTHENTICATE_);

            // weiv: check that we have the correct Unicode version in
            // binary files
            VersionInfo UCDVersion = IBM.ICU.Lang.UCharacter.GetUnicodeVersion();

            if (UnicodeVersion[0] != UCDVersion.GetMajor() ||
                UnicodeVersion[1] != UCDVersion.GetMinor())
            {
                throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
            }

            CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
            DataInputStream input = new DataInputStream(inputStream);

            input.ReadInt();                 // bytesize
            int tablesize = input.ReadInt(); // in int size
            int contsize  = input.ReadInt(); // in char size

            input.ReadInt();                 // table in bytes
            input.ReadInt();                 // conts in bytes
            result.m_UCA_version_ = ReadVersion(input);
            input.SkipBytes(8);              // skip padding

            int size = tablesize * 3;        // one column for each strength

            result.m_table_         = new int[size];
            result.m_continuations_ = new char[contsize];

            for (int i = 0; i < size; i++)
            {
                result.m_table_[i] = input.ReadInt();
            }
            for (int i_0 = 0; i_0 < contsize; i_0++)
            {
                result.m_continuations_[i_0] = input.ReadChar();
            }
            // input.Close();
            return(result);
        }
        public virtual string ReadString(DataInputStream inStream)
        {
            string resultString = String.Empty;
            var    res          = START;

            Task.Run(() =>
            {
                while (res != END && !StopToken.IsCancellationRequested)
                {
                    res = inStream.ReadChar();
                    //Log.Debug(_tag, $"Received '{res}' (aka {(char)res}).");
                    if (res != END)
                    {
                        resultString += (char)res;
                    }
                }
            }).Wait();
            //Log.Debug(_tag, $"Received '{resultString}'.");
            return(resultString);
        }
Esempio n. 7
0
        public virtual string ReadString(DataInputStream inStream, DataOutputStream outStream)
        {
            string resultString = String.Empty;
            var    res          = START;

            Task.Run(() =>
            {
                while (res != END && !StopToken.IsCancellationRequested)
                {
                    res = inStream.ReadChar();
                    //Log.Debug(_tag, $"Received '{res}' (aka {(char)res}).");
                    if (res != END)
                    {
                        resultString += (char)res;
                    }
                }
                //outStream.WriteChar(ACKchar);
            }).Wait();
            //Log.Debug(_tag, $"Received '{Readable(resultString)}'.");
            return(resultString.TrimStart(START)); // Using START here is mostly as a buffer - first-character drops seem to be common - so if they happen all they cost us is a START char (which we then silently fail to trim).
        }
Esempio n. 8
0
        /*
         * Get an RBBIDataWrapper from an InputStream onto a pre-compiled set of
         * RBBI rules.
         */
        static internal RBBIDataWrapper Get(Stream mask0)
        {
            int i;

            DataInputStream dis  = new DataInputStream(new BufferedStream(mask0));
            RBBIDataWrapper This = new RBBIDataWrapper();

            // Seek past the ICU data header.
            // TODO: verify that the header looks good.
            dis.SkipBytes(0x80);

            // Read in the RBBI data header...
            This.fHeader                   = new RBBIDataWrapper.RBBIDataHeader();
            This.fHeader.fMagic            = dis.ReadInt();
            This.fHeader.fVersion          = dis.ReadInt();
            This.fHeader.fFormatVersion[0] = (byte)(This.fHeader.fVersion >> 24);
            This.fHeader.fFormatVersion[1] = (byte)(This.fHeader.fVersion >> 16);
            This.fHeader.fFormatVersion[2] = (byte)(This.fHeader.fVersion >> 8);
            This.fHeader.fFormatVersion[3] = (byte)(This.fHeader.fVersion);
            This.fHeader.fLength           = dis.ReadInt();
            This.fHeader.fCatCount         = dis.ReadInt();
            This.fHeader.fFTable           = dis.ReadInt();
            This.fHeader.fFTableLen        = dis.ReadInt();
            This.fHeader.fRTable           = dis.ReadInt();
            This.fHeader.fRTableLen        = dis.ReadInt();
            This.fHeader.fSFTable          = dis.ReadInt();
            This.fHeader.fSFTableLen       = dis.ReadInt();
            This.fHeader.fSRTable          = dis.ReadInt();
            This.fHeader.fSRTableLen       = dis.ReadInt();
            This.fHeader.fTrie             = dis.ReadInt();
            This.fHeader.fTrieLen          = dis.ReadInt();
            This.fHeader.fRuleSource       = dis.ReadInt();
            This.fHeader.fRuleSourceLen    = dis.ReadInt();
            This.fHeader.fStatusTable      = dis.ReadInt();
            This.fHeader.fStatusTableLen   = dis.ReadInt();
            dis.SkipBytes(6 * 4);                                                       // uint32_t fReserved[6];

            if (This.fHeader.fMagic != 0xb1a0 || !(This.fHeader.fVersion == 1 ||        // ICU
                                                                                        // 3.2
                                                                                        // and
                                                                                        // earlier
                                                   This.fHeader.fFormatVersion[0] == 3) // ICU 3.4
                )
            {
                throw new IOException(
                          "Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
            }

            // Current position in input stream.
            int pos = 24 * 4;     // offset of end of header, which has 24 fields, all

            // int32_t (4 bytes)

            //
            // Read in the Forward state transition table as an array of shorts.
            //

            // Quick Sanity Check
            if (This.fHeader.fFTable < pos ||
                This.fHeader.fFTable > This.fHeader.fLength)
            {
                throw new IOException("Break iterator Rule data corrupt");
            }

            // Skip over any padding preceding this table
            dis.SkipBytes(This.fHeader.fFTable - pos);
            pos = This.fHeader.fFTable;

            This.fFTable = new short[This.fHeader.fFTableLen / 2];
            for (i = 0; i < This.fFTable.Length; i++)
            {
                This.fFTable[i] = dis.ReadShort();
                pos            += 2;
            }

            //
            // Read in the Reverse state table
            //

            // Skip over any padding in the file
            dis.SkipBytes(This.fHeader.fRTable - pos);
            pos = This.fHeader.fRTable;

            // Create & fill the table itself.
            This.fRTable = new short[This.fHeader.fRTableLen / 2];
            for (i = 0; i < This.fRTable.Length; i++)
            {
                This.fRTable[i] = dis.ReadShort();
                pos            += 2;
            }

            //
            // Read in the Safe Forward state table
            //
            if (This.fHeader.fSFTableLen > 0)
            {
                // Skip over any padding in the file
                dis.SkipBytes(This.fHeader.fSFTable - pos);
                pos = This.fHeader.fSFTable;

                // Create & fill the table itself.
                This.fSFTable = new short[This.fHeader.fSFTableLen / 2];
                for (i = 0; i < This.fSFTable.Length; i++)
                {
                    This.fSFTable[i] = dis.ReadShort();
                    pos += 2;
                }
            }

            //
            // Read in the Safe Reverse state table
            //
            if (This.fHeader.fSRTableLen > 0)
            {
                // Skip over any padding in the file
                dis.SkipBytes(This.fHeader.fSRTable - pos);
                pos = This.fHeader.fSRTable;

                // Create & fill the table itself.
                This.fSRTable = new short[This.fHeader.fSRTableLen / 2];
                for (i = 0; i < This.fSRTable.Length; i++)
                {
                    This.fSRTable[i] = dis.ReadShort();
                    pos += 2;
                }
            }

            //
            // Unserialize the Character categories TRIE
            // Because we can't be absolutely certain where the Trie deserialize
            // will
            // leave the input stream, leave position unchanged.
            // The seek to the start of the next item following the TRIE will get us
            // back in sync.
            //
            dis.SkipBytes(This.fHeader.fTrie - pos);          // seek input stream from end of
            // previous section to
            pos = This.fHeader.fTrie;                         // to the start of the trie

            dis.Mark(This.fHeader.fTrieLen + 100);            // Mark position of start of TRIE
                                                              // in the input
                                                              // and tell Java to keep the mark
                                                              // valid so long
                                                              // as we don't go more than 100
                                                              // bytes past the
                                                              // past the end of the TRIE.

            This.fTrie = new CharTrie(dis, fTrieFoldingFunc); // Deserialize the
                                                              // TRIE, leaving input
            // stream at an unknown position, preceding the
            // padding between TRIE and following section.

            dis.Reset();     // Move input stream back to marked position at
                             // the start of the serialized TRIE. Now our
                             // "pos" variable and the input stream are in
                             // agreement.

            //
            // Read the Rule Status Table
            //
            if (pos > This.fHeader.fStatusTable)
            {
                throw new IOException("Break iterator Rule data corrupt");
            }
            dis.SkipBytes(This.fHeader.fStatusTable - pos);
            pos = This.fHeader.fStatusTable;
            This.fStatusTable = new int[This.fHeader.fStatusTableLen / 4];
            for (i = 0; i < This.fStatusTable.Length; i++)
            {
                This.fStatusTable[i] = dis.ReadInt();
                pos += 4;
            }

            //
            // Put the break rule source into a String
            //
            if (pos > This.fHeader.fRuleSource)
            {
                throw new IOException("Break iterator Rule data corrupt");
            }
            dis.SkipBytes(This.fHeader.fRuleSource - pos);
            pos = This.fHeader.fRuleSource;
            StringBuilder sb = new StringBuilder(This.fHeader.fRuleSourceLen / 2);

            for (i = 0; i < This.fHeader.fRuleSourceLen; i += 2)
            {
                sb.Append(dis.ReadChar());
                pos += 2;
            }
            This.fRuleSource = sb.ToString();

            if (IBM.ICU.Text.RuleBasedBreakIterator.fDebugEnv != null &&
                IBM.ICU.Text.RuleBasedBreakIterator.fDebugEnv.IndexOf("data") >= 0)
            {
                This.Dump();
            }
            return(This);
        }