// public methods -------------------------------------------------------- /// <summary> /// <p> /// ICU data header reader method. Takes a ICU generated big-endian input /// stream, parse the ICU standard file header and authenticates them. /// </p> /// <p> /// Header format: /// <ul> /// <li>Header size (char) /// <li>Magic number 1 (byte) /// <li>Magic number 2 (byte) /// <li>Rest of the header size (char) /// <li>Reserved word (char) /// <li>Big endian indicator (byte) /// <li>Character set family indicator (byte) /// <li>Size of a char (byte) for c++ and c use /// <li>Reserved byte (byte) /// <li>Data format identifier (4 bytes), each ICU data has its own /// identifier to distinguish them. [0] major [1] minor [2] milli [3] micro /// <li>Data version (4 bytes), the change version of the ICU data [0] major /// [1] minor [2] milli [3] micro /// <li>Unicode version (4 bytes) this ICU is based on. /// </ul> /// </p> /// <p> /// Example of use:<br> /// <pre> /// try { /// FileInputStream input = new FileInputStream(filename); /// If (Utility.readICUDataHeader(input, dataformat, dataversion, /// unicode) { /// System.out.println("Verified file header, this is a ICU data file"); /// } /// } catch (IOException e) { /// System.out.println("This is not a ICU data file"); /// } /// </pre> /// </p> /// </summary> /// /// <param name="inputStream">input stream that contains the ICU data header</param> /// <param name="dataFormatIDExpected">Data format expected. An array of 4 bytes information aboutthe data format. E.g. data format ID 1.2.3.4. will became anarray of {1, 2, 3, 4}</param> /// <param name="authenticate">user defined extra data authentication. This value can benull, if no extra authentication is needed.</param> /// <exception cref="IOException">thrown if there is a read error or when headerauthentication fails.</exception> /// @draft 2.1 public static byte[] ReadHeader(Stream inputStream, byte[] dataFormatIDExpected, ICUBinary.Authenticate authenticate) { DataInputStream input = new DataInputStream(inputStream); char headersize = input.ReadChar(); int readcount = 2; // reading the header format byte magic1 = (byte)input.ReadByte(); readcount++; byte magic2 = (byte)input.ReadByte(); readcount++; if (magic1 != MAGIC1 || magic2 != MAGIC2) { throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_); } input.ReadChar(); // reading size readcount += 2; input.ReadChar(); // reading reserved word readcount += 2; sbyte bigendian = input.ReadByte(); readcount++; sbyte charset = input.ReadByte(); readcount++; sbyte charsize = input.ReadByte(); readcount++; input.ReadByte(); // reading reserved byte readcount++; byte[] dataFormatID = new byte[4]; input.ReadFully(dataFormatID); readcount += 4; byte[] dataVersion = new byte[4]; input.ReadFully(dataVersion); readcount += 4; byte[] unicodeVersion = new byte[4]; input.ReadFully(unicodeVersion); readcount += 4; if (headersize < readcount) { throw new IOException("Internal Error: Header size error"); } input.SkipBytes(headersize - readcount); if (bigendian != BIG_ENDIAN_ || charset != CHAR_SET_ || charsize != CHAR_SIZE_ || !ILOG.J2CsMapping.Collections.Arrays.Equals(dataFormatIDExpected, dataFormatID) || (authenticate != null && !authenticate .IsDataVersionAcceptable(dataVersion))) { throw new IOException(HEADER_AUTHENTICATION_FAILED_); } return(unicodeVersion); }
// private methods --------------------------------------------------- /// <summary> /// Reads an individual record of AlgorithmNames /// </summary> /// /// <returns>an instance of AlgorithNames if read is successful otherwise null</returns> /// <exception cref="IOException">thrown when file read error occurs or data is corrupted</exception> private UCharacterName.AlgorithmName ReadAlg() { UCharacterName.AlgorithmName result = new UCharacterName.AlgorithmName(); int rangestart = m_dataInputStream_.ReadInt(); int rangeend = m_dataInputStream_.ReadInt(); sbyte type = m_dataInputStream_.ReadByte(); sbyte variant = m_dataInputStream_.ReadByte(); if (!result.SetInfo(rangestart, rangeend, type, variant)) { return(null); } int size = m_dataInputStream_.ReadChar(); if (type == IBM.ICU.Impl.UCharacterName.AlgorithmName.TYPE_1_) { char[] factor = new char[variant]; for (int j = 0; j < variant; j++) { factor[j] = m_dataInputStream_.ReadChar(); } result.SetFactor(factor); size -= (variant << 1); } StringBuilder prefix = new StringBuilder(); char c = (char)(m_dataInputStream_.ReadByte() & 0x00FF); while (c != 0) { prefix.Append(c); c = (char)(m_dataInputStream_.ReadByte() & 0x00FF); } result.SetPrefix(prefix.ToString()); size -= (ALG_INFO_SIZE_ + prefix.Length + 1); if (size > 0) { byte[] str0 = new byte[size]; m_dataInputStream_.ReadFully(str0); result.SetFactorString(str0); } return(result); }
/// <summary> /// <p> /// Parses the inputstream and creates the trie index with it. /// </p> /// <p> /// This is overwritten by the child classes. /// </summary> /// /// <param name="inputStream">input stream containing the trie information</param> /// <exception cref="IOException">thrown when data reading fails.</exception> /// @draft 2.1 protected internal virtual void Unserialize(DataInputStream inputStream) { // indexLength is a multiple of 1024 >> INDEX_STAGE_2_SHIFT_ m_index_ = new char[m_dataOffset_]; DataInputStream input = inputStream; // new DataInputStream(inputStream); for (int i = 0; i < m_dataOffset_; i++) { m_index_[i] = input.ReadChar(); } }
// /CLOVER:ON // protected methods ----------------------------------------------- /// <summary> /// <p> /// Parses the input stream and stores its trie content into a index and data /// array /// </p> /// </summary> /// /// <param name="inputStream">data input stream containing trie data</param> /// <exception cref="IOException">thrown when data reading fails</exception> protected internal override void Unserialize(DataInputStream inputStream) { DataInputStream input = inputStream; // new DataInputStream(inputStream); int indexDataLength = m_dataOffset_ + m_dataLength_; m_index_ = new char[indexDataLength]; for (int i = 0; i < indexDataLength; i++) { m_index_[i] = input.ReadChar(); } m_data_ = m_index_; m_initialValue_ = m_data_[m_dataOffset_]; }
/// <summary> /// Reads in the inverse uca data /// </summary> /// /// <param name="input">input stream with the inverse uca data</param> /// <returns>an object containing the inverse uca data</returns> /// <exception cref="IOException">thrown when error occurs while reading the inverse uca</exception> private static CollationParsedRuleBuilder.InverseUCA ReadInverseUCA( Stream inputStream) { byte[] UnicodeVersion = IBM.ICU.Impl.ICUBinary.ReadHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, INVERSE_UCA_AUTHENTICATE_); // weiv: check that we have the correct Unicode version in // binary files VersionInfo UCDVersion = IBM.ICU.Lang.UCharacter.GetUnicodeVersion(); if (UnicodeVersion[0] != UCDVersion.GetMajor() || UnicodeVersion[1] != UCDVersion.GetMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA(); DataInputStream input = new DataInputStream(inputStream); input.ReadInt(); // bytesize int tablesize = input.ReadInt(); // in int size int contsize = input.ReadInt(); // in char size input.ReadInt(); // table in bytes input.ReadInt(); // conts in bytes result.m_UCA_version_ = ReadVersion(input); input.SkipBytes(8); // skip padding int size = tablesize * 3; // one column for each strength result.m_table_ = new int[size]; result.m_continuations_ = new char[contsize]; for (int i = 0; i < size; i++) { result.m_table_[i] = input.ReadInt(); } for (int i_0 = 0; i_0 < contsize; i_0++) { result.m_continuations_[i_0] = input.ReadChar(); } // input.Close(); return(result); }
public virtual string ReadString(DataInputStream inStream) { string resultString = String.Empty; var res = START; Task.Run(() => { while (res != END && !StopToken.IsCancellationRequested) { res = inStream.ReadChar(); //Log.Debug(_tag, $"Received '{res}' (aka {(char)res})."); if (res != END) { resultString += (char)res; } } }).Wait(); //Log.Debug(_tag, $"Received '{resultString}'."); return(resultString); }
public virtual string ReadString(DataInputStream inStream, DataOutputStream outStream) { string resultString = String.Empty; var res = START; Task.Run(() => { while (res != END && !StopToken.IsCancellationRequested) { res = inStream.ReadChar(); //Log.Debug(_tag, $"Received '{res}' (aka {(char)res})."); if (res != END) { resultString += (char)res; } } //outStream.WriteChar(ACKchar); }).Wait(); //Log.Debug(_tag, $"Received '{Readable(resultString)}'."); return(resultString.TrimStart(START)); // Using START here is mostly as a buffer - first-character drops seem to be common - so if they happen all they cost us is a START char (which we then silently fail to trim). }
/* * Get an RBBIDataWrapper from an InputStream onto a pre-compiled set of * RBBI rules. */ static internal RBBIDataWrapper Get(Stream mask0) { int i; DataInputStream dis = new DataInputStream(new BufferedStream(mask0)); RBBIDataWrapper This = new RBBIDataWrapper(); // Seek past the ICU data header. // TODO: verify that the header looks good. dis.SkipBytes(0x80); // Read in the RBBI data header... This.fHeader = new RBBIDataWrapper.RBBIDataHeader(); This.fHeader.fMagic = dis.ReadInt(); This.fHeader.fVersion = dis.ReadInt(); This.fHeader.fFormatVersion[0] = (byte)(This.fHeader.fVersion >> 24); This.fHeader.fFormatVersion[1] = (byte)(This.fHeader.fVersion >> 16); This.fHeader.fFormatVersion[2] = (byte)(This.fHeader.fVersion >> 8); This.fHeader.fFormatVersion[3] = (byte)(This.fHeader.fVersion); This.fHeader.fLength = dis.ReadInt(); This.fHeader.fCatCount = dis.ReadInt(); This.fHeader.fFTable = dis.ReadInt(); This.fHeader.fFTableLen = dis.ReadInt(); This.fHeader.fRTable = dis.ReadInt(); This.fHeader.fRTableLen = dis.ReadInt(); This.fHeader.fSFTable = dis.ReadInt(); This.fHeader.fSFTableLen = dis.ReadInt(); This.fHeader.fSRTable = dis.ReadInt(); This.fHeader.fSRTableLen = dis.ReadInt(); This.fHeader.fTrie = dis.ReadInt(); This.fHeader.fTrieLen = dis.ReadInt(); This.fHeader.fRuleSource = dis.ReadInt(); This.fHeader.fRuleSourceLen = dis.ReadInt(); This.fHeader.fStatusTable = dis.ReadInt(); This.fHeader.fStatusTableLen = dis.ReadInt(); dis.SkipBytes(6 * 4); // uint32_t fReserved[6]; if (This.fHeader.fMagic != 0xb1a0 || !(This.fHeader.fVersion == 1 || // ICU // 3.2 // and // earlier This.fHeader.fFormatVersion[0] == 3) // ICU 3.4 ) { throw new IOException( "Break Iterator Rule Data Magic Number Incorrect, or unsupported data version."); } // Current position in input stream. int pos = 24 * 4; // offset of end of header, which has 24 fields, all // int32_t (4 bytes) // // Read in the Forward state transition table as an array of shorts. // // Quick Sanity Check if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) { throw new IOException("Break iterator Rule data corrupt"); } // Skip over any padding preceding this table dis.SkipBytes(This.fHeader.fFTable - pos); pos = This.fHeader.fFTable; This.fFTable = new short[This.fHeader.fFTableLen / 2]; for (i = 0; i < This.fFTable.Length; i++) { This.fFTable[i] = dis.ReadShort(); pos += 2; } // // Read in the Reverse state table // // Skip over any padding in the file dis.SkipBytes(This.fHeader.fRTable - pos); pos = This.fHeader.fRTable; // Create & fill the table itself. This.fRTable = new short[This.fHeader.fRTableLen / 2]; for (i = 0; i < This.fRTable.Length; i++) { This.fRTable[i] = dis.ReadShort(); pos += 2; } // // Read in the Safe Forward state table // if (This.fHeader.fSFTableLen > 0) { // Skip over any padding in the file dis.SkipBytes(This.fHeader.fSFTable - pos); pos = This.fHeader.fSFTable; // Create & fill the table itself. This.fSFTable = new short[This.fHeader.fSFTableLen / 2]; for (i = 0; i < This.fSFTable.Length; i++) { This.fSFTable[i] = dis.ReadShort(); pos += 2; } } // // Read in the Safe Reverse state table // if (This.fHeader.fSRTableLen > 0) { // Skip over any padding in the file dis.SkipBytes(This.fHeader.fSRTable - pos); pos = This.fHeader.fSRTable; // Create & fill the table itself. This.fSRTable = new short[This.fHeader.fSRTableLen / 2]; for (i = 0; i < This.fSRTable.Length; i++) { This.fSRTable[i] = dis.ReadShort(); pos += 2; } } // // Unserialize the Character categories TRIE // Because we can't be absolutely certain where the Trie deserialize // will // leave the input stream, leave position unchanged. // The seek to the start of the next item following the TRIE will get us // back in sync. // dis.SkipBytes(This.fHeader.fTrie - pos); // seek input stream from end of // previous section to pos = This.fHeader.fTrie; // to the start of the trie dis.Mark(This.fHeader.fTrieLen + 100); // Mark position of start of TRIE // in the input // and tell Java to keep the mark // valid so long // as we don't go more than 100 // bytes past the // past the end of the TRIE. This.fTrie = new CharTrie(dis, fTrieFoldingFunc); // Deserialize the // TRIE, leaving input // stream at an unknown position, preceding the // padding between TRIE and following section. dis.Reset(); // Move input stream back to marked position at // the start of the serialized TRIE. Now our // "pos" variable and the input stream are in // agreement. // // Read the Rule Status Table // if (pos > This.fHeader.fStatusTable) { throw new IOException("Break iterator Rule data corrupt"); } dis.SkipBytes(This.fHeader.fStatusTable - pos); pos = This.fHeader.fStatusTable; This.fStatusTable = new int[This.fHeader.fStatusTableLen / 4]; for (i = 0; i < This.fStatusTable.Length; i++) { This.fStatusTable[i] = dis.ReadInt(); pos += 4; } // // Put the break rule source into a String // if (pos > This.fHeader.fRuleSource) { throw new IOException("Break iterator Rule data corrupt"); } dis.SkipBytes(This.fHeader.fRuleSource - pos); pos = This.fHeader.fRuleSource; StringBuilder sb = new StringBuilder(This.fHeader.fRuleSourceLen / 2); for (i = 0; i < This.fHeader.fRuleSourceLen; i += 2) { sb.Append(dis.ReadChar()); pos += 2; } This.fRuleSource = sb.ToString(); if (IBM.ICU.Text.RuleBasedBreakIterator.fDebugEnv != null && IBM.ICU.Text.RuleBasedBreakIterator.fDebugEnv.IndexOf("data") >= 0) { This.Dump(); } return(This); }