private void Init(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize, bool leaveOpen) { _stream = stream; _encoding = encoding; _decoder = encoding.GetDecoder(); if (bufferSize < MinBufferSize) { bufferSize = MinBufferSize; } _byteBuffer = new byte[bufferSize]; _maxCharsPerBuffer = encoding.GetMaxCharCount(bufferSize); _charBuffer = new char[_maxCharsPerBuffer]; _byteLen = 0; _bytePos = 0; _detectEncoding = detectEncodingFromByteOrderMarks; // Encoding.GetPreamble() always allocates and returns a new byte[] array for // encodings that have a preamble. // We can avoid repeated allocations for the default and commonly used Encoding.UTF8 // encoding by using our own private cached instance of the UTF8 preamble. // We specifically look for Encoding.UTF8 because we know it has a preamble, // whereas other instances of UTF8Encoding may not have a preamble enabled, and // there's no public way to tell if the preamble is enabled for an instance other // than calling GetPreamble(), which we're trying to avoid. // This means that other instances of UTF8Encoding are excluded from this optimization. _preamble = object.ReferenceEquals(encoding, Encoding.UTF8) ? (s_utf8Preamble ?? (s_utf8Preamble = encoding.GetPreamble())) : encoding.GetPreamble(); _checkPreamble = (_preamble.Length > 0); _isBlocked = false; _closable = !leaveOpen; }
static public string AutoEncoding(byte[] bt, ref System.Text.Encoding enc) { System.Text.Encoding[] arrenc = { System.Text.Encoding.Default, System.Text.Encoding.GetEncoding("ascii"), System.Text.Encoding.GetEncoding("UTF-8"), System.Text.Encoding.GetEncoding("gb2312"), //cannot detect gb2312&big5 System.Text.Encoding.GetEncoding("big5"), System.Text.Encoding.GetEncoding("shift_jis"), System.Text.Encoding.GetEncoding("euc-jp"), System.Text.Encoding.GetEncoding("iso-2022-jp"), System.Text.Encoding.GetEncoding("utf-16") }; byte[] btenu; enc = System.Text.Encoding.Unicode; btenu = enc.GetPreamble(); if (bt.Length > 2 && bt[0] == btenu[0] && bt[1] == btenu[1]) { return(enc.GetString(bt, 2, bt.Length - 2)); } enc = System.Text.Encoding.UTF8; btenu = enc.GetPreamble(); if (bt.Length > 3 && bt[0] == btenu[0] && bt[1] == btenu[1] && bt[2] == btenu[2]) { return(enc.GetString(bt, 3, bt.Length - 3)); } for (int i = 0; i < arrenc.Length; i++) { enc = arrenc[i]; string sTxt = enc.GetString(bt); byte[] bt2 = enc.GetBytes(sTxt); if (bt.Length == bt2.Length) { for (int j = 0; j < bt.Length; j++) { if (bt[j] != bt2[j]) { break; } if (j == bt.Length - 1) { return(sTxt); } } } } enc = System.Text.Encoding.Default; return(enc.GetString(bt)); }
/// <summary> /// Static constructor. /// </summary> public ClassificationResultParser() { // The encoding of the string result. _encoding = Encoding.Unicode; // Byte order mark; should be removed from the result before deserializing it. _byteOrderMark = _encoding.GetString(_encoding.GetPreamble()); }
public ReadOnlyStreamWithEncodingPreamble(Stream innerStream, Encoding encoding) { Contract.Assert(innerStream != null); Contract.Assert(innerStream.CanRead); Contract.Assert(encoding != null); _innerStream = innerStream; // Determine whether we even have a preamble to be concerned about byte[] preamble = encoding.GetPreamble(); int preambleLength = preamble.Length; if (preambleLength <= 0) { return; } // Create a double sized buffer, and read enough bytes from the stream to know // whether we have a preamble present already or not. int finalBufferLength = preambleLength * 2; byte[] finalBuffer = new byte[finalBufferLength]; int finalCount = preambleLength; preamble.CopyTo(finalBuffer, 0); // Read the first bytes of the stream and see if they already contain a preamble for (; finalCount < finalBufferLength; finalCount++) { int b = innerStream.ReadByte(); if (b == -1) { break; } finalBuffer[finalCount] = (byte)b; } // Did we read enough bytes to do the comparison? if (finalCount == finalBufferLength) { bool foundPreamble = true; for (int idx = 0; idx < preambleLength; idx++) { if (finalBuffer[idx] != finalBuffer[idx + preambleLength]) { foundPreamble = false; break; } } // If we found the preamble, then just exclude it from the data that we return if (foundPreamble) { finalCount = preambleLength; } } _remainingBytes = new ArraySegment<byte>(finalBuffer, 0, finalCount); }
public override int Read(byte[] buffer, int offset, int count) { byte[] bom = _sourceEncoding.GetPreamble(); int appendLen = bom.Length; //Remove BOM from source, by moving past BOM sequence if (_stream.Position == 0) { byte[] declaration = null; //<?xml version="1.0" encoding="{0}" standalone="no"?> if (_addXmlDeclaration == true) { declaration = _sourceEncoding.GetBytes(xmlDeclaration); appendLen = (appendLen + declaration.Length); } byte[] internalBuffer = new byte[(count - appendLen)]; int read = _stream.Read(internalBuffer, offset, (count - appendLen)); //Simple encoding check if (internalBuffer[0] != bom[0]) { bom.CopyTo(buffer, 0); if (declaration != null) { declaration.CopyTo(buffer, bom.Length); } } else { //bom already exists appendLen = (appendLen - bom.Length); if (declaration != null) { declaration.CopyTo(buffer, 0); } } internalBuffer.CopyTo(buffer, appendLen); return(read + appendLen); } else { return(_stream.Read(buffer, offset, count)); } }
private byte[] GetBytes(Encoding encoding, string source) { var preamble = encoding.GetPreamble(); var content = encoding.GetBytes(source); var bytes = new byte[preamble.Length + content.Length]; preamble.CopyTo(bytes, 0); content.CopyTo(bytes, preamble.Length); return bytes; }
public static string RemovePreamble(this string str, Encoding encoding) { var bytes = encoding.GetBytes(str); var preamble = encoding.GetPreamble(); if (bytes.Length < preamble.Length || preamble.Where((p, i) => p != bytes[i]).Any()) { return str; } return encoding.GetString(bytes.Skip(preamble.Length).ToArray()); }
public void SaveToTextStream(Stream stream, System.Text.Encoding encoding) { string vText = SaveToText(); byte[] vBuffer = encoding.GetBytes(vText); byte[] vPreamble = encoding.GetPreamble(); if (vPreamble.Length > 0) { stream.Write(vPreamble, 0, vPreamble.Length); } stream.Write(vBuffer, 0, vBuffer.Length); }
protected byte[] GetBytes(Encoding encoding, string source) { currentEncoding = encoding; var preamble = encoding.GetPreamble(); var content = encoding.GetBytes(source); byte[] bytes = new byte[preamble.Length + content.Length]; preamble.CopyTo(bytes, 0); content.CopyTo(bytes, preamble.Length); return bytes; }
static public int GetPreamble(IntPtr l) { try { System.Text.Encoding self = (System.Text.Encoding)checkSelf(l); var ret = self.GetPreamble(); pushValue(l, true); pushValue(l, ret); return(2); } catch (Exception e) { return(error(l, e)); } }
/// <summary> /// Writes the encoded text preamble (byte order mark [BOM]) to the stream. /// Preamble is required for unicode strings only. BE Unicode and UTF8 should not have it. /// Preamble should be written only once per frame at the beginning of the encoded text. /// </summary> /// <param name="encoding">The encoding.</param> public void WritePreamble(Encoding encoding) { if (encoding.CodePage == 1200) // Unicode { byte[] bom = encoding.GetPreamble(); if (bom.Length == 0) { throw new ID3TagException("Unicode encoding must provide byte order mark (BOM)."); } _stream.Write(bom, 0, bom.Length); } }
public static Stream StringToStream(string inputContents, Encoding encoding) { //MDF Serialization requires the preamble var preamble = encoding.GetPreamble(); var body = encoding.GetBytes(inputContents); var stream = new MemoryStream(preamble.Length + body.Length); stream.Write(preamble, 0, preamble.Length); stream.Write(body, 0, body.Length); stream.Position = 0; return stream; }
/// <summary> /// Computes the number of bytes required to encode the specified string. /// </summary> /// <remarks> /// Use this Method to calculate the exact number of bytes that <paramref name="s" /> /// will have when beeing encoded. /// <b>Note:</b> No error checking is performed. Make sure <paramref name="s" /> and /// <paramref name="e" /> are not null. /// </remarks> /// <param name="s">The <see cref="String" /> to be encoded.</param> /// <param name="e">The <see cref="Encoding" /> with which the string will be encoded.</param> /// <param name="terminateString"> /// When <b>true</b> one or two zero bytes depending on the Encoding will be appended; else /// nothing wil be appended. /// </param> /// <returns>The number of bytes required to encode <paramref name="s" />.</returns> internal static int ByteCount(string s, Encoding e, bool terminateString) { int count = 0; if (terminateString) { if (e == utf16LE || e == utf16BE) { count = 2; } else { count = 1; } } count += e.GetByteCount(s) + e.GetPreamble().Length; return count; }
public void StreamWithoutPreamble(Encoding encoding, bool includePreambleInInputStream) { using (MemoryStream inputStream = new MemoryStream()) { // Arrange string message = "Hello, world" + Environment.NewLine // English + "こんにちは、世界" + Environment.NewLine // Japanese + "مرحبا، العالم"; // Arabic byte[] preamble = encoding.GetPreamble(); byte[] encodedMessage = encoding.GetBytes(message); if (includePreambleInInputStream) { inputStream.Write(preamble, 0, preamble.Length); } inputStream.Write(encodedMessage, 0, encodedMessage.Length); byte[] expectedBytes = new byte[preamble.Length + encodedMessage.Length]; preamble.CopyTo(expectedBytes, 0); encodedMessage.CopyTo(expectedBytes, preamble.Length); inputStream.Seek(0, SeekOrigin.Begin); using (ReadOnlyStreamWithEncodingPreamble wrapperStream = new ReadOnlyStreamWithEncodingPreamble(inputStream, encoding)) { // Act int totalRead = 0; byte[] readBuffer = new byte[expectedBytes.Length]; while (totalRead < readBuffer.Length) { int read = wrapperStream.Read(readBuffer, totalRead, readBuffer.Length - totalRead); totalRead += read; if (read == 0) break; } // Assert Assert.Equal(expectedBytes.Length, totalRead); Assert.Equal(expectedBytes, readBuffer); Assert.Equal(0, wrapperStream.Read(readBuffer, 0, 1)); // Make sure there are no stray bytes left in the stream } } }
public static Stream AsStream(this string value, Encoding encoding) { var memoryStream = new MemoryStream(); try { var bytes = encoding.GetPreamble(); memoryStream.Write(bytes, 0, bytes.Length); bytes = encoding.GetBytes(value); memoryStream.Write(bytes, 0, bytes.Length); memoryStream.Seek(0, SeekOrigin.Begin); return memoryStream; } catch { memoryStream.Dispose(); throw; } }
static public Stream ConvertTextToStream(string text) { System.Text.Encoding encoding = System.Text.Encoding.Unicode; byte[] encodedBytes = encoding.GetBytes(text); byte[] preamble = encoding.GetPreamble(); byte[] identifiableContent; if (preamble.Length == 0) { identifiableContent = encodedBytes; } else { identifiableContent = new byte[preamble.Length + encodedBytes.Length]; preamble.CopyTo(identifiableContent, 0); encodedBytes.CopyTo(identifiableContent, preamble.Length); } return(new MemoryStream(identifiableContent)); }
public static async Task WriteTextAsync (string fileName, string text, Encoding encoding, bool hadBom) { var tmpPath = WriteTextInit (fileName, text, encoding); using (var stream = new FileStream (tmpPath, FileMode.OpenOrCreate, FileAccess.Write, FileShare.Write, bufferSize: DefaultBufferSize, options: FileOptions.Asynchronous)) { if (hadBom) { var bom = encoding.GetPreamble (); if (bom != null && bom.Length > 0) await stream.WriteAsync (bom, 0, bom.Length).ConfigureAwait (false); } byte[] bytes = encoding.GetBytes (text); await stream.WriteAsync (bytes, 0, bytes.Length).ConfigureAwait (false); } WriteTextFinal (tmpPath, fileName); }
public static void WriteText (string fileName, string text, Encoding encoding, bool hadBom) { var tmpPath = WriteTextInit (fileName, text, encoding); using (var stream = new FileStream (tmpPath, FileMode.OpenOrCreate, FileAccess.Write, FileShare.Write)) { if (hadBom) { var bom = encoding.GetPreamble (); if (bom != null && bom.Length > 0) stream.Write (bom, 0, bom.Length); } byte[] bytes = encoding.GetBytes (text); stream.Write (bytes, 0, bytes.Length); } WriteTextFinal (tmpPath, fileName); }
public static string GetText (byte[] bytes, Encoding encoding, out bool hadBom) { byte[] bom = encoding.GetPreamble (); if (bom != null && bom.Length > 0 && bom.Length <= bytes.Length) { hadBom = true; for (int i = 0; i < bom.Length; i++) { if (bytes [i] != bom [i]) { hadBom = false; break; } } } else { hadBom = false; } if (hadBom) return encoding.GetString (bytes, bom.Length, bytes.Length - bom.Length); return encoding.GetString (bytes); }
// We pass in private instance fields of this MarshalByRefObject-derived type as local params // to ensure performant access inside the state machine that corresponds this async method. private static async Task FlushAsyncInternal(StreamWriter _this, bool flushStream, bool flushEncoder, char[] charBuffer, int charPos, bool haveWrittenPreamble, Encoding encoding, Encoder encoder, Byte[] byteBuffer, Stream stream) { if (!haveWrittenPreamble) { _this.HaveWrittenPreamble_Prop = true; byte[] preamble = encoding.GetPreamble(); if (preamble.Length > 0) { await stream.WriteAsync(preamble, 0, preamble.Length).ConfigureAwait(false); } } int count = encoder.GetBytes(charBuffer, 0, charPos, byteBuffer, 0, flushEncoder); if (count > 0) { await stream.WriteAsync(byteBuffer, 0, count).ConfigureAwait(false); } // By definition, calling Flush should flush the stream, but this is // only necessary if we passed in true for flushStream. The Web // Services guys have some perf tests where flushing needlessly hurts. if (flushStream) { await stream.FlushAsync().ConfigureAwait(false); } }
private byte[] EncodeStringWithBOM(Encoding encoding, string str) { byte[] rawBytes = encoding.GetBytes(str); byte[] preamble = encoding.GetPreamble(); // Get the correct BOM characters byte[] contentBytes = new byte[preamble.Length + rawBytes.Length]; Array.Copy(preamble, contentBytes, preamble.Length); Array.Copy(rawBytes, 0, contentBytes, preamble.Length, rawBytes.Length); return contentBytes; }
public static string ReadAllText (string fileName, Encoding encoding, out bool hadBom) { if (fileName == null) throw new ArgumentNullException ("fileName"); if (encoding == null) throw new ArgumentNullException ("encoding"); byte[] content = File.ReadAllBytes (fileName); byte[] bom = encoding.GetPreamble (); if (bom != null && bom.Length > 0 && bom.Length <= content.Length) { hadBom = true; for (int i = 0; i < bom.Length; i++) { if (content [i] != bom [i]) { hadBom= false; break; } } } else { hadBom = false; } return encoding.GetString (content); }
internal void Initialize (Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize) { if (null == stream) throw new ArgumentNullException ("stream"); if (null == encoding) throw new ArgumentNullException ("encoding"); if (!stream.CanRead) throw new ArgumentException ("Cannot read stream"); if (bufferSize <= 0) throw new ArgumentOutOfRangeException ("bufferSize", "The minimum size of the buffer must be positive"); if (bufferSize < MinimumBufferSize) bufferSize = MinimumBufferSize; // since GetChars() might add flushed character, it // should have additional char buffer for extra 1 // (probably 1 is ok, but might be insufficient. I'm not sure) var decoded_buffer_size = encoding.GetMaxCharCount (bufferSize) + 1; // // Instead of allocating a new default buffer use the // last one if there is any available // if (bufferSize <= DefaultBufferSize && input_buffer_recycle != null) { lock (input_buffer_recycle_lock) { if (input_buffer_recycle != null) { input_buffer = input_buffer_recycle; input_buffer_recycle = null; } if (decoded_buffer_recycle != null && decoded_buffer_size <= decoded_buffer_recycle.Length) { decoded_buffer = decoded_buffer_recycle; decoded_buffer_recycle = null; } } } if (input_buffer == null) input_buffer = new byte [bufferSize]; else Array.Clear (input_buffer, 0, bufferSize); if (decoded_buffer == null) decoded_buffer = new char [decoded_buffer_size]; else Array.Clear (decoded_buffer, 0, decoded_buffer_size); base_stream = stream; this.buffer_size = bufferSize; this.encoding = encoding; decoder = encoding.GetDecoder (); byte [] preamble = encoding.GetPreamble (); do_checks = detectEncodingFromByteOrderMarks ? 1 : 0; do_checks += (preamble.Length == 0) ? 0 : 2; decoded_count = 0; pos = 0; }
/// <summary> /// Comparing BOM from text file with Preamble. /// </summary> /// <param name="bom">BOM from text file</param> /// <param name="encoding">Encoding for comparing</param> /// <returns>true - BOM founded, false - BOM not founded</returns> private Boolean bomComparing(byte[] bom, Encoding encoding) { byte[] preamble = encoding.GetPreamble(); if (preamble.Length == 0) return (false); for (int i = 0; i < preamble.Length; i++) { if (bom[i] != preamble[i]) return (false); } return (true); }
public void DecorateFile (string path, string filename, MD5 checksum, Encoding enc) { string newline = Environment.NewLine; var sb = new StringBuilder (); sb.AppendFormat ("#ExternalSource(\"{0}\",1){1}", filename, newline); byte[] bytes = enc.GetBytes (sb.ToString ()); using (FileStream fs = new FileStream (path, FileMode.Open, FileAccess.Write)) { fs.Seek (enc.GetPreamble ().Length, SeekOrigin.Begin); fs.Write (bytes, 0, bytes.Length); bytes = null; sb.Length = 0; sb.AppendFormat ("{0}#End ExternalSource{0}", newline); bytes = enc.GetBytes (sb.ToString ()); fs.Seek (0, SeekOrigin.End); fs.Write (bytes, 0, bytes.Length); } sb = null; bytes = null; }
public void DecorateFile (string path, string filename, MD5 checksum, Encoding enc) { string newline = Environment.NewLine; var sb = new StringBuilder (); sb.AppendFormat ("#pragma checksum {0} \"{1}\" \"{2}\"{3}{3}", QuoteSnippetString (filename), BaseCompiler.HashMD5.ToString ("B"), ChecksumToHex (checksum), newline); sb.AppendFormat ("#line 1 {0}{1}", QuoteSnippetString (filename), newline); byte[] bytes = enc.GetBytes (sb.ToString ()); using (FileStream fs = new FileStream (path, FileMode.Open, FileAccess.Write)) { fs.Seek (enc.GetPreamble ().Length, SeekOrigin.Begin); fs.Write (bytes, 0, bytes.Length); bytes = null; sb.Length = 0; sb.AppendFormat ("{0}#line default{0}#line hidden{0}", newline); bytes = Encoding.UTF8.GetBytes (sb.ToString ()); fs.Seek (0, SeekOrigin.End); fs.Write (bytes, 0, bytes.Length); } sb = null; bytes = null; }
public void ProcessFrame() { byte[] NewFrameData; if (FF_Unsynchronisation) { NewFrameData = ID3Base.UnSync(FrameData); } else { NewFrameData = FrameData; } //T-Type Frame (Text) if (FrameName.ToCharArray()[0] == 'T' && FrameName != "TXXX" && FrameName != "TXX") { if (NewFrameData.Length > 1) { System.Text.Encoding enc = null; bool useBOM = GetEncodingType(ref enc, 1); if (useBOM) { Data = enc.GetString(NewFrameData, 1 + enc.GetPreamble().Length, NewFrameData.Length - (1 + enc.GetPreamble().Length)); } else { Data = enc.GetString(NewFrameData, 1, NewFrameData.Length - 1); } Data = ((string)Data).Trim('\0').Trim(); } else { //No Frame Data, so blank. Console.WriteLine("The Frame Above Had No Data"); Data = ""; } } else if (FrameName == "TXXX" || FrameName == "TXX") { //TXXX, TXX Frame Goes Here (TODO) } //W-Type Frame (TODO: Needs more testing) if (FrameName.ToCharArray()[0] == 'W' && FrameName != "WXXX" && FrameName != "WXX") { Encoding enc = Encoding.ASCII; Data = enc.GetString(NewFrameData, 0, NewFrameData.Length); } else if (FrameName == "WXXX" || FrameName == "WXX") { //WXXX, WXX is always ISO-8859-1 Encoded Encoding enc = Encoding.GetEncoding("ISO-8859-1"); Data = enc.GetString(NewFrameData, 0, NewFrameData.Length); } if (FrameName == "APIC" || FrameName == "PIC") { if (NewFrameData.Length > 1) { int DataPosition = 0; ID3v2APICFrame apic = new ID3v2APICFrame(); //Skip just Text Encoding DataPosition++; //Get MimeType (as Generic Text) Encoding enc = Encoding.ASCII; if (MajorVersion > 2) { int BeginMimeType = DataPosition; while (NewFrameData[DataPosition] != 0x00) { DataPosition++; } apic.MIMEType = enc.GetString(NewFrameData, BeginMimeType, DataPosition - BeginMimeType); } else { apic.MIMEType = enc.GetString(NewFrameData, DataPosition, 3); DataPosition += 2; //Should be Increment by 3, but next instruction is to increment by 1 } //Get ImageType DataPosition++; apic.ImageType = NewFrameData[DataPosition]; //Get Description DataPosition++; bool useBOM = GetEncodingType(ref enc, DataPosition); //Determine what encoding style we need int BeginDescription = DataPosition; if (DataPosition + 1 < NewFrameData.Length && enc == Encoding.Unicode) //Little Endian, Every Two Bytes (16bits) { while (!(NewFrameData[DataPosition] == 0x00 && NewFrameData[DataPosition + 1] == 0x00)) { DataPosition += 2; } //Skip Past $00 00 at End DataPosition += 2; } else { while (NewFrameData[DataPosition] != 0x00) { DataPosition++; } //Skip Past $00 at End DataPosition++; } if (!useBOM) { apic.Description = enc.GetString(NewFrameData, BeginDescription, DataPosition - BeginDescription); } else { apic.Description = enc.GetString(NewFrameData, BeginDescription + enc.GetPreamble().Length, (DataPosition - (BeginDescription + enc.GetPreamble().Length))); } apic.Description = apic.Description.Trim('\0').Trim(); //Get Binary Data MemoryStream ms = new MemoryStream(NewFrameData, DataPosition, NewFrameData.Length - DataPosition); try { apic.Picture = Image.FromStream(ms); } catch (System.ArgumentException ex) { apic.Picture = null; Console.WriteLine(ex.Message); } Data = apic; } } }
/// <summary> /// Static constructor that fills the default preferred codepages /// </summary> static c_EncodingTools() { List <int> streamEcodings = new List <int>(); List <int> allEncodings = new List <int>(); List <int> mimeEcodings = new List <int>(); // asscii - most simple so put it in first place... streamEcodings.Add(System.Text.Encoding.ASCII.CodePage); mimeEcodings.Add(System.Text.Encoding.ASCII.CodePage); allEncodings.Add(System.Text.Encoding.ASCII.CodePage); // add default 2nd for all encodings allEncodings.Add(System.Text.Encoding.Default.CodePage); // default is single byte? if (System.Text.Encoding.Default.IsSingleByte) { // put it in second place streamEcodings.Add(System.Text.Encoding.Default.CodePage); mimeEcodings.Add(System.Text.Encoding.Default.CodePage); } // prefer JIS over JIS-SHIFT (JIS is detected better than JIS-SHIFT) // this one does include cyrilic (strange but true) allEncodings.Add(50220); mimeEcodings.Add(50220); // always allow unicode flavours for streams (they all have a preamble) streamEcodings.Add(System.Text.Encoding.Unicode.CodePage); foreach (EncodingInfo enc in System.Text.Encoding.GetEncodings()) { if (!streamEcodings.Contains(enc.CodePage)) { System.Text.Encoding encoding = System.Text.Encoding.GetEncoding(enc.CodePage); if (encoding.GetPreamble().Length > 0) { streamEcodings.Add(enc.CodePage); } } } // stream is done here PreferedEncodingsForStream = streamEcodings.ToArray(); // all singlebyte encodings foreach (EncodingInfo enc in System.Text.Encoding.GetEncodings()) { if (!enc.GetEncoding().IsSingleByte) { continue; } if (!allEncodings.Contains(enc.CodePage)) { allEncodings.Add(enc.CodePage); } // only add iso and IBM encodings to mime encodings if (enc.CodePage <= 1258) { mimeEcodings.Add(enc.CodePage); } } // add the rest (multibyte) foreach (EncodingInfo enc in System.Text.Encoding.GetEncodings()) { if (!enc.GetEncoding().IsSingleByte) { if (!allEncodings.Contains(enc.CodePage)) { allEncodings.Add(enc.CodePage); } // only add iso and IBM encodings to mime encodings if (enc.CodePage <= 1258) { mimeEcodings.Add(enc.CodePage); } } } // add unicodes mimeEcodings.Add(System.Text.Encoding.Unicode.CodePage); PreferedEncodings = mimeEcodings.ToArray(); AllEncodings = allEncodings.ToArray(); }
/// <summary> /// Write from input to output. Fix the UTF-8 bom /// </summary> /// <param name="input"></param> /// <param name="output"></param> /// <param name="writeUtf8BOM"></param> /// <param name="encoding"></param> private static void WriteStreamAndFixPreamble(Stream input, Stream output, bool? writeUtf8BOM, Encoding encoding) { //only when utf-8 encoding is used, the Encoding preamble is optional var nothingToDo = writeUtf8BOM == null || !(encoding is UTF8Encoding); const int preambleSize = 3; if (!nothingToDo) { //it's UTF-8 var hasBomInEncoding = encoding.GetPreamble().Length == preambleSize; //BOM already in Encoding. nothingToDo = writeUtf8BOM.Value && hasBomInEncoding; //Bom already not in Encoding nothingToDo = nothingToDo || !writeUtf8BOM.Value && !hasBomInEncoding; } var offset = nothingToDo ? 0 : preambleSize; input.CopyWithOffset(output, offset); }
/// <summary> /// Returns a byte array containing the text encoded by a specified encoding & bom. /// </summary> /// <param name="text">The text to encode.</param> /// <param name="encoding">The encoding.</param> /// <param name="hadBom">If set to <c>true</c> a bom will be prepended.</param> public static byte[] GetBuffer (string text, Encoding encoding, bool hadBom) { using (var stream = new MemoryStream ()) { if (hadBom) { var bom = encoding.GetPreamble (); if (bom != null && bom.Length > 0) stream.Write (bom, 0, bom.Length); } byte[] bytes = encoding.GetBytes (text); stream.Write (bytes, 0, bytes.Length); return stream.GetBuffer (); } }
internal void Initialize (Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize) { if (null == stream) throw new ArgumentNullException ("stream"); if (null == encoding) throw new ArgumentNullException ("encoding"); if (!stream.CanRead) throw new ArgumentException ("Cannot read stream"); if (bufferSize <= 0) throw new ArgumentOutOfRangeException ("bufferSize", "The minimum size of the buffer must be positive"); if (bufferSize < MinimumBufferSize) bufferSize = MinimumBufferSize; base_stream = stream; input_buffer = new byte [bufferSize]; this.buffer_size = bufferSize; this.encoding = encoding; decoder = encoding.GetDecoder (); byte [] preamble = encoding.GetPreamble (); do_checks = detectEncodingFromByteOrderMarks ? 1 : 0; do_checks += (preamble.Length == 0) ? 0 : 2; // since GetChars() might add flushed character, it // should have additional char buffer for extra 1 // (probably 1 is ok, but might be insufficient. I'm not sure) decoded_buffer = new char [encoding.GetMaxCharCount (bufferSize) + 1]; decoded_count = 0; pos = 0; }
private int DetermineEncoding(byte[] srcBuffer, int srcOffset) { _Encoding = null; switch(AutoDetectEncoding(srcBuffer, srcOffset)) { case UniCodeBE://2: _Encoding = Encoding.BigEndianUnicode; break; case UniCode: //3: _Encoding = Encoding.Unicode; break; case UCS4BE: //4: case UCS4BEB: // 5: _Encoding = Ucs4Encoding.UCS4_Bigendian; break; case UCS4: //6: case UCS4B: //7: _Encoding = Ucs4Encoding.UCS4_Littleendian; break; case UCS434://8: case UCS434B: //9: _Encoding = Ucs4Encoding.UCS4_3412; break; case UCS421: //10: case UCS421B://11: _Encoding = Ucs4Encoding.UCS4_2143; break; case EBCDIC: //12: ebcdic throw new XmlException(Res.Xml_UnknownEncoding, "ebcdic", LineNum, LinePos); //break; case UTF8: //13: _EncodingSetByDefault = true; _Encoding = new UTF8Encoding(true); break; default: _Encoding = new UTF8Encoding(true, true); break; } _Decoder = _Encoding.GetDecoder(); _PermitEncodingChange = true; if(_Encoding != null) _nCodePage = _Encoding.CodePage; //_achText = new char[_nSize+1]; int startDecodingIndex = 0; int preambleLength = 0; try { byte[] preamble = _Encoding.GetPreamble(); preambleLength = preamble.Length; bool hasByteOrderMark = true; for (int i = srcOffset; i < srcOffset + preambleLength && hasByteOrderMark; i++) { hasByteOrderMark &= (srcBuffer[i] == preamble[i - srcOffset]); } if (hasByteOrderMark) { startDecodingIndex = preambleLength; } } catch (Exception) { } return startDecodingIndex; }
private async Task WriteXmlFeed(XmlDataFeed feed, TdsParserStateObject stateObj, bool needBom, Encoding encoding, int size) { byte[] preambleToSkip = null; if (!needBom) { preambleToSkip = encoding.GetPreamble(); } ConstrainedTextWriter writer = new ConstrainedTextWriter(new StreamWriter(new TdsOutputStream(this, stateObj, preambleToSkip), encoding), size); XmlWriterSettings writerSettings = new XmlWriterSettings(); writerSettings.CloseOutput = false; // don't close the memory stream writerSettings.ConformanceLevel = ConformanceLevel.Fragment; if (_asyncWrite) { writerSettings.Async = true; } XmlWriter ww = XmlWriter.Create(writer, writerSettings); if (feed._source.ReadState == ReadState.Initial) { feed._source.Read(); } while (!feed._source.EOF && !writer.IsComplete) { // We are copying nodes from a reader to a writer. This will cause the // XmlDeclaration to be emitted despite ConformanceLevel.Fragment above. // Therefore, we filter out the XmlDeclaration while copying. if (feed._source.NodeType == XmlNodeType.XmlDeclaration) { feed._source.Read(); continue; } if (_asyncWrite) { await ww.WriteNodeAsync(feed._source, true).ConfigureAwait(false); } else { ww.WriteNode(feed._source, true); } } if (_asyncWrite) { await ww.FlushAsync().ConfigureAwait(false); } else { ww.Flush(); } }
/// <summary> /// Change the file encoding. May force a reload if byte count ot preamble lenght differs from previous used encoding. /// </summary> /// <param name="encoding"></param> public void ChangeEncoding(Encoding encoding) { CurrentLogFileReader.ChangeEncoding(encoding); EncodingOptions.Encoding = encoding; if (_guiStateArgs.CurrentEncoding.IsSingleByte != encoding.IsSingleByte || _guiStateArgs.CurrentEncoding.GetPreamble().Length != encoding.GetPreamble().Length) { Reload(); } else { dataGridView.Refresh(); SendGuiStateUpdate(); } _guiStateArgs.CurrentEncoding = CurrentLogFileReader.CurrentEncoding; }
public static void WriteText (string fileName, string text, Encoding encoding, bool hadBom) { if (fileName == null) throw new ArgumentNullException ("fileName"); if (text == null) throw new ArgumentNullException ("text"); if (encoding == null) throw new ArgumentNullException ("encoding"); using (var stream = new FileStream (fileName, FileMode.Create, FileAccess.Write, FileShare.Write)) { if (hadBom) { var bom = encoding.GetPreamble (); if (bom != null && bom.Length > 0) stream.Write (bom, 0, bom.Length); } byte[] bytes = encoding.GetBytes (text); stream.Write (bytes, 0, bytes.Length); } }
private void Init(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize, bool leaveOpen) { _stream = stream; _encoding = encoding; _decoder = encoding.GetDecoder(); if (bufferSize < MinBufferSize) { bufferSize = MinBufferSize; } _byteBuffer = new byte[bufferSize]; _maxCharsPerBuffer = encoding.GetMaxCharCount(bufferSize); _charBuffer = new char[_maxCharsPerBuffer]; _byteLen = 0; _bytePos = 0; _detectEncoding = detectEncodingFromByteOrderMarks; _preamble = encoding.GetPreamble(); _checkPreamble = (_preamble.Length > 0); _isBlocked = false; _closable = !leaveOpen; }
private static int SeekPreamble(byte[] buffer, int offset, int count, Encoding e) { byte[] prem = e.GetPreamble(); if (prem.Length + offset > buffer.Length || prem.Length > count) return offset; int newoffset = 0; while (newoffset < prem.Length && prem[newoffset] == buffer[newoffset + offset]) newoffset++; if (newoffset == prem.Length) return offset + newoffset; else return offset; }