private static string Evalute(Match match) { try { if (match.Success) { string charSet = match.Groups["charset"].Value; string encoding = match.Groups["encoding"].Value.ToUpper(); string value = match.Groups["value"].Value; Encoding enc = EncodingTools.GetEncodingByCodepageName(charSet) ?? ("utf-8".Equals(encoding, StringComparison.OrdinalIgnoreCase) ? Encoding.UTF8 : Encoding.Default); if (encoding.ToLower().Equals("b")) { return(enc.GetString(Convert.FromBase64String(value))); } return(Core.QDecode(enc, value)); } } catch (Exception) { } return(match.Value); }
public void RunLengthDecodeShouldConvertAnRLEStringToItsFullFormat() { Assert.Equal("", EncodingTools.RunLengthDecode("")); Assert.Equal("aaaaac", EncodingTools.RunLengthDecode("5a1c")); Assert.Equal("aaaaaccccccccc", EncodingTools.RunLengthDecode("5a9c")); Assert.Equal("ZZZnnnnn", EncodingTools.RunLengthDecode("3Z5n")); }
public string TryReduceRtlLargeFileContent(string fileName) { // OSDB's PHP server can't accept subtitle files larger than 100KB! // LTR languages are fine (most of the times), but RTL languages with utf-8 encoding have problems, // because utf-8 means larger files than original windows-1256 files. if (new FileInfo(fileName).Length < 102400) { //it's fine for OSDB upload return(fileName); } var content = File.ReadAllText(fileName); if (!content.ContainsFarsi()) { //it's not RTL return(fileName); } var mostEfficientEncoding = EncodingTools.DetectInputCodepage(File.ReadAllBytes(fileName)); if (mostEfficientEncoding.CodePage != 65001) { //don't corrupt it. return(fileName); } var newFilePath = string.Format("{0}\\sub-{1}", Path.GetDirectoryName(fileName), Path.GetFileName(fileName)); File.WriteAllText(newFilePath, content, Encoding.GetEncoding("windows-1256")); LogWindow.AddMessage(LogType.Info, "Saved a new file with windows-1256 encoding to reduce the size of the sub file @ " + newFilePath); return(newFilePath); }
/// <summary> /// Reads RFC 2047 'encoded-word' from source stream. /// </summary> /// <returns>Returns RFC 2047 'encoded-word' or null if end of stream reached.</returns> /// <exception cref="InvalidOperationException">Is raised when source stream has no encoded-word at current position.</exception> public string EncodedWord() { /* RFC 2047 2. * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" * * An 'encoded-word' may not be more than 75 characters long, including * 'charset', 'encoding', 'encoded-text', and delimiters. If it is * desirable to encode more text than will fit in an 'encoded-word' of * 75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may * be used. */ ToFirstChar(); if (Peek(false) != '=') { throw new InvalidOperationException("No encoded-word available."); } string retVal = ""; while (true) { string encodedWord = Atom(); try { string[] parts = encodedWord.Split('?'); if (parts[2].ToUpper() == "Q") { retVal += Core.QDecode(EncodingTools.GetEncodingByCodepageName_Throws(parts[1]), parts[3]); } else if (parts[2].ToUpper() == "B") { retVal += EncodingTools.GetEncodingByCodepageName_Throws(parts[1]).GetString( Core.Base64Decode(Encoding.Default.GetBytes(parts[3]))); } else { throw new Exception(""); } } catch { // Failed to parse encoded-word, leave it as is. RFC 2047 6.3. retVal += encodedWord; } ToFirstChar(); // encoded-word does not continue. if (Peek(false) != '=') { break; } } return(retVal); }
/// <summary> /// Save the given text to a stream /// </summary> /// <param name="text">text to save</param> /// <param name="path">path to the file</param> private void SaveToStream(string text, string path) { // this is all... detect the encoding Encoding enc = EncodingTools.GetMostEfficientEncodingForStream(text); // then safe using (StreamWriter sw = new StreamWriter(path, false, enc)) sw.Write(text); }
public static Encoding file_encoding(FileStream file) { try { // Read the BOM var bom = new byte[4]; int read = 0; var pos = file.Position; file.Seek(0, SeekOrigin.Begin); read = file.Read(bom, 0, 4); file.Seek(pos, SeekOrigin.Begin); if (read < 4) { return(null); } // Analyze the BOM if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76) { return(Encoding.UTF7); } if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) { return(Encoding.UTF8); } if (bom[0] == 0xff && bom[1] == 0xfe) { return(Encoding.Unicode); //UTF-16LE } if (bom[0] == 0xfe && bom[1] == 0xff) { return(Encoding.BigEndianUnicode); //UTF-16BE } if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) { return(Encoding.UTF32); } long len = Math.Min(8192, file.Length); byte[] buff = new byte[len]; file.Read(buff, 0, (int)len); var detected = EncodingTools.DetectInputCodepage(buff); if (!detected.Equals(Encoding.Default)) { return(detected); } // use user's default return(Encoding.Default); } catch { return(null); } }
public void SetzeParameterEncodedEin(string name, string inhalt) { if (string.IsNullOrEmpty(inhalt)) { this.QueryString.Remove(name); } else { this.QueryString[name] = EncodingTools.UrlISO885915Encode(inhalt); } }
/// <summary> /// 处理收到的数据,转化为netmsg /// </summary> private void ProcessReceive() { while (true) { byte[] data = EncodingTools.Decode(ref cache); if (data == null) { break; } msgQueue.Enqueue(NetMsg.Deserialize(data)); } }
private void SendMsg(NetMsg msg) { byte[] data = EncodingTools.Encode(msg.Serialize()); try { clientSocket.Send(data); } catch (SocketException) { Debug.LogError("发送数据失败,请检查网络连接"); } }
/// <summary> /// 处理tcp数据报,转化为NetMsg,每解析一个NetMsg,就触发一次ReceiveCompleted事件 /// </summary> /// <remarks> /// 一个tcp数据报可能含多个或者一部分数据包,需要自行拆分 ///tcp数据报:tcp协议一次传输的数据 ///数据包:包装的一段数据,包含包头和数据段,过于简单就不封装了 ///</remarks> private void ProcessData() { while (true) { byte[] data = EncodingTools.Decode(ref cache); if (data == null) { break; } NetMsg msg = NetMsg.Deserialize(data); ReceiveCompleted(this, msg); } }
/// <summary> /// Gets charset from Content-Type. If char set isn't specified, "ascii" is defined as default and it will be returned. /// </summary> /// <returns>Returns content charset.</returns> /// <exception cref="ArgumentException">Is raised when Content-Type has not supported charset parameter value.</exception> private Encoding GetCharset() { // RFC 2046 4.1.2. The default character set, US-ASCII. if (string.IsNullOrEmpty(ContentType.Param_Charset)) { return(Encoding.ASCII); } else { return(EncodingTools.GetEncodingByCodepageName(ContentType.Param_Charset) ?? Encoding.ASCII); } }
/// <summary> /// /// </summary> /// <param name="inputPath"></param> /// <param name="maxEncodingsCount"></param> /// <returns></returns> /// <exception cref="ArgumentOutOfRangeException">If <paramref name="maxEncodingsCount"/> is less than 1.</exception> /// <exception cref="FileNotFoundException">If <paramref name="inputPath"/> does not exists</exception> private static Encoding[] DetectInputEncodings(string inputPath, int maxEncodingsCount) { if (maxEncodingsCount < 1) { throw new ArgumentOutOfRangeException(nameof(maxEncodingsCount), $"{nameof(maxEncodingsCount)} must be larger that 0!"); } if (!File.Exists(inputPath)) { TraceError(nameof(DetectInputEncoding), nameof(inputPath) + " does not exist!"); throw new FileNotFoundException(Properties.Resources.Message_InputFileDoesNotExsist, inputPath); } // do auto-detect //isDetectingInputEncoding = true; byte[] buf = null; FileStream stream = null; try { stream = new FileStream(inputPath, FileMode.Open, FileAccess.Read); buf = new byte[stream.Length]; stream.Read(buf, 0, (int)stream.Length); } catch (Exception ex) { TraceWarning(nameof(DetectInputEncoding), "Exception while reading input file '" + inputPath + $"': {Environment.NewLine}" ); ex.WriteToTrace(); } finally { stream?.Close(); } //User has provided a preferred encoding. we have to use it Encoding[] encodings = null; try { encodings = EncodingTools.DetectInputCodepages(buf, maxEncodingsCount); } catch (Exception ex) { TraceWarning($"Error while detecting the encoding of the file '{inputPath}'."); ex.WriteToTrace(); } return(encodings); }
public void SendNetMsg(int opCode, int subOpCode, object message) { msg.Reset(opCode, subOpCode, message); byte[] data = EncodingTools.Encode(msg.Serialize()); try { clientSocket.Send(data);//send不需要异步方式 } catch (Exception) { Console.WriteLine($"消息发送失败(client ip={this.ip},OpCode={opCode},SubOpCode={subOpCode})"); throw; } }
/// <summary> /// Creeates a dummy email /// </summary> /// <param name="text">body text of the email</param> /// <param name="path">path to the new email (should have the extension .eml)</param> private void SaveToAsEmail(string text, string path) { // this is all... detect the encoding Encoding enc = EncodingTools.GetMostEfficientEncoding(text); // then safe using (StreamWriter sw = new StreamWriter(path, false, Encoding.ASCII)) { sw.WriteLine("Subject: test"); sw.WriteLine("Transfer-Encoding: 7bit"); sw.WriteLine("Content-Type: text/plain;\r\n\tcharset=\"{0}\"", enc.BodyName); sw.WriteLine("Content-Transfer-Encoding: base64"); // should be QP sw.WriteLine(); sw.Write(Convert.ToBase64String(enc.GetBytes(text), Base64FormattingOptions.InsertLineBreaks)); } }
private static Encoding DetectEncodingUsingMLang(Byte[] bytes) { try { Encoding[] detected = EncodingTools.DetectInputCodepages(bytes, 1); if (detected.Length > 0) { return(detected[0]); } } catch //(COMException ex) { // return default codepage on error } return(null); }
private void DoTest() { if ((this.m_TestText == null) || (this.m_TestText.Length == 0)) { return; } using (System.IO.MemoryStream ms = new System.IO.MemoryStream()) { byte[] encoded = this.m_Encoding.GetBytes(this.m_TestText); // preamble? byte[] preamble = this.m_Encoding.GetPreamble(); // Make sure a preamble was returned // and is large enough to containa BOM. if (preamble.Length >= 2) { ms.Write(preamble, 0, preamble.Length); } ms.Write(encoded, 0, encoded.Length); ms.Position = 0; // read it using standard text reader System.IO.StreamReader tr = new System.IO.StreamReader(ms, true); this.streamReader.Text = tr.ReadToEnd(); this.label1.Text = String.Format("StreamReader: {0} / {1}", tr.CurrentEncoding.EncodingName, tr.CurrentEncoding.BodyName); // now the improved test ms.Position = 0; Encoding targetEncoding; byte[] rawData = ms.ToArray(); try { targetEncoding = EncodingTools.DetectInputCodepage(rawData); } catch (System.Runtime.InteropServices.COMException) { targetEncoding = Encoding.Default; } this.detected.Text = targetEncoding.GetString(rawData); this.label2.Text = String.Format("EncodingTools.DetectInputCodepage: {0} / {1}", targetEncoding.EncodingName, targetEncoding.BodyName); } }
/// <summary>Converts CSV data from a file into a list of string arrays.</summary> /// <param name="path">The path to the file.</param> /// <param name="hasHeaders"> /// if set to <c>true</c>, the file is assumed to have headers in the first row. /// </param> /// <param name="useCultureSeparator"> /// If <c>true</c>, the method uses the current culture's list separator. If <c>false</c>, it tries to automatically detect it. /// </param> /// <returns>A list of string arrays containing the CSV data.</returns> public static List <string[]> ArrayListFromCSVFile(string path, bool hasHeaders = true, bool useCultureSeparator = false) { var detectedSeparator = detectSeparator(path); using ( var cr = new CsvReader( EncodingTools.OpenTextFile(path), hasHeaders, useCultureSeparator ? ListSeparator : detectedSeparator, '\"', '\"', '\0', ValueTrimmingOptions.UnquotedOnly)) { var arrayList = arrayListFromCSV(cr); return(arrayList); } }
/// <summary>Converts CSV data from a file into a list of dictionaries.</summary> /// <param name="path">The path of the CSV file.</param> /// <param name="useCultureSeparator"> /// If <c>true</c>, the method uses the current culture's list separator. If <c>false</c>, it tries to automatically detect it. /// </param> /// <returns> /// A list of dictionaries, where each dictionary is a record, and the key-value pairs are the column header and corresponding /// value. /// </returns> public static List <Dictionary <string, string> > DictionaryListFromCSVFile(string path, bool useCultureSeparator = false) { var detectedSeparator = detectSeparator(path); using (var streamReader = EncodingTools.OpenTextFile(path)) { var cr = new CsvReader( streamReader, true, useCultureSeparator ? ListSeparator : detectedSeparator, '\"', '\"', '\0', ValueTrimmingOptions.UnquotedOnly); var dictList = dictionaryListFromCSV(cr); return(dictList); } }
private void OpenTextFileTest() { // read the complete file into a string string content = EncodingTools.ReadTextFile(@"C:\test\txt"); // create a StreamReader with the guessed best encoding using (StreamReader sr = EncodingTools.OpenTextFile(@"C:\test\txt")) { string fileContent = sr.ReadToEnd(); } // create a streamReader from a stream using (MemoryStream ms = new MemoryStream( Encoding.GetEncoding("windows-1252").GetBytes("Some umlauts: צה��"))) { using (StreamReader sr = EncodingTools.OpenTextStream(ms)) { string fileContent = sr.ReadToEnd(); } } }
private static EncodedFile GetFileWithEncodingCore(string file, bool includeBom = true) { using (Stream fs = File.Open(file, FileMode.Open)) { var rawData = new byte[fs.Length]; fs.Read(rawData, 0, (int)fs.Length); var encoding = EncodingTools.DetectInputCodepage(rawData); var preambleLength = encoding.GetPreamble().Length; var contents = !includeBom && preambleLength > 0 ? encoding.GetString(rawData, preambleLength, rawData.Length - preambleLength) : encoding.GetString(rawData); return(new EncodedFile { Contents = contents, Encoding = encoding, Directory = new DirectoryInfo(new DirectoryInfo(file).Parent.FullName), File = new FileInfo(file) }); } }
private void button1_Click(object sender, EventArgs e) { this.listView1.Items.Clear(); Encoding encForStream = EncodingTools.GetMostEfficientEncodingForStream(testText.Text); Encoding encForMime = EncodingTools.GetMostEfficientEncoding(testText.Text); ListViewItem encItem = null; /* * * encItem = GetListItemForEncoding(encForMime); * encItem.ImageKey = "text_ok"; * this.listView1.Items.Add(encItem); */ Encoding[] encodings = EncodingTools.DetectOutgoingEncodings(testText.Text, EncodingTools.AllEncodings, true); foreach (Encoding encoding in encodings) { encItem = GetListItemForEncoding(encoding); if (encoding == encForStream) { encItem.StateImageIndex = 2; } if (encoding == encForMime) { encItem.ImageKey = "text_ok"; } this.listView1.Items.Add(encItem); } Encoding encodingDefault = EncodingTools.GetMostEfficientEncoding(testText.Text); }
public bool IsUTF8(string filePath) { var mostEfficientEncoding = EncodingTools.DetectInputCodepage(File.ReadAllBytes(filePath)); if (mostEfficientEncoding.CodePage == 65001) { return(true); } using (var file = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read)) { if (file.CanSeek) { var bom = new byte[4]; // Get the byte-order mark, if there is one file.Read(bom, 0, 4); return(bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf); } //it's a binary file return(false); } }
public static void FixEncodingIssues(this Header header, ILogger logger = null) { if (logger == null) { logger = new NullLogger(); } try { var rawValueString = Encoding.UTF8.GetString(header.RawValue).Trim(); if (rawValueString.IndexOf("?q?", StringComparison.InvariantCultureIgnoreCase) > -1 || rawValueString.IndexOf("?b?", StringComparison.InvariantCultureIgnoreCase) > -1) { return; } var charset = EncodingTools.DetectCharset(header.RawValue); if (string.IsNullOrEmpty(charset)) { return; } var newValue = header.GetValue(charset); if (header.Value.Equals(newValue, StringComparison.InvariantCultureIgnoreCase)) { return; } var encoding = EncodingTools.GetEncodingByCodepageName(charset); header.SetValue(encoding, newValue); } catch (Exception ex) { logger.Warn("Header.FixEncodingIssues: {0}", ex.Message); } }
// Public Methods (1) public static EncodingsInf DetectProbableFileCodepages(string filePath) { var result = new EncodingsInf(); try { var fileBytes = File.ReadAllBytes(filePath); if (fileBytes.Length == 0) { return(result); } var encList = EncodingTools.DetectInputCodepages(fileBytes, maxEncodings: 10); if (encList == null || encList.Length == 0) { addAllEncodings(result); return(result); } foreach (var item in encList.OrderBy(e => e.EncodingName)) { result.Add(new EncodingInf { Name = item.EncodingName, BodyName = item.BodyName }); } } catch (Exception ex) { ExceptionLogger.LogExceptionToFile(ex); addAllEncodings(result); } addWindows1256IfNotExists(result); return(result); }
public void WhenSubtituteSettingsCalled_SubstitutionsMaintainEncoding() { CleanOutput(); var hash = new Hashtable { { "Key A", new[] { "Value A" } }, { "Key B", new[] { "Value B" } } }; var cmd = new CopySubstitutedSettingFiles() { DeploymentEnvironment = "Test", TemplatesDirectory = @"Integration\_templates", TargetDirectory = @"Integration\_environments", Settings = hash }; var result = cmd.Invoke().GetEnumerator(); result.MoveNext(); var encoding = EncodingTools.DetectInputCodepage(File.ReadAllBytes(file2)); Assert.That(encoding, Is.EqualTo(Encoding.Unicode)); }
public static void FixEncodingIssues(this MimeMessage mimeMessage, ILogger logger = null) { if (logger == null) { logger = new NullLogger(); } try { foreach (var mimeEntity in mimeMessage.BodyParts) { var textPart = mimeEntity as TextPart; if (textPart == null || textPart.ContentObject == null || textPart.ContentObject.Encoding != ContentEncoding.Default) { continue; } try { string charset; using (var stream = new MemoryStream()) { textPart.ContentObject.DecodeTo(stream); var bytes = stream.ToArray(); charset = EncodingTools.DetectCharset(bytes); } if (!string.IsNullOrEmpty(charset) && (textPart.ContentType == null || string.IsNullOrEmpty(textPart.ContentType.Charset) || textPart.ContentType.Charset != charset)) { var encoding = EncodingTools.GetEncodingByCodepageName(charset); if (encoding == null) { continue; } var newText = textPart.GetText(charset); textPart.SetText(encoding, newText); } } catch (Exception ex) { logger.Warn("MimeMessage.FixEncodingIssues->ImproveBodyEncoding: {0}", ex.Message); } } if (mimeMessage.Headers.Contains(HeaderId.From)) { var fromParsed = mimeMessage.From.FirstOrDefault(); if (fromParsed != null && !string.IsNullOrEmpty(fromParsed.Name)) { var fromHeader = mimeMessage.Headers.FirstOrDefault(h => h.Id == HeaderId.From); fromHeader.FixEncodingIssues(logger); } } if (!mimeMessage.Headers.Contains(HeaderId.Subject)) { return; } var subjectHeader = mimeMessage.Headers.FirstOrDefault(h => h.Id == HeaderId.Subject); subjectHeader.FixEncodingIssues(logger); } catch (Exception ex) { logger.Warn("MimeMessage.FixEncodingIssues: {0}", ex.Message); } }
public void UploadIcsToCalendar(MailBoxData mailBoxData, int calendarId, string calendarEventUid, string calendarIcs, string calendarCharset, string calendarContentType, string calendarEventReceiveEmail, string httpContextScheme) { try { if (string.IsNullOrEmpty(calendarEventUid) || string.IsNullOrEmpty(calendarIcs) || calendarContentType != "text/calendar") { return; } var calendar = MailUtil.ParseValidCalendar(calendarIcs, Log); if (calendar == null) { return; } var alienEvent = true; var organizer = calendar.Events[0].Organizer; if (organizer != null) { var orgEmail = calendar.Events[0].Organizer.Value.ToString() .ToLowerInvariant() .Replace("mailto:", ""); if (orgEmail.Equals(calendarEventReceiveEmail)) { alienEvent = false; } } else { throw new ArgumentException("calendarIcs.organizer is null"); } if (alienEvent) { if (calendar.Events[0].Attendees.Any( a => a.Value.ToString() .ToLowerInvariant() .Replace("mailto:", "") .Equals(calendarEventReceiveEmail))) { alienEvent = false; } } if (alienEvent) { return; } CoreContext.TenantManager.SetCurrentTenant(mailBoxData.TenantId); SecurityContext.AuthenticateMe(new Guid(mailBoxData.UserId)); using (var ms = new MemoryStream(EncodingTools.GetEncodingByCodepageName(calendarCharset).GetBytes(calendarIcs))) { var apiHelper = new ApiHelper(httpContextScheme, Log); apiHelper.UploadIcsToCalendar(calendarId, ms, "calendar.ics", calendarContentType); } Log.Info("CalendarEngine->UploadIcsToCalendar() has been succeeded"); } catch (Exception ex) { Log.ErrorFormat("CalendarEngine->UploadIcsToCalendar with \r\n" + "calendarId: {0}\r\n" + "calendarEventUid: '{1}'\r\n" + "calendarIcs: '{2}'\r\n" + "calendarCharset: '{3}'\r\n" + "calendarContentType: '{4}'\r\n" + "calendarEventReceiveEmail: '{5}'\r\n" + "Exception:\r\n{6}\r\n", calendarId, calendarEventUid, calendarIcs, calendarCharset, calendarContentType, calendarEventReceiveEmail, ex.ToString()); } }
/// <summary> /// Parses MIME header from the specified stream. /// </summary> /// <param name="stream">MIME header stream.</param> /// <exception cref="ArgumentNullException">Is raised when <b>stream</b> is null.</exception> public void Parse(SmartStream stream) { //TODO: ���� ��������� �������! �������� ����! �� ��� ���� �������� � utf8 ����� if (stream == null) { throw new ArgumentNullException("stream"); } var headers = new List <KeyValuePair <string, byte[]> >(); var currentMemStream = new MemoryStream(); SmartStream.ReadLineAsyncOP readLineOP = new SmartStream.ReadLineAsyncOP(new byte[Workaround.Definitions.MaxStreamLineLength], SizeExceededAction. ThrowException); while (true) { stream.ReadLine(readLineOP, false); if (readLineOP.Error != null) { throw readLineOP.Error; } // We reached end of stream. if (readLineOP.BytesInBuffer == 0) { if (currentMemStream.Length > 0) { AddToBinaryDict(headers, currentMemStream); } m_IsModified = false; break; } // We got blank header terminator line. if (readLineOP.LineBytesInBuffer == 0) { if (currentMemStream.Length > 0) { AddToBinaryDict(headers, currentMemStream); } m_IsModified = false; break; } string line = Encoding.UTF8.GetString(readLineOP.Buffer, 0, readLineOP.BytesInBuffer); var realBuffer = new List <byte>(); if ((line.StartsWith("From: \"") || line.StartsWith("To: \"")) && !line.EndsWith(">\r\n")) { var tmpArr = new byte[readLineOP.BytesInBuffer]; Array.Copy(readLineOP.Buffer, 0, tmpArr, 0, readLineOP.BytesInBuffer); realBuffer.AddRange(tmpArr); do { stream.ReadLine(readLineOP, false); if (readLineOP.LineBytesInBuffer == 0) { break; } line = Encoding.UTF8.GetString(readLineOP.Buffer, 0, readLineOP.BytesInBuffer); tmpArr = new byte[readLineOP.BytesInBuffer]; Array.Copy(readLineOP.Buffer, 0, tmpArr, 0, readLineOP.BytesInBuffer); realBuffer.AddRange(tmpArr); } while (!line.EndsWith(">\r\n")); if (realBuffer.Count > 0) { line = Encoding.UTF8.GetString(realBuffer.ToArray()); } } // New header field starts. if (currentMemStream.Length == 0) { currentMemStream.Write(readLineOP.Buffer, 0, readLineOP.BytesInBuffer); } // Header field continues. else if (char.IsWhiteSpace(line[0])) { currentMemStream.Write(readLineOP.Buffer, 0, readLineOP.BytesInBuffer); } // Current header field closed, new starts. else { AddToBinaryDict(headers, currentMemStream); currentMemStream = new MemoryStream(); if (realBuffer.Count > 0) { currentMemStream.Write(realBuffer.ToArray(), 0, realBuffer.Count); } else { currentMemStream.Write(readLineOP.Buffer, 0, readLineOP.BytesInBuffer); } } } //Process dictionary //Find content type var contentTypeHeader = headers .Where(x => x.Value != null) .Where(x => "content-type".Equals(x.Key, StringComparison.OrdinalIgnoreCase)) .Select(x => Encoding.UTF8.GetString(x.Value)) .SingleOrDefault(); var encoding = Encoding.UTF8; if (contentTypeHeader != null) { var mime = MIME_h_ContentType.Parse(contentTypeHeader); if (!string.IsNullOrEmpty(mime.Param_Charset)) { encoding = EncodingTools.GetEncodingByCodepageName(mime.Param_Charset) ?? Encoding.UTF8; } else { //Join headers var subjectRaw = headers .Where(x => x.Value != null) .Where(x => "subject".Equals(x.Key, StringComparison.OrdinalIgnoreCase)) .Select(x => x.Value) .SingleOrDefault(); //Try to detect hueristic encoding = subjectRaw != null?EncodingTools.DetectInputCodepage(subjectRaw) : Encoding.UTF8; } } foreach (var keyValuePair in headers) { Add(encoding.GetString(keyValuePair.Value)); } }
/// <summary> /// Decodes "encoded-word"'s from the specified text. For more information see RFC 2047. /// </summary> /// <param name="text">Text to decode.</param> /// <returns>Returns decoded text.</returns> public static string DecodeWords(string text) { if (text == null) { return(null); } /* RFC 2047 2. Syntax of encoded-words. * An 'encoded-word' is defined by the following ABNF grammar. The * notation of RFC 822 is used, with the exception that white space * characters MUST NOT appear between components of an 'encoded-word'. * * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" * charset = token ; see section 3 * encoding = token ; see section 4 * token = 1*<Any CHAR except SPACE, CTLs, and especials> * especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / " * <"> / "/" / "[" / "]" / "?" / "." / "=" * encoded-text = 1*<Any printable ASCII character other than "?" or SPACE> * ; (but see "Use of encoded-words in message headers", section 5) * * Both 'encoding' and 'charset' names are case-independent. Thus the * charset name "ISO-8859-1" is equivalent to "iso-8859-1", and the * encoding named "Q" may be spelled either "Q" or "q". * * An 'encoded-word' may not be more than 75 characters long, including * 'charset', 'encoding', 'encoded-text', and delimiters. If it is * desirable to encode more text than will fit in an 'encoded-word' of * 75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may * be used. * * IMPORTANT: 'encoded-word's are designed to be recognized as 'atom's * by an RFC 822 parser. As a consequence, unencoded white space * characters (such as SPACE and HTAB) are FORBIDDEN within an * 'encoded-word'. For example, the character sequence * * =?iso-8859-1?q?this is some text?= * * would be parsed as four 'atom's, rather than as a single 'atom' (by * an RFC 822 parser) or 'encoded-word' (by a parser which understands * 'encoded-words'). The correct way to encode the string "this is some * text" is to encode the SPACE characters as well, e.g. * * =?iso-8859-1?q?this=20is=20some=20text?= */ StringReader r = new StringReader(text); StringBuilder retVal = new StringBuilder(); // We need to loop all words, if encoded word, decode it, othwerwise just append to return value. bool lastIsEncodedWord = false; while (r.Available > 0) { string whiteSpaces = r.ReadToFirstChar(); // Probably is encoded-word, we try to parse it. if (r.StartsWith("=?") && r.SourceString.IndexOf("?=") > -1) { StringBuilder encodedWord = new StringBuilder(); string decodedWord = null; try { // NOTE: We can't read encoded word and then split !!!, we need to read each part. // Remove =? encodedWord.Append(r.ReadSpecifiedLength(2)); // Read charset string charset = r.QuotedReadToDelimiter('?'); encodedWord.Append(charset + "?"); // Read encoding string encoding = r.QuotedReadToDelimiter('?'); encodedWord.Append(encoding + "?"); // Read text string encodedText = r.QuotedReadToDelimiter('?'); encodedWord.Append(encodedText + "?"); // We must have remaining '=' here if (r.StartsWith("=")) { encodedWord.Append(r.ReadSpecifiedLength(1)); Encoding c = EncodingTools.GetEncodingByCodepageName(charset); if (c != null) { if (encoding.ToLower() == "q") { decodedWord = Core.QDecode(c, encodedText); } else if (encoding.ToLower() == "b") { decodedWord = c.GetString(Core.Base64Decode(Encoding.Default.GetBytes(encodedText))); } } } } catch { // Not encoded-word or contains unknwon charset/encoding, so leave // encoded-word as is. } /* RFC 2047 6.2. * When displaying a particular header field that contains multiple * 'encoded-word's, any 'linear-white-space' that separates a pair of * adjacent 'encoded-word's is ignored. (This is to allow the use of * multiple 'encoded-word's to represent long strings of unencoded text, * without having to separate 'encoded-word's where spaces occur in the * unencoded text.) */ if (!lastIsEncodedWord) { retVal.Append(whiteSpaces); } // Decoding failed for that encoded-word, leave encoded-word as is. if (decodedWord == null) { retVal.Append(encodedWord.ToString()); } // We deocded encoded-word successfully. else { retVal.Append(decodedWord); } lastIsEncodedWord = true; } // Normal word. else if (r.StartsWithWord()) { retVal.Append(whiteSpaces + r.ReadWord(false)); lastIsEncodedWord = false; } // We have some separator or parenthesize. else { retVal.Append(whiteSpaces + r.ReadSpecifiedLength(1)); } } return(retVal.ToString()); }
/// <summary> /// Parses parameters from the specified reader. /// </summary> /// <param name="reader">MIME reader.</param> /// <exception cref="ArgumentNullException">Is raised when <b>reader</b> is null reference.</exception> public void Parse(MIME_Reader reader) { if (reader == null) { throw new ArgumentNullException("reader"); } /* RFC 2231. */ while (true) { // End os stream reached. if (reader.Peek(true) == -1) { break; } // Next parameter start, just eat that char. else if (reader.Peek(true) == ';') { reader.Char(false); } else { string name = reader.Token(); if (name == null) { break; } string value = ""; // Parameter value specified. if (reader.Peek(true) == '=') { reader.Char(false); string v = reader.Word(); // Normally value may not be null, but following case: paramName=EOS. if (v != null) { value = v; } } // RFC 2231 encoded/splitted parameter. if (name.IndexOf('*') > -1) { string[] name_x_no_x = name.Split('*'); name = name_x_no_x[0]; Encoding charset = Encoding.ASCII; StringBuilder valueBuffer = new StringBuilder(); // We must have charset'language'value. // Examples: // URL*=utf-8''test; // URL*0*=utf-8''"test"; if ((name_x_no_x.Length == 2 && name_x_no_x[1] == "") || name_x_no_x.Length == 3) { string[] charset_language_value = value.Split('\''); charset = EncodingTools.GetEncodingByCodepageName(charset_language_value[0]) ?? Encoding.ASCII; valueBuffer.Append(charset_language_value[2]); } // No encoding, probably just splitted ASCII value. // Example: // URL*0="value1"; // URL*1="value2"; else { valueBuffer.Append(value); } // Read while value continues. while (true) { // End os stream reached. if (reader.Peek(true) == -1) { break; } // Next parameter start, just eat that char. else if (reader.Peek(true) == ';') { reader.Char(false); } else { if (!reader.StartsWith(name + "*")) { break; } reader.Token(); // Parameter value specified. if (reader.Peek(true) == '=') { reader.Char(false); string v = reader.Word(); // Normally value may not be null, but following case: paramName=EOS. if (v != null) { valueBuffer.Append(v); } } } } this[name] = DecodeExtOctet(valueBuffer.ToString(), charset); } // Regular parameter. else { this[name] = value; } } } m_IsModified = false; }