private static void DecodeAndAppendLine(StringBuilder builder, string buff, string transferEncoding, string charset) { if (null == builder) { return; } if (0 == string.Compare(transferEncoding, "quoted-printable", true)) { builder.Append(MimeUtility.DecodeQuotedPrintableLine(buff, MimeUtility.TryGetEncoding(charset))); } else if (0 == string.Compare(transferEncoding, "base64", true)) { builder.Append(MimeUtility.DecodeBase64String(buff, MimeUtility.TryGetEncoding(charset))); } else { builder.Append(buff + "\r\n"); } }
/// <summary> /// Decodes RFC 2047 message header extensions into unicode strings consistent with the System.Net.MailMessage object model. /// </summary> /// <param name="headers"></param> private static void InPlaceDecodeExtendedHeaders(Dictionary <string, string[]> headers) { /********************************************************* * http://tools.ietf.org/search/rfc2047 * * RFC 2047 "Message Header Extensions" * * 2. Syntax of encoded-words * * An 'encoded-word' is defined by the following ABNF grammar. The * notation of RFC 822 is used, with the exception that white space * characters MUST NOT appear between components of an 'encoded-word'. * * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" * * ... * * 3. Character sets * * The 'charset' portion of an 'encoded-word' specifies the character * set associated with the unencoded text. A 'charset' can be any of * the character set names allowed in an MIME "charset" parameter of a * "text/plain" body part, or any character set name registered with * IANA for use with the MIME text/plain content-type. * * ... * * When there is a possibility of using more than one character set to * represent the text in an 'encoded-word', and in the absence of * private agreements between sender and recipients of a message, it is * recommended that members of the ISO-8859-* series be used in * preference to other character sets. * * 4. Encodings * * Initially, the legal values for "encoding" are "Q" and "B". These * encodings are described below. The "Q" encoding is recommended for * use when most of the characters to be encoded are in the ASCII * character set; otherwise, the "B" encoding should be used. * Nevertheless, a mail reader which claims to recognize 'encoded-word's * MUST be able to accept either encoding for any character set which it * supports. * * ... * * 4.1. The "B" encoding * * The "B" encoding is identical to the "BASE64" encoding defined by RFC * 2045. * * 4.2. The "Q" encoding * * The "Q" encoding is similar to the "Quoted-Printable" content- * transfer-encoding defined in RFC 2045. It is designed to allow text * containing mostly ASCII characters to be decipherable on an ASCII * terminal without decoding. * /*********************************************************/ //convert the raw header into decoded strings. Debug.WriteLine("InPlaceDecodeExtendedHeaders()"); const string p_split = @"(=\?[^\?]+\?[^\?]+\?[^\?]+\?=\s?)"; //the charset field goes into capture group 1 //the transfer encoding field goes into capture group 2 //the raw text value goes into capture group 3. const string p_capture = @"^=\?([^\?]+)\?([^\?]+)\?([^\?]+)\?=\s?$"; foreach (string[] hlist in headers.Values) { for (int i = 0; i < hlist.Length; i++) { if (Regex.IsMatch(hlist[i], p_split)) { StringBuilder b = new StringBuilder(); foreach (string part in Regex.Split(hlist[i], p_split)) { Match match = Regex.Match(part, p_capture); if (match.Success) //part is RFC 2047 encoded { string charset = match.Groups[1].Value; string transfer = match.Groups[2].Value; string rawtext = match.Groups[3].Value; Encoding encoding = MimeUtility.TryGetEncoding(charset); if (0 == string.Compare(transfer, "b", true)) { b.Append(MimeUtility.DecodeBase64String(rawtext, encoding)); } else { MimeUtility.DecodeAndAppendQuotedPrintableSegment(b, rawtext, encoding); } } else if (!string.IsNullOrEmpty(part)) { b.Append(part); //part is not RFC 2047 encoded } } hlist[i] = b.ToString(); } } } }
private static ContentType GetContentType(NameValueCollection headers, string boundary) { /**************************************************************** * http://tools.ietf.org/html/rfc1341#page-5 * http://tools.ietf.org/html/rfc1521#page-9 * * /****************************************************************/ const string CONTENT_TYPE = "content-type"; string ct = headers[CONTENT_TYPE]; ContentType contentType; Match match; if (string.IsNullOrEmpty(ct)) { contentType = new ContentType(); } else { //decode content-type const string P_CONTENT_TYPE = @"^([^; ]+)"; match = Regex.Match(ct, P_CONTENT_TYPE, RegexOptions.IgnoreCase); if (match.Success) { try { contentType = new ContentType(match.Groups[1].Value); } catch (Exception ex) { if (ex is ArgumentException || ex is FormatException) { contentType = new ContentType(); } throw ex; } } else { contentType = new ContentType(); } } if (!string.IsNullOrEmpty(ct)) { ct = ct.Trim(); //trailing whitespace can mess up the matching //decode name const string P_NAME = "name=(?:\"([^\"]+)\"|([^;]+))(?:;|$)"; //captures value to group 1 or 2 match = Regex.Match(ct, P_NAME, RegexOptions.IgnoreCase); if (match.Success) { contentType.Name = string.IsNullOrEmpty(match.Groups[1].Value) ? match.Groups[2].Value : match.Groups[1].Value; } //decode charset const string P_CHARSET = "charset=(?:\"([^\"]+)\"|([^;]+))(?:;|$)"; //captures value to group 1 or 2 match = Regex.Match(ct, P_CHARSET, RegexOptions.IgnoreCase); if (match.Success) { //some charset encodings defined in incoming are not supported by System.Text. //We need to test it here to make sure that we won't crash later. string charset = match.Groups[1].Success ? match.Groups[1].Value : match.Groups[2].Value; //"cp1252" is an alias for "windows-1252" but is not a codepage identifier supported by Encoding.GetEncoding(string) if (0 == string.Compare("cp1252", charset, true)) { charset = "windows-1252"; } //MimUtility.TryGetEncoding(string) will return a valid Encoding object. If we use the BodyName from an Encoding object //we can be sure it we won't crash converting back to an Encoding object. contentType.CharSet = MimeUtility.TryGetEncoding(charset).BodyName; } contentType.Boundary = boundary; if (string.IsNullOrEmpty(contentType.Boundary)) { //decode boundary const string P_BOUNDARY = "boundary=(?:\"([^\"]+)\"|([^;]+))(?:;|$)"; //captures value to group 1 or 2 match = Regex.Match(ct, P_BOUNDARY, RegexOptions.IgnoreCase); if (match.Success) { contentType.Boundary = match.Groups[1].Success ? match.Groups[1].Value : match.Groups[2].Value; } } } //if boundary exists, these are sub-headers and then these are the headers for a //multipart component and we need to check for a filename in the content-disposition header. if (!string.IsNullOrEmpty(boundary)) { if (string.IsNullOrEmpty(contentType.Name)) { const string P_FILENAME = "filename=(?:\"([^\"]+)\"|([^;]+))(?:;|$)"; //captures value to group 1 or 2 string disposition = headers["content-disposition"]; if (!string.IsNullOrEmpty(disposition)) { match = Regex.Match(disposition, P_FILENAME); if (match.Success) { contentType.Name = match.Groups[1].Success ? match.Groups[1].Value : match.Groups[2].Value; } } } } return(contentType); }