示例#1
0
        private static void DecodeAndAppendLine(StringBuilder builder, string buff, string transferEncoding, string charset)
        {
            if (null == builder)
            {
                return;
            }

            if (0 == string.Compare(transferEncoding, "quoted-printable", true))
            {
                builder.Append(MimeUtility.DecodeQuotedPrintableLine(buff, MimeUtility.TryGetEncoding(charset)));
            }
            else if (0 == string.Compare(transferEncoding, "base64", true))
            {
                builder.Append(MimeUtility.DecodeBase64String(buff, MimeUtility.TryGetEncoding(charset)));
            }
            else
            {
                builder.Append(buff + "\r\n");
            }
        }
示例#2
0
        /// <summary>
        /// Decodes RFC 2047 message header extensions into unicode strings consistent with the System.Net.MailMessage object model.
        /// </summary>
        /// <param name="headers"></param>
        private static void InPlaceDecodeExtendedHeaders(Dictionary <string, string[]> headers)
        {
            /*********************************************************
             * http://tools.ietf.org/search/rfc2047
             *
             * RFC 2047 "Message Header Extensions"
             *
             *  2. Syntax of encoded-words
             *
             *     An 'encoded-word' is defined by the following ABNF grammar.  The
             *     notation of RFC 822 is used, with the exception that white space
             *     characters MUST NOT appear between components of an 'encoded-word'.
             *
             *     encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
             *
             *     ...
             *
             * 3. Character sets
             *
             *     The 'charset' portion of an 'encoded-word' specifies the character
             *     set associated with the unencoded text.  A 'charset' can be any of
             *     the character set names allowed in an MIME "charset" parameter of a
             *     "text/plain" body part, or any character set name registered with
             *     IANA for use with the MIME text/plain content-type.
             *
             *     ...
             *
             *     When there is a possibility of using more than one character set to
             *     represent the text in an 'encoded-word', and in the absence of
             *     private agreements between sender and recipients of a message, it is
             *     recommended that members of the ISO-8859-* series be used in
             *     preference to other character sets.
             *
             * 4. Encodings
             *
             *     Initially, the legal values for "encoding" are "Q" and "B".  These
             *     encodings are described below.  The "Q" encoding is recommended for
             *     use when most of the characters to be encoded are in the ASCII
             *     character set; otherwise, the "B" encoding should be used.
             *     Nevertheless, a mail reader which claims to recognize 'encoded-word's
             *     MUST be able to accept either encoding for any character set which it
             *     supports.
             *
             *     ...
             *
             * 4.1. The "B" encoding
             *
             *     The "B" encoding is identical to the "BASE64" encoding defined by RFC
             *     2045.
             *
             *  4.2. The "Q" encoding
             *
             *     The "Q" encoding is similar to the "Quoted-Printable" content-
             *     transfer-encoding defined in RFC 2045.  It is designed to allow text
             *     containing mostly ASCII characters to be decipherable on an ASCII
             *     terminal without decoding.
             * /*********************************************************/

            //convert the raw header into decoded strings.

            Debug.WriteLine("InPlaceDecodeExtendedHeaders()");

            const string p_split = @"(=\?[^\?]+\?[^\?]+\?[^\?]+\?=\s?)";
            //the charset field goes into capture group 1
            //the transfer encoding field goes into capture group 2
            //the raw text value goes into capture group 3.
            const string p_capture = @"^=\?([^\?]+)\?([^\?]+)\?([^\?]+)\?=\s?$";

            foreach (string[] hlist in headers.Values)
            {
                for (int i = 0; i < hlist.Length; i++)
                {
                    if (Regex.IsMatch(hlist[i], p_split))
                    {
                        StringBuilder b = new StringBuilder();
                        foreach (string part in Regex.Split(hlist[i], p_split))
                        {
                            Match match = Regex.Match(part, p_capture);
                            if (match.Success)  //part is RFC 2047 encoded
                            {
                                string   charset  = match.Groups[1].Value;
                                string   transfer = match.Groups[2].Value;
                                string   rawtext  = match.Groups[3].Value;
                                Encoding encoding = MimeUtility.TryGetEncoding(charset);
                                if (0 == string.Compare(transfer, "b", true))
                                {
                                    b.Append(MimeUtility.DecodeBase64String(rawtext, encoding));
                                }
                                else
                                {
                                    MimeUtility.DecodeAndAppendQuotedPrintableSegment(b, rawtext, encoding);
                                }
                            }
                            else if (!string.IsNullOrEmpty(part))
                            {
                                b.Append(part);   //part is not RFC 2047 encoded
                            }
                        }
                        hlist[i] = b.ToString();
                    }
                }
            }
        }
示例#3
0
        private static ContentType GetContentType(NameValueCollection headers, string boundary)
        {
            /****************************************************************
             * http://tools.ietf.org/html/rfc1341#page-5
             * http://tools.ietf.org/html/rfc1521#page-9
             *
             * /****************************************************************/

            const string CONTENT_TYPE = "content-type";
            string       ct           = headers[CONTENT_TYPE];

            ContentType contentType;
            Match       match;

            if (string.IsNullOrEmpty(ct))
            {
                contentType = new ContentType();
            }
            else
            {
                //decode content-type
                const string P_CONTENT_TYPE = @"^([^; ]+)";
                match = Regex.Match(ct, P_CONTENT_TYPE, RegexOptions.IgnoreCase);
                if (match.Success)
                {
                    try
                    {
                        contentType = new ContentType(match.Groups[1].Value);
                    }
                    catch (Exception ex)
                    {
                        if (ex is ArgumentException || ex is FormatException)
                        {
                            contentType = new ContentType();
                        }
                        throw ex;
                    }
                }
                else
                {
                    contentType = new ContentType();
                }
            }

            if (!string.IsNullOrEmpty(ct))
            {
                ct = ct.Trim(); //trailing whitespace can mess up the matching

                //decode name
                const string P_NAME = "name=(?:\"([^\"]+)\"|([^;]+))(?:;|$)"; //captures value to group 1 or 2
                match = Regex.Match(ct, P_NAME, RegexOptions.IgnoreCase);
                if (match.Success)
                {
                    contentType.Name = string.IsNullOrEmpty(match.Groups[1].Value) ? match.Groups[2].Value : match.Groups[1].Value;
                }

                //decode charset
                const string P_CHARSET = "charset=(?:\"([^\"]+)\"|([^;]+))(?:;|$)"; //captures value to group 1 or 2
                match = Regex.Match(ct, P_CHARSET, RegexOptions.IgnoreCase);
                if (match.Success)
                {
                    //some charset encodings defined in incoming are not supported by System.Text.
                    //We need to test it here to make sure that we won't crash later.

                    string charset = match.Groups[1].Success ? match.Groups[1].Value : match.Groups[2].Value;
                    //"cp1252" is an alias for "windows-1252" but is not a codepage identifier supported by Encoding.GetEncoding(string)
                    if (0 == string.Compare("cp1252", charset, true))
                    {
                        charset = "windows-1252";
                    }
                    //MimUtility.TryGetEncoding(string) will return a valid Encoding object. If we use the BodyName from an Encoding object
                    //we can be sure it we won't crash converting back to an Encoding object.
                    contentType.CharSet = MimeUtility.TryGetEncoding(charset).BodyName;
                }


                contentType.Boundary = boundary;
                if (string.IsNullOrEmpty(contentType.Boundary))
                {
                    //decode boundary
                    const string P_BOUNDARY = "boundary=(?:\"([^\"]+)\"|([^;]+))(?:;|$)"; //captures value to group 1 or 2
                    match = Regex.Match(ct, P_BOUNDARY, RegexOptions.IgnoreCase);
                    if (match.Success)
                    {
                        contentType.Boundary = match.Groups[1].Success ? match.Groups[1].Value : match.Groups[2].Value;
                    }
                }
            }

            //if boundary exists, these are sub-headers and then these are the headers for a
            //multipart component and we need to check for a filename in the content-disposition header.
            if (!string.IsNullOrEmpty(boundary))
            {
                if (string.IsNullOrEmpty(contentType.Name))
                {
                    const string P_FILENAME = "filename=(?:\"([^\"]+)\"|([^;]+))(?:;|$)"; //captures value to group 1 or 2

                    string disposition = headers["content-disposition"];
                    if (!string.IsNullOrEmpty(disposition))
                    {
                        match = Regex.Match(disposition, P_FILENAME);
                        if (match.Success)
                        {
                            contentType.Name = match.Groups[1].Success ? match.Groups[1].Value : match.Groups[2].Value;
                        }
                    }
                }
            }

            return(contentType);
        }