Exemplo n.º 1
0
        /// <summary>
        /// Decodes "encoded-word"'s from the specified text. For more information see RFC 2047.
        /// </summary>
        /// <param name="text">Text to decode.</param>
        /// <returns>Returns decoded text.</returns>
        public static string DecodeWords(string text)
        {
            if(text == null){
                return null;
            }

            /* RFC 2047 2. Syntax of encoded-words.
                An 'encoded-word' is defined by the following ABNF grammar.  The
                notation of RFC 822 is used, with the exception that white space
                characters MUST NOT appear between components of an 'encoded-word'.

                encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
                charset      = token    ; see section 3
                encoding     = token    ; see section 4
                token        = 1*<Any CHAR except SPACE, CTLs, and especials>
                especials    = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "
                               <"> / "/" / "[" / "]" / "?" / "." / "="
                encoded-text = 1*<Any printable ASCII character other than "?" or SPACE>
                                       ; (but see "Use of encoded-words in message headers", section 5)

                Both 'encoding' and 'charset' names are case-independent.  Thus the
                charset name "ISO-8859-1" is equivalent to "iso-8859-1", and the
                encoding named "Q" may be spelled either "Q" or "q".

                An 'encoded-word' may not be more than 75 characters long, including
                'charset', 'encoding', 'encoded-text', and delimiters.  If it is
                desirable to encode more text than will fit in an 'encoded-word' of
                75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may
                be used.
              
                IMPORTANT: 'encoded-word's are designed to be recognized as 'atom's
                by an RFC 822 parser.  As a consequence, unencoded white space
                characters (such as SPACE and HTAB) are FORBIDDEN within an
                'encoded-word'.  For example, the character sequence

                =?iso-8859-1?q?this is some text?=

                would be parsed as four 'atom's, rather than as a single 'atom' (by
                an RFC 822 parser) or 'encoded-word' (by a parser which understands
                'encoded-words').  The correct way to encode the string "this is some
                text" is to encode the SPACE characters as well, e.g.

                =?iso-8859-1?q?this=20is=20some=20text?=
            */

            StringReader  r      = new StringReader(text);
            StringBuilder retVal = new StringBuilder();

            // We need to loop all words, if encoded word, decode it, othwerwise just append to return value.
            bool lastIsEncodedWord = false;
            while(r.Available > 0){
                string whiteSpaces = r.ReadToFirstChar();

                // Probably is encoded-word, we try to parse it.
                if(r.StartsWith("=?") && r.SourceString.IndexOf("?=") > -1){
                    StringBuilder encodedWord = new StringBuilder();
                    string        decodedWord = null;

                    try{
                        // NOTE: We can't read encoded word and then split !!!, we need to read each part.
                    
                        // Remove =?
                        encodedWord.Append(r.ReadSpecifiedLength(2));

                        // Read charset
                        string charset = r.QuotedReadToDelimiter('?');
                        encodedWord.Append(charset + "?");

                        // Read encoding
                        string encoding = r.QuotedReadToDelimiter('?');
                        encodedWord.Append(encoding + "?");

                        // Read text
                        string encodedText = r.QuotedReadToDelimiter('?');
                        encodedWord.Append(encodedText + "?");

                        // We must have remaining '=' here
                        if(r.StartsWith("=")){
                            encodedWord.Append(r.ReadSpecifiedLength(1));

                            Encoding c = Encoding.GetEncoding(charset);
                            if(encoding.ToLower() == "q"){
                                decodedWord = Core.QDecode(c,encodedText);
                            }
                            else if(encoding.ToLower() == "b"){
                                decodedWord = c.GetString(Core.Base64Decode(Encoding.Default.GetBytes(encodedText)));
                            }
                        }
                    }
                    catch{
                        // Not encoded-word or contains unknwon charset/encoding, so leave
                        // encoded-word as is.
                    }

                    /* RFC 2047 6.2.
                        When displaying a particular header field that contains multiple
                        'encoded-word's, any 'linear-white-space' that separates a pair of
                        adjacent 'encoded-word's is ignored.  (This is to allow the use of
                        multiple 'encoded-word's to represent long strings of unencoded text,
                        without having to separate 'encoded-word's where spaces occur in the
                        unencoded text.)
                    */
                    if(!lastIsEncodedWord){
                        retVal.Append(whiteSpaces);
                    }

                    // Decoding failed for that encoded-word, leave encoded-word as is.
                    if(decodedWord == null){
                        retVal.Append(encodedWord.ToString());
                    }
                    // We deocded encoded-word successfully.
                    else{
                        retVal.Append(decodedWord);
                    }

                    lastIsEncodedWord = true;
                }
                // Normal word.
                else if(r.StartsWithWord()){
                    retVal.Append(whiteSpaces + r.ReadWord(false));
                    lastIsEncodedWord = false;
                }
                // We have some separator or parenthesize.
                else{
                   retVal.Append(whiteSpaces + r.ReadSpecifiedLength(1));
                }
            }

            return retVal.ToString();
        }