/// <summary> /// Decodes "encoded-word"'s from the specified text. For more information see RFC 2047. /// </summary> /// <param name="text">Text to decode.</param> /// <returns>Returns decoded text.</returns> public static string DecodeWords(string text) { if(text == null){ return null; } /* RFC 2047 2. Syntax of encoded-words. An 'encoded-word' is defined by the following ABNF grammar. The notation of RFC 822 is used, with the exception that white space characters MUST NOT appear between components of an 'encoded-word'. encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" charset = token ; see section 3 encoding = token ; see section 4 token = 1*<Any CHAR except SPACE, CTLs, and especials> especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / " <"> / "/" / "[" / "]" / "?" / "." / "=" encoded-text = 1*<Any printable ASCII character other than "?" or SPACE> ; (but see "Use of encoded-words in message headers", section 5) Both 'encoding' and 'charset' names are case-independent. Thus the charset name "ISO-8859-1" is equivalent to "iso-8859-1", and the encoding named "Q" may be spelled either "Q" or "q". An 'encoded-word' may not be more than 75 characters long, including 'charset', 'encoding', 'encoded-text', and delimiters. If it is desirable to encode more text than will fit in an 'encoded-word' of 75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may be used. IMPORTANT: 'encoded-word's are designed to be recognized as 'atom's by an RFC 822 parser. As a consequence, unencoded white space characters (such as SPACE and HTAB) are FORBIDDEN within an 'encoded-word'. For example, the character sequence =?iso-8859-1?q?this is some text?= would be parsed as four 'atom's, rather than as a single 'atom' (by an RFC 822 parser) or 'encoded-word' (by a parser which understands 'encoded-words'). The correct way to encode the string "this is some text" is to encode the SPACE characters as well, e.g. =?iso-8859-1?q?this=20is=20some=20text?= */ StringReader r = new StringReader(text); StringBuilder retVal = new StringBuilder(); // We need to loop all words, if encoded word, decode it, othwerwise just append to return value. bool lastIsEncodedWord = false; while(r.Available > 0){ string whiteSpaces = r.ReadToFirstChar(); // Probably is encoded-word, we try to parse it. if(r.StartsWith("=?") && r.SourceString.IndexOf("?=") > -1){ StringBuilder encodedWord = new StringBuilder(); string decodedWord = null; try{ // NOTE: We can't read encoded word and then split !!!, we need to read each part. // Remove =? encodedWord.Append(r.ReadSpecifiedLength(2)); // Read charset string charset = r.QuotedReadToDelimiter('?'); encodedWord.Append(charset + "?"); // Read encoding string encoding = r.QuotedReadToDelimiter('?'); encodedWord.Append(encoding + "?"); // Read text string encodedText = r.QuotedReadToDelimiter('?'); encodedWord.Append(encodedText + "?"); // We must have remaining '=' here if(r.StartsWith("=")){ encodedWord.Append(r.ReadSpecifiedLength(1)); Encoding c = Encoding.GetEncoding(charset); if(encoding.ToLower() == "q"){ decodedWord = Core.QDecode(c,encodedText); } else if(encoding.ToLower() == "b"){ decodedWord = c.GetString(Core.Base64Decode(Encoding.Default.GetBytes(encodedText))); } } } catch{ // Not encoded-word or contains unknwon charset/encoding, so leave // encoded-word as is. } /* RFC 2047 6.2. When displaying a particular header field that contains multiple 'encoded-word's, any 'linear-white-space' that separates a pair of adjacent 'encoded-word's is ignored. (This is to allow the use of multiple 'encoded-word's to represent long strings of unencoded text, without having to separate 'encoded-word's where spaces occur in the unencoded text.) */ if(!lastIsEncodedWord){ retVal.Append(whiteSpaces); } // Decoding failed for that encoded-word, leave encoded-word as is. if(decodedWord == null){ retVal.Append(encodedWord.ToString()); } // We deocded encoded-word successfully. else{ retVal.Append(decodedWord); } lastIsEncodedWord = true; } // Normal word. else if(r.StartsWithWord()){ retVal.Append(whiteSpaces + r.ReadWord(false)); lastIsEncodedWord = false; } // We have some separator or parenthesize. else{ retVal.Append(whiteSpaces + r.ReadSpecifiedLength(1)); } } return retVal.ToString(); }