/// <summary> /// Decodes "encoded-word"'s from the specified text. For more information see RFC 2047. /// </summary> /// <param name="text">Text to decode.</param> /// <returns>Returns decoded text.</returns> public static string DecodeWords(string text) { if (text == null) { return(null); } /* RFC 2047 2. Syntax of encoded-words. * An 'encoded-word' is defined by the following ABNF grammar. The * notation of RFC 822 is used, with the exception that white space * characters MUST NOT appear between components of an 'encoded-word'. * * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" * charset = token ; see section 3 * encoding = token ; see section 4 * token = 1*<Any CHAR except SPACE, CTLs, and especials> * especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / " * <"> / "/" / "[" / "]" / "?" / "." / "=" * encoded-text = 1*<Any printable ASCII character other than "?" or SPACE> * ; (but see "Use of encoded-words in message headers", section 5) * * Both 'encoding' and 'charset' names are case-independent. Thus the * charset name "ISO-8859-1" is equivalent to "iso-8859-1", and the * encoding named "Q" may be spelled either "Q" or "q". * * An 'encoded-word' may not be more than 75 characters long, including * 'charset', 'encoding', 'encoded-text', and delimiters. If it is * desirable to encode more text than will fit in an 'encoded-word' of * 75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may * be used. * * IMPORTANT: 'encoded-word's are designed to be recognized as 'atom's * by an RFC 822 parser. As a consequence, unencoded white space * characters (such as SPACE and HTAB) are FORBIDDEN within an * 'encoded-word'. For example, the character sequence * * =?iso-8859-1?q?this is some text?= * * would be parsed as four 'atom's, rather than as a single 'atom' (by * an RFC 822 parser) or 'encoded-word' (by a parser which understands * 'encoded-words'). The correct way to encode the string "this is some * text" is to encode the SPACE characters as well, e.g. * * =?iso-8859-1?q?this=20is=20some=20text?= */ StringReader r = new StringReader(text); StringBuilder retVal = new StringBuilder(); // We need to loop all words, if encoded word, decode it, othwerwise just append to return value. bool lastIsEncodedWord = false; while (r.Available > 0) { string whiteSpaces = r.ReadToFirstChar(); // Probably is encoded-word, we try to parse it. if (r.StartsWith("=?") && r.SourceString.IndexOf("?=") > -1) { StringBuilder encodedWord = new StringBuilder(); string decodedWord = null; try { // NOTE: We can't read encoded word and then split !!!, we need to read each part. // Remove =? encodedWord.Append(r.ReadSpecifiedLength(2)); // Read charset string charset = r.QuotedReadToDelimiter('?'); encodedWord.Append(charset + "?"); // Read encoding string encoding = r.QuotedReadToDelimiter('?'); encodedWord.Append(encoding + "?"); // Read text string encodedText = r.QuotedReadToDelimiter('?'); encodedWord.Append(encodedText + "?"); // We must have remaining '=' here if (r.StartsWith("=")) { encodedWord.Append(r.ReadSpecifiedLength(1)); Encoding c = EncodingTools.GetEncodingByCodepageName(charset); if (c != null) { if (encoding.ToLower() == "q") { decodedWord = Core.QDecode(c, encodedText); } else if (encoding.ToLower() == "b") { decodedWord = c.GetString(Core.Base64Decode(Encoding.Default.GetBytes(encodedText))); } } } } catch { // Not encoded-word or contains unknwon charset/encoding, so leave // encoded-word as is. } /* RFC 2047 6.2. * When displaying a particular header field that contains multiple * 'encoded-word's, any 'linear-white-space' that separates a pair of * adjacent 'encoded-word's is ignored. (This is to allow the use of * multiple 'encoded-word's to represent long strings of unencoded text, * without having to separate 'encoded-word's where spaces occur in the * unencoded text.) */ if (!lastIsEncodedWord) { retVal.Append(whiteSpaces); } // Decoding failed for that encoded-word, leave encoded-word as is. if (decodedWord == null) { retVal.Append(encodedWord.ToString()); } // We deocded encoded-word successfully. else { retVal.Append(decodedWord); } lastIsEncodedWord = true; } // Normal word. else if (r.StartsWithWord()) { retVal.Append(whiteSpaces + r.ReadWord(false)); lastIsEncodedWord = false; } // We have some separator or parenthesize. else { retVal.Append(whiteSpaces + r.ReadSpecifiedLength(1)); } } return(retVal.ToString()); }
/// <summary> /// Decodes "encoded-word"'s from the specified text. For more information see RFC 2047. /// </summary> /// <param name="text">Text to decode.</param> /// <returns>Returns decoded text.</returns> public static string DecodeWords(string text) { if (text == null) { return null; } /* RFC 2047 2. Syntax of encoded-words. An 'encoded-word' is defined by the following ABNF grammar. The notation of RFC 822 is used, with the exception that white space characters MUST NOT appear between components of an 'encoded-word'. encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" charset = token ; see section 3 encoding = token ; see section 4 token = 1*<Any CHAR except SPACE, CTLs, and especials> especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / " <"> / "/" / "[" / "]" / "?" / "." / "=" encoded-text = 1*<Any printable ASCII character other than "?" or SPACE> ; (but see "Use of encoded-words in message headers", section 5) Both 'encoding' and 'charset' names are case-independent. Thus the charset name "ISO-8859-1" is equivalent to "iso-8859-1", and the encoding named "Q" may be spelled either "Q" or "q". An 'encoded-word' may not be more than 75 characters long, including 'charset', 'encoding', 'encoded-text', and delimiters. If it is desirable to encode more text than will fit in an 'encoded-word' of 75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may be used. IMPORTANT: 'encoded-word's are designed to be recognized as 'atom's by an RFC 822 parser. As a consequence, unencoded white space characters (such as SPACE and HTAB) are FORBIDDEN within an 'encoded-word'. For example, the character sequence =?iso-8859-1?q?this is some text?= would be parsed as four 'atom's, rather than as a single 'atom' (by an RFC 822 parser) or 'encoded-word' (by a parser which understands 'encoded-words'). The correct way to encode the string "this is some text" is to encode the SPACE characters as well, e.g. =?iso-8859-1?q?this=20is=20some=20text?= */ StringReader r = new StringReader(text); StringBuilder retVal = new StringBuilder(); // We need to loop all words, if encoded word, decode it, othwerwise just append to return value. bool lastIsEncodedWord = false; while (r.Available > 0) { string whiteSpaces = r.ReadToFirstChar(); // Probably is encoded-word, we try to parse it. if (r.StartsWith("=?") && r.SourceString.IndexOf("?=") > -1) { StringBuilder encodedWord = new StringBuilder(); string decodedWord = null; try { // NOTE: We can't read encoded word and then split !!!, we need to read each part. // Remove =? encodedWord.Append(r.ReadSpecifiedLength(2)); // Read charset string charset = r.QuotedReadToDelimiter('?'); encodedWord.Append(charset + "?"); // Read encoding string encoding = r.QuotedReadToDelimiter('?'); encodedWord.Append(encoding + "?"); // Read text string encodedText = r.QuotedReadToDelimiter('?'); encodedWord.Append(encodedText + "?"); // We must have remaining '=' here if (r.StartsWith("=")) { encodedWord.Append(r.ReadSpecifiedLength(1)); Encoding c = EncodingTools.GetEncodingByCodepageName(charset); if (c != null) { if (encoding.ToLower() == "q") { decodedWord = Core.QDecode(c, encodedText); } else if (encoding.ToLower() == "b") { decodedWord = c.GetString(Core.Base64Decode(Encoding.Default.GetBytes(encodedText))); } } } } catch { // Not encoded-word or contains unknwon charset/encoding, so leave // encoded-word as is. } /* RFC 2047 6.2. When displaying a particular header field that contains multiple 'encoded-word's, any 'linear-white-space' that separates a pair of adjacent 'encoded-word's is ignored. (This is to allow the use of multiple 'encoded-word's to represent long strings of unencoded text, without having to separate 'encoded-word's where spaces occur in the unencoded text.) */ if (!lastIsEncodedWord) { retVal.Append(whiteSpaces); } // Decoding failed for that encoded-word, leave encoded-word as is. if (decodedWord == null) { retVal.Append(encodedWord.ToString()); } // We deocded encoded-word successfully. else { retVal.Append(decodedWord); } lastIsEncodedWord = true; } // Normal word. else if (r.StartsWithWord()) { retVal.Append(whiteSpaces + r.ReadWord(false)); lastIsEncodedWord = false; } // We have some separator or parenthesize. else { retVal.Append(whiteSpaces + r.ReadSpecifiedLength(1)); } } return retVal.ToString(); }
// TODO get rid of this method, only IMAP uses it #region Methods /// <summary> /// Parses rfc 2822 datetime. /// </summary> /// <param name="date">Date string.</param> /// <returns></returns> public static DateTime ParseDate(string date) { /* Rfc 2822 3.3. Date and Time Specification. * date-time = [ day-of-week "," ] date FWS time [CFWS] * date = day month year * time = hour ":" minute [ ":" second ] FWS zone */ /* IMAP date format. * date-time = date FWS time [CFWS] * date = day-month-year * time = hour ":" minute [ ":" second ] FWS zone */ // zone = (( "+" / "-" ) 4DIGIT) //--- Replace timezone constants -------// /* * UT -0000 * GMT -0000 * EDT -0400 * EST -0500 * CDT -0500 * CST -0600 * MDT -0600 * MST -0700 * PDT -0700 * PST -0800 * BST +0100 British Summer Time */ date = date.ToLower(); date = date.Replace("ut", "-0000"); date = date.Replace("gmt", "-0000"); date = date.Replace("edt", "-0400"); date = date.Replace("est", "-0500"); date = date.Replace("cdt", "-0500"); date = date.Replace("cst", "-0600"); date = date.Replace("mdt", "-0600"); date = date.Replace("mst", "-0700"); date = date.Replace("pdt", "-0700"); date = date.Replace("pst", "-0800"); date = date.Replace("bst", "+0100"); //----------------------------------------// //--- Replace month constants ---// date = date.Replace("jan", "01"); date = date.Replace("feb", "02"); date = date.Replace("mar", "03"); date = date.Replace("apr", "04"); date = date.Replace("may", "05"); date = date.Replace("jun", "06"); date = date.Replace("jul", "07"); date = date.Replace("aug", "08"); date = date.Replace("sep", "09"); date = date.Replace("oct", "10"); date = date.Replace("nov", "11"); date = date.Replace("dec", "12"); //-------------------------------// // If date contains optional "day-of-week,", remove it if (date.IndexOf(',') > -1) { date = date.Substring(date.IndexOf(',') + 1); } // Remove () from date. "Mon, 13 Oct 2003 20:50:57 +0300 (EEST)" if (date.IndexOf(" (") > -1) { date = date.Substring(0, date.IndexOf(" (")); } int year = 1900; int month = 1; int day = 1; int hour = -1; int minute = -1; int second = -1; int zoneMinutes = -1; StringReader s = new StringReader(date); //--- Pase date --------------------------------------------------------------------// try { day = Convert.ToInt32(s.ReadWord(true, new[] { '.', '-', ' ' }, true)); } catch { throw new Exception("Invalid date value '" + date + "', invalid day value !"); } try { month = Convert.ToInt32(s.ReadWord(true, new[] { '.', '-', ' ' }, true)); } catch { throw new Exception("Invalid date value '" + date + "', invalid month value !"); } try { year = Convert.ToInt32(s.ReadWord(true, new[] { '.', '-', ' ' }, true)); } catch { throw new Exception("Invalid date value '" + date + "', invalid year value !"); } //----------------------------------------------------------------------------------// //--- Parse time -------------------------------------------------------------------// // Time is optional, so parse it if its included. if (s.Available > 0) { try { hour = Convert.ToInt32(s.ReadWord(true, new[] { ':' }, true)); } catch { throw new Exception("Invalid date value '" + date + "', invalid hour value !"); } try { minute = Convert.ToInt32(s.ReadWord(true, new[] { ':' }, false)); } catch { throw new Exception("Invalid date value '" + date + "', invalid minute value !"); } s.ReadToFirstChar(); if (s.StartsWith(":")) { s.ReadSpecifiedLength(1); try { string secondString = s.ReadWord(true, new[] { ' ' }, true); // Milli seconds specified, remove them. if (secondString.IndexOf('.') > -1) { secondString = secondString.Substring(0, secondString.IndexOf('.')); } second = Convert.ToInt32(secondString); } catch { throw new Exception("Invalid date value '" + date + "', invalid second value !"); } } s.ReadToFirstChar(); if (s.Available > 3) { string timezone = s.SourceString.Replace(":", ""); if (timezone.StartsWith("+") || timezone.StartsWith("-")) { bool utc_add_time = timezone.StartsWith("+"); // Remove +/- sign timezone = timezone.Substring(1); // padd time zone to 4 symbol. For example 200, will be 0200. while (timezone.Length < 4) { timezone = "0" + timezone; } try { // time zone format hours|minutes int h = Convert.ToInt32(timezone.Substring(0, 2)); int m = Convert.ToInt32(timezone.Substring(2)); if (utc_add_time) { zoneMinutes = 0 - ((h * 60) + m); } else { zoneMinutes = (h * 60) + m; } } catch { // Just skip time zone, if can't parse } } } } //---------------------------------------------------------------------------------// // Convert time to UTC if (hour != -1 && minute != -1 && second != -1) { DateTime d = new DateTime(year, month, day, hour, minute, second).AddMinutes(zoneMinutes); return (new DateTime(d.Year, d.Month, d.Day, d.Hour, d.Minute, d.Second, DateTimeKind.Utc). ToLocalTime()); } else { return(new DateTime(year, month, day)); } }
// TODO get rid of this method, only IMAP uses it #region Methods /// <summary> /// Parses rfc 2822 datetime. /// </summary> /// <param name="date">Date string.</param> /// <returns></returns> public static DateTime ParseDate(string date) { /* Rfc 2822 3.3. Date and Time Specification. date-time = [ day-of-week "," ] date FWS time [CFWS] date = day month year time = hour ":" minute [ ":" second ] FWS zone */ /* IMAP date format. date-time = date FWS time [CFWS] date = day-month-year time = hour ":" minute [ ":" second ] FWS zone */ // zone = (( "+" / "-" ) 4DIGIT) //--- Replace timezone constants -------// /* UT -0000 GMT -0000 EDT -0400 EST -0500 CDT -0500 CST -0600 MDT -0600 MST -0700 PDT -0700 PST -0800 BST +0100 British Summer Time */ date = date.ToLower(); date = date.Replace("ut", "-0000"); date = date.Replace("gmt", "-0000"); date = date.Replace("edt", "-0400"); date = date.Replace("est", "-0500"); date = date.Replace("cdt", "-0500"); date = date.Replace("cst", "-0600"); date = date.Replace("mdt", "-0600"); date = date.Replace("mst", "-0700"); date = date.Replace("pdt", "-0700"); date = date.Replace("pst", "-0800"); date = date.Replace("bst", "+0100"); //----------------------------------------// //--- Replace month constants ---// date = date.Replace("jan", "01"); date = date.Replace("feb", "02"); date = date.Replace("mar", "03"); date = date.Replace("apr", "04"); date = date.Replace("may", "05"); date = date.Replace("jun", "06"); date = date.Replace("jul", "07"); date = date.Replace("aug", "08"); date = date.Replace("sep", "09"); date = date.Replace("oct", "10"); date = date.Replace("nov", "11"); date = date.Replace("dec", "12"); //-------------------------------// // If date contains optional "day-of-week,", remove it if (date.IndexOf(',') > -1) { date = date.Substring(date.IndexOf(',') + 1); } // Remove () from date. "Mon, 13 Oct 2003 20:50:57 +0300 (EEST)" if (date.IndexOf(" (") > -1) { date = date.Substring(0, date.IndexOf(" (")); } int year = 1900; int month = 1; int day = 1; int hour = -1; int minute = -1; int second = -1; int zoneMinutes = -1; StringReader s = new StringReader(date); //--- Pase date --------------------------------------------------------------------// try { day = Convert.ToInt32(s.ReadWord(true, new[] {'.', '-', ' '}, true)); } catch { throw new Exception("Invalid date value '" + date + "', invalid day value !"); } try { month = Convert.ToInt32(s.ReadWord(true, new[] {'.', '-', ' '}, true)); } catch { throw new Exception("Invalid date value '" + date + "', invalid month value !"); } try { year = Convert.ToInt32(s.ReadWord(true, new[] {'.', '-', ' '}, true)); } catch { throw new Exception("Invalid date value '" + date + "', invalid year value !"); } //----------------------------------------------------------------------------------// //--- Parse time -------------------------------------------------------------------// // Time is optional, so parse it if its included. if (s.Available > 0) { try { hour = Convert.ToInt32(s.ReadWord(true, new[] {':'}, true)); } catch { throw new Exception("Invalid date value '" + date + "', invalid hour value !"); } try { minute = Convert.ToInt32(s.ReadWord(true, new[] {':'}, false)); } catch { throw new Exception("Invalid date value '" + date + "', invalid minute value !"); } s.ReadToFirstChar(); if (s.StartsWith(":")) { s.ReadSpecifiedLength(1); try { string secondString = s.ReadWord(true, new[] {' '}, true); // Milli seconds specified, remove them. if (secondString.IndexOf('.') > -1) { secondString = secondString.Substring(0, secondString.IndexOf('.')); } second = Convert.ToInt32(secondString); } catch { throw new Exception("Invalid date value '" + date + "', invalid second value !"); } } s.ReadToFirstChar(); if (s.Available > 3) { string timezone = s.SourceString.Replace(":", ""); if (timezone.StartsWith("+") || timezone.StartsWith("-")) { bool utc_add_time = timezone.StartsWith("+"); // Remove +/- sign timezone = timezone.Substring(1); // padd time zone to 4 symbol. For example 200, will be 0200. while (timezone.Length < 4) { timezone = "0" + timezone; } try { // time zone format hours|minutes int h = Convert.ToInt32(timezone.Substring(0, 2)); int m = Convert.ToInt32(timezone.Substring(2)); if (utc_add_time) { zoneMinutes = 0 - ((h*60) + m); } else { zoneMinutes = (h*60) + m; } } catch { // Just skip time zone, if can't parse } } } } //---------------------------------------------------------------------------------// // Convert time to UTC if (hour != -1 && minute != -1 && second != -1) { DateTime d = new DateTime(year, month, day, hour, minute, second).AddMinutes(zoneMinutes); return new DateTime(d.Year, d.Month, d.Day, d.Hour, d.Minute, d.Second, DateTimeKind.Utc). ToLocalTime(); } else { return new DateTime(year, month, day); } }