/// <summary> /// Reads RFC 2047 (section 5) 'phrase' from source stream. /// </summary> /// <returns>Returns RFC 2047 (section 5) 'phrase' or null if end of stream reached.</returns> public string Phrase() { /* RFC 2047 5. * phrase = 1*( encoded-word / word ) * word = atom / quoted-string */ int peek = Peek(true); if (peek == -1) { return(null); } else if (peek == '"') { return("\"" + QuotedString() + "\""); } else if (peek == '=') { return(EncodedWord()); } else { string word = Atom(); if (word == null) { return(null); } // Try to encode invalid encoded-words if any mixed in text. word = encodedword_regex.Replace(word, delegate(Match m) { string encodedWord = m.Value; try { if (string.Equals(m.Groups["encoding"].Value, "Q", StringComparison.InvariantCultureIgnoreCase)) { return(MIME_Utils.QDecode(Encoding.GetEncoding(m.Groups["charset"].Value), m.Groups["value"].Value)); } else if (string.Equals(m.Groups["encoding"].Value, "B", StringComparison.InvariantCultureIgnoreCase)) { return(Encoding.GetEncoding(m.Groups["charset"].Value).GetString(Net_Utils.FromBase64(Encoding.Default.GetBytes(m.Groups["value"].Value)))); } // Failed to parse encoded-word, leave it as is. RFC 2047 6.3. else { return(encodedWord); } } catch { // Failed to parse encoded-word, leave it as is. RFC 2047 6.3. return(encodedWord); } }); return(word); } }
/// <summary> /// Parses header field from the specified value. /// </summary> /// <param name="value">Header field value. Header field name must be included. For example: 'Content-Type: text/plain'.</param> /// <returns>Returns parsed header field.</returns> /// <exception cref="ArgumentNullException">Is raised when <b>value</b> is null reference.</exception> /// <exception cref="ParseException">Is raised when header field parsing errors.</exception> public static MIME_h_Unstructured Parse(string value) { if (value == null) { throw new ArgumentNullException("value"); } MIME_h_Unstructured retVal = new MIME_h_Unstructured(); string[] name_value = value.Split(new char[] { ':' }, 2); if (name_value[0].Trim() == string.Empty) { throw new ParseException("Invalid header field '" + value + "' syntax."); } retVal.m_Name = name_value[0]; // There may be multiple encoded-words and they can be mixed with atom/quoted-string ... . try{ StringBuilder v = new StringBuilder(); MIME_Reader r = new MIME_Reader(MIME_Utils.UnfoldHeader(name_value.Length == 2 ? name_value[1].TrimStart() : "")); while (true) { string whiteSpaces = r.ToFirstChar(); if (!string.IsNullOrEmpty(whiteSpaces)) { v.Append(whiteSpaces); } string phrase = r.Phrase(); if (phrase == null) { if (r.Available == 0) { retVal.m_Value = v.ToString().TrimStart(); break; } // Some special char(like :,{ ...) just read it. else { v.Append((char)r.Char(false)); } } else { v.Append(phrase); } } } catch { // Parsing failed, leave raw unparsed value. retVal.m_Value = MIME_Utils.UnfoldHeader(name_value.Length == 2 ? name_value[1].TrimStart() : ""); } retVal.m_ParseValue = value; return(retVal); }
/// <summary> /// Decodes non-ascii word with MIME <b>encoded-word</b> method. Defined in RFC 2047 2. /// </summary> /// <param name="word">MIME encoded-word value.</param> /// <returns>Returns decoded word.</returns> /// <remarks>If <b>word</b> is not encoded-word or has invalid syntax, <b>word</b> is leaved as is.</remarks> /// <exception cref="ArgumentNullException">Is raised when <b>word</b> is null reference.</exception> public static string DecodeS(string word) { if (word == null) { throw new ArgumentNullException("word"); } /* RFC 2047 2. * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" * * RFC 2231. * encoded-word := "=?" charset ["*" language] "?" encoded-text "?=" */ try{ string[] parts = word.Split('?'); // Not encoded-word. if (parts.Length != 5) { return(word); } else if (parts[2].ToUpper() == "Q") { return(MIME_Utils.QDecode(Encoding.GetEncoding(parts[1].Split('*')[0]), parts[3])); } else if (parts[2].ToUpper() == "B") { return(Encoding.GetEncoding(parts[1].Split('*')[0]).GetString(Net_Utils.FromBase64(Encoding.Default.GetBytes(parts[3])))); } // Unknown encoding. else { return(word); } } catch { // Failed to parse encoded-word, leave it as is. RFC 2047 6.3. return(word); } }
/// <summary> /// Parses header field from the specified value. /// </summary> /// <param name="value">Header field value. Header field name must be included. For example: 'Content-Type: text/plain'.</param> /// <returns>Returns parsed header field.</returns> /// <exception cref="ArgumentNullException">Is raised when <b>value</b> is null reference.</exception> /// <exception cref="ParseException">Is raised when header field parsing errors.</exception> public static MIME_h_Unstructured Parse(string value) { if (value == null) { throw new ArgumentNullException("value"); } MIME_h_Unstructured retVal = new MIME_h_Unstructured(); string[] name_value = value.Split(new char[] { ':' }, 2); if (name_value[0].Trim() == string.Empty) { throw new ParseException("Invalid header field '" + value + "' syntax."); } retVal.m_Name = name_value[0]; retVal.m_Value = MIME_Encoding_EncodedWord.DecodeS(MIME_Utils.UnfoldHeader(name_value.Length == 2 ? name_value[1].TrimStart() : "")); retVal.m_ParseValue = value; return(retVal); }
/// <summary> /// Reads RFC 2047 'encoded-word' from source stream. /// </summary> /// <returns>Returns RFC 2047 'encoded-word' or null if end of stream reached.</returns> /// <exception cref="InvalidOperationException">Is raised when source stream has no encoded-word at current position.</exception> public string EncodedWord() { /* RFC 2047 2. * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" * * encoded-text = 1*<Any printable ASCII character other than "?" or SPACE> * ; (but see "Use of encoded-words in message * ; headers", section 5) * * An 'encoded-word' may not be more than 75 characters long, including * 'charset', 'encoding', 'encoded-text', and delimiters. If it is * desirable to encode more text than will fit in an 'encoded-word' of * 75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may * be used. */ ToFirstChar(); if (Peek(false) != '=') { throw new InvalidOperationException("No encoded-word available."); } StringBuilder retVal = new StringBuilder(); while (true) { int index = m_Source.IndexOf("?=", m_Offset); // Invalid or not encoded-word. if (index == -1) { retVal.Append(ToEnd()); } else { string encodedWord = m_Source.Substring(m_Offset, index - m_Offset + 2); // Move index over encoded-word. m_Offset += encodedWord.Length; try{ string[] encodedWordParts = encodedWord.Split('?'); if (encodedWordParts[2].ToUpper() == "Q") { retVal.Append(MIME_Utils.QDecode(Encoding.GetEncoding(encodedWordParts[1]), encodedWordParts[3])); } else if (encodedWordParts[2].ToUpper() == "B") { retVal.Append(Encoding.GetEncoding(encodedWordParts[1]).GetString(Net_Utils.FromBase64(Encoding.Default.GetBytes(encodedWordParts[3])))); } // Failed to parse encoded-word, leave it as is. RFC 2047 6.3. else { retVal.Append(encodedWord); } } catch { // Failed to parse encoded-word, leave it as is. RFC 2047 6.3. retVal.Append(encodedWord); } } // We have continuos encoded-word. if (m_Source.Substring(m_Offset).TrimStart().StartsWith("=?")) { ToFirstChar(); } // encoded-word does not continue. else { break; } } return(retVal.ToString()); }
/// <summary> /// Decodes non-ascii text with MIME <b>encoded-word</b> method. Defined in RFC 2047 2. /// </summary> /// <param name="text">Text.</param> /// <returns>Returns decoded text.</returns> /// <exception cref="ArgumentNullException">Is raised when <b>text</b> is null reference.</exception> public static string DecodeTextS(string text) { if (text == null) { throw new ArgumentNullException("word"); } /* RFC 2047 2. * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" * * encoded-text = 1*<Any printable ASCII character other than "?" or SPACE> * ; (but see "Use of encoded-words in message * ; headers", section 5) * * An 'encoded-word' may not be more than 75 characters long, including * 'charset', 'encoding', 'encoded-text', and delimiters. If it is * desirable to encode more text than will fit in an 'encoded-word' of * 75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may * be used. * * RFC 2231 updates. * encoded-word := "=?" charset ["*" language] "?" encoded-text "?=" */ string retVal = text; retVal = encodedword_regex.Replace(retVal, delegate(Match m) { // We have encoded word, try to decode it. // Also if we have continuing encoded word, we need to skip all whitespaces between words. string encodedWord = m.Value; try { if (string.Equals(m.Groups["encoding"].Value, "Q", StringComparison.InvariantCultureIgnoreCase)) { encodedWord = MIME_Utils.QDecode(Encoding.GetEncoding(m.Groups["charset"].Value), m.Groups["value"].Value); } else if (string.Equals(m.Groups["encoding"].Value, "B", StringComparison.InvariantCultureIgnoreCase)) { encodedWord = Encoding.GetEncoding(m.Groups["charset"].Value).GetString(Net_Utils.FromBase64(Encoding.Default.GetBytes(m.Groups["value"].Value))); } // Failed to parse encoded-word, leave it as is. RFC 2047 6.3. // else{ // No continuing encoded-word, append whitespaces to retval. Match mNext = encodedword_regex.Match(retVal, m.Index + m.Length); if (!(mNext.Success && mNext.Index == (m.Index + m.Length))) { encodedWord += m.Groups["whitespaces"].Value; } // We have continuing encoded-word, so skip all whitespaces. //else{ return(encodedWord); } catch { // Failed to parse encoded-word, leave it as is. RFC 2047 6.3. return(encodedWord); } }); return(retVal); }
/// <summary> /// Parses parameters from the specified reader. /// </summary> /// <param name="reader">MIME reader.</param> /// <exception cref="ArgumentNullException">Is raised when <b>reader</b> is null reference.</exception> public void Parse(MIME_Reader reader) { if (reader == null) { throw new ArgumentNullException("reader"); } /* RFC 2231. * Asterisks ("*") are reused to provide the indicator that language and * character set information is present and encoding is being used. A * single quote ("'") is used to delimit the character set and language * information at the beginning of the parameter value. Percent signs * ("%") are used as the encoding flag, which agrees with RFC 2047. * * Character set and language information may be combined with the * parameter continuation mechanism. For example: * * Content-Type: application/x-stuff * title*0*=us-ascii'en'This%20is%20even%20more%20 * title*1*=%2A%2A%2Afun%2A%2A%2A%20 * title*2="isn't it!" * * Note that: * * (1) Language and character set information only appear at * the beginning of a given parameter value. * * (2) Continuations do not provide a facility for using more * than one character set or language in the same * parameter value. * * (3) A value presented using multiple continuations may * contain a mixture of encoded and unencoded segments. * * (4) The first segment of a continuation MUST be encoded if * language and character set information are given. * * (5) If the first segment of a continued parameter value is * encoded the language and character set field delimiters * MUST be present even when the fields are left blank. */ KeyValueCollection <string, _ParameterBuilder> parameters = new KeyValueCollection <string, _ParameterBuilder>(); // Parse all parameter parts. string[] parameterParts = TextUtils.SplitQuotedString(reader.ToEnd(), ';'); foreach (string part in parameterParts) { if (string.IsNullOrEmpty(part)) { continue; } string[] name_value = part.Trim().Split(new char[] { '=' }, 2); string paramName = name_value[0].Trim(); string paramValue = null; if (name_value.Length == 2) { paramValue = TextUtils.UnQuoteString(MIME_Utils.UnfoldHeader(name_value[1].Trim())); } // Valueless parameter. //else{ string[] nameParts = paramName.Split('*'); int index = 0; bool encoded = nameParts.Length == 3; // Get multi value parameter index. if (nameParts.Length >= 2) { try{ index = Convert.ToInt32(nameParts[1]); } catch { } } // Single value parameter and we already have parameter with such name, skip it. if (nameParts.Length < 2 && parameters.ContainsKey(nameParts[0])) { continue; } // Parameter builder doesn't exist for the specified parameter, create it. if (!parameters.ContainsKey(nameParts[0])) { parameters.Add(nameParts[0], new _ParameterBuilder(nameParts[0])); } parameters[nameParts[0]].AddPart(index, encoded, paramValue); } // Build parameters from parts. foreach (_ParameterBuilder b in parameters) { m_pParameters.Add(b.Name, b.GetParamter()); } m_IsModified = false; }
/// <summary> /// Reads RFC 2047 'encoded-word' from source stream. /// </summary> /// <returns>Returns RFC 2047 'encoded-word' or null if end of stream reached.</returns> /// <exception cref="InvalidOperationException">Is raised when source stream has no encoded-word at current position.</exception> public string EncodedWord() { /* RFC 2047 2. * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" * * encoded-text = 1*<Any printable ASCII character other than "?" or SPACE> * ; (but see "Use of encoded-words in message * ; headers", section 5) * * An 'encoded-word' may not be more than 75 characters long, including * 'charset', 'encoding', 'encoded-text', and delimiters. If it is * desirable to encode more text than will fit in an 'encoded-word' of * 75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may * be used. */ ToFirstChar(); if (Peek(false) != '=') { throw new InvalidOperationException("No encoded-word available."); } StringBuilder retVal = new StringBuilder(); while (true) { Match match = encodedword_regex.Match(m_Source, m_Offset); if (match.Success && match.Index == m_Offset) { string encodedWord = m_Source.Substring(m_Offset, match.Length); // Move index over encoded-word. m_Offset += match.Length; try { if (string.Equals(match.Groups["encoding"].Value, "Q", StringComparison.InvariantCultureIgnoreCase)) { retVal.Append(MIME_Utils.QDecode(Encoding.GetEncoding(match.Groups["charset"].Value), match.Groups["value"].Value)); } else if (string.Equals(match.Groups["encoding"].Value, "B", StringComparison.InvariantCultureIgnoreCase)) { retVal.Append(Encoding.GetEncoding(match.Groups["charset"].Value).GetString(Net_Utils.FromBase64(Encoding.Default.GetBytes(match.Groups["value"].Value)))); } // Failed to parse encoded-word, leave it as is. RFC 2047 6.3. else { retVal.Append(encodedWord); } } catch { // Failed to parse encoded-word, leave it as is. RFC 2047 6.3. retVal.Append(encodedWord); } } else { retVal.Append(Atom()); } // We have continuos encoded-word. match = encodedword_regex.Match(m_Source, m_Offset); if (match.Success && match.Index == m_Offset) { ToFirstChar(); } // encoded-word does not continue. else { break; } } return(retVal.ToString()); }