private static string GetBodyText(HttpWebResponse resp)
        {
            if (resp.ContentType != null && resp.ContentType.Length > 0)
            {
                IDictionary contentTypeData = MimeHelper.ParseContentType(resp.ContentType, true);
                string      mainType        = (string)contentTypeData[""];
                switch (mainType)
                {
                case "text/plain":
                {
                    return(DecodeBody(resp));
                }

                case "text/html":
                {
                    return(DecodeBody(resp));
                }
                }
            }
            return("");
        }
Exemplo n.º 2
0
        /// <summary>
        /// Gets the character set associated with the WebResponse.
        /// </summary>
        /// <param name="response">The WebResponse to inspect for a character set.</param>
        /// <param name="characterSet">When this method returns, contains the character set associated with the
        /// WebResponse if the character set is explicitly specified; otherwise, null. This parameter is passed
        /// uninitialized.</param>
        /// <returns>true if the WebResponse explicitly specifies a character set; otherwise, false.</returns>
        private bool TryGetCharacterSet(HttpWebResponse response, out string characterSet)
        {
            // A very applicable comment from MSDN on why not to use the HttpWebResponse.CharacterSet property:
            // http://msdn.microsoft.com/en-us/library/system.net.httpwebresponse.characterset(v=VS.80).aspx
            //
            // "As stated in a previous comment, many web servers are poorly configured and don't include the charset
            // in their content type header, e. g. they just return "text/html". In theory, user agents should treat
            // it as ISO-8859-1, as recommended by W3C. This is what the CharacterSet property actually does: It
            // always returns ISO-8859-1 if the charset it not specified, although often the content has a different
            // encoding (which of course HttpWebResponse cannot know).
            // In real life however, in case of a missing charset definition in the HTTP header user agents look into
            // the markup, and usully a meta tag can be found that contains the correct encoding, like "utf-8". To
            // implement this pragmatic approach, it would IMHO be much more convenient if the CharacterSet would
            // default to an empty string, then you know, that the encoding is not specified and you need a workaround
            // to determine the correct encoding to use.
            // The only workaround that I could find for me was to parse ContentType myself to extract the character
            // set, and ignore ContentEncoding and CharacterSet because they are useless."

            string contentType = response.ContentType;

            if (!String.IsNullOrEmpty(contentType))
            {
                IDictionary values = MimeHelper.ParseContentType(contentType, true);

                const string charset = "charset";
                if (values.Contains(charset))
                {
                    characterSet = values[charset] as string;
                    if (!String.IsNullOrEmpty(characterSet))
                    {
                        Debug.Assert(characterSet.Equals(response.CharacterSet, StringComparison.OrdinalIgnoreCase),
                                     "CharacterSet was parsed incorrectly!");
                        return(true);
                    }
                }
            }

            characterSet = null;
            return(false);
        }
        private static string GetBodyText(HttpWebResponse resp)
        {
            if (resp.ContentType != null && resp.ContentType.Length > 0)
            {
                IDictionary contentTypeData = MimeHelper.ParseContentType(resp.ContentType, true);
                string      mainType        = (string)contentTypeData[""];
                switch (mainType)
                {
                case "text/plain":
                {
                    return(DecodeBody(resp));
                }

                case "text/html":
                {
                    return(StringHelper.CompressExcessWhitespace(
                               HTMLDocumentHelper.HTMLToPlainText(
                                   LightWeightHTMLThinner2.Thin(
                                       DecodeBody(resp), true))));
                }
                }
            }
            return("");
        }