예제 #1
0
        public override string GetHtml(string url)
        {
            try
            {
                var response = GetResponse(url);
                if (response.Content.Headers.ContentType != null && response.Content.Headers.ContentType.MediaType != "text/xml" && response.Content.Headers.ContentType.MediaType != "text/html")
                {
                    return("[Not a html page.]");
                }

                var html = response.Content.ReadAsStringAsync().Result;

                if (response.Content.Headers.ContentType != null && response.Content.Headers.ContentType.CharSet == null && CharSetRegex.IsMatch(html))
                {
                    string charset = CharSetRegex.Match(html).Groups[1].Value;
                    response.Content.Headers.ContentType.CharSet = charset.IndexOf("GB", StringComparison.OrdinalIgnoreCase) > -1 ? "GBK" : charset;
                    html = response.Content.ReadAsStringAsync().Result;
                }

                return(HttpUtility.HtmlDecode(html));
            }
            catch (InvalidOperationException)
            {
                return(null);
            }
        }
예제 #2
0
        public override string GetHtml(string url)
        {
            try
            {
                var response = GetResponse(url);
                if (response.Content.Headers.ContentType != null && response.Content.Headers.ContentType.MediaType != "text/xml" && response.Content.Headers.ContentType.MediaType != "text/html")
                {
                    return("[Not a html page.]");
                }

                var html = response.Content.ReadAsStringAsync().Result;

                if (response.Content.Headers.ContentType != null && response.Content.Headers.ContentType.CharSet == null && CharSetRegex.IsMatch(html))
                {
                    string charset = CharSetRegex.Match(html).Groups[1].Value;
                    response.Content.Headers.ContentType.CharSet = charset.IndexOf("GB", StringComparison.OrdinalIgnoreCase) > -1 ? "GBK" : charset;
                    html = response.Content.ReadAsStringAsync().Result;
                }

                if (response.Content.Headers.ContentEncoding.ToString().ToLower().Contains("gzip"))
                {
                    using (GZipStream stream = new GZipStream(response.Content.ReadAsStreamAsync().Result, CompressionMode.Decompress))
                    {
                        using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
                        {
                            html = reader.ReadToEnd();
                        }
                    }
                }

                return(HttpUtility.HtmlDecode(html));
            }
            catch (InvalidOperationException)
            {
                return(null);
            }
        }