public override string GetHtml(string url) { try { var response = GetResponse(url); if (response.Content.Headers.ContentType != null && response.Content.Headers.ContentType.MediaType != "text/xml" && response.Content.Headers.ContentType.MediaType != "text/html") { return("[Not a html page.]"); } var html = response.Content.ReadAsStringAsync().Result; if (response.Content.Headers.ContentType != null && response.Content.Headers.ContentType.CharSet == null && CharSetRegex.IsMatch(html)) { string charset = CharSetRegex.Match(html).Groups[1].Value; response.Content.Headers.ContentType.CharSet = charset.IndexOf("GB", StringComparison.OrdinalIgnoreCase) > -1 ? "GBK" : charset; html = response.Content.ReadAsStringAsync().Result; } return(HttpUtility.HtmlDecode(html)); } catch (InvalidOperationException) { return(null); } }
public override string GetHtml(string url) { try { var response = GetResponse(url); if (response.Content.Headers.ContentType != null && response.Content.Headers.ContentType.MediaType != "text/xml" && response.Content.Headers.ContentType.MediaType != "text/html") { return("[Not a html page.]"); } var html = response.Content.ReadAsStringAsync().Result; if (response.Content.Headers.ContentType != null && response.Content.Headers.ContentType.CharSet == null && CharSetRegex.IsMatch(html)) { string charset = CharSetRegex.Match(html).Groups[1].Value; response.Content.Headers.ContentType.CharSet = charset.IndexOf("GB", StringComparison.OrdinalIgnoreCase) > -1 ? "GBK" : charset; html = response.Content.ReadAsStringAsync().Result; } if (response.Content.Headers.ContentEncoding.ToString().ToLower().Contains("gzip")) { using (GZipStream stream = new GZipStream(response.Content.ReadAsStreamAsync().Result, CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { html = reader.ReadToEnd(); } } } return(HttpUtility.HtmlDecode(html)); } catch (InvalidOperationException) { return(null); } }