/// <summary> /// 自动检测内容的charset,通过字符集和html中指定的contentType来获取 /// 如果无法获得字符集,则返回null /// </summary> /// <param name="stringcontent"></param> /// <returns></returns> public static Encoding DetectCharset(QuickWebResponse response, byte[] bytes) { string regexstr = "(text/html|text/xml).*charset=(?<charset>\\w+\\-*\\d*)"; Regex regex = new Regex(regexstr, RegexOptions.IgnoreCase); string contentType = response.ContentTypeStr; if (contentType != null) { Match match1 = regex.Match(contentType); if (match1.Success) { string charset = match1.Groups["charset"].Value.ToUpper(); Encoding encoder = System.Text.Encoding.GetEncoding(charset); if (encoder != null) { return encoder; } } } string ascii = System.Text.Encoding.ASCII.GetString(bytes); //<META http-equiv="Content-Type" content="text/html; charset=GB2312"> //Content-Type=text/html; Match match = regex.Match(ascii); if ( match.Success ) { string charset = match.Groups["charset"].Value.ToUpper(); return System.Text.Encoding.GetEncoding(charset); } else { return null; } }
public QuickWebResponse GetResponse(int Timeout) { if (response == null || !response.SocketConnected) { response = new QuickWebResponse(Timeout); response.ConnectAndGetHeader(this); } else { response.ConnectAndGetHeader(this); } return(response); }
/// <summary> /// 自动检测内容的charset,通过字符集和html中指定的contentType来获取 /// 如果无法获得字符集,则返回null /// </summary> /// <param name="stringcontent"></param> /// <returns></returns> public static Encoding DetectCharset(QuickWebResponse response, byte[] bytes) { string regexstr = "(text/html|text/xml).*charset=(?<charset>\\w+\\-*\\d*)"; Regex regex = new Regex(regexstr, RegexOptions.IgnoreCase); string contentType = response.ContentTypeStr; if (contentType != null) { Match match1 = regex.Match(contentType); if (match1.Success) { string charset = match1.Groups["charset"].Value.ToUpper(); Encoding encoder = System.Text.Encoding.GetEncoding(charset); if (encoder != null) { return(encoder); } } } string ascii = System.Text.Encoding.ASCII.GetString(bytes); //<META http-equiv="Content-Type" content="text/html; charset=GB2312"> //Content-Type=text/html; Match match = regex.Match(ascii); if (match.Success) { string charset = match.Groups["charset"].Value.ToUpper(); return(System.Text.Encoding.GetEncoding(charset)); } else { return(null); } }
public QuickWebResponse GetResponse(int Timeout) { if (response == null || !response.SocketConnected) { response = new QuickWebResponse(Timeout); response.ConnectAndGetHeader(this); } else { response.ConnectAndGetHeader(this); } return response; }
private string ReadUrlContent(ref string url, bool checkredirect, bool checkHtmlRedurect,int timeoutsecond) { string content = String.Empty; try { Uri uri = new Uri(url); gRequest = QuickWebRequest.Create(uri, null, gRequest, false); gResponse = gRequest.GetResponse(timeoutsecond); if (checkredirect) { if (gResponse.RedirectUri != null && gResponse.RedirectUri.AbsoluteUri != url) { return ReadUrlContent(gResponse.RedirectUri.AbsoluteUri, false, checkHtmlRedurect); } } byte[] bytes = gResponse.ReadResponse(); if (bytes == null) { return null; } System.Text.Encoding charset = HttpUtils.DetectCharset(gResponse, bytes); if (charset == null) { //默认使用GB2312 charset = System.Text.Encoding.GetEncoding("GB2312"); } // 使用指定的编码 if (!string.IsNullOrEmpty(Encoding)) { charset = System.Text.Encoding.GetEncoding(Encoding); } //完成到UTF8的编码转换 if (charset != System.Text.Encoding.UTF8) { bytes = System.Text.Encoding.Convert(charset, System.Text.Encoding.UTF8, bytes); charset = System.Text.Encoding.UTF8; } content = charset.GetString(bytes); //检查一次Html重定向 if (checkHtmlRedurect) { System.Text.RegularExpressions.Regex redirectRegex = new System.Text.RegularExpressions.Regex("<META\\s+HTTP-EQUIV\\s*=\\s*[\"]*Refresh[\"]*\\s+CONTENT=[\"\\s]*\\d+\\s*[;]\\s*URL=(?<match>.*?)[\\s\"]*>", System.Text.RegularExpressions.RegexOptions.Singleline | System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Match match = redirectRegex.Match(content); if (match.Success) { if (!string.IsNullOrEmpty(match.Groups["match"].Value) && match.Groups["match"].Value != url) { url = match.Groups["match"].Value; return ReadUrlContent(url, checkredirect, false); } } } } catch { //暂时不做处理。 } finally { gResponse.Close(); } return content; }
private string ReadUrlContent(ref string url, bool checkredirect, bool checkHtmlRedurect, int timeoutsecond) { string content = String.Empty; try { Uri uri = new Uri(url); gRequest = QuickWebRequest.Create(uri, null, gRequest, false); gResponse = gRequest.GetResponse(timeoutsecond); if (checkredirect) { if (gResponse.RedirectUri != null && gResponse.RedirectUri.AbsoluteUri != url) { return(ReadUrlContent(gResponse.RedirectUri.AbsoluteUri, false, checkHtmlRedurect)); } } byte[] bytes = gResponse.ReadResponse(); if (bytes == null) { return(null); } System.Text.Encoding charset = HttpUtils.DetectCharset(gResponse, bytes); if (charset == null) { //默认使用GB2312 charset = System.Text.Encoding.GetEncoding("GB2312"); } // 使用指定的编码 if (!string.IsNullOrEmpty(Encoding)) { charset = System.Text.Encoding.GetEncoding(Encoding); } //完成到UTF8的编码转换 if (charset != System.Text.Encoding.UTF8) { bytes = System.Text.Encoding.Convert(charset, System.Text.Encoding.UTF8, bytes); charset = System.Text.Encoding.UTF8; } content = charset.GetString(bytes); //检查一次Html重定向 if (checkHtmlRedurect) { System.Text.RegularExpressions.Regex redirectRegex = new System.Text.RegularExpressions.Regex("<META\\s+HTTP-EQUIV\\s*=\\s*[\"]*Refresh[\"]*\\s+CONTENT=[\"\\s]*\\d+\\s*[;]\\s*URL=(?<match>.*?)[\\s\"]*>", System.Text.RegularExpressions.RegexOptions.Singleline | System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Match match = redirectRegex.Match(content); if (match.Success) { if (!string.IsNullOrEmpty(match.Groups["match"].Value) && match.Groups["match"].Value != url) { url = match.Groups["match"].Value; return(ReadUrlContent(url, checkredirect, false)); } } } } catch { //暂时不做处理。 } finally { gResponse.Close(); } return(content); }