private void download(string url) { try { m_uri = new Uri(url); m_html = ""; m_outstr = ""; m_title = ""; m_good = true; if (url.EndsWith(".rar") || url.EndsWith(".dat") || url.EndsWith(".msi")) { m_good = false; return; } HttpWebRequest rqst = (HttpWebRequest)WebRequest.Create(m_uri); rqst.Timeout = 30000; //rqst.AllowAutoRedirect = true; //rqst.MaximumAutomaticRedirections = 3; //rqst.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.0.3705;)"; //rqst.KeepAlive = true; //rqst.Method = "GET"; //rqst.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; //rqst.Headers.Add("Accept-Language: en-us,en;q=0.5"); //rqst.Headers.Add("Accept-Encoding: gzip,deflate"); //rqst.Headers.Add("Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7"); //rqst.Referer = m_uri.Host; lock (DownloadHelper.webcookies) { if (DownloadHelper.webcookies.ContainsKey(m_uri.Host)) rqst.CookieContainer = DownloadHelper.webcookies[m_uri.Host]; else { CookieContainer cc = new CookieContainer(); DownloadHelper.webcookies[m_uri.Host] = cc; rqst.CookieContainer = cc; } } HttpWebResponse rsps = (HttpWebResponse)rqst.GetResponse(); Stream sm = rsps.GetResponseStream(); if (!rsps.ContentType.ToLower().StartsWith("text/") || rsps.ContentLength > 1 << 22) { rsps.Close(); m_good = false; return; } Encoding cding = System.Text.Encoding.Default; string contenttype = rsps.ContentType.ToLower(); int ix = contenttype.IndexOf("charset="); if (ix != -1) { try { cding = System.Text.Encoding.GetEncoding(rsps.ContentType.Substring(ix + "charset".Length + 1)); } catch { cding = Encoding.Default; } //m_html = HttpUtility.HtmlDecode(new StreamReader(sm, cding).ReadToEnd()); m_html = new StreamReader(sm, cding).ReadToEnd(); } else { //m_html = HttpUtility.HtmlDecode(new StreamReader(sm, cding).ReadToEnd()); m_html = new StreamReader(sm, cding).ReadToEnd(); Regex regex = new Regex("charset=(?<cding>[^=]+)?\"", RegexOptions.IgnoreCase); string strcding = regex.Match(m_html).Groups["cding"].Value; try { cding = Encoding.GetEncoding(strcding); } catch { cding = Encoding.Default; } byte[] bytes = Encoding.Default.GetBytes(m_html.ToCharArray()); m_html = cding.GetString(bytes); if (m_html.Split('?').Length > 100) { m_html = Encoding.Default.GetString(bytes); } } m_pagesize = m_html.Length; m_uri = rsps.ResponseUri; rsps.Close(); } catch (Exception ex) { } }
private void Init(string _url) { try { m_uri = new Uri(_url); m_links = new List<Link>(); m_html = ""; m_outstr = ""; m_title = ""; m_good = true; if (_url.EndsWith(".rar") || _url.EndsWith(".dat") || _url.EndsWith(".msi")) { m_good = false; return; } HttpWebRequest rqst = (HttpWebRequest)WebRequest.Create(m_uri); rqst.AllowAutoRedirect = true; rqst.MaximumAutomaticRedirections = 3; rqst.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)"; rqst.KeepAlive = true; rqst.Timeout = 30000; lock (WebPage.webcookies) { if (WebPage.webcookies.ContainsKey(m_uri.Host)) rqst.CookieContainer = WebPage.webcookies[m_uri.Host]; else { CookieContainer cc = new CookieContainer(); WebPage.webcookies[m_uri.Host] = cc; rqst.CookieContainer = cc; } } HttpWebResponse rsps = (HttpWebResponse)rqst.GetResponse(); Stream sm = rsps.GetResponseStream(); if (!rsps.ContentType.ToLower().StartsWith("text/") || rsps.ContentLength > 1 << 22) { rsps.Close(); m_good = false; return; } Encoding cding = System.Text.Encoding.Default; string contenttype = rsps.ContentType.ToLower(); int ix = contenttype.IndexOf("charset="); if (ix != -1) { try { cding = System.Text.Encoding.GetEncoding(rsps.ContentType.Substring(ix + "charset".Length + 1)); } catch { cding = Encoding.Default; } m_html = new StreamReader(sm, cding).ReadToEnd(); } else { m_html = new StreamReader(sm, cding).ReadToEnd(); Regex regex = new Regex("charset=(?<cding>[^=]+)?\"", RegexOptions.IgnoreCase); string strcding = regex.Match(m_html).Groups["cding"].Value; try { cding = Encoding.GetEncoding(strcding); } catch { cding = Encoding.Default; } byte[] bytes = Encoding.Default.GetBytes(m_html.ToCharArray()); m_html = cding.GetString(bytes); if (m_html.Split('?').Length > 100) { m_html = Encoding.Default.GetString(bytes); } } m_pagesize = m_html.Length; m_uri = rsps.ResponseUri; rsps.Close(); } catch (Exception ex) { Console.WriteLine(ex.Message + m_uri.ToString()); m_good = false; } }