Example #1
0
        private void download(string url)
        {
            try
            {
                m_uri = new Uri(url);
                m_html = "";
                m_outstr = "";
                m_title = "";
                m_good = true;

                if (url.EndsWith(".rar") || url.EndsWith(".dat") || url.EndsWith(".msi"))
                {
                    m_good = false;
                    return;
                }

                HttpWebRequest rqst = (HttpWebRequest)WebRequest.Create(m_uri);
                rqst.Timeout = 30000;

                //rqst.AllowAutoRedirect = true;
                //rqst.MaximumAutomaticRedirections = 3;
                //rqst.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.0.3705;)";
                //rqst.KeepAlive = true;
                //rqst.Method = "GET";
                //rqst.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
                //rqst.Headers.Add("Accept-Language: en-us,en;q=0.5");
                //rqst.Headers.Add("Accept-Encoding: gzip,deflate");
                //rqst.Headers.Add("Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7");
                //rqst.Referer = m_uri.Host;

                lock (DownloadHelper.webcookies)
                {
                    if (DownloadHelper.webcookies.ContainsKey(m_uri.Host))
                        rqst.CookieContainer = DownloadHelper.webcookies[m_uri.Host];
                    else
                    {
                        CookieContainer cc = new CookieContainer();
                        DownloadHelper.webcookies[m_uri.Host] = cc;

                        rqst.CookieContainer = cc;
                    }
                }

                HttpWebResponse rsps = (HttpWebResponse)rqst.GetResponse();

                Stream sm = rsps.GetResponseStream();
                if (!rsps.ContentType.ToLower().StartsWith("text/") || rsps.ContentLength > 1 << 22)
                {
                    rsps.Close();
                    m_good = false;
                    return;
                }

                Encoding cding = System.Text.Encoding.Default;
                string contenttype = rsps.ContentType.ToLower();
                int ix = contenttype.IndexOf("charset=");

                if (ix != -1)
                {
                    try
                    {
                        cding = System.Text.Encoding.GetEncoding(rsps.ContentType.Substring(ix + "charset".Length + 1));
                    }
                    catch
                    {
                        cding = Encoding.Default;
                    }

                    //m_html = HttpUtility.HtmlDecode(new StreamReader(sm, cding).ReadToEnd());
                    m_html = new StreamReader(sm, cding).ReadToEnd();

                }
                else
                {
                    //m_html = HttpUtility.HtmlDecode(new StreamReader(sm, cding).ReadToEnd());

                    m_html = new StreamReader(sm, cding).ReadToEnd();
                    Regex regex = new Regex("charset=(?<cding>[^=]+)?\"", RegexOptions.IgnoreCase);
                    string strcding = regex.Match(m_html).Groups["cding"].Value;

                    try
                    {
                        cding = Encoding.GetEncoding(strcding);
                    }
                    catch
                    {
                        cding = Encoding.Default;
                    }

                    byte[] bytes = Encoding.Default.GetBytes(m_html.ToCharArray());
                    m_html = cding.GetString(bytes);

                    if (m_html.Split('?').Length > 100)
                    {
                        m_html = Encoding.Default.GetString(bytes);
                    }
                }

                m_pagesize = m_html.Length;
                m_uri = rsps.ResponseUri;
                rsps.Close();
            }
            catch (Exception ex)
            {

            }
        }
Example #2
0
        private void Init(string _url)
        {
            try
            {
                m_uri = new Uri(_url);
                m_links = new List<Link>();
                m_html = "";
                m_outstr = "";
                m_title = "";
                m_good = true;
                if (_url.EndsWith(".rar") || _url.EndsWith(".dat") || _url.EndsWith(".msi"))
                {
                    m_good = false;
                    return;
                }
                HttpWebRequest rqst = (HttpWebRequest)WebRequest.Create(m_uri);
                rqst.AllowAutoRedirect = true;
                rqst.MaximumAutomaticRedirections = 3;
                rqst.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
                rqst.KeepAlive = true;
                rqst.Timeout = 30000;
                lock (WebPage.webcookies)
                {
                    if (WebPage.webcookies.ContainsKey(m_uri.Host))
                        rqst.CookieContainer = WebPage.webcookies[m_uri.Host];
                    else
                    {
                        CookieContainer cc = new CookieContainer();
                        WebPage.webcookies[m_uri.Host] = cc;
                        rqst.CookieContainer = cc;
                    }
                }

                HttpWebResponse rsps = (HttpWebResponse)rqst.GetResponse();

                Stream sm = rsps.GetResponseStream();
                if (!rsps.ContentType.ToLower().StartsWith("text/") || rsps.ContentLength > 1 << 22)
                {
                    rsps.Close();
                    m_good = false;
                    return;
                }
                Encoding cding = System.Text.Encoding.Default;
                string contenttype = rsps.ContentType.ToLower();
                int ix = contenttype.IndexOf("charset=");
                if (ix != -1)
                {

                    try
                    {
                        cding = System.Text.Encoding.GetEncoding(rsps.ContentType.Substring(ix + "charset".Length + 1));
                    }
                    catch
                    {
                        cding = Encoding.Default;
                    }
                    m_html = new StreamReader(sm, cding).ReadToEnd();
                }
                else
                {
                    m_html = new StreamReader(sm, cding).ReadToEnd();
                    Regex regex = new Regex("charset=(?<cding>[^=]+)?\"", RegexOptions.IgnoreCase);
                    string strcding = regex.Match(m_html).Groups["cding"].Value;
                    try
                    {
                        cding = Encoding.GetEncoding(strcding);
                    }
                    catch
                    {
                        cding = Encoding.Default;
                    }
                    byte[] bytes = Encoding.Default.GetBytes(m_html.ToCharArray());
                    m_html = cding.GetString(bytes);
                    if (m_html.Split('?').Length > 100)
                    {
                        m_html = Encoding.Default.GetString(bytes);
                    }
                }

                m_pagesize = m_html.Length;
                m_uri = rsps.ResponseUri;
                rsps.Close();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message + m_uri.ToString());
                m_good = false;

            }
        }