public static string GetPageDocument(string site,
                                      FakeUserAgent userAgent,
                                      bool ignoreCookies           = true,
                                      bool useDefaultCookiesParser = false)
 {
     if (!site.Contains("http://"))
     {
         site = $"http://{site}";
     }
     return(GetPageDocument(new Uri(site), null, userAgent, ignoreCookies, useDefaultCookiesParser));
 }
        //System.Net.CookieException

        /// <summary>
        ///
        /// </summary>
        /// <param name="site"></param>
        /// <param name="webProxy"></param>
        /// <param name="userAgent"></param>
        /// <param name="headers"></param>
        /// <returns></returns>
        public static string GetPageDocument(Uri site,
                                             IWebProxy webProxy      = null,
                                             FakeUserAgent userAgent = null,
                                             params KeyValuePair <string, string>[] headers)
        {
            return(GetPageDocument(site,
                                   webProxy,
                                   userAgent,
                                   true,
                                   false,
                                   headers));
        }
        public static string GetPageDocument(Uri site,
                                             IWebProxy webProxy           = null,
                                             FakeUserAgent userAgent      = null,
                                             bool?ignoreCookies           = true,
                                             bool?useDefaultCookiesParser = false,
                                             params KeyValuePair <string, string>[] headers)
        {
            var redirectedUrl = string.Empty;

            return(GetPageDocument(site, ref redirectedUrl,
                                   webProxy,
                                   userAgent,
                                   ignoreCookies,
                                   useDefaultCookiesParser,
                                   headers));
        }
Exemplo n.º 4
0
 /// <summary>
 ///
 /// </summary>
 /// <param name="name"></param>
 /// <param name="value"></param>
 private UserAgents(string name, FakeUserAgent value)
 {
     _name  = name;
     _value = value;
 }
        /// <summary>
        ///
        /// </summary>
        /// <param name="site"></param>
        /// <param name="webProxy"></param>
        /// <param name="userAgent"></param>
        /// <param name="ignoreCookies"></param>
        /// <param name="useDefaultCookiesParser"></param>
        /// <param name="redirectedUrl"></param>
        /// <param name="headers"></param>
        /// <returns></returns>
        public static string GetPageDocument(Uri site,
                                             ref string redirectedUrl,
                                             IWebProxy webProxy           = null,
                                             FakeUserAgent userAgent      = null,
                                             bool?ignoreCookies           = true,
                                             bool?useDefaultCookiesParser = false,
                                             params KeyValuePair <string, string>[] headers)
        {
            if (userAgent == null)
            {
                userAgent = UserAgents.GetFakeUserAgent(UserAgents.Mozilla22);
            }

            try
            {
                ScrapingBrowser browser = null;
                if (webProxy != null)
                {
                    browser = new ScrapingBrowser()
                    {
                        UserAgent = userAgent,
                        Proxy     = webProxy,
                    };
                }
                else
                {
                    Dictionary <string, string> hdrs = null;

                    if (headers != null && headers.Any())
                    {
                        hdrs = new Dictionary <string, string>();
                        headers.ToList().ForEach(h => { hdrs.Add(h.Key, h.Value); });
                    }

                    browser = new ScrapingBrowser
                    {
                        UserAgent = userAgent,
                        //KeepAlive = true,
                    };
                }

                browser.IgnoreCookies     = ignoreCookies.GetValueOrDefault();
                browser.AllowMetaRedirect = true;
                browser.Timeout           = new TimeSpan(0, 0, 30);
                //useDefaultCookiesParser.GetValueOrDefault();

                var html = browser.DownloadString(site);

                redirectedUrl = browser.Referer.AbsoluteUri;
                return(html);
            }
            catch (WebException webEx)
            {
                //var extraDataOnError = string.Empty;
                //if (webEx.Response != null)
                //{
                //    var stream = webEx.Response.GetResponseStream();
                //    if (stream != null)
                //        using (var sr = new StreamReader(stream))
                //            extraDataOnError = sr.ReadToEnd();
                //}

                // Now you can access webEx.Response object that contains more info on the server response
                if (webEx.Status != WebExceptionStatus.ProtocolError)
                {
                    throw;
                }

                var response = ((HttpWebResponse)webEx.Response);

                if (response == null)
                {
                    return(string.Empty);
                }
                var error = $"Error occurred: {response.StatusCode}";

                if (webEx.InnerException == null)
                {
                    error += $"; Description : {response.StatusDescription}";
                }
                else
                {
                    error += $"; Description : {response.StatusDescription}; {webEx.InnerException.Message}";
                }

                throw new WebException(error, webEx.InnerException);
            }
            catch (AggregateException aEx)
            {
                var sErr = string.Empty;
                aEx.Handle((x) =>
                {
                    if (x is UnauthorizedAccessException) // This we know how to handle.
                    {
                        //do your code here
                    }
                    else
                    {
                        sErr += x.Message;
                    }

                    return(true); //if you do something like this all exceptions are marked as handled
                });
                throw new Exception(sErr);
            }
            catch (Exception ex)
            {
                throw new Exception(ex.Message);
            }
        }