public static string GetPageDocument(string site, FakeUserAgent userAgent, bool ignoreCookies = true, bool useDefaultCookiesParser = false) { if (!site.Contains("http://")) { site = $"http://{site}"; } return(GetPageDocument(new Uri(site), null, userAgent, ignoreCookies, useDefaultCookiesParser)); }
//System.Net.CookieException /// <summary> /// /// </summary> /// <param name="site"></param> /// <param name="webProxy"></param> /// <param name="userAgent"></param> /// <param name="headers"></param> /// <returns></returns> public static string GetPageDocument(Uri site, IWebProxy webProxy = null, FakeUserAgent userAgent = null, params KeyValuePair <string, string>[] headers) { return(GetPageDocument(site, webProxy, userAgent, true, false, headers)); }
public static string GetPageDocument(Uri site, IWebProxy webProxy = null, FakeUserAgent userAgent = null, bool?ignoreCookies = true, bool?useDefaultCookiesParser = false, params KeyValuePair <string, string>[] headers) { var redirectedUrl = string.Empty; return(GetPageDocument(site, ref redirectedUrl, webProxy, userAgent, ignoreCookies, useDefaultCookiesParser, headers)); }
/// <summary> /// /// </summary> /// <param name="name"></param> /// <param name="value"></param> private UserAgents(string name, FakeUserAgent value) { _name = name; _value = value; }
/// <summary> /// /// </summary> /// <param name="site"></param> /// <param name="webProxy"></param> /// <param name="userAgent"></param> /// <param name="ignoreCookies"></param> /// <param name="useDefaultCookiesParser"></param> /// <param name="redirectedUrl"></param> /// <param name="headers"></param> /// <returns></returns> public static string GetPageDocument(Uri site, ref string redirectedUrl, IWebProxy webProxy = null, FakeUserAgent userAgent = null, bool?ignoreCookies = true, bool?useDefaultCookiesParser = false, params KeyValuePair <string, string>[] headers) { if (userAgent == null) { userAgent = UserAgents.GetFakeUserAgent(UserAgents.Mozilla22); } try { ScrapingBrowser browser = null; if (webProxy != null) { browser = new ScrapingBrowser() { UserAgent = userAgent, Proxy = webProxy, }; } else { Dictionary <string, string> hdrs = null; if (headers != null && headers.Any()) { hdrs = new Dictionary <string, string>(); headers.ToList().ForEach(h => { hdrs.Add(h.Key, h.Value); }); } browser = new ScrapingBrowser { UserAgent = userAgent, //KeepAlive = true, }; } browser.IgnoreCookies = ignoreCookies.GetValueOrDefault(); browser.AllowMetaRedirect = true; browser.Timeout = new TimeSpan(0, 0, 30); //useDefaultCookiesParser.GetValueOrDefault(); var html = browser.DownloadString(site); redirectedUrl = browser.Referer.AbsoluteUri; return(html); } catch (WebException webEx) { //var extraDataOnError = string.Empty; //if (webEx.Response != null) //{ // var stream = webEx.Response.GetResponseStream(); // if (stream != null) // using (var sr = new StreamReader(stream)) // extraDataOnError = sr.ReadToEnd(); //} // Now you can access webEx.Response object that contains more info on the server response if (webEx.Status != WebExceptionStatus.ProtocolError) { throw; } var response = ((HttpWebResponse)webEx.Response); if (response == null) { return(string.Empty); } var error = $"Error occurred: {response.StatusCode}"; if (webEx.InnerException == null) { error += $"; Description : {response.StatusDescription}"; } else { error += $"; Description : {response.StatusDescription}; {webEx.InnerException.Message}"; } throw new WebException(error, webEx.InnerException); } catch (AggregateException aEx) { var sErr = string.Empty; aEx.Handle((x) => { if (x is UnauthorizedAccessException) // This we know how to handle. { //do your code here } else { sErr += x.Message; } return(true); //if you do something like this all exceptions are marked as handled }); throw new Exception(sErr); } catch (Exception ex) { throw new Exception(ex.Message); } }