/// <summary> /// /// </summary> private void RunIPProviderThread() { lock (_locker) { string _hmaProxListUrl = "http://hidemyass.com/proxy-list/{0}"; int _hmaPgIdx = 0; bool _pgNotEnding = false; while (true) { while (!_pgNotEnding) { if (FreeProxies == null) { FreeProxies = new Queue <FreeProxy>(); } _hmaPgIdx++; string __hmaProxListUrl = string.Format(_hmaProxListUrl, _hmaPgIdx); string _hmaPgDoc = string.Empty; try { _hmaPgDoc = HtmlUtil.GetPageDocument(__hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.UserAgent.Chrome)); _hmaPgDoc = _hmaPgDoc.Replace(System.Environment.NewLine, ""); int _start = _hmaPgDoc.IndexOf("<table id=\"listtable\""); int _end = _hmaPgDoc.IndexOf("<div id=\"pagination\">"); _hmaPgDoc = _hmaPgDoc.Substring(_start, _end - _start); this.ExtractProxies(_hmaPgDoc); } catch (Exception ex) { _pgNotEnding = true; } } Thread.Sleep(1000 * 60 * 10); } } }
/// <summary> /// FreeIPGeneratorException /// </summary> private void RunProxyProviderThread() { lock (_proxyProviderLocker) { var hmaPgIdx = 1; var pgEnding = false; //this.RotationId = Guid.NewGuid(); do { if (_reachedLastPage | _reset) { hmaPgIdx = 1; pgEnding = false; _reset = false; } while (!pgEnding && !_stopExtraction) { if (_resume) { if (hmaPgIdx == 1 && OnBeginningOfProxyListPage != null) { OnBeginningOfProxyListPage(this, new EventArgs()); } var hmaProxListUrl = string.Format(HMAProxListUrl, hmaPgIdx); try { var fPxy = GetFreeProxy(true); var hmaPgDoc = (fPxy != null) ? HtmlUtil.GetPageDocument(new Uri(hmaProxListUrl), fPxy.ToWebProxy(), UserAgents.GetFakeUserAgent(UserAgents.Chrome41022280)) : HtmlUtil.GetPageDocument(hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.Chrome41022280)); hmaPgDoc = hmaPgDoc.Replace(Environment.NewLine, ""); var start = hmaPgDoc.IndexOf("<table id=\"listtable\"", StringComparison.Ordinal); var end = hmaPgDoc.IndexOf("<div id=\"pagination\">", StringComparison.Ordinal); hmaPgDoc = hmaPgDoc.Substring(start, end - start); var results = ExtractProxies(hmaPgDoc); if (_reachedLastPage) { hmaPgIdx = 1; } else { hmaPgIdx++; } if (results == 1 | _reachedLastPage) { hmaPgIdx = hmaPgIdx--; // go back one page in index pgEnding = true; _reachedLastPage = true; if (OnEndOfProxyListPage != null && !_reachedLastPage) { OnEndOfProxyListPage(this, new EventArgs()); } } } catch (Exception ex) { var errMsgPrefx = string.Format("Error@{0}/pgIdx:[{1}]/errmsg:[{2}]", "RunIPProviderThread", hmaPgIdx, ex.Message); var exception = new Exception(errMsgPrefx, ex); InvokeEventFreeIPGeneratorException(new EventHandlers.FreeIPGeneratorExceptionEventArgs(exception)); } } Thread.Sleep(1000 * 10); } Thread.Sleep(10); }while (true); } }
/// <summary> /// FreeIPGeneratorException /// </summary> private void RunIPProviderThread() { lock (_locker) { string _hmaProxListUrl = "http://hidemyass.com/proxy-list/{0}"; int _hmaPgIdx = 1; bool _pgEnding = false; do { if (m_reset) { _hmaPgIdx = 1; _pgEnding = false; m_reset = false; } while (!_pgEnding) { if (m_resume) { if (_hmaPgIdx == 1 && OnBeginningOfProxyListPage != null) { OnBeginningOfProxyListPage(this, new EventArgs()); } string __hmaProxListUrl = string.Format(_hmaProxListUrl, _hmaPgIdx); string _hmaPgDoc = string.Empty; try { _hmaPgDoc = HtmlUtil.GetPageDocument(__hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.UserAgent.Chrome)); _hmaPgDoc = _hmaPgDoc.Replace(System.Environment.NewLine, ""); int _start = _hmaPgDoc.IndexOf("<table id=\"listtable\""); int _end = _hmaPgDoc.IndexOf("<div id=\"pagination\">"); _hmaPgDoc = _hmaPgDoc.Substring(_start, _end - _start); int _results = this.ExtractProxies(_hmaPgDoc); _hmaPgIdx++; if (_results == 1) { _hmaPgIdx = _hmaPgIdx--; // go back one page in index _pgEnding = true; if (OnEndOfProxyListPage != null) { OnEndOfProxyListPage(this, new EventArgs()); } } } catch (Exception ex) { string _errMsgPrefx = string.Format("Error@{0}/pgIdx:[{1}]/errmsg:[{2}]", "RunIPProviderThread", _hmaPgIdx, ex.Message); Exception _ex = new Exception(_errMsgPrefx, ex); this.InvokeEventFreeIPGeneratorException(new EventHandlers.FreeIPGeneratorExceptionEventArgs(_ex)); } } Thread.Sleep(1000 * 10); } }while(true); } }
/// <summary> /// /// </summary> /// <param name="site"></param> /// <param name="webProxy"></param> /// <param name="userAgent"></param> /// <param name="ignoreCookies"></param> /// <param name="useDefaultCookiesParser"></param> /// <param name="redirectedUrl"></param> /// <param name="headers"></param> /// <returns></returns> public static string GetPageDocument(Uri site, ref string redirectedUrl, IWebProxy webProxy = null, FakeUserAgent userAgent = null, bool?ignoreCookies = true, bool?useDefaultCookiesParser = false, params KeyValuePair <string, string>[] headers) { if (userAgent == null) { userAgent = UserAgents.GetFakeUserAgent(UserAgents.Mozilla22); } try { ScrapingBrowser browser = null; if (webProxy != null) { browser = new ScrapingBrowser() { UserAgent = userAgent, Proxy = webProxy, }; } else { Dictionary <string, string> hdrs = null; if (headers != null && headers.Any()) { hdrs = new Dictionary <string, string>(); headers.ToList().ForEach(h => { hdrs.Add(h.Key, h.Value); }); } browser = new ScrapingBrowser { UserAgent = userAgent, //KeepAlive = true, }; } browser.IgnoreCookies = ignoreCookies.GetValueOrDefault(); browser.AllowMetaRedirect = true; browser.Timeout = new TimeSpan(0, 0, 30); //useDefaultCookiesParser.GetValueOrDefault(); var html = browser.DownloadString(site); redirectedUrl = browser.Referer.AbsoluteUri; return(html); } catch (WebException webEx) { //var extraDataOnError = string.Empty; //if (webEx.Response != null) //{ // var stream = webEx.Response.GetResponseStream(); // if (stream != null) // using (var sr = new StreamReader(stream)) // extraDataOnError = sr.ReadToEnd(); //} // Now you can access webEx.Response object that contains more info on the server response if (webEx.Status != WebExceptionStatus.ProtocolError) { throw; } var response = ((HttpWebResponse)webEx.Response); if (response == null) { return(string.Empty); } var error = $"Error occurred: {response.StatusCode}"; if (webEx.InnerException == null) { error += $"; Description : {response.StatusDescription}"; } else { error += $"; Description : {response.StatusDescription}; {webEx.InnerException.Message}"; } throw new WebException(error, webEx.InnerException); } catch (AggregateException aEx) { var sErr = string.Empty; aEx.Handle((x) => { if (x is UnauthorizedAccessException) // This we know how to handle. { //do your code here } else { sErr += x.Message; } return(true); //if you do something like this all exceptions are marked as handled }); throw new Exception(sErr); } catch (Exception ex) { throw new Exception(ex.Message); } }