Ejemplo n.º 1
0
        /// <summary>
        ///
        /// </summary>
        private void RunIPProviderThread()
        {
            lock (_locker)
            {
                string _hmaProxListUrl = "http://hidemyass.com/proxy-list/{0}";
                int    _hmaPgIdx       = 0;
                bool   _pgNotEnding    = false;

                while (true)
                {
                    while (!_pgNotEnding)
                    {
                        if (FreeProxies == null)
                        {
                            FreeProxies = new Queue <FreeProxy>();
                        }

                        _hmaPgIdx++;
                        string __hmaProxListUrl = string.Format(_hmaProxListUrl, _hmaPgIdx);
                        string _hmaPgDoc        = string.Empty;

                        try
                        {
                            _hmaPgDoc = HtmlUtil.GetPageDocument(__hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.UserAgent.Chrome));
                            _hmaPgDoc = _hmaPgDoc.Replace(System.Environment.NewLine, "");

                            int _start = _hmaPgDoc.IndexOf("<table id=\"listtable\"");
                            int _end   = _hmaPgDoc.IndexOf("<div id=\"pagination\">");
                            _hmaPgDoc = _hmaPgDoc.Substring(_start, _end - _start);

                            this.ExtractProxies(_hmaPgDoc);
                        }
                        catch (Exception ex)
                        {
                            _pgNotEnding = true;
                        }
                    }
                    Thread.Sleep(1000 * 60 * 10);
                }
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// FreeIPGeneratorException
        /// </summary>
        private void RunProxyProviderThread()
        {
            lock (_proxyProviderLocker)
            {
                var hmaPgIdx = 1;
                var pgEnding = false;

                //this.RotationId = Guid.NewGuid();

                do
                {
                    if (_reachedLastPage | _reset)
                    {
                        hmaPgIdx = 1;
                        pgEnding = false;
                        _reset   = false;
                    }

                    while (!pgEnding && !_stopExtraction)
                    {
                        if (_resume)
                        {
                            if (hmaPgIdx == 1 && OnBeginningOfProxyListPage != null)
                            {
                                OnBeginningOfProxyListPage(this, new EventArgs());
                            }

                            var hmaProxListUrl = string.Format(HMAProxListUrl, hmaPgIdx);

                            try
                            {
                                var fPxy     = GetFreeProxy(true);
                                var hmaPgDoc = (fPxy != null)
                                    ? HtmlUtil.GetPageDocument(new Uri(hmaProxListUrl), fPxy.ToWebProxy(), UserAgents.GetFakeUserAgent(UserAgents.Chrome41022280))
                                    : HtmlUtil.GetPageDocument(hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.Chrome41022280));

                                hmaPgDoc = hmaPgDoc.Replace(Environment.NewLine, "");

                                var start = hmaPgDoc.IndexOf("<table id=\"listtable\"", StringComparison.Ordinal);
                                var end   = hmaPgDoc.IndexOf("<div id=\"pagination\">", StringComparison.Ordinal);
                                hmaPgDoc = hmaPgDoc.Substring(start, end - start);

                                var results = ExtractProxies(hmaPgDoc);

                                if (_reachedLastPage)
                                {
                                    hmaPgIdx = 1;
                                }
                                else
                                {
                                    hmaPgIdx++;
                                }

                                if (results == 1 | _reachedLastPage)
                                {
                                    hmaPgIdx = hmaPgIdx--; // go back one page in index
                                    pgEnding = true;

                                    _reachedLastPage = true;

                                    if (OnEndOfProxyListPage != null && !_reachedLastPage)
                                    {
                                        OnEndOfProxyListPage(this, new EventArgs());
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                                var errMsgPrefx = string.Format("Error@{0}/pgIdx:[{1}]/errmsg:[{2}]", "RunIPProviderThread", hmaPgIdx, ex.Message);
                                var exception   = new Exception(errMsgPrefx, ex);
                                InvokeEventFreeIPGeneratorException(new EventHandlers.FreeIPGeneratorExceptionEventArgs(exception));
                            }
                        }
                        Thread.Sleep(1000 * 10);
                    }
                    Thread.Sleep(10);
                }while (true);
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// FreeIPGeneratorException
        /// </summary>
        private void RunIPProviderThread()
        {
            lock (_locker)
            {
                string _hmaProxListUrl = "http://hidemyass.com/proxy-list/{0}";
                int    _hmaPgIdx       = 1;
                bool   _pgEnding       = false;

                do
                {
                    if (m_reset)
                    {
                        _hmaPgIdx = 1;
                        _pgEnding = false;
                        m_reset   = false;
                    }

                    while (!_pgEnding)
                    {
                        if (m_resume)
                        {
                            if (_hmaPgIdx == 1 && OnBeginningOfProxyListPage != null)
                            {
                                OnBeginningOfProxyListPage(this, new EventArgs());
                            }

                            string __hmaProxListUrl = string.Format(_hmaProxListUrl, _hmaPgIdx);
                            string _hmaPgDoc        = string.Empty;

                            try
                            {
                                _hmaPgDoc = HtmlUtil.GetPageDocument(__hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.UserAgent.Chrome));
                                _hmaPgDoc = _hmaPgDoc.Replace(System.Environment.NewLine, "");

                                int _start = _hmaPgDoc.IndexOf("<table id=\"listtable\"");
                                int _end   = _hmaPgDoc.IndexOf("<div id=\"pagination\">");
                                _hmaPgDoc = _hmaPgDoc.Substring(_start, _end - _start);

                                int _results = this.ExtractProxies(_hmaPgDoc);

                                _hmaPgIdx++;

                                if (_results == 1)
                                {
                                    _hmaPgIdx = _hmaPgIdx--; // go back one page in index
                                    _pgEnding = true;

                                    if (OnEndOfProxyListPage != null)
                                    {
                                        OnEndOfProxyListPage(this, new EventArgs());
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                                string    _errMsgPrefx = string.Format("Error@{0}/pgIdx:[{1}]/errmsg:[{2}]", "RunIPProviderThread", _hmaPgIdx, ex.Message);
                                Exception _ex          = new Exception(_errMsgPrefx, ex);
                                this.InvokeEventFreeIPGeneratorException(new EventHandlers.FreeIPGeneratorExceptionEventArgs(_ex));
                            }
                        }
                        Thread.Sleep(1000 * 10);
                    }
                }while(true);
            }
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="site"></param>
        /// <param name="webProxy"></param>
        /// <param name="userAgent"></param>
        /// <param name="ignoreCookies"></param>
        /// <param name="useDefaultCookiesParser"></param>
        /// <param name="redirectedUrl"></param>
        /// <param name="headers"></param>
        /// <returns></returns>
        public static string GetPageDocument(Uri site,
                                             ref string redirectedUrl,
                                             IWebProxy webProxy           = null,
                                             FakeUserAgent userAgent      = null,
                                             bool?ignoreCookies           = true,
                                             bool?useDefaultCookiesParser = false,
                                             params KeyValuePair <string, string>[] headers)
        {
            if (userAgent == null)
            {
                userAgent = UserAgents.GetFakeUserAgent(UserAgents.Mozilla22);
            }

            try
            {
                ScrapingBrowser browser = null;
                if (webProxy != null)
                {
                    browser = new ScrapingBrowser()
                    {
                        UserAgent = userAgent,
                        Proxy     = webProxy,
                    };
                }
                else
                {
                    Dictionary <string, string> hdrs = null;

                    if (headers != null && headers.Any())
                    {
                        hdrs = new Dictionary <string, string>();
                        headers.ToList().ForEach(h => { hdrs.Add(h.Key, h.Value); });
                    }

                    browser = new ScrapingBrowser
                    {
                        UserAgent = userAgent,
                        //KeepAlive = true,
                    };
                }

                browser.IgnoreCookies     = ignoreCookies.GetValueOrDefault();
                browser.AllowMetaRedirect = true;
                browser.Timeout           = new TimeSpan(0, 0, 30);
                //useDefaultCookiesParser.GetValueOrDefault();

                var html = browser.DownloadString(site);

                redirectedUrl = browser.Referer.AbsoluteUri;
                return(html);
            }
            catch (WebException webEx)
            {
                //var extraDataOnError = string.Empty;
                //if (webEx.Response != null)
                //{
                //    var stream = webEx.Response.GetResponseStream();
                //    if (stream != null)
                //        using (var sr = new StreamReader(stream))
                //            extraDataOnError = sr.ReadToEnd();
                //}

                // Now you can access webEx.Response object that contains more info on the server response
                if (webEx.Status != WebExceptionStatus.ProtocolError)
                {
                    throw;
                }

                var response = ((HttpWebResponse)webEx.Response);

                if (response == null)
                {
                    return(string.Empty);
                }
                var error = $"Error occurred: {response.StatusCode}";

                if (webEx.InnerException == null)
                {
                    error += $"; Description : {response.StatusDescription}";
                }
                else
                {
                    error += $"; Description : {response.StatusDescription}; {webEx.InnerException.Message}";
                }

                throw new WebException(error, webEx.InnerException);
            }
            catch (AggregateException aEx)
            {
                var sErr = string.Empty;
                aEx.Handle((x) =>
                {
                    if (x is UnauthorizedAccessException) // This we know how to handle.
                    {
                        //do your code here
                    }
                    else
                    {
                        sErr += x.Message;
                    }

                    return(true); //if you do something like this all exceptions are marked as handled
                });
                throw new Exception(sErr);
            }
            catch (Exception ex)
            {
                throw new Exception(ex.Message);
            }
        }