Beispiel #1
0
        /// <summary>
        ///
        /// </summary>
        private void RunIPProviderThread()
        {
            lock (_locker)
            {
                string _hmaProxListUrl = "http://hidemyass.com/proxy-list/{0}";
                int    _hmaPgIdx       = 0;
                bool   _pgNotEnding    = false;

                while (true)
                {
                    while (!_pgNotEnding)
                    {
                        if (FreeProxies == null)
                        {
                            FreeProxies = new Queue <FreeProxy>();
                        }

                        _hmaPgIdx++;
                        string __hmaProxListUrl = string.Format(_hmaProxListUrl, _hmaPgIdx);
                        string _hmaPgDoc        = string.Empty;

                        try
                        {
                            _hmaPgDoc = HtmlUtil.GetPageDocument(__hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.UserAgent.Chrome));
                            _hmaPgDoc = _hmaPgDoc.Replace(System.Environment.NewLine, "");

                            int _start = _hmaPgDoc.IndexOf("<table id=\"listtable\"");
                            int _end   = _hmaPgDoc.IndexOf("<div id=\"pagination\">");
                            _hmaPgDoc = _hmaPgDoc.Substring(_start, _end - _start);

                            this.ExtractProxies(_hmaPgDoc);
                        }
                        catch (Exception ex)
                        {
                            _pgNotEnding = true;
                        }
                    }
                    Thread.Sleep(1000 * 60 * 10);
                }
            }
        }
Beispiel #2
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="pgErrorMsg"></param>
        /// <returns></returns>
        private string StartGenerateSearchResultPageCallback(ref bool pgIsError)
        {
            string _searchUrl = Models.DirectoryProviderSetting.BuildSearchUrl(base.DirectoryProviderSetting, base.SearchItem.searchItem, base.SearchItem.searchLocation, 1);

            try
            {
                string _htmlDoc    = HtmlUtil.GetPageDocument(_searchUrl);
                string _pgErrorMsg = string.Empty;
                pgIsError = this.ypTool.CheckPageIfError(_htmlDoc, ref _pgErrorMsg);

                if (pgIsError)
                {
                    return(_pgErrorMsg);
                }
                else
                {
                    return(_htmlDoc);
                }
            }
            catch
            {
                return(string.Empty);
            }
        }
Beispiel #3
0
        /// <summary>
        /// FreeIPGeneratorException
        /// </summary>
        private void RunProxyProviderThread()
        {
            lock (_proxyProviderLocker)
            {
                var hmaPgIdx = 1;
                var pgEnding = false;

                //this.RotationId = Guid.NewGuid();

                do
                {
                    if (_reachedLastPage | _reset)
                    {
                        hmaPgIdx = 1;
                        pgEnding = false;
                        _reset   = false;
                    }

                    while (!pgEnding && !_stopExtraction)
                    {
                        if (_resume)
                        {
                            if (hmaPgIdx == 1 && OnBeginningOfProxyListPage != null)
                            {
                                OnBeginningOfProxyListPage(this, new EventArgs());
                            }

                            var hmaProxListUrl = string.Format(HMAProxListUrl, hmaPgIdx);

                            try
                            {
                                var fPxy     = GetFreeProxy(true);
                                var hmaPgDoc = (fPxy != null)
                                    ? HtmlUtil.GetPageDocument(new Uri(hmaProxListUrl), fPxy.ToWebProxy(), UserAgents.GetFakeUserAgent(UserAgents.Chrome41022280))
                                    : HtmlUtil.GetPageDocument(hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.Chrome41022280));

                                hmaPgDoc = hmaPgDoc.Replace(Environment.NewLine, "");

                                var start = hmaPgDoc.IndexOf("<table id=\"listtable\"", StringComparison.Ordinal);
                                var end   = hmaPgDoc.IndexOf("<div id=\"pagination\">", StringComparison.Ordinal);
                                hmaPgDoc = hmaPgDoc.Substring(start, end - start);

                                var results = ExtractProxies(hmaPgDoc);

                                if (_reachedLastPage)
                                {
                                    hmaPgIdx = 1;
                                }
                                else
                                {
                                    hmaPgIdx++;
                                }

                                if (results == 1 | _reachedLastPage)
                                {
                                    hmaPgIdx = hmaPgIdx--; // go back one page in index
                                    pgEnding = true;

                                    _reachedLastPage = true;

                                    if (OnEndOfProxyListPage != null && !_reachedLastPage)
                                    {
                                        OnEndOfProxyListPage(this, new EventArgs());
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                                var errMsgPrefx = string.Format("Error@{0}/pgIdx:[{1}]/errmsg:[{2}]", "RunIPProviderThread", hmaPgIdx, ex.Message);
                                var exception   = new Exception(errMsgPrefx, ex);
                                InvokeEventFreeIPGeneratorException(new EventHandlers.FreeIPGeneratorExceptionEventArgs(exception));
                            }
                        }
                        Thread.Sleep(1000 * 10);
                    }
                    Thread.Sleep(10);
                }while (true);
            }
        }
Beispiel #4
0
        /// <summary>
        /// FreeIPGeneratorException
        /// </summary>
        private void RunIPProviderThread()
        {
            lock (_locker)
            {
                string _hmaProxListUrl = "http://hidemyass.com/proxy-list/{0}";
                int    _hmaPgIdx       = 1;
                bool   _pgEnding       = false;

                do
                {
                    if (m_reset)
                    {
                        _hmaPgIdx = 1;
                        _pgEnding = false;
                        m_reset   = false;
                    }

                    while (!_pgEnding)
                    {
                        if (m_resume)
                        {
                            if (_hmaPgIdx == 1 && OnBeginningOfProxyListPage != null)
                            {
                                OnBeginningOfProxyListPage(this, new EventArgs());
                            }

                            string __hmaProxListUrl = string.Format(_hmaProxListUrl, _hmaPgIdx);
                            string _hmaPgDoc        = string.Empty;

                            try
                            {
                                _hmaPgDoc = HtmlUtil.GetPageDocument(__hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.UserAgent.Chrome));
                                _hmaPgDoc = _hmaPgDoc.Replace(System.Environment.NewLine, "");

                                int _start = _hmaPgDoc.IndexOf("<table id=\"listtable\"");
                                int _end   = _hmaPgDoc.IndexOf("<div id=\"pagination\">");
                                _hmaPgDoc = _hmaPgDoc.Substring(_start, _end - _start);

                                int _results = this.ExtractProxies(_hmaPgDoc);

                                _hmaPgIdx++;

                                if (_results == 1)
                                {
                                    _hmaPgIdx = _hmaPgIdx--; // go back one page in index
                                    _pgEnding = true;

                                    if (OnEndOfProxyListPage != null)
                                    {
                                        OnEndOfProxyListPage(this, new EventArgs());
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                                string    _errMsgPrefx = string.Format("Error@{0}/pgIdx:[{1}]/errmsg:[{2}]", "RunIPProviderThread", _hmaPgIdx, ex.Message);
                                Exception _ex          = new Exception(_errMsgPrefx, ex);
                                this.InvokeEventFreeIPGeneratorException(new EventHandlers.FreeIPGeneratorExceptionEventArgs(_ex));
                            }
                        }
                        Thread.Sleep(1000 * 10);
                    }
                }while(true);
            }
        }
Beispiel #5
0
        /// <summary>
        ///
        /// </summary>
        /// <returns></returns>
        public override Models.Advertisement.Advertisements StartSearchResultProcessCallback()
        {
            Models.Advertisement.Advertisements _ads = null;

            if (base.SearchItem.searchResult.pointers != null && base.SearchItem.searchResult.pointers.Count > 0)
            {
                int _pgNo = 0;

                _ads = new Models.Advertisement.Advertisements();

                try
                {
                    foreach (Models.SearchItem.SearchResult.UrlPointer _p in base.SearchItem.searchResult.pointers)
                    {
                        Thread.Sleep(5000);

                        _pgNo++;
                        string _htmlDoc = HtmlUtil.GetPageDocument(_p.SearchUrl);

                        if (!string.IsNullOrEmpty(_htmlDoc))
                        {
                            string _errMsg = string.Empty;

                            bool _pgInError = this.ypTool.CheckPageIfError(_htmlDoc, ref _errMsg);
                            _p.IsValid = !_pgInError;

                            if (_p.IsValid)
                            {
                                _p.SearchHtml = _htmlDoc;

                                Stopwatch _stopwatch = new Stopwatch();
                                _stopwatch.Reset();
                                _stopwatch.Start();

                                Models.Advertisement.Advertisements __ads = this.ypTool.ExtractAds(_htmlDoc, base.InvokeEventAdExtracted);

                                if (__ads != null && __ads.Count > 0)
                                {
                                    _ads.AddRange(__ads.ToArray());
                                }

                                _stopwatch.Stop();

                                base.InvokeEventUrlPointerProcessed(new Handlers.EventHandlers.UrlPointerProcessedEventArgs(_p, _stopwatch.Elapsed));

                                if (base.SearchItem.pagesToProcess.HasValue && _pgNo == base.SearchItem.pagesToProcess)
                                {
                                    break;
                                }

                                if (__ads != null && __ads.Count == base.SearchItem.searchResult.TotalResults)
                                {
                                    break;
                                }
                            }
                            else
                            {
                                //correct Search result values here...
                                //base.SearchItem.searchResult.ResultsPerPage=base.SearchItem.searchResult.

                                //Strip the rest forward + all invalid objects
                                base.SearchItem.searchResult.pointers.RemoveAll(p => !p.IsValid);
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.DirectoryProviderSetting.ServicedCountry.ToString(), "StartSearchResultProcessCallback"), ex);
                    base.InvokeEventFrameworkException(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex));
                }
            }
            return(_ads);
        }
Beispiel #6
0
            /// <summary>
            ///
            /// </summary>
            /// <param name="adInf"></param>
            /// <returns></returns>
            public Models.Advertisement EscrapeAdInfoExtend(Models.Advertisement adInf)
            {
                if (adInf != null && !string.IsNullOrEmpty(adInf.AdvertiserLink))
                {
                    try
                    {
                        string _htmlDoc = HtmlUtil.GetPageDocument(adInf.AdvertiserLink);

                        HtmlAgilityPack.HtmlDocument htmDocAg = new HtmlAgilityPack.HtmlDocument();
                        htmDocAg.LoadHtml(_htmlDoc);

                        string _googMap = "[NA]";

                        string _keywords = "[NA]";
                        var    _n        = HtmlUtil.GetNode(htmDocAg.DocumentNode, "meta", "name", "keywords");
                        if (_n != null)
                        {
                            _keywords = _n.Attributes["content"].Value;
                        }

                        string _description = "[NA]";
                        HtmlAgilityPack.HtmlNode _metaDesc = HtmlUtil.GetNode(htmDocAg.DocumentNode, "meta", "name", "description");
                        if (_metaDesc != null)
                        {
                            _description = _metaDesc.Attributes["content"].Value;
                        }

                        string _emailAdd = "[NA]";

                        /*
                         * <a id="mainEmailAddressLink" class="emailBusinessLink" rel="nofollow" href="/onlineSolution_emailBusiness.do?listingId=14074960&amp;classification=MAIN&amp;context=businessTypeSearch&amp;referredBy=YOL" title="Contact Turner Freeman Lawyers">
                         *          <img class="emailAddressIcon" src="/ui/standard/bpp/email_icon.png" alt="Main Email Address">
                         *          <span>[email protected]</span>
                         *  </a>
                         */
                        HtmlAgilityPack.HtmlNode _emailAdNode = HtmlUtil.GetNode(htmDocAg.DocumentNode, "a", "id", "mainEmailAddressLink");
                        if (_emailAdNode != null)
                        {
                            _emailAdd = _emailAdNode.Descendants("span").ToArray()[0].InnerText;
                        }

                        string _locations = "[NA]";
                        //string _dateAdded = "[NA]";

                        adInf.GoogleMap    = _googMap;
                        adInf.Keywords     = _keywords;
                        adInf.Description  = _description;
                        adInf.EmailAddress = _emailAdd;
                        adInf.Locations    = _locations;
                        //adInf.DateAdded = _dateAdded;
                    }
                    catch (Exception ex)
                    {
                        if (this.frameworkExceptionInvoke != null)
                        {
                            Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.directoryProviderSetting.ServicedCountry.ToString(), "EscrapeAdInfoExtend"), ex);
                            this.frameworkExceptionInvoke(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex));
                        }
                    }
                }
                return(adInf);
            }
            /// <summary>
            ///
            /// </summary>
            /// <param name="adInf"></param>
            /// <returns></returns>
            public Models.Advertisement EscrapeAdInfoExtend(Models.Advertisement adInf)
            {
                if (adInf != null && !string.IsNullOrEmpty(adInf.AdvertiserLink))
                {
                    try
                    {
                        string _htmlDoc = HtmlUtil.GetPageDocument(adInf.AdvertiserLink);

                        HtmlAgilityPack.HtmlDocument htmDocAg = new HtmlAgilityPack.HtmlDocument();
                        htmDocAg.LoadHtml(_htmlDoc);

                        string _latitude  = "[NA]";
                        string _longitude = "[NA]";
                        HtmlAgilityPack.HtmlNode _mapDatNode = HtmlUtil.GetNode(htmDocAg.DocumentNode, "div", "id", "ypgMapContainer");
                        if (_mapDatNode != null)
                        {   //latLong = new VELatLong(43.8087172232, -79.5469648855); map.CreateAndLoadMap
                            string _mapDat    = _mapDatNode.InnerText;
                            int    _mrkrStart = _mapDat.IndexOf("VELatLong", 0);
                            int    _mrkrEnd   = _mapDat.IndexOf("map.CreateAndLoadMap", 0);
                            _mapDat = _mapDat.Substring(_mrkrStart, _mrkrEnd - _mrkrStart);

                            _mapDat = _mapDat
                                      .Replace("VELatLong(", "")
                                      .Replace("map.CreateAndLoadMap", "")
                                      .Replace(");", "")
                                      .Trim();

                            string[] _coords = _mapDat.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
                            _latitude  = _coords[0];
                            _longitude = _coords[1];
                        }

                        string _googMap = "[NA]";

                        string _keywords = "[NA]";
                        HtmlAgilityPack.HtmlNode _metaKeywords = HtmlUtil.GetNode(htmDocAg.DocumentNode, "meta", "name", "keywords");
                        if (_metaKeywords != null)
                        {
                            _keywords = _metaKeywords.Attributes["content"].Value;
                        }

                        string _description = "[NA]";
                        HtmlAgilityPack.HtmlNode _metaDesc = HtmlUtil.GetNode(htmDocAg.DocumentNode, "meta", "name", "description");
                        if (_metaDesc != null)
                        {
                            _description = _metaDesc.Attributes["content"].Value;
                        }

                        string _rating = "[NA]";

                        string _emailAdd = "[NA]";
                        HtmlAgilityPack.HtmlNode _emailAdNode = HtmlUtil.GetNode(htmDocAg.DocumentNode, "div", "class", "busCardLeftLinks");
                        if (_emailAdNode != null)
                        {
                            try
                            {
                                _emailAdd = _emailAdNode.Descendants("a").ToArray()[0].Attributes["content"].Value;
                            }
                            catch { }
                        }

                        string _locations = "[NA]";
                        //string _dateAdded = "[NA]";

                        adInf.Latitude     = _latitude;
                        adInf.Longtitude   = _longitude;
                        adInf.GoogleMap    = _googMap;
                        adInf.Keywords     = _keywords;
                        adInf.Description  = _description;
                        adInf.Rating       = _rating;
                        adInf.EmailAddress = _emailAdd;
                        adInf.Locations    = _locations;
                        //adInf.DateAdded = _dateAdded;
                    }
                    catch (Exception ex)
                    {
                        if (this.frameworkExceptionInvoke != null)
                        {
                            Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.directoryProviderSetting.ServicedCountry.ToString(), "EscrapeAdInfoExtend"), ex);
                            this.frameworkExceptionInvoke(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex));
                        }
                    }
                }
                return(adInf);
            }