/// <summary> /// /// </summary> private void RunIPProviderThread() { lock (_locker) { string _hmaProxListUrl = "http://hidemyass.com/proxy-list/{0}"; int _hmaPgIdx = 0; bool _pgNotEnding = false; while (true) { while (!_pgNotEnding) { if (FreeProxies == null) { FreeProxies = new Queue <FreeProxy>(); } _hmaPgIdx++; string __hmaProxListUrl = string.Format(_hmaProxListUrl, _hmaPgIdx); string _hmaPgDoc = string.Empty; try { _hmaPgDoc = HtmlUtil.GetPageDocument(__hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.UserAgent.Chrome)); _hmaPgDoc = _hmaPgDoc.Replace(System.Environment.NewLine, ""); int _start = _hmaPgDoc.IndexOf("<table id=\"listtable\""); int _end = _hmaPgDoc.IndexOf("<div id=\"pagination\">"); _hmaPgDoc = _hmaPgDoc.Substring(_start, _end - _start); this.ExtractProxies(_hmaPgDoc); } catch (Exception ex) { _pgNotEnding = true; } } Thread.Sleep(1000 * 60 * 10); } } }
/// <summary> /// /// </summary> /// <param name="pgErrorMsg"></param> /// <returns></returns> private string StartGenerateSearchResultPageCallback(ref bool pgIsError) { string _searchUrl = Models.DirectoryProviderSetting.BuildSearchUrl(base.DirectoryProviderSetting, base.SearchItem.searchItem, base.SearchItem.searchLocation, 1); try { string _htmlDoc = HtmlUtil.GetPageDocument(_searchUrl); string _pgErrorMsg = string.Empty; pgIsError = this.ypTool.CheckPageIfError(_htmlDoc, ref _pgErrorMsg); if (pgIsError) { return(_pgErrorMsg); } else { return(_htmlDoc); } } catch { return(string.Empty); } }
/// <summary> /// FreeIPGeneratorException /// </summary> private void RunProxyProviderThread() { lock (_proxyProviderLocker) { var hmaPgIdx = 1; var pgEnding = false; //this.RotationId = Guid.NewGuid(); do { if (_reachedLastPage | _reset) { hmaPgIdx = 1; pgEnding = false; _reset = false; } while (!pgEnding && !_stopExtraction) { if (_resume) { if (hmaPgIdx == 1 && OnBeginningOfProxyListPage != null) { OnBeginningOfProxyListPage(this, new EventArgs()); } var hmaProxListUrl = string.Format(HMAProxListUrl, hmaPgIdx); try { var fPxy = GetFreeProxy(true); var hmaPgDoc = (fPxy != null) ? HtmlUtil.GetPageDocument(new Uri(hmaProxListUrl), fPxy.ToWebProxy(), UserAgents.GetFakeUserAgent(UserAgents.Chrome41022280)) : HtmlUtil.GetPageDocument(hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.Chrome41022280)); hmaPgDoc = hmaPgDoc.Replace(Environment.NewLine, ""); var start = hmaPgDoc.IndexOf("<table id=\"listtable\"", StringComparison.Ordinal); var end = hmaPgDoc.IndexOf("<div id=\"pagination\">", StringComparison.Ordinal); hmaPgDoc = hmaPgDoc.Substring(start, end - start); var results = ExtractProxies(hmaPgDoc); if (_reachedLastPage) { hmaPgIdx = 1; } else { hmaPgIdx++; } if (results == 1 | _reachedLastPage) { hmaPgIdx = hmaPgIdx--; // go back one page in index pgEnding = true; _reachedLastPage = true; if (OnEndOfProxyListPage != null && !_reachedLastPage) { OnEndOfProxyListPage(this, new EventArgs()); } } } catch (Exception ex) { var errMsgPrefx = string.Format("Error@{0}/pgIdx:[{1}]/errmsg:[{2}]", "RunIPProviderThread", hmaPgIdx, ex.Message); var exception = new Exception(errMsgPrefx, ex); InvokeEventFreeIPGeneratorException(new EventHandlers.FreeIPGeneratorExceptionEventArgs(exception)); } } Thread.Sleep(1000 * 10); } Thread.Sleep(10); }while (true); } }
/// <summary> /// FreeIPGeneratorException /// </summary> private void RunIPProviderThread() { lock (_locker) { string _hmaProxListUrl = "http://hidemyass.com/proxy-list/{0}"; int _hmaPgIdx = 1; bool _pgEnding = false; do { if (m_reset) { _hmaPgIdx = 1; _pgEnding = false; m_reset = false; } while (!_pgEnding) { if (m_resume) { if (_hmaPgIdx == 1 && OnBeginningOfProxyListPage != null) { OnBeginningOfProxyListPage(this, new EventArgs()); } string __hmaProxListUrl = string.Format(_hmaProxListUrl, _hmaPgIdx); string _hmaPgDoc = string.Empty; try { _hmaPgDoc = HtmlUtil.GetPageDocument(__hmaProxListUrl, UserAgents.GetFakeUserAgent(UserAgents.UserAgent.Chrome)); _hmaPgDoc = _hmaPgDoc.Replace(System.Environment.NewLine, ""); int _start = _hmaPgDoc.IndexOf("<table id=\"listtable\""); int _end = _hmaPgDoc.IndexOf("<div id=\"pagination\">"); _hmaPgDoc = _hmaPgDoc.Substring(_start, _end - _start); int _results = this.ExtractProxies(_hmaPgDoc); _hmaPgIdx++; if (_results == 1) { _hmaPgIdx = _hmaPgIdx--; // go back one page in index _pgEnding = true; if (OnEndOfProxyListPage != null) { OnEndOfProxyListPage(this, new EventArgs()); } } } catch (Exception ex) { string _errMsgPrefx = string.Format("Error@{0}/pgIdx:[{1}]/errmsg:[{2}]", "RunIPProviderThread", _hmaPgIdx, ex.Message); Exception _ex = new Exception(_errMsgPrefx, ex); this.InvokeEventFreeIPGeneratorException(new EventHandlers.FreeIPGeneratorExceptionEventArgs(_ex)); } } Thread.Sleep(1000 * 10); } }while(true); } }
/// <summary> /// /// </summary> /// <returns></returns> public override Models.Advertisement.Advertisements StartSearchResultProcessCallback() { Models.Advertisement.Advertisements _ads = null; if (base.SearchItem.searchResult.pointers != null && base.SearchItem.searchResult.pointers.Count > 0) { int _pgNo = 0; _ads = new Models.Advertisement.Advertisements(); try { foreach (Models.SearchItem.SearchResult.UrlPointer _p in base.SearchItem.searchResult.pointers) { Thread.Sleep(5000); _pgNo++; string _htmlDoc = HtmlUtil.GetPageDocument(_p.SearchUrl); if (!string.IsNullOrEmpty(_htmlDoc)) { string _errMsg = string.Empty; bool _pgInError = this.ypTool.CheckPageIfError(_htmlDoc, ref _errMsg); _p.IsValid = !_pgInError; if (_p.IsValid) { _p.SearchHtml = _htmlDoc; Stopwatch _stopwatch = new Stopwatch(); _stopwatch.Reset(); _stopwatch.Start(); Models.Advertisement.Advertisements __ads = this.ypTool.ExtractAds(_htmlDoc, base.InvokeEventAdExtracted); if (__ads != null && __ads.Count > 0) { _ads.AddRange(__ads.ToArray()); } _stopwatch.Stop(); base.InvokeEventUrlPointerProcessed(new Handlers.EventHandlers.UrlPointerProcessedEventArgs(_p, _stopwatch.Elapsed)); if (base.SearchItem.pagesToProcess.HasValue && _pgNo == base.SearchItem.pagesToProcess) { break; } if (__ads != null && __ads.Count == base.SearchItem.searchResult.TotalResults) { break; } } else { //correct Search result values here... //base.SearchItem.searchResult.ResultsPerPage=base.SearchItem.searchResult. //Strip the rest forward + all invalid objects base.SearchItem.searchResult.pointers.RemoveAll(p => !p.IsValid); } } } } catch (Exception ex) { Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.DirectoryProviderSetting.ServicedCountry.ToString(), "StartSearchResultProcessCallback"), ex); base.InvokeEventFrameworkException(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex)); } } return(_ads); }
/// <summary> /// /// </summary> /// <param name="adInf"></param> /// <returns></returns> public Models.Advertisement EscrapeAdInfoExtend(Models.Advertisement adInf) { if (adInf != null && !string.IsNullOrEmpty(adInf.AdvertiserLink)) { try { string _htmlDoc = HtmlUtil.GetPageDocument(adInf.AdvertiserLink); HtmlAgilityPack.HtmlDocument htmDocAg = new HtmlAgilityPack.HtmlDocument(); htmDocAg.LoadHtml(_htmlDoc); string _googMap = "[NA]"; string _keywords = "[NA]"; var _n = HtmlUtil.GetNode(htmDocAg.DocumentNode, "meta", "name", "keywords"); if (_n != null) { _keywords = _n.Attributes["content"].Value; } string _description = "[NA]"; HtmlAgilityPack.HtmlNode _metaDesc = HtmlUtil.GetNode(htmDocAg.DocumentNode, "meta", "name", "description"); if (_metaDesc != null) { _description = _metaDesc.Attributes["content"].Value; } string _emailAdd = "[NA]"; /* * <a id="mainEmailAddressLink" class="emailBusinessLink" rel="nofollow" href="/onlineSolution_emailBusiness.do?listingId=14074960&classification=MAIN&context=businessTypeSearch&referredBy=YOL" title="Contact Turner Freeman Lawyers"> * <img class="emailAddressIcon" src="/ui/standard/bpp/email_icon.png" alt="Main Email Address"> * <span>[email protected]</span> * </a> */ HtmlAgilityPack.HtmlNode _emailAdNode = HtmlUtil.GetNode(htmDocAg.DocumentNode, "a", "id", "mainEmailAddressLink"); if (_emailAdNode != null) { _emailAdd = _emailAdNode.Descendants("span").ToArray()[0].InnerText; } string _locations = "[NA]"; //string _dateAdded = "[NA]"; adInf.GoogleMap = _googMap; adInf.Keywords = _keywords; adInf.Description = _description; adInf.EmailAddress = _emailAdd; adInf.Locations = _locations; //adInf.DateAdded = _dateAdded; } catch (Exception ex) { if (this.frameworkExceptionInvoke != null) { Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.directoryProviderSetting.ServicedCountry.ToString(), "EscrapeAdInfoExtend"), ex); this.frameworkExceptionInvoke(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex)); } } } return(adInf); }
/// <summary> /// /// </summary> /// <param name="adInf"></param> /// <returns></returns> public Models.Advertisement EscrapeAdInfoExtend(Models.Advertisement adInf) { if (adInf != null && !string.IsNullOrEmpty(adInf.AdvertiserLink)) { try { string _htmlDoc = HtmlUtil.GetPageDocument(adInf.AdvertiserLink); HtmlAgilityPack.HtmlDocument htmDocAg = new HtmlAgilityPack.HtmlDocument(); htmDocAg.LoadHtml(_htmlDoc); string _latitude = "[NA]"; string _longitude = "[NA]"; HtmlAgilityPack.HtmlNode _mapDatNode = HtmlUtil.GetNode(htmDocAg.DocumentNode, "div", "id", "ypgMapContainer"); if (_mapDatNode != null) { //latLong = new VELatLong(43.8087172232, -79.5469648855); map.CreateAndLoadMap string _mapDat = _mapDatNode.InnerText; int _mrkrStart = _mapDat.IndexOf("VELatLong", 0); int _mrkrEnd = _mapDat.IndexOf("map.CreateAndLoadMap", 0); _mapDat = _mapDat.Substring(_mrkrStart, _mrkrEnd - _mrkrStart); _mapDat = _mapDat .Replace("VELatLong(", "") .Replace("map.CreateAndLoadMap", "") .Replace(");", "") .Trim(); string[] _coords = _mapDat.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); _latitude = _coords[0]; _longitude = _coords[1]; } string _googMap = "[NA]"; string _keywords = "[NA]"; HtmlAgilityPack.HtmlNode _metaKeywords = HtmlUtil.GetNode(htmDocAg.DocumentNode, "meta", "name", "keywords"); if (_metaKeywords != null) { _keywords = _metaKeywords.Attributes["content"].Value; } string _description = "[NA]"; HtmlAgilityPack.HtmlNode _metaDesc = HtmlUtil.GetNode(htmDocAg.DocumentNode, "meta", "name", "description"); if (_metaDesc != null) { _description = _metaDesc.Attributes["content"].Value; } string _rating = "[NA]"; string _emailAdd = "[NA]"; HtmlAgilityPack.HtmlNode _emailAdNode = HtmlUtil.GetNode(htmDocAg.DocumentNode, "div", "class", "busCardLeftLinks"); if (_emailAdNode != null) { try { _emailAdd = _emailAdNode.Descendants("a").ToArray()[0].Attributes["content"].Value; } catch { } } string _locations = "[NA]"; //string _dateAdded = "[NA]"; adInf.Latitude = _latitude; adInf.Longtitude = _longitude; adInf.GoogleMap = _googMap; adInf.Keywords = _keywords; adInf.Description = _description; adInf.Rating = _rating; adInf.EmailAddress = _emailAdd; adInf.Locations = _locations; //adInf.DateAdded = _dateAdded; } catch (Exception ex) { if (this.frameworkExceptionInvoke != null) { Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.directoryProviderSetting.ServicedCountry.ToString(), "EscrapeAdInfoExtend"), ex); this.frameworkExceptionInvoke(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex)); } } } return(adInf); }