/// <summary> /// /// </summary> /// <param name="htmlDocument"></param> /// <param name="actionAdExtracted"></param> /// <returns></returns> public Models.Advertisement.Advertisements ExtractAds(string htmlDocument, Action <Handlers.EventHandlers.AdExtractedEventArgs> actionAdExtracted) { Models.Advertisement.Advertisements _ads = null; try { HtmlAgilityPack.HtmlDocument _doc = new HtmlAgilityPack.HtmlDocument(); _doc.LoadHtml(htmlDocument); var _nodes = HtmlUtil.GetNodeCollection(_doc.DocumentNode, "div", "class", "ypgListing clearfix"); if (_nodes != null && _nodes.Count() > 0) { _ads = new Models.Advertisement.Advertisements(); int _pgItemIdx = 0; foreach (HtmlAgilityPack.HtmlNode _n in _nodes) { Models.Advertisement _ad = new Models.Advertisement(); _pgItemIdx++; Stopwatch _stopwatch = new Stopwatch(); _stopwatch.Reset(); _stopwatch.Start(); _ad = this.EscrapeAdInfo(_n); if (_ad != null) { _ad = this.EscrapeAdInfoExtend(_ad); } _stopwatch.Stop(); _ads.Add(_ad); if (actionAdExtracted != null) { actionAdExtracted(new Handlers.EventHandlers.AdExtractedEventArgs(_ad, _stopwatch.Elapsed, _pgItemIdx)); } } } } catch (Exception ex) { if (this.frameworkExceptionInvoke != null) { Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.directoryProviderSetting.ServicedCountry.ToString(), "ExtractAds"), ex); this.frameworkExceptionInvoke(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex)); } } return(_ads); }
/// <summary> /// /// </summary> /// <param name="htmlDocument"></param> /// <returns></returns> public int GetResultsPerPage(string htmlDocument) { HtmlAgilityPack.HtmlDocument _doc = new HtmlAgilityPack.HtmlDocument(); _doc.LoadHtml(htmlDocument); var _nodes = HtmlUtil.GetNodeCollection(_doc.DocumentNode, "li", "class", "gold mappableListing listingContainer omnitureListing"); if (_nodes != null && _nodes.Count() > 0) { return(_nodes.Count()); } return(0); }
/// <summary> /// /// </summary> /// <param name="htmlDocument"></param> /// <returns></returns> public int GetResultsPerPage(string htmlDocument) { HtmlAgilityPack.HtmlDocument _doc = new HtmlAgilityPack.HtmlDocument(); _doc.LoadHtml(htmlDocument); var _nodes = HtmlUtil.GetNodeCollection(_doc.DocumentNode, "div", "class", "ypgListing clearfix"); if (_nodes != null && _nodes.Count() > 0) { return(_nodes.Count()); } return(0); }
/// <summary> /// /// </summary> /// <param name="pageContentDocument"></param> /// <param name="freeProxyFetched"></param> /// <returns></returns> public override Queue <FreeProxy> LoadUpIPProxies(string pageContentDocument, Action <FreeProxy> freeProxyFetched) { Queue <FreeProxy> freeProxies = null; try { var doc = new HtmlDocument(); doc.LoadHtml(pageContentDocument); var document = doc.DocumentNode; var target = HtmlUtil.GetNodeByAttribute(document, "table", "id", "proxylist"); if (target == null) { return(null); } var lines = HtmlUtil.GetNodeCollection(target, "tr"); lines.ToList().ForEach(e => { if (freeProxies == null) { freeProxies = new Queue <FreeProxy>(); } if (e.Descendants("td").Count() <= 1) { return; } try { var proxy = new FreeProxy(); var cells = e.Descendants("td").ToArray(); var unwanted = cells[0].Descendants("script"); var enumerable = unwanted as HtmlNode[] ?? unwanted.ToArray(); if (unwanted != null && enumerable.ToArray().Any()) { enumerable.ToArray()[0].Remove(); } var address = cells[0]; var addressParts = address.InnerText.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); proxy.IPAddress = addressParts[0]; proxy.PortNo = int.Parse(addressParts[1]); // get anonymity level //proxy.AnonymityLevel = cells[1].InnerText.Contains("high") // ? ProxyAnonymityLevelEnum.High // : ProxyAnonymityLevelEnum.Medium; proxy.AnonymityLevel = cells[1].InnerText; // get last checked time var checkdate = cells[2].InnerText; //todo: //proxy.LastValidationCheck = DateTime.Parse(checkdate); // get the country var countryPartial = cells[3].InnerText.ToLower().Replace(" ", "_"); countryPartial = countryPartial.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries)[0]; var pxycountry = FindProxyCountryFromPartial(countryPartial); proxy.Country = pxycountry; //if (!ProxyTestHelper.CanPing(string.Format("{0}://{1}:{2}", proxy.Protocol == ProxyProtocolEnum.HTTP ? "http" : "https", proxy.HostIP, proxy.PortNo))) //if (!ProxyTestHelper.ProxyIsGood(proxy.HostIP, proxy.PortNo)) return; freeProxies.Enqueue(proxy); freeProxyFetched(proxy); } catch (Exception ex) { throw; } }); } catch (Exception ex) { throw; } return(freeProxies); }
/// <summary> /// /// </summary> /// <param name="pageContentDocument"></param> /// <param name="freeProxyFetched"></param> /// <returns></returns> public override Queue <FreeProxy> LoadUpIPProxies(string pageContentDocument, Action <FreeProxy> freeProxyFetched) { Queue <FreeProxy> freeProxies = null; try { var doc = new HtmlDocument(); doc.LoadHtml(pageContentDocument); var document = doc.DocumentNode; var target = HtmlUtil.GetNodeByAttribute(document, "table", "class", "proxytbl"); if (target == null) { return(null); } var lines = HtmlUtil.GetNodeCollection(target, "tr"); lines.ToList().ForEach(e => { if (freeProxies == null) { freeProxies = new Queue <FreeProxy>(); } if (e.Descendants("td").Count() <= 1) { return; } var proxy = new FreeProxy(); var cells = e.Descendants("td").ToArray(); //ip var ip = cells[0].InnerText; //port var scriptPart = cells[1].Descendants("script"); var htmlNodes = scriptPart as HtmlNode[] ?? scriptPart.ToArray(); if (htmlNodes.Any()) { htmlNodes.ToArray()[0].Remove(); } var port = HtmlUtil.Resolve(cells[1].InnerText); // country var country = HtmlUtil.Resolve(cells[2].InnerText); // anon var anon = HtmlUtil.Resolve(cells[3].InnerText); //https var http = HtmlUtil.Resolve(cells[4].Attributes["class"].Value); //last check var lastChecked = HtmlUtil.Resolve(cells[5].InnerText); freeProxies.Enqueue(proxy); freeProxyFetched(proxy); }); } catch (Exception ex) { throw; } return(freeProxies); }
/// <summary> /// /// </summary> /// <param name="content"></param> protected override void ParseProxyPage(string content) { var doc = new HtmlDocument(); doc.LoadHtml(content); var document = doc.DocumentNode; var target = HtmlUtil.GetNodeByAttribute(document, "table", "class", "proxytbl"); if (target == null) { return; } var lines = HtmlUtil.GetNodeCollection(target, "tr"); lines.ToList().ForEach(e => { if (e.Descendants("td").Count() <= 1) { return; } var proxy = new IPProxy() { ProviderId = GetType().Name.Replace("Cartridge", ""), }; var cells = e.Descendants("td").ToArray(); var scriptPart = cells[1].Descendants("script"); var htmlNodes = scriptPart as HtmlNode[] ?? scriptPart.ToArray(); if (htmlNodes.Any()) { htmlNodes.ToArray()[0].Remove(); } //ip proxy.IPAddress = cells[0].InnerText.Trim(); //port proxy.PortNo = int.Parse(HtmlUtil.Resolve(cells[1].InnerText.Trim()).Replace("\r\n", "").Trim()); // country var country = HtmlUtil.Resolve(cells[2].InnerText.Trim()); var countryPrts = country.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); proxy.Country = Helper.FindProxyCountryFromPartial(countryPrts[0].Replace(" ", "_")); // anon proxy.AnonymityLevel = cells[3].InnerText.Trim().Replace("\r\n", "").Trim().Contains("anonymous") ? ProxyAnonymityLevelsEnum.Anonymous : ProxyAnonymityLevelsEnum.Elite; //protocol var https = HtmlUtil.Resolve(cells[4].Attributes["class"].Value); proxy.Protocol = https.ToLower().Contains("https") ? ProxyProtocolsEnum.HTTPS : ProxyProtocolsEnum.HTTP; //last check var lastChecked = (new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day)); proxy.LastChecked = lastChecked.Add(TimeSpan.Parse(HtmlUtil.Resolve(cells[5].InnerText.Trim()))); RegisterProxy(proxy); }); base.ParseProxyPage(content); }