/// <summary>
            ///
            /// </summary>
            /// <param name="htmlDocument"></param>
            /// <param name="actionAdExtracted"></param>
            /// <returns></returns>
            public Models.Advertisement.Advertisements ExtractAds(string htmlDocument, Action <Handlers.EventHandlers.AdExtractedEventArgs> actionAdExtracted)
            {
                Models.Advertisement.Advertisements _ads = null;
                try
                {
                    HtmlAgilityPack.HtmlDocument _doc = new HtmlAgilityPack.HtmlDocument();
                    _doc.LoadHtml(htmlDocument);

                    var _nodes = HtmlUtil.GetNodeCollection(_doc.DocumentNode, "div", "class", "ypgListing clearfix");
                    if (_nodes != null && _nodes.Count() > 0)
                    {
                        _ads = new Models.Advertisement.Advertisements();
                        int _pgItemIdx = 0;

                        foreach (HtmlAgilityPack.HtmlNode _n in _nodes)
                        {
                            Models.Advertisement _ad = new Models.Advertisement();

                            _pgItemIdx++;

                            Stopwatch _stopwatch = new Stopwatch();
                            _stopwatch.Reset();
                            _stopwatch.Start();

                            _ad = this.EscrapeAdInfo(_n);

                            if (_ad != null)
                            {
                                _ad = this.EscrapeAdInfoExtend(_ad);
                            }

                            _stopwatch.Stop();

                            _ads.Add(_ad);

                            if (actionAdExtracted != null)
                            {
                                actionAdExtracted(new Handlers.EventHandlers.AdExtractedEventArgs(_ad, _stopwatch.Elapsed, _pgItemIdx));
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    if (this.frameworkExceptionInvoke != null)
                    {
                        Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.directoryProviderSetting.ServicedCountry.ToString(), "ExtractAds"), ex);
                        this.frameworkExceptionInvoke(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex));
                    }
                }
                return(_ads);
            }
Beispiel #2
0
            /// <summary>
            ///
            /// </summary>
            /// <param name="htmlDocument"></param>
            /// <returns></returns>
            public int GetResultsPerPage(string htmlDocument)
            {
                HtmlAgilityPack.HtmlDocument _doc = new HtmlAgilityPack.HtmlDocument();
                _doc.LoadHtml(htmlDocument);

                var _nodes = HtmlUtil.GetNodeCollection(_doc.DocumentNode, "li", "class", "gold mappableListing listingContainer omnitureListing");

                if (_nodes != null && _nodes.Count() > 0)
                {
                    return(_nodes.Count());
                }

                return(0);
            }
            /// <summary>
            ///
            /// </summary>
            /// <param name="htmlDocument"></param>
            /// <returns></returns>
            public int GetResultsPerPage(string htmlDocument)
            {
                HtmlAgilityPack.HtmlDocument _doc = new HtmlAgilityPack.HtmlDocument();
                _doc.LoadHtml(htmlDocument);

                var _nodes = HtmlUtil.GetNodeCollection(_doc.DocumentNode, "div", "class", "ypgListing clearfix");

                if (_nodes != null && _nodes.Count() > 0)
                {
                    return(_nodes.Count());
                }

                return(0);
            }
Beispiel #4
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="pageContentDocument"></param>
        /// <param name="freeProxyFetched"></param>
        /// <returns></returns>
        public override Queue <FreeProxy> LoadUpIPProxies(string pageContentDocument, Action <FreeProxy> freeProxyFetched)
        {
            Queue <FreeProxy> freeProxies = null;

            try
            {
                var doc = new HtmlDocument();
                doc.LoadHtml(pageContentDocument);
                var document = doc.DocumentNode;
                var target   = HtmlUtil.GetNodeByAttribute(document, "table", "id", "proxylist");

                if (target == null)
                {
                    return(null);
                }

                var lines = HtmlUtil.GetNodeCollection(target, "tr");

                lines.ToList().ForEach(e =>
                {
                    if (freeProxies == null)
                    {
                        freeProxies = new Queue <FreeProxy>();
                    }

                    if (e.Descendants("td").Count() <= 1)
                    {
                        return;
                    }

                    try
                    {
                        var proxy = new FreeProxy();
                        var cells = e.Descendants("td").ToArray();

                        var unwanted   = cells[0].Descendants("script");
                        var enumerable = unwanted as HtmlNode[] ?? unwanted.ToArray();

                        if (unwanted != null && enumerable.ToArray().Any())
                        {
                            enumerable.ToArray()[0].Remove();
                        }

                        var address = cells[0];

                        var addressParts = address.InnerText.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries);
                        proxy.IPAddress  = addressParts[0];
                        proxy.PortNo     = int.Parse(addressParts[1]);

                        // get anonymity level
                        //proxy.AnonymityLevel = cells[1].InnerText.Contains("high")
                        //    ? ProxyAnonymityLevelEnum.High
                        //    : ProxyAnonymityLevelEnum.Medium;

                        proxy.AnonymityLevel = cells[1].InnerText;

                        // get last checked time
                        var checkdate = cells[2].InnerText;

                        //todo:
                        //proxy.LastValidationCheck = DateTime.Parse(checkdate);

                        // get the country

                        var countryPartial = cells[3].InnerText.ToLower().Replace(" ", "_");
                        countryPartial     = countryPartial.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries)[0];
                        var pxycountry     = FindProxyCountryFromPartial(countryPartial);
                        proxy.Country      = pxycountry;

                        //if (!ProxyTestHelper.CanPing(string.Format("{0}://{1}:{2}", proxy.Protocol == ProxyProtocolEnum.HTTP ? "http" : "https", proxy.HostIP, proxy.PortNo)))

                        //if (!ProxyTestHelper.ProxyIsGood(proxy.HostIP, proxy.PortNo)) return;

                        freeProxies.Enqueue(proxy);
                        freeProxyFetched(proxy);
                    }
                    catch (Exception ex)
                    {
                        throw;
                    }
                });
            }
            catch (Exception ex)
            {
                throw;
            }

            return(freeProxies);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="pageContentDocument"></param>
        /// <param name="freeProxyFetched"></param>
        /// <returns></returns>
        public override Queue <FreeProxy> LoadUpIPProxies(string pageContentDocument, Action <FreeProxy> freeProxyFetched)
        {
            Queue <FreeProxy> freeProxies = null;

            try
            {
                var doc = new HtmlDocument();
                doc.LoadHtml(pageContentDocument);
                var document = doc.DocumentNode;
                var target   = HtmlUtil.GetNodeByAttribute(document, "table", "class", "proxytbl");

                if (target == null)
                {
                    return(null);
                }

                var lines = HtmlUtil.GetNodeCollection(target, "tr");

                lines.ToList().ForEach(e =>
                {
                    if (freeProxies == null)
                    {
                        freeProxies = new Queue <FreeProxy>();
                    }

                    if (e.Descendants("td").Count() <= 1)
                    {
                        return;
                    }

                    var proxy = new FreeProxy();
                    var cells = e.Descendants("td").ToArray();

                    //ip
                    var ip = cells[0].InnerText;

                    //port
                    var scriptPart = cells[1].Descendants("script");
                    var htmlNodes  = scriptPart as HtmlNode[] ?? scriptPart.ToArray();
                    if (htmlNodes.Any())
                    {
                        htmlNodes.ToArray()[0].Remove();
                    }
                    var port = HtmlUtil.Resolve(cells[1].InnerText);

                    // country
                    var country = HtmlUtil.Resolve(cells[2].InnerText);

                    // anon
                    var anon = HtmlUtil.Resolve(cells[3].InnerText);

                    //https
                    var http = HtmlUtil.Resolve(cells[4].Attributes["class"].Value);


                    //last check
                    var lastChecked = HtmlUtil.Resolve(cells[5].InnerText);

                    freeProxies.Enqueue(proxy);
                    freeProxyFetched(proxy);
                });
            }
            catch (Exception ex)
            {
                throw;
            }

            return(freeProxies);
        }
Beispiel #6
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="content"></param>
        protected override void ParseProxyPage(string content)
        {
            var doc = new HtmlDocument();

            doc.LoadHtml(content);
            var document = doc.DocumentNode;
            var target   = HtmlUtil.GetNodeByAttribute(document, "table", "class", "proxytbl");

            if (target == null)
            {
                return;
            }

            var lines = HtmlUtil.GetNodeCollection(target, "tr");

            lines.ToList().ForEach(e =>
            {
                if (e.Descendants("td").Count() <= 1)
                {
                    return;
                }

                var proxy = new IPProxy()
                {
                    ProviderId = GetType().Name.Replace("Cartridge", ""),
                };
                var cells = e.Descendants("td").ToArray();

                var scriptPart = cells[1].Descendants("script");
                var htmlNodes  = scriptPart as HtmlNode[] ?? scriptPart.ToArray();
                if (htmlNodes.Any())
                {
                    htmlNodes.ToArray()[0].Remove();
                }

                //ip
                proxy.IPAddress = cells[0].InnerText.Trim();

                //port
                proxy.PortNo = int.Parse(HtmlUtil.Resolve(cells[1].InnerText.Trim()).Replace("\r\n", "").Trim());

                // country
                var country     = HtmlUtil.Resolve(cells[2].InnerText.Trim());
                var countryPrts = country.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
                proxy.Country   = Helper.FindProxyCountryFromPartial(countryPrts[0].Replace(" ", "_"));

                // anon
                proxy.AnonymityLevel = cells[3].InnerText.Trim().Replace("\r\n", "").Trim().Contains("anonymous")
                    ? ProxyAnonymityLevelsEnum.Anonymous
                    : ProxyAnonymityLevelsEnum.Elite;

                //protocol
                var https      = HtmlUtil.Resolve(cells[4].Attributes["class"].Value);
                proxy.Protocol = https.ToLower().Contains("https")
                    ? ProxyProtocolsEnum.HTTPS
                    : ProxyProtocolsEnum.HTTP;

                //last check
                var lastChecked   = (new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day));
                proxy.LastChecked = lastChecked.Add(TimeSpan.Parse(HtmlUtil.Resolve(cells[5].InnerText.Trim())));

                RegisterProxy(proxy);
            });

            base.ParseProxyPage(content);
        }