Beispiel #1
0
            /// <summary>
            ///
            /// </summary>
            /// <param name="htmlDocument"></param>
            /// <param name="actionAdExtracted"></param>
            /// <returns></returns>
            public Models.Advertisement.Advertisements ExtractAds(string htmlDocument, Action <Handlers.EventHandlers.AdExtractedEventArgs> actionAdExtracted)
            {
                Models.Advertisement.Advertisements _ads = null;
                try
                {
                    HtmlAgilityPack.HtmlDocument _doc = new HtmlAgilityPack.HtmlDocument();
                    _doc.LoadHtml(htmlDocument);

                    var _N = HtmlUtil.GetNode(_doc.DocumentNode, "ul", "id", "searchResultListings");
                    HtmlAgilityPack.HtmlNode[] _nodes = _N.Descendants("li")
                                                        .Where(li => li.Attributes.Contains("class") &&
                                                               li.Attributes[@"class"].Value.Contains("listingContainer"))
                                                        .ToArray();

                    if (_nodes != null && _nodes.Count() > 0)
                    {
                        _ads = new Models.Advertisement.Advertisements();
                        int _pgItemIdx = 0;

                        foreach (HtmlAgilityPack.HtmlNode _n in _nodes)
                        {
                            Models.Advertisement _ad = new Models.Advertisement();

                            _pgItemIdx++;

                            Stopwatch _stopwatch = new Stopwatch();
                            _stopwatch.Reset();
                            _stopwatch.Start();

                            _ad = this.EscrapeAdInfo(_n);

                            if (_ad != null)
                            {
                                _ad = this.EscrapeAdInfoExtend(_ad);
                            }

                            _stopwatch.Stop();

                            _ads.Add(_ad);

                            if (actionAdExtracted != null)
                            {
                                actionAdExtracted(new Handlers.EventHandlers.AdExtractedEventArgs(_ad, _stopwatch.Elapsed, _pgItemIdx));
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    if (this.frameworkExceptionInvoke != null)
                    {
                        Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.directoryProviderSetting.ServicedCountry.ToString(), "ExtractAds"), ex);
                        this.frameworkExceptionInvoke(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex));
                    }
                }
                return(_ads);
            }
            /// <summary>
            ///
            /// </summary>
            /// <param name="htmlDocument"></param>
            /// <param name="actionAdExtracted"></param>
            /// <returns></returns>
            public Models.Advertisement.Advertisements ExtractAds(string htmlDocument, Action <Handlers.EventHandlers.AdExtractedEventArgs> actionAdExtracted)
            {
                Models.Advertisement.Advertisements _ads = null;
                try
                {
                    HtmlAgilityPack.HtmlDocument _doc = new HtmlAgilityPack.HtmlDocument();
                    _doc.LoadHtml(htmlDocument);

                    var _nodes = HtmlUtil.GetNodeCollection(_doc.DocumentNode, "div", "class", "ypgListing clearfix");
                    if (_nodes != null && _nodes.Count() > 0)
                    {
                        _ads = new Models.Advertisement.Advertisements();
                        int _pgItemIdx = 0;

                        foreach (HtmlAgilityPack.HtmlNode _n in _nodes)
                        {
                            Models.Advertisement _ad = new Models.Advertisement();

                            _pgItemIdx++;

                            Stopwatch _stopwatch = new Stopwatch();
                            _stopwatch.Reset();
                            _stopwatch.Start();

                            _ad = this.EscrapeAdInfo(_n);

                            if (_ad != null)
                            {
                                _ad = this.EscrapeAdInfoExtend(_ad);
                            }

                            _stopwatch.Stop();

                            _ads.Add(_ad);

                            if (actionAdExtracted != null)
                            {
                                actionAdExtracted(new Handlers.EventHandlers.AdExtractedEventArgs(_ad, _stopwatch.Elapsed, _pgItemIdx));
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    if (this.frameworkExceptionInvoke != null)
                    {
                        Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.directoryProviderSetting.ServicedCountry.ToString(), "ExtractAds"), ex);
                        this.frameworkExceptionInvoke(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex));
                    }
                }
                return(_ads);
            }
Beispiel #3
0
        /// <summary>
        ///
        /// </summary>
        /// <returns></returns>
        public override Models.Advertisement.Advertisements StartSearchResultProcessCallback()
        {
            Models.Advertisement.Advertisements _ads = null;

            if (base.SearchItem.searchResult.pointers != null && base.SearchItem.searchResult.pointers.Count > 0)
            {
                int _pgNo = 0;

                _ads = new Models.Advertisement.Advertisements();

                try
                {
                    foreach (Models.SearchItem.SearchResult.UrlPointer _p in base.SearchItem.searchResult.pointers)
                    {
                        Thread.Sleep(5000);

                        _pgNo++;
                        string _htmlDoc = HtmlUtil.GetPageDocument(_p.SearchUrl);

                        if (!string.IsNullOrEmpty(_htmlDoc))
                        {
                            string _errMsg = string.Empty;

                            bool _pgInError = this.ypTool.CheckPageIfError(_htmlDoc, ref _errMsg);
                            _p.IsValid = !_pgInError;

                            if (_p.IsValid)
                            {
                                _p.SearchHtml = _htmlDoc;

                                Stopwatch _stopwatch = new Stopwatch();
                                _stopwatch.Reset();
                                _stopwatch.Start();

                                Models.Advertisement.Advertisements __ads = this.ypTool.ExtractAds(_htmlDoc, base.InvokeEventAdExtracted);

                                if (__ads != null && __ads.Count > 0)
                                {
                                    _ads.AddRange(__ads.ToArray());
                                }

                                _stopwatch.Stop();

                                base.InvokeEventUrlPointerProcessed(new Handlers.EventHandlers.UrlPointerProcessedEventArgs(_p, _stopwatch.Elapsed));

                                if (base.SearchItem.pagesToProcess.HasValue && _pgNo == base.SearchItem.pagesToProcess)
                                {
                                    break;
                                }

                                if (__ads != null && __ads.Count == base.SearchItem.searchResult.TotalResults)
                                {
                                    break;
                                }
                            }
                            else
                            {
                                //correct Search result values here...
                                //base.SearchItem.searchResult.ResultsPerPage=base.SearchItem.searchResult.

                                //Strip the rest forward + all invalid objects
                                base.SearchItem.searchResult.pointers.RemoveAll(p => !p.IsValid);
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.DirectoryProviderSetting.ServicedCountry.ToString(), "StartSearchResultProcessCallback"), ex);
                    base.InvokeEventFrameworkException(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex));
                }
            }
            return(_ads);
        }
            /// <summary>
            /// 
            /// </summary>
            /// <param name="htmlDocument"></param>
            /// <param name="actionAdExtracted"></param>
            /// <returns></returns>
            public Models.Advertisement.Advertisements ExtractAds(string htmlDocument, Action<Handlers.EventHandlers.AdExtractedEventArgs> actionAdExtracted)
            {
                Models.Advertisement.Advertisements _ads = null;
                try
                {
                    HtmlAgilityPack.HtmlDocument _doc = new HtmlAgilityPack.HtmlDocument();
                    _doc.LoadHtml(htmlDocument);

                    var _nodes = HtmlUtil.GetNodeCollection(_doc.DocumentNode, "div", "class", "ypgListing clearfix");
                    if(_nodes != null && _nodes.Count() > 0)
                    {
                        _ads = new Models.Advertisement.Advertisements();
                        int _pgItemIdx = 0;

                        foreach(HtmlAgilityPack.HtmlNode _n in _nodes)
                        {

                            Models.Advertisement _ad = new Models.Advertisement();

                            _pgItemIdx++;

                            Stopwatch _stopwatch = new Stopwatch();
                            _stopwatch.Reset();
                            _stopwatch.Start();

                            _ad = this.EscrapeAdInfo(_n);

                            if(_ad != null)
                                _ad = this.EscrapeAdInfoExtend(_ad);

                            _stopwatch.Stop();

                            _ads.Add(_ad);

                            if(actionAdExtracted != null)
                                actionAdExtracted(new Handlers.EventHandlers.AdExtractedEventArgs(_ad, _stopwatch.Elapsed, _pgItemIdx));

                        }
                    }
                }
                catch(Exception ex)
                {
                    if(this.frameworkExceptionInvoke != null)
                    {
                        Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.directoryProviderSetting.ServicedCountry.ToString(), "ExtractAds"), ex);
                        this.frameworkExceptionInvoke(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex));
                    }
                }
                return _ads;
            }
        /// <summary>
        /// 
        /// </summary>
        /// <returns></returns>
        public override Models.Advertisement.Advertisements StartSearchResultProcessCallback()
        {
            Models.Advertisement.Advertisements _ads = null;

            if(base.SearchItem.searchResult.pointers != null && base.SearchItem.searchResult.pointers.Count > 0)
            {
                int _pgNo = 0;

                _ads = new Models.Advertisement.Advertisements();

                try
                {
                    foreach(Models.SearchItem.SearchResult.UrlPointer _p in base.SearchItem.searchResult.pointers)
                    {
                        Thread.Sleep(5000);

                        _pgNo++;
                        string _htmlDoc = HtmlUtil.GetPageDocument(_p.SearchUrl);

                        if(!string.IsNullOrEmpty(_htmlDoc))
                        {
                            string _errMsg = string.Empty;

                            bool _pgInError = this.ypTool.CheckPageIfError(_htmlDoc, ref _errMsg);
                            _p.IsValid = !_pgInError;

                            if(_p.IsValid)
                            {
                                _p.SearchHtml = _htmlDoc;

                                Stopwatch _stopwatch = new Stopwatch();
                                _stopwatch.Reset();
                                _stopwatch.Start();

                                Models.Advertisement.Advertisements __ads = this.ypTool.ExtractAds(_htmlDoc, base.InvokeEventAdExtracted);

                                if(__ads != null && __ads.Count > 0)
                                    _ads.AddRange(__ads.ToArray());

                                _stopwatch.Stop();

                                base.InvokeEventUrlPointerProcessed(new Handlers.EventHandlers.UrlPointerProcessedEventArgs(_p, _stopwatch.Elapsed));

                                if(base.SearchItem.pagesToProcess.HasValue && _pgNo == base.SearchItem.pagesToProcess)
                                    break;

                                if(__ads != null && __ads.Count == base.SearchItem.searchResult.TotalResults)
                                    break;
                            }
                            else
                            {
                                //correct Search result values here...
                                //base.SearchItem.searchResult.ResultsPerPage=base.SearchItem.searchResult.

                                //Strip the rest forward + all invalid objects
                                base.SearchItem.searchResult.pointers.RemoveAll(p => !p.IsValid);
                            }


                        }
                    }
                }
                catch(Exception ex)
                {
                    Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.DirectoryProviderSetting.ServicedCountry.ToString(), "StartSearchResultProcessCallback"), ex);
                    base.InvokeEventFrameworkException(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex));
                }
            }
            return _ads;
        }
            /// <summary>
            /// 
            /// </summary>
            /// <param name="htmlDocument"></param>
            /// <param name="actionAdExtracted"></param>
            /// <returns></returns>
            public Models.Advertisement.Advertisements ExtractAds(string htmlDocument, Action<Handlers.EventHandlers.AdExtractedEventArgs> actionAdExtracted)
            {
                Models.Advertisement.Advertisements _ads = null;
                try
                {
                    HtmlAgilityPack.HtmlDocument _doc = new HtmlAgilityPack.HtmlDocument();
                    _doc.LoadHtml(htmlDocument);

                    var _N = HtmlUtil.GetNode(_doc.DocumentNode, "ul", "id", "searchResultListings");
                    HtmlAgilityPack.HtmlNode[] _nodes = _N.Descendants("li")
                                                        .Where(li => li.Attributes.Contains("class")
                                                            && li.Attributes[@"class"].Value.Contains("listingContainer"))
                                                            .ToArray();

                    if(_nodes != null && _nodes.Count() > 0)
                    {
                        _ads = new Models.Advertisement.Advertisements();
                        int _pgItemIdx = 0;

                        foreach(HtmlAgilityPack.HtmlNode _n in _nodes)
                        {
                            Models.Advertisement _ad = new Models.Advertisement();

                            _pgItemIdx++;

                            Stopwatch _stopwatch = new Stopwatch();
                            _stopwatch.Reset();
                            _stopwatch.Start();

                            _ad = this.EscrapeAdInfo(_n);

                            if(_ad != null)
                                _ad = this.EscrapeAdInfoExtend(_ad);

                            _stopwatch.Stop();

                            _ads.Add(_ad);

                            if(actionAdExtracted != null)
                                actionAdExtracted(new Handlers.EventHandlers.AdExtractedEventArgs(_ad, _stopwatch.Elapsed, _pgItemIdx));
                        }
                    }
                }
                catch(Exception ex)
                {
                    if(this.frameworkExceptionInvoke != null)
                    {
                        Exception _ex = new Exception(string.Format("Exception in {0}.{1}(?)", this.directoryProviderSetting.ServicedCountry.ToString(), "ExtractAds"), ex);
                        this.frameworkExceptionInvoke(new Handlers.EventHandlers.FrameworkExceptionEventArgs(_ex));
                    }
                }
                return _ads;
            }