Beispiel #1
0
        private List <CarInfo> ScrapMultiple(PagingInfo pagingInfo, DealerInfo dealer, ISelector selector)
        {
            var result = new List <CarInfo>();

            foreach (var pagedUrl in pagingInfo.PagedUrls)
            {
#if DEBUG
                var s = DateTime.Now;
#endif
                //HtmlAgilityPack.HtmlDocument doc = LoadWebSiteAsync(dealer.Url + pagedUrl);
                //var node = LoadWebSiteScrapySharp(dealer.Url + pagedUrl);
                HtmlDocument doc = LoadWebsite(pagedUrl);

                HtmlNodeCollection rows = null;
                foreach (var rowSelector in selector.GetRowSelectors())
                {
                    rows = doc?.DocumentNode.SelectNodes(rowSelector);
                    if (rows != null)
                    {
                        break;
                    }
                }

                if (rows != null)
                {
                    rows.ToList().ForEach(row =>
                    {
                        var carInfo        = selector.ParseHtmlIntoCarInfo(row, dealer);
                        carInfo.WebSite    = dealer.Url;
                        carInfo.DealerName = dealer.Name;

                        var map = selector.GetCleanupMap();
                        if (map != null)
                        {
                            map.ForEach(e => { carInfo.GetType().GetProperty(e.Item1).SetValue(carInfo, carInfo.GetType().GetProperty(e.Item1).GetValue(carInfo)?.ToString().Replace(e.Item2, "").Trim()); });
                        }

                        var regexMap = selector.GetRegexMap();
                        if (regexMap != null)
                        {
                            regexMap.ForEach(a =>
                            {
                                if (carInfo.GetType().GetProperty(a.Item1).GetValue(carInfo) != null)
                                {
                                    carInfo.GetType().GetProperty(a.Item1).SetValue(carInfo, a.Item2.Replace(carInfo.GetType().GetProperty(a.Item1).GetValue(carInfo)?.ToString(), " ").Trim());
                                }
                            });
                        }

                        result.Add(carInfo);
                    });
                }

#if DEBUG
                NLogger.Instance.Info(string.Format("Finished scrape for URL {0}, {1} cars. ({2} ms)", pagedUrl, result.GroupBy(a => a.VIN).Select(a => a.First()).Count(), (DateTime.Now - s).TotalMilliseconds));
#endif
            }

            return(result);
        }