Ejemplo n.º 1
0
        public override CarInfo ParseHtmlIntoCarInfo(HtmlNode node, DealerInfo dealer)
        {
            var description = node.Descendants("div").Where(a => a.Attributes.Contains("class") && a.Attributes["class"].Value == "description").FirstOrDefault();
            var entries     = description?.InnerText.Split(GetInfoSeparators(), StringSplitOptions.RemoveEmptyEntries);
            //var anotherDiv = node.Descendants("div").Where(a => a.Attributes.Any(b => b.Value.Equals("ff_link"))).FirstOrDefault();

            var carInfo = new CarInfo();

            carInfo.Make          = Make;
            carInfo.Model         = GetModel(node);
            carInfo.Engine        = GetEngine(entries, node);
            carInfo.Transmission  = GetTransmission(entries, node);
            carInfo.DriveType     = GetDriveType(entries, node);
            carInfo.ExteriorColor = GetExtColor(entries, node);
            carInfo.InteriorColor = GetIntColor(entries, node);
            carInfo.StockNumber   = GetStockNumber(entries, node);
            carInfo.MSRP          = GetMSRP(node);
            carInfo.VIN           = GetVIN(node);
            carInfo.BodyStyle     = GetBodyStyle(node);
            carInfo.URL           = GetStockUrl(node, dealer);
            carInfo.IsLoaner      = IsThisLoaner(node);
            carInfo.IPacket       = GetIPacket(node, GetVIN(node));
            carInfo.Packages      = GetPackages(node);

            return(carInfo);
        }
Ejemplo n.º 2
0
        private List <CarInfo> ScrapMultiple(PagingInfo pagingInfo, DealerInfo dealer, ISelector selector)
        {
            var result = new List <CarInfo>();

            foreach (var pagedUrl in pagingInfo.PagedUrls)
            {
#if DEBUG
                var s = DateTime.Now;
#endif
                //HtmlAgilityPack.HtmlDocument doc = LoadWebSiteAsync(dealer.Url + pagedUrl);
                //var node = LoadWebSiteScrapySharp(dealer.Url + pagedUrl);
                HtmlDocument doc = LoadWebsite(pagedUrl);

                HtmlNodeCollection rows = null;
                foreach (var rowSelector in selector.GetRowSelectors())
                {
                    rows = doc?.DocumentNode.SelectNodes(rowSelector);
                    if (rows != null)
                    {
                        break;
                    }
                }

                if (rows != null)
                {
                    rows.ToList().ForEach(row =>
                    {
                        var carInfo        = selector.ParseHtmlIntoCarInfo(row, dealer);
                        carInfo.WebSite    = dealer.Url;
                        carInfo.DealerName = dealer.Name;

                        var map = selector.GetCleanupMap();
                        if (map != null)
                        {
                            map.ForEach(e => { carInfo.GetType().GetProperty(e.Item1).SetValue(carInfo, carInfo.GetType().GetProperty(e.Item1).GetValue(carInfo)?.ToString().Replace(e.Item2, "").Trim()); });
                        }

                        var regexMap = selector.GetRegexMap();
                        if (regexMap != null)
                        {
                            regexMap.ForEach(a =>
                            {
                                if (carInfo.GetType().GetProperty(a.Item1).GetValue(carInfo) != null)
                                {
                                    carInfo.GetType().GetProperty(a.Item1).SetValue(carInfo, a.Item2.Replace(carInfo.GetType().GetProperty(a.Item1).GetValue(carInfo)?.ToString(), " ").Trim());
                                }
                            });
                        }

                        result.Add(carInfo);
                    });
                }

#if DEBUG
                NLogger.Instance.Info(string.Format("Finished scrape for URL {0}, {1} cars. ({2} ms)", pagedUrl, result.GroupBy(a => a.VIN).Select(a => a.First()).Count(), (DateTime.Now - s).TotalMilliseconds));
#endif
            }

            return(result);
        }
Ejemplo n.º 3
0
 public override PagingInfo GetPagingInfo(HtmlDocument htmlDocument, DealerInfo dealer)
 {
     //no paging logic yet, return original URL for scrapping
     return(new PagingInfo
     {
         IsEnabled = true,
         PagedUrls = new List <string>()
         {
             GetUrlDetails(dealer)
         }
     });
 }
Ejemplo n.º 4
0
        private static string GetStockUrl(HtmlNode node, DealerInfo dealer)
        {
            var url = node.SelectNodes(".//input[contains(@value,'/new-inventory')]")?.FirstOrDefault()?.Attributes["value"]?.Value;

            if (!IsEmpty(url))
            {
                return(url);
            }

            return(string.Format("{0}/{1}",
                                 dealer.Url, node.SelectNodes(".//a[contains(@href,'/new/') or contains(@href, '-new')]")?.FirstOrDefault()?.Attributes["href"].Value));
        }
Ejemplo n.º 5
0
 public override PagingInfo GetPagingInfo(HtmlDocument htmlDocument, DealerInfo dealer)
 {
     //TODO: look into need for paging later
     return(new PagingInfo
     {
         IsEnabled = true,
         PagedUrls = new List <string>()
         {
             GetUrlDetails(dealer)
         }
     });
 }
Ejemplo n.º 6
0
        public override string GetUrlDetails(DealerInfo dealer)
        {
            //return string.Format("/new-inventory/index.htm?model={0}", GetModelIdentifier());

            var url = string.Format("{0}/new-inventory/index.htm?model={1}", dealer.Url, GetModelIdentifier());

            if (!string.IsNullOrEmpty(dealer.CustomUrl))
            {
                url = string.Format(dealer.CustomUrl, GetModelIdentifier());
            }

            return(url);
        }
Ejemplo n.º 7
0
        public override string GetUrlDetails(DealerInfo dealer)
        {
            // return string.Format("{0}/new-vehicles/{1}/#action=im_ajax_call&perform=get_results&vrp_view=listview&page=1", dealer.Url, GetModelIdentifier());

            var url = string.Format("{0}/new-vehicles/{1}/#action=im_ajax_call&perform=get_results&vrp_view=listview&page=1", dealer.Url, GetModelIdentifier());

            if (!string.IsNullOrEmpty(dealer.CustomUrl))
            {
                url = string.Format(dealer.CustomUrl, GetModelIdentifier());
            }

            if (GetCurrentInventoryType() == InventoryType.Loaner && !string.IsNullOrEmpty(dealer.LoanerUrl))
            {
                url = string.Format(dealer.LoanerUrl, GetModelIdentifier());
            }

            return(url);
        }
Ejemplo n.º 8
0
        public override PagingInfo GetPagingInfo(HtmlDocument htmlDocument, DealerInfo dealer)
        {
            var urls          = new List <string>();
            var url           = string.Format("{0}", GetUrlDetails(dealer));
            var isStandardUrl = string.IsNullOrEmpty(dealer.CustomUrl);

            //add pagination only to standard URLs
            if (isStandardUrl)
            {
                url += "&start=0";
            }

            urls.Add(url);

            if (isStandardUrl)
            {
                var entry = htmlDocument.DocumentNode.SelectSingleNode(".//span[contains(text(), 'Page')]")?.InnerText;
                if (entry != null)
                {
                    var matches = Regex.Matches(entry, "\\d+");

                    //Rather than disabling it outright, return collection with 1st paged URL so at least that can be scraped
                    //if (matches.Count != 2)
                    //    return new PagingInfo { IsEnabled = false };

                    if (matches.Count == 2)
                    {
                        int iStart = int.Parse(matches[0].Value);
                        int iEnd   = int.Parse(matches[1].Value);

                        for (int i = iStart; i <= iEnd; i++)
                        {
                            urls.Add(string.Format("{0}&start={1}", GetUrlDetails(dealer), int.Parse(i + "0")));
                        }
                    }
                }
            }

            return(new PagingInfo
            {
                IsEnabled = true,
                PagedUrls = urls.GroupBy(a => a.Trim()).Select(a => a.First()).ToList() //remove duplicate entries
            });
        }
Ejemplo n.º 9
0
        public override CarInfo ParseHtmlIntoCarInfo(HtmlNode node, DealerInfo dealer)
        {
            var entries = node.InnerText?.Split(GetInfoSeparators(), StringSplitOptions.RemoveEmptyEntries);

            return(new CarInfo
            {
                Make = GetMakeIdentifier(),
                Model = GetModel(node),
                MSRP = GetMSRP(node),
                InteriorColor = GetIntColor(node),
                ExteriorColor = GetExtColor(node),
                DriveType = GetDriveType(node),
                Transmission = node.SelectNodes(GetTransmissionIdentifier())?.Where(a => a.InnerText.ToLower().Contains("trans")).SingleOrDefault()?.ParentNode.InnerText.Trim(),
                StockNumber = GetStock(node),
                VIN = GetVin(entries, node),
                URL = node.Descendants().Where(a => a.Name == "a" && a.OuterHtml.Contains("http") && !a.OuterHtml.Contains("javascript")).FirstOrDefault()?.Attributes.Where(a => a.Name == "href").FirstOrDefault()?.Value,
                IPacket = GetIPacket(node, GetVin(entries, node)),
                BodyStyle = GetBodyStyle(node)
            });
        }
Ejemplo n.º 10
0
        public override CarInfo ParseHtmlIntoCarInfo(HtmlNode node, DealerInfo dealer)
        {
            var entries = node.InnerText?.Split(GetInfoSeparators(), StringSplitOptions.RemoveEmptyEntries);

            return(new CarInfo
            {
                Make = GetMakeIdentifier(),
                Model = entries.Where(a => a.ToLower().Contains(GetMakeIdentifier().ToLower())).FirstOrDefault()?.Trim(),
                MSRP = entries.Where(a => a.Contains(GetMsrpIdentifier())).FirstOrDefault()?.Replace(GetMsrpIdentifier(), "").Trim(),
                InteriorColor = entries.Where(a => a.Contains(GetIntColorIdentifier())).FirstOrDefault()?.Replace(GetIntColorIdentifier(), "").Trim(),
                ExteriorColor = entries.Where(a => a.Contains(GetExtColorIdentifier())).FirstOrDefault()?.Replace(GetExtColorIdentifier(), "").Trim(),
                DriveType = entries.Where(a => a.Contains(GetDriveTypeIdentifier())).FirstOrDefault()?.Replace(GetDriveTypeIdentifier(), "").Trim(),
                Engine = entries.Where(a => a.Contains(GetEngineIdentifier())).FirstOrDefault()?.Replace(GetEngineIdentifier(), "").Trim(),
                StockNumber = entries.Where(a => a.Contains(GetStockNumberIdentifier())).FirstOrDefault()?.Replace(GetStockNumberIdentifier(), "").Trim(),
                VIN = entries.Where(a => a.Contains(GetVinIdentifier())).FirstOrDefault()?.Replace(GetVinIdentifier(), "").Trim(),
                URL = node.SelectNodes(GetCarUrlIdentifier()).FirstOrDefault()?.Attributes.Where(a => a.Name == "href").FirstOrDefault()?.Value,
                //WebSite = URL, do it on a higher level
                BodyStyle = entries.Where(a => a.Contains(GetBodyStyleIdentifier())).FirstOrDefault()?.Replace(GetBodyStyleIdentifier(), "").Trim(),
                ModelCode = entries.Where(a => a.Contains(GetModelCodeIdentifier())).FirstOrDefault()?.Replace(GetModelCodeIdentifier(), "").Trim(),
                Transmission = entries.Where(a => a.Contains(GetTransmissionIdentifier())).FirstOrDefault()?.Replace(GetTransmissionIdentifier(), "").Trim()
            });
        }
Ejemplo n.º 11
0
        public override CarInfo ParseHtmlIntoCarInfo(HtmlNode node, DealerInfo dealer)
        {
            var sectionID = node.Attributes["id"]?.Value;
            var data      = node.OwnerDocument.DocumentNode.SelectNodes(string.Format(".//section[@id='{0}']", sectionID))?.FirstOrDefault()?.Attributes["data-params"]?.Value?.Split(';');


            var car = new CarInfo
            {
                BodyStyle     = data.Where(a => a.Contains("bodyType:")).SingleOrDefault()?.Replace("bodyType:", ""),
                ExteriorColor = data.Where(a => a.Contains("exteriorColor:")).SingleOrDefault()?.Replace("exteriorColor:", "")?.Replace("%20", " "),
                Make          = base.Make,
                Model         = (data.Where(a => a.Contains("year:")).SingleOrDefault()?.Replace("year:", "") +
                                 data.Where(a => a.Contains("model:")).SingleOrDefault()?.Replace("model:", " ") +
                                 data.Where(a => a.Contains("trim:")).SingleOrDefault()?.Replace("trim:", " "))?.Replace("%20", " "),
                StockNumber = data.Where(a => a.Contains("stockNumber:")).SingleOrDefault()?.Replace("stockNumber:", ""),
                VIN         = data.Where(a => a.Contains("vin:")).SingleOrDefault()?.Replace("vin:", ""),
                URL         = node.SelectNodes(".//a")?.Where(a => a.Attributes["itemprop"]?.Value.ToLower() == "url").FirstOrDefault()?.Attributes["href"].Value,
                IsLoaner    = node.SelectNodes(".//img[contains(@title,'Courtesy')]")?.Count() > 0,
                MSRP        = node.SelectNodes(".//span")?.Where(a => a.Attributes["itemprop"]?.Value == "price").FirstOrDefault()?.InnerText
            };

            return(car);
        }
Ejemplo n.º 12
0
 public abstract PagingInfo GetPagingInfo(HtmlDocument htmlDocument, DealerInfo dealer);
Ejemplo n.º 13
0
 public abstract CarInfo ParseHtmlIntoCarInfo(HtmlNode node, DealerInfo dealer);
Ejemplo n.º 14
0
 public abstract string GetUrlDetails(DealerInfo dealer);
Ejemplo n.º 15
0
 public override string GetUrlDetails(DealerInfo dealer)
 {
     return(string.Format("{0}/new-vehicles/{1}/#action=im_ajax_call&perform=get_results&vrp_view=listview&page=1", dealer.Url, GetModelIdentifier()));
 }
Ejemplo n.º 16
0
 public override string GetUrlDetails(DealerInfo dealer)
 {
     return(string.Format("{0}/VehicleSearchResults?search=new&model={1}&limit=100", dealer.Url, base.Model));
 }
Ejemplo n.º 17
0
 public override string GetUrlDetails(DealerInfo dealer)
 {
     return(string.Format("{0}/searchnew.aspx?Model={1}&pn=100&st=Price+desc", dealer.Url, GetModelIdentifier()));
 }