public override CarInfo ParseHtmlIntoCarInfo(HtmlNode node, DealerInfo dealer) { var description = node.Descendants("div").Where(a => a.Attributes.Contains("class") && a.Attributes["class"].Value == "description").FirstOrDefault(); var entries = description?.InnerText.Split(GetInfoSeparators(), StringSplitOptions.RemoveEmptyEntries); //var anotherDiv = node.Descendants("div").Where(a => a.Attributes.Any(b => b.Value.Equals("ff_link"))).FirstOrDefault(); var carInfo = new CarInfo(); carInfo.Make = Make; carInfo.Model = GetModel(node); carInfo.Engine = GetEngine(entries, node); carInfo.Transmission = GetTransmission(entries, node); carInfo.DriveType = GetDriveType(entries, node); carInfo.ExteriorColor = GetExtColor(entries, node); carInfo.InteriorColor = GetIntColor(entries, node); carInfo.StockNumber = GetStockNumber(entries, node); carInfo.MSRP = GetMSRP(node); carInfo.VIN = GetVIN(node); carInfo.BodyStyle = GetBodyStyle(node); carInfo.URL = GetStockUrl(node, dealer); carInfo.IsLoaner = IsThisLoaner(node); carInfo.IPacket = GetIPacket(node, GetVIN(node)); carInfo.Packages = GetPackages(node); return(carInfo); }
private List <CarInfo> ScrapMultiple(PagingInfo pagingInfo, DealerInfo dealer, ISelector selector) { var result = new List <CarInfo>(); foreach (var pagedUrl in pagingInfo.PagedUrls) { #if DEBUG var s = DateTime.Now; #endif //HtmlAgilityPack.HtmlDocument doc = LoadWebSiteAsync(dealer.Url + pagedUrl); //var node = LoadWebSiteScrapySharp(dealer.Url + pagedUrl); HtmlDocument doc = LoadWebsite(pagedUrl); HtmlNodeCollection rows = null; foreach (var rowSelector in selector.GetRowSelectors()) { rows = doc?.DocumentNode.SelectNodes(rowSelector); if (rows != null) { break; } } if (rows != null) { rows.ToList().ForEach(row => { var carInfo = selector.ParseHtmlIntoCarInfo(row, dealer); carInfo.WebSite = dealer.Url; carInfo.DealerName = dealer.Name; var map = selector.GetCleanupMap(); if (map != null) { map.ForEach(e => { carInfo.GetType().GetProperty(e.Item1).SetValue(carInfo, carInfo.GetType().GetProperty(e.Item1).GetValue(carInfo)?.ToString().Replace(e.Item2, "").Trim()); }); } var regexMap = selector.GetRegexMap(); if (regexMap != null) { regexMap.ForEach(a => { if (carInfo.GetType().GetProperty(a.Item1).GetValue(carInfo) != null) { carInfo.GetType().GetProperty(a.Item1).SetValue(carInfo, a.Item2.Replace(carInfo.GetType().GetProperty(a.Item1).GetValue(carInfo)?.ToString(), " ").Trim()); } }); } result.Add(carInfo); }); } #if DEBUG NLogger.Instance.Info(string.Format("Finished scrape for URL {0}, {1} cars. ({2} ms)", pagedUrl, result.GroupBy(a => a.VIN).Select(a => a.First()).Count(), (DateTime.Now - s).TotalMilliseconds)); #endif } return(result); }
public override PagingInfo GetPagingInfo(HtmlDocument htmlDocument, DealerInfo dealer) { //no paging logic yet, return original URL for scrapping return(new PagingInfo { IsEnabled = true, PagedUrls = new List <string>() { GetUrlDetails(dealer) } }); }
private static string GetStockUrl(HtmlNode node, DealerInfo dealer) { var url = node.SelectNodes(".//input[contains(@value,'/new-inventory')]")?.FirstOrDefault()?.Attributes["value"]?.Value; if (!IsEmpty(url)) { return(url); } return(string.Format("{0}/{1}", dealer.Url, node.SelectNodes(".//a[contains(@href,'/new/') or contains(@href, '-new')]")?.FirstOrDefault()?.Attributes["href"].Value)); }
public override PagingInfo GetPagingInfo(HtmlDocument htmlDocument, DealerInfo dealer) { //TODO: look into need for paging later return(new PagingInfo { IsEnabled = true, PagedUrls = new List <string>() { GetUrlDetails(dealer) } }); }
public override string GetUrlDetails(DealerInfo dealer) { //return string.Format("/new-inventory/index.htm?model={0}", GetModelIdentifier()); var url = string.Format("{0}/new-inventory/index.htm?model={1}", dealer.Url, GetModelIdentifier()); if (!string.IsNullOrEmpty(dealer.CustomUrl)) { url = string.Format(dealer.CustomUrl, GetModelIdentifier()); } return(url); }
public override string GetUrlDetails(DealerInfo dealer) { // return string.Format("{0}/new-vehicles/{1}/#action=im_ajax_call&perform=get_results&vrp_view=listview&page=1", dealer.Url, GetModelIdentifier()); var url = string.Format("{0}/new-vehicles/{1}/#action=im_ajax_call&perform=get_results&vrp_view=listview&page=1", dealer.Url, GetModelIdentifier()); if (!string.IsNullOrEmpty(dealer.CustomUrl)) { url = string.Format(dealer.CustomUrl, GetModelIdentifier()); } if (GetCurrentInventoryType() == InventoryType.Loaner && !string.IsNullOrEmpty(dealer.LoanerUrl)) { url = string.Format(dealer.LoanerUrl, GetModelIdentifier()); } return(url); }
public override PagingInfo GetPagingInfo(HtmlDocument htmlDocument, DealerInfo dealer) { var urls = new List <string>(); var url = string.Format("{0}", GetUrlDetails(dealer)); var isStandardUrl = string.IsNullOrEmpty(dealer.CustomUrl); //add pagination only to standard URLs if (isStandardUrl) { url += "&start=0"; } urls.Add(url); if (isStandardUrl) { var entry = htmlDocument.DocumentNode.SelectSingleNode(".//span[contains(text(), 'Page')]")?.InnerText; if (entry != null) { var matches = Regex.Matches(entry, "\\d+"); //Rather than disabling it outright, return collection with 1st paged URL so at least that can be scraped //if (matches.Count != 2) // return new PagingInfo { IsEnabled = false }; if (matches.Count == 2) { int iStart = int.Parse(matches[0].Value); int iEnd = int.Parse(matches[1].Value); for (int i = iStart; i <= iEnd; i++) { urls.Add(string.Format("{0}&start={1}", GetUrlDetails(dealer), int.Parse(i + "0"))); } } } } return(new PagingInfo { IsEnabled = true, PagedUrls = urls.GroupBy(a => a.Trim()).Select(a => a.First()).ToList() //remove duplicate entries }); }
public override CarInfo ParseHtmlIntoCarInfo(HtmlNode node, DealerInfo dealer) { var entries = node.InnerText?.Split(GetInfoSeparators(), StringSplitOptions.RemoveEmptyEntries); return(new CarInfo { Make = GetMakeIdentifier(), Model = GetModel(node), MSRP = GetMSRP(node), InteriorColor = GetIntColor(node), ExteriorColor = GetExtColor(node), DriveType = GetDriveType(node), Transmission = node.SelectNodes(GetTransmissionIdentifier())?.Where(a => a.InnerText.ToLower().Contains("trans")).SingleOrDefault()?.ParentNode.InnerText.Trim(), StockNumber = GetStock(node), VIN = GetVin(entries, node), URL = node.Descendants().Where(a => a.Name == "a" && a.OuterHtml.Contains("http") && !a.OuterHtml.Contains("javascript")).FirstOrDefault()?.Attributes.Where(a => a.Name == "href").FirstOrDefault()?.Value, IPacket = GetIPacket(node, GetVin(entries, node)), BodyStyle = GetBodyStyle(node) }); }
public override CarInfo ParseHtmlIntoCarInfo(HtmlNode node, DealerInfo dealer) { var entries = node.InnerText?.Split(GetInfoSeparators(), StringSplitOptions.RemoveEmptyEntries); return(new CarInfo { Make = GetMakeIdentifier(), Model = entries.Where(a => a.ToLower().Contains(GetMakeIdentifier().ToLower())).FirstOrDefault()?.Trim(), MSRP = entries.Where(a => a.Contains(GetMsrpIdentifier())).FirstOrDefault()?.Replace(GetMsrpIdentifier(), "").Trim(), InteriorColor = entries.Where(a => a.Contains(GetIntColorIdentifier())).FirstOrDefault()?.Replace(GetIntColorIdentifier(), "").Trim(), ExteriorColor = entries.Where(a => a.Contains(GetExtColorIdentifier())).FirstOrDefault()?.Replace(GetExtColorIdentifier(), "").Trim(), DriveType = entries.Where(a => a.Contains(GetDriveTypeIdentifier())).FirstOrDefault()?.Replace(GetDriveTypeIdentifier(), "").Trim(), Engine = entries.Where(a => a.Contains(GetEngineIdentifier())).FirstOrDefault()?.Replace(GetEngineIdentifier(), "").Trim(), StockNumber = entries.Where(a => a.Contains(GetStockNumberIdentifier())).FirstOrDefault()?.Replace(GetStockNumberIdentifier(), "").Trim(), VIN = entries.Where(a => a.Contains(GetVinIdentifier())).FirstOrDefault()?.Replace(GetVinIdentifier(), "").Trim(), URL = node.SelectNodes(GetCarUrlIdentifier()).FirstOrDefault()?.Attributes.Where(a => a.Name == "href").FirstOrDefault()?.Value, //WebSite = URL, do it on a higher level BodyStyle = entries.Where(a => a.Contains(GetBodyStyleIdentifier())).FirstOrDefault()?.Replace(GetBodyStyleIdentifier(), "").Trim(), ModelCode = entries.Where(a => a.Contains(GetModelCodeIdentifier())).FirstOrDefault()?.Replace(GetModelCodeIdentifier(), "").Trim(), Transmission = entries.Where(a => a.Contains(GetTransmissionIdentifier())).FirstOrDefault()?.Replace(GetTransmissionIdentifier(), "").Trim() }); }
public override CarInfo ParseHtmlIntoCarInfo(HtmlNode node, DealerInfo dealer) { var sectionID = node.Attributes["id"]?.Value; var data = node.OwnerDocument.DocumentNode.SelectNodes(string.Format(".//section[@id='{0}']", sectionID))?.FirstOrDefault()?.Attributes["data-params"]?.Value?.Split(';'); var car = new CarInfo { BodyStyle = data.Where(a => a.Contains("bodyType:")).SingleOrDefault()?.Replace("bodyType:", ""), ExteriorColor = data.Where(a => a.Contains("exteriorColor:")).SingleOrDefault()?.Replace("exteriorColor:", "")?.Replace("%20", " "), Make = base.Make, Model = (data.Where(a => a.Contains("year:")).SingleOrDefault()?.Replace("year:", "") + data.Where(a => a.Contains("model:")).SingleOrDefault()?.Replace("model:", " ") + data.Where(a => a.Contains("trim:")).SingleOrDefault()?.Replace("trim:", " "))?.Replace("%20", " "), StockNumber = data.Where(a => a.Contains("stockNumber:")).SingleOrDefault()?.Replace("stockNumber:", ""), VIN = data.Where(a => a.Contains("vin:")).SingleOrDefault()?.Replace("vin:", ""), URL = node.SelectNodes(".//a")?.Where(a => a.Attributes["itemprop"]?.Value.ToLower() == "url").FirstOrDefault()?.Attributes["href"].Value, IsLoaner = node.SelectNodes(".//img[contains(@title,'Courtesy')]")?.Count() > 0, MSRP = node.SelectNodes(".//span")?.Where(a => a.Attributes["itemprop"]?.Value == "price").FirstOrDefault()?.InnerText }; return(car); }
public abstract PagingInfo GetPagingInfo(HtmlDocument htmlDocument, DealerInfo dealer);
public abstract CarInfo ParseHtmlIntoCarInfo(HtmlNode node, DealerInfo dealer);
public abstract string GetUrlDetails(DealerInfo dealer);
public override string GetUrlDetails(DealerInfo dealer) { return(string.Format("{0}/new-vehicles/{1}/#action=im_ajax_call&perform=get_results&vrp_view=listview&page=1", dealer.Url, GetModelIdentifier())); }
public override string GetUrlDetails(DealerInfo dealer) { return(string.Format("{0}/VehicleSearchResults?search=new&model={1}&limit=100", dealer.Url, base.Model)); }
public override string GetUrlDetails(DealerInfo dealer) { return(string.Format("{0}/searchnew.aspx?Model={1}&pn=100&st=Price+desc", dealer.Url, GetModelIdentifier())); }