public SlurpResult Process(string manufacturerId) { log.InfoFormat("Running {0} for {1}", this.GetType().Name, manufacturerId); SlurpResult result = new SlurpResult(manufacturerId); return(result); }
public SlurpResult Process(string manufacturerId) { using (WebClient client = new WebClient()) { SlurpResult result = new SlurpResult(manufacturerId); result.SiteName = this.GetType().Name; var tweakerUrl = String.Format("http://tweakers.net/pricewatch/zoeken/?keyword={0}", manufacturerId); string searchResults = client.DownloadString(tweakerUrl); if (searchResults.Contains("Er werden geen producten gevonden.")) { return(result); } log.InfoFormat("Processing product {0}", manufacturerId); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(searchResults); var productUrl = doc.DocumentNode.SelectSingleNode("//table[@class=\"priceTable\"]/tbody/tr[1]/td[2]/p/a").Attributes["href"].Value; string productPage = client.DownloadString(productUrl); if (productPage.Contains("Van dit product worden geen prijzen meer getoond.") || productPage.Contains("Geen actuele prijzen bekend.")) { result.ProductStatus = ProductStatus.Obsolete; return(result); } using (var reader = new StringReader(productPage)) { XDocument xdoc = new XDocument(); using (Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader()) { sgmlReader.DocType = "HTML"; sgmlReader.WhitespaceHandling = WhitespaceHandling.All; sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower; sgmlReader.InputStream = reader; xdoc = XDocument.Load(sgmlReader); } var rows = xdoc.Root.Descendants("table").Where(t => t.Attribute("class").Value == "priceTable").First().Element("tbody").Elements("tr"); foreach (var row in rows) { var cells = row.Elements("td"); if (cells.First().Attribute("colspan") != null && Int32.Parse(cells.First().Attribute("colspan").Value) > 1) { continue; } var shop = cells.First().Value; string txt; var priceCell = cells.FirstOrDefault(x => x.Attribute("class") != null && x.Attribute("class").Value == "price"); decimal?price = null; if (priceCell != null) { price = StripPrice2(priceCell.Value); } var totalPriceCell = cells.FirstOrDefault(x => x.Attribute("class") != null && x.Attribute("class").Value == "totalPrice"); if (totalPriceCell != null && totalPriceCell.Element("div") != null) { txt = totalPriceCell.Element("div").Element("a").Value; } else { txt = null; } var total = StripPrice2(txt); var deliveryCell = cells.First(x => x.Attribute("class") != null && x.Attribute("class").Value == "delivery"); var img = deliveryCell.Element("img"); string delivery = String.Empty; if (img != null) { delivery = img.Attribute("src").Value; } else { delivery = String.Empty; } //delivery = deliveryCell.Element("span").Attribute("title").Value; var deliveryStatus = ParseIcon(delivery); result.Shops.Add(new ShopResult() { Name = shop, Price = price, TotalPrice = total, Delivery = deliveryStatus }); } } return(result); } }
public SlurpResult Process(string manufacturerId) { using (WebClient client = new WebClient()) { SlurpResult result = new SlurpResult(manufacturerId); result.SiteName = this.GetType().Name; var searchUrl = String.Format("http://www.kieskeurig.nl/zoeken/index.html?q={0}", manufacturerId); string searchResults = client.DownloadString(searchUrl); if (searchResults.Contains("Helaas zijn er geen resultaten gevonden voor je zoekopdracht")) { return(result); } log.InfoFormat("Processing product {0}", manufacturerId); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(searchResults); var form = doc.DocumentNode.SelectSingleNode("//form[@class=\"productList\"]").Attributes["action"].Value; if (form != "/winkelvergelijk") { var productSelectQuery = (from row in doc.DocumentNode.SelectNodes("//table[starts-with(@class, \"resultTable\")]/tbody/tr").Cast <HtmlNode>() select row).ToList(); string productUrl = null; for (int idx = 0; idx < productSelectQuery.Count; idx++) { if (productSelectQuery[idx].InnerHtml.Contains("type=\"checkbox\"")) { continue; } productUrl = "http://www.kieskeurig.nl/" + doc.DocumentNode.SelectSingleNode("//table[starts-with(@class, \"resultTable\")]/tbody/tr[" + (idx + 1).ToString() + "]/td[1]//a").Attributes["href"].Value; break; } //subselect product if (String.IsNullOrEmpty(productUrl)) { log.InfoFormat("Product {0} cannot be found", manufacturerId); return(result); } string productPage = client.DownloadString(productUrl); doc.LoadHtml(productPage); } var tableNode = doc.DocumentNode.SelectSingleNode("//table[@id=\"priceTable\"]/tbody"); int cellOffset = 0; if (tableNode == null) { tableNode = doc.DocumentNode.SelectSingleNode("//table[contains(@class, \"priceTable\")]/tbody"); cellOffset = 1; } var query = from row in tableNode.SelectNodes("tr").Cast <HtmlNode>() select row; //from cell in row.SelectNodes("th|td").Cast<HtmlNode>() //select new { Table = table.Id, CellText = cell.InnerText }; foreach (var row in query) { var cells = row.SelectNodes("td").Cast <HtmlNode>(); if (cellOffset == 0) { if (row.Attributes["class"] != null && (row.Attributes["class"].Value.Contains("moreOffers") || row.Attributes["class"].Value.Contains("otherprice"))) { continue; } } else { if (!cells.Any(x => x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("deliverYPrice"))) { continue; } } var imageNodes = cells.First().SelectNodes("//img"); var shop = ""; HtmlNodeCollection cellNodes = row.SelectNodes("td"); HtmlNode bannerNode = cellNodes[2]; string javascript = bannerNode.ChildNodes[1].ChildNodes[1].Attributes["onclick"].Value; var firstStatement = javascript.Split(';')[0]; //split statements var parameters = firstStatement.Split('(', ')')[1].Replace("\n", String.Empty).Replace(Environment.NewLine, String.Empty); // get parameters shop = parameters.Split(',')[6].Trim().Trim('\''); // get company name //var shop = cells.Skip(cellOffset).Take(1).First().SelectSingleNode(".//img").Attributes["alt"].Value; ////img").Attributes["alt"].Value; //€ 359,- decimal?price = null; decimal?total = null; string txt = String.Empty; if (cellOffset == 0) { txt = row.SelectSingleNode("td[@class=\"deliveryPrice\"]").InnerHtml; } else { txt = row.SelectSingleNode("td[@class=\"deliverYPrice\"]/div[@class=\"il\"]").InnerHtml; } total = StripPrice2(txt); //txt = row.SelectSingleNode("td[@class=\"totalPrice\"]").InnerHtml; //var total = StripPrice(txt); ////var price = Decimal.Parse(row.SelectSingleNode("td[@class=\"price\"]").InnerText.Split(' ')[1].Trim(new char[] { '-', ' ' })); ////var total = Decimal.Parse(row.SelectSingleNode("td[@class=\"totalPrice\"]").InnerText.Split(' ')[1].Trim(new char[] { '-', ' ' })); DeliveryStatus deliveryStatus = DeliveryStatus.Unknown; if (cellOffset == 0) { var deliveryNode = row.SelectSingleNode("td[@class=\"deliveryTime\"]/div"); var delivery = deliveryNode.Attributes["title"].Value; var parts = deliveryNode.Attributes["class"].Value.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); var deliveryIcon = parts.FirstOrDefault(x => x != "icon"); deliveryStatus = ParseIcon(deliveryIcon); } //var line = String.Format("Shop: {0}\t\tPrice: {1}\tTotal: {2}\tDelivery:{3}" + Environment.NewLine, shop, price, total, delivery); //Console.WriteLine(line); result.Shops.Add(new ShopResult() { Name = shop, Delivery = deliveryStatus, TotalPrice = total, Price = price }); } //using (var reader = new StringReader(productPage)) //{ // Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader(); // sgmlReader.DocType = "HTML"; // sgmlReader.WhitespaceHandling = WhitespaceHandling.All; // sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower; // sgmlReader.InputStream = reader; // XDocument xdoc = XDocument.Load(sgmlReader); // var rows = xdoc.Root.Descendants("table");//.Where(t => t.Attribute("id").Value == "priceTable"); // //;.Element("tbody").Elements("tr"); // foreach (var row in rows) // { // var cells = row.Elements("td"); // var shop = cells.First().Element("img").Attribute("alt").Value; // var price = 0m; // var total = 0m; // var delivery = String.Empty; // //var txt = cells.First(x => x.Attribute("class").Value == "price").Value; // //var price = StripPrice2(txt); // //txt = cells.First(x => x.Attribute("class").Value == "totalPrice").Element("div").Element("a").Value; // //var total = StripPrice2(txt); // //var deliveryCell = cells.First(x => x.Attribute("class").Value == "delivery"); // //var img = deliveryCell.Element("img"); // //string delivery = String.Empty; // //if (img != null) // // delivery = img.Attribute("alt").Value; // //else // // delivery = deliveryCell.Element("span").Attribute("title").Value; // //var line = String.Format("Shop: {0}\t\tPrice: {1}\tTotal: {2}\tDelivery:{3}" + Environment.NewLine, shop, price, total, delivery); // //Console.WriteLine(line); // result.Shops.Add(new ShopResult() // { // Name = shop, // Price = price, // TotalPrice = total, // Delivery = delivery // }); // } return(result); } }