public override List <Product> GetResultsForSingleUrl(LinkStruct linkStruct) { var result = new List <Product>(); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(linkStruct.Html); var divs = htmlDocument.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .EqualsTrim("product")) .ToList(); var tasks = new List <Task>(); foreach (var div in divs) { //ExtractProduct(div, linkStruct); var nodeToPass = div; tasks.Add(Task.Run(() => result.Add(ExtractProduct(nodeToPass, linkStruct)))); } Task.WaitAll(tasks.ToArray()); result.RemoveAll(x => x == null || string.IsNullOrEmpty(x.Name)); result.TrimExcess(); return(result); }
public override List <Product> GetResultsForSingleUrl(LinkStruct linkStruct) { var result = new List <Product>(); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(linkStruct.Html); var divs = htmlDocument.DocumentNode.SelectNodes("//div[contains(@data-t-name, 'ArticleTile')]")?.ToList(); if (divs == null) { return(result); } var tasks = new List <Task>(); foreach (var div in divs) { //ExtractProduct(div, linkStruct); var nodeToPass = div; tasks.Add(Task.Run(() => result.Add(ExtractProduct(nodeToPass, linkStruct)))); } Task.WaitAll(tasks.ToArray()); result.RemoveAll(x => x == null || string.IsNullOrEmpty(x.Name)); result.TrimExcess(); return(result); }
public override List <SearchResultsModels.Product> GetResultsForSingleUrl(LinkStruct linkStruct) { var result = new List <SearchResultsModels.Product>(); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(linkStruct.Html); var json = htmlDocument.DocumentNode.Descendants("script").FirstOrDefault(x => x.Id == "__NEXT_DATA__")?.InnerText; CarrefourJsonModel dataSource; if (!string.IsNullOrEmpty(json)) { dataSource = JsonConvert.DeserializeObject <CarrefourJsonModel>(json); } else { return(result); } var tasks = new List <Task>(); foreach (var productData in dataSource.props.initialState.products.data.content) { //ExtractProduct(productData, linkStruct); var dataToPass = productData; tasks.Add(Task.Run(() => result.Add(ExtractProduct(dataToPass, linkStruct)))); } Task.WaitAll(tasks.ToArray()); result.RemoveAll(x => x == null || string.IsNullOrEmpty(x.Name)); result.TrimExcess(); return(result); }
private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists if (productNode.Descendants("div").Any(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "m-priceBox_old"))) { } #endregion #region Get Name #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Elektroniczne"; #endregion #region Get Price and Sale Price #endregion #region Get Sale Description #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL //result.Seller = this.GetType().Name.Replace("Crawler", ""); //result.TimeStamp = DateTime.Now; //result.SourceUrl = linkStruct.Link; //var productUrl = productNode.GetAttributeValue("href", ""); //result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString(); #endregion return(result); }
public override List <Product> GetResultsForSingleUrl(LinkStruct linkStruct) { var result = new List <Product>(); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(linkStruct.Html); var standardDivs = htmlDocument.DocumentNode.Descendants("div") .AsParallel() .Where(node => node.GetAttributeValue("class", "") .ContainsAny("teaser-hochkant border-orange single-prod")) .ToList(); var listDivs = htmlDocument.DocumentNode.Descendants("li") .AsParallel() .Where(node => node.GetAttributeValue("class", "") .ContainsAny("product large")) .ToList(); var tasks = new List <Task>(); if (standardDivs.Count() == 0 && listDivs.Count() == 0) { return(result); } //foreach (var container in standardDivs) //{ // //ExtractProduct(container, linkStruct); // var nodeToPass = container; // tasks.Add(Task.Run(() => result.Add(ExtractProduct(nodeToPass, linkStruct)))); //} foreach (var container in listDivs) { //ExtractListProduct(container, linkStruct); var nodeToPass = container; tasks.Add(Task.Run(() => result.Add(ExtractListProduct(nodeToPass, linkStruct)))); } Task.WaitAll(tasks.ToArray()); result.RemoveAll(x => string.IsNullOrEmpty(x.Name)); result.TrimExcess(); return(result); }
private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists if (!productNode.Descendants("div").Any(x => x.GetAttributeValue("class", "") == "price price--prev oldPriceMarker trans")) { return(new Product()); } #endregion #region Get Name result.Name = productNode.Descendants("p") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "product-teaser__content__head__title")? .InnerText .Trim(); #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Spożywcze"; #endregion #region Get Price and Sale Price var regularPrice = productNode.Descendants("div") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price price--prev oldPriceMarker trans") .Descendants("span") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price-int") .InnerText .RemoveNonNumeric() + productNode.Descendants("div") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price price--prev oldPriceMarker trans") .Descendants("span") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price-dec") .InnerText .RemoveNonNumeric(); var promoPrice = productNode.Descendants("div") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price" && x.ParentNode.GetAttributeValue("class", "") == "product-teaser__content__details change")? .Descendants("span") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price-int") .InnerText .RemoveNonNumeric() + productNode.Descendants("div") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price" && x.ParentNode.GetAttributeValue("class", "") == "product-teaser__content__details change")? .Descendants("span") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price-dec") .InnerText .RemoveNonNumeric(); if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal)) { result.Value = promoPriceDecimal / 100; } if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal)) { result.SaleValue = regularPriceDecimal / 100; } result.OnSale = true; #endregion #region Get Sale Description #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; result.SourceUrl = linkStruct.Link; var productUrl = productNode.GetAttributeValue("href", ""); result.SourceUrl = linkStruct.Link; #endregion return(result); }
private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "content", MatchDireciton.Equals)))) { return(new Product()); } #endregion #region Get Name var name = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "about", MatchDireciton.InputContainsMatch))) .Select(z => z.InnerText) .FirstOrDefault(); result.Name = CrawlerRegex.RemoveMetaCharacters(name).Trim(); #endregion #region Get Description if (CrawlerRegex.StandardMatch(linkStruct.Link, "direct", MatchDireciton.InputContainsMatch)) { result.Description = "Znalezione na Auchan Direct!"; } #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Budowlane"; #endregion #region Get Price and Sale Price, set OnSale Flag if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "prices", MatchDireciton.Equals)))) { var priceNode = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "prices", MatchDireciton.Equals))) .FirstOrDefault(); if (priceNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "discount", MatchDireciton.Equals)))) { var regularPrice = priceNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "discount", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveNonNumeric(); var promoPriceNode = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "normal", MatchDireciton.Equals))) .FirstOrDefault(); var promoPricePLN = promoPriceNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-nb", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveNonNumeric(); var promoPriceGR = promoPriceNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-cents", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveNonNumeric(); if (decimal.TryParse(regularPrice, out decimal priceDecimal)) { result.Value = priceDecimal / 100; } if (decimal.TryParse(promoPricePLN + promoPriceGR, out decimal promoPriceDecimal)) { result.SaleValue = promoPriceDecimal / 100; } result.OnSale = true; } else { var regularPriceNode = priceNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "standard", MatchDireciton.Equals))) .FirstOrDefault(); var regularPricePLN = regularPriceNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-nb", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveNonNumeric(); var regularPriceGR = regularPriceNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-cents", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveNonNumeric(); if (decimal.TryParse(regularPricePLN + regularPriceGR, out decimal priceDecimal)) { result.Value = priceDecimal / 100; } result.OnSale = false; } } else { return(new Product()); } if (result.Value == 0) { return(new Product()); } #endregion #region Get Sale Description #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; result.SourceUrl = linkStruct.Link; #endregion return(result); }
private Product ExtractListProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "description", MatchDireciton.Equals)))) { return(new Product()); } #endregion #region Get Name var nameNode = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "description", MatchDireciton.Equals))) .FirstOrDefault(); if (nameNode == null) { return(new Product()); } else { var name = nameNode .Descendants("p") .FirstOrDefault()? .InnerText; if (string.IsNullOrEmpty(name)) { return(new Product()); } result.Name = CrawlerRegex.RemoveMetaCharacters(name).Trim(); } #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Budowlane"; #endregion #region Get Price and Sale Price, set OnSale Flag var priceNode = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price", MatchDireciton.Equals))) .FirstOrDefault(); if (priceNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.InputContainsMatch)))) { var regularPrice = priceNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.InputContainsMatch))) .FirstOrDefault()? .Descendants("del") .FirstOrDefault()? .Descendants("span") .FirstOrDefault()? .InnerText .RemoveNonNumeric(); var promoPrice = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price", MatchDireciton.Equals))) .FirstOrDefault()? .Descendants("span") .Where(d => !d.Descendants().Any(c => c.Name == "del") && d.ParentNode.Name != "del") .FirstOrDefault()? .InnerText .RemoveNonNumeric(); if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal)) { result.Value = promoPriceDecimal / 100; } if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal)) { result.SaleValue = regularPriceDecimal / 100; } result.OnSale = true; } else { return(new Product()); } if (result.Value == 0) { return(new Product()); } #endregion #region Get Sale Description #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; result.SourceUrl = linkStruct.Link; #endregion return(result); }
public static void Main() { Random random = new(); int test = 10; Console.WriteLine("You are runnning the Data Structures example."); Console.WriteLine("======================================================"); Console.WriteLine(); #region Link (aka Tuple) { Console.WriteLine(" Link------------------------------------"); Console.WriteLine(); Console.WriteLine(" A \"Link\" is like a System.Tuple that implements"); Console.WriteLine(" Towel.DataStructures.IDataStructure<T>. A Link/Tuple is"); Console.WriteLine(" used when you have a small, known-sized set of objects"); Console.WriteLine(" that you want to bundle together without making a custom"); Console.WriteLine(" custom class."); Console.WriteLine(); Link link = new Link <int, string, char, float, decimal>(1, "2", '3', 4f, 5m); Console.Write(" Traversal: "); link.Stepper(Console.Write); Console.WriteLine(); Link linkStruct = new LinkStruct <int, string, char, float, decimal>(1, "2", '3', 4f, 5m); Console.Write(" Traversal: "); link.Stepper(Console.Write); Console.WriteLine(); Pause(); } #endregion #region Array { Console.WriteLine(" Array---------------------------------"); Console.WriteLine(); Console.WriteLine(@$ " An Array<T> is just a wrapper for arrays that implements"); Console.WriteLine(@$ " Towel.DataStructures.IDataStructure<T>. An array is used when"); Console.WriteLine(@$ " dealing with static-sized, known-sized sets of data. Arrays"); Console.WriteLine(@$ " can be sorted along 1 dimensions for binary searching algorithms."); Console.WriteLine(); IArray <int> array = new Array <int>(test); Console.Write($" Filling in (0-{test - 1})..."); for (int i = 0; i < test; i++) { array[i] = i; } Console.WriteLine(); Console.Write(" Traversal: "); array.Stepper(Console.Write); Console.WriteLine(); Console.WriteLine($" Length: {array.Length}"); Pause(); } #endregion #region List { Console.WriteLine(" List---------------------------------"); Console.WriteLine(); Console.WriteLine(" An List is like an IList that implements"); Console.WriteLine(" Towel.DataStructures.IDataStructure<T>. \"ListArray\" is"); Console.WriteLine(" the array implementation while \"ListLinked\" is the"); Console.WriteLine(" the linked-list implementation. An List is used"); Console.WriteLine(" when dealing with an unknown quantity of data that you"); Console.WriteLine(" will likely have to enumerate/step through everything. The"); Console.WriteLine(" ListArray shares the properties of an Array in"); Console.WriteLine(" that it can be relateively quickly sorted along 1 dimensions"); Console.WriteLine(" for binary search algorithms."); Console.WriteLine(); // ListArray --------------------------------------- IList <int> listArray = new ListArray <int>(test); Console.Write($" [ListArray] Adding (0-{test - 1})..."); for (int i = 0; i < test; i++) { listArray.Add(i); } Console.WriteLine(); Console.Write(" [ListArray] Traversal: "); listArray.Stepper(Console.Write); Console.WriteLine(); Console.WriteLine($" [ListArray] Count: {listArray.Count}"); listArray.Clear(); Console.WriteLine(); // ListLinked --------------------------------------- IList <int> listLinked = new ListLinked <int>(); Console.Write($" [ListLinked] Adding (0-{test - 1})..."); for (int i = 0; i < test; i++) { listLinked.Add(i); } Console.WriteLine(); Console.Write(" [ListLinked] Traversal: "); listLinked.Stepper(Console.Write); Console.WriteLine(); Console.WriteLine($" [ListLinked] Count: {listLinked.Count}"); listLinked.Clear(); Pause(); } #endregion #region Stack { Console.WriteLine(" Stack---------------------------------"); Console.WriteLine(); Console.WriteLine(" An \"Stack\" is a Stack that implements"); Console.WriteLine(" Towel.DataStructures.IDataStructure<T>. \"StackArray\" is"); Console.WriteLine(" the array implementation while \"StackLinked\" is the"); Console.WriteLine(" the linked-list implementation. A Stack is used"); Console.WriteLine(" specifically when you need the algorithm provided by the Push"); Console.WriteLine(" and Pop functions."); Console.WriteLine(); IStack <int> stackArray = new StackArray <int>(); Console.Write($" [StackArray] Pushing (0-{test - 1})..."); for (int i = 0; i < test; i++) { stackArray.Push(i); } Console.WriteLine(); Console.Write(" [StackArray] Traversal: "); stackArray.Stepper(Console.Write); Console.WriteLine(); Console.WriteLine($" [StackArray] Pop: {stackArray.Pop()}"); Console.WriteLine($" [StackArray] Pop: {stackArray.Pop()}"); Console.WriteLine($" [StackArray] Peek: {stackArray.Peek()}"); Console.WriteLine($" [StackArray] Pop: {stackArray.Pop()}"); Console.WriteLine($" [StackArray] Count: {stackArray.Count}"); stackArray.Clear(); Console.WriteLine(); IStack <int> stackLinked = new StackLinked <int>(); Console.Write($" [StackLinked] Pushing (0-{test - 1})..."); for (int i = 0; i < test; i++) { stackLinked.Push(i); } Console.WriteLine(); Console.Write(" [StackLinked] Traversal: "); stackLinked.Stepper(Console.Write); Console.WriteLine(); Console.WriteLine($" [StackLinked] Pop: {stackLinked.Pop()}"); Console.WriteLine($" [StackLinked] Pop: {stackLinked.Pop()}"); Console.WriteLine($" [StackLinked] Peek: {stackLinked.Peek()}"); Console.WriteLine($" [StackLinked] Pop: {stackLinked.Pop()}"); Console.WriteLine($" [StackLinked] Count: {stackLinked.Count}"); stackLinked.Clear(); Pause(); } #endregion #region Queue { Console.WriteLine(" Queue---------------------------------"); Console.WriteLine(); Console.WriteLine(" An \"Queue\" is a Queue that implements"); Console.WriteLine(" Towel.DataStructures.IDataStructure<T>. \"QueueArray\" is"); Console.WriteLine(" the array implementation while \"QueueLinked\" is the"); Console.WriteLine(" the linked-list implementation. A Queue/Stack is used"); Console.WriteLine(" specifically when you need the algorithm provided by the Queue"); Console.WriteLine(" and Dequeue functions."); Console.WriteLine(); IQueue <int> queueArray = new QueueArray <int>(); Console.Write($" [QueueArray] Enqueuing (0-{test - 1})..."); for (int i = 0; i < test; i++) { queueArray.Enqueue(i); } Console.WriteLine(); Console.Write(" [QueueArray] Traversal: "); queueArray.Stepper(Console.Write); Console.WriteLine(); Console.WriteLine($" [QueueArray] Dequeue: {queueArray.Dequeue()}"); Console.WriteLine($" [QueueArray] Dequeue: {queueArray.Dequeue()}"); Console.WriteLine($" [QueueArray] Peek: {queueArray.Peek()}"); Console.WriteLine($" [QueueArray] Dequeue: {queueArray.Dequeue()}"); Console.WriteLine($" [QueueArray] Count: {queueArray.Count}"); queueArray.Clear(); Console.WriteLine(); IQueue <int> queueLinked = new QueueLinked <int>(); Console.Write($" [QueueLinked] Enqueuing (0-{test - 1})..."); for (int i = 0; i < test; i++) { queueLinked.Enqueue(i); } Console.WriteLine(); Console.Write(" [QueueLinked] Traversal: "); queueLinked.Stepper(Console.Write); Console.WriteLine(); Console.WriteLine($" [QueueLinked] Pop: {queueLinked.Dequeue()}"); Console.WriteLine($" [QueueLinked] Pop: {queueLinked.Dequeue()}"); Console.WriteLine($" [QueueLinked] Peek: {queueLinked.Peek()}"); Console.WriteLine($" [QueueLinked] Pop: {queueLinked.Dequeue()}"); Console.WriteLine($" [QueueLinked] Count: {queueLinked.Count}"); queueLinked.Clear(); Pause(); } #endregion #region Heap { Console.WriteLine(" Heap---------------------------------"); Console.WriteLine(); Console.WriteLine(" An \"Heap\" is a binary tree that stores items based on priorities."); Console.WriteLine(" It implements Towel.DataStructures.IDataStructure<T> like the others."); Console.WriteLine(" It uses sifting algorithms to move nodes vertically through itself."); Console.WriteLine(" It is often the best data structure for standard priority queues."); Console.WriteLine(" \"HeapArray\" is an implementation where the tree has been flattened"); Console.WriteLine(" into an array."); Console.WriteLine(); Console.WriteLine(" Let's say the priority is how close a number is to \"5\"."); Console.WriteLine(" So \"Dequeue\" will give us the next closest value to \"5\".");
private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__price", MatchDireciton.InputContainsMatch)))) { return(new Product()); } #endregion #region Get Name var names = new List <string> { productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "m-offer-tile__subtitle", MatchDireciton.Equals))) .Select(z => z.InnerText) .FirstOrDefault()? .RemoveMetaCharacters(), productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "m-offer-tile__title", MatchDireciton.Equals))) .Select(z => z.InnerText) .FirstOrDefault()? .RemoveMetaCharacters(), }; names.RemoveAll(x => string.IsNullOrEmpty(x)); result.Name = String.Join(", ", names.ToArray()); #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Spożywcze"; #endregion #region Get Price and Sale Price, set OnSale Flag if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__old-price", MatchDireciton.Equals)))) { var price = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__price", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveMetaCharacters() .RemoveNonNumeric(); var salePrice = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__old-price", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveMetaCharacters() .RemoveNonNumeric(); if (decimal.TryParse(price, out decimal plnDecimal) && decimal.TryParse(salePrice, out decimal salePlnDecimal)) { result.SaleValue = salePlnDecimal / 100; result.Value = plnDecimal / 100; result.OnSale = true; } else { return(new Product()); } } else { var price = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__price", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveMetaCharacters() .RemoveNonNumeric(); if (decimal.TryParse(price, out decimal plnDecimal)) { result.Value = plnDecimal / 100; result.OnSale = false; } else { return(new Product()); } } #endregion #region Get Sale Description var promoCommnets = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__discount", MatchDireciton.Equals))) .Select(z => z.InnerText.RemoveMetaCharacters()) .ToList(); if (promoCommnets.Count != 0) { result.SaleDescription = String.Join(", ", promoCommnets.ToArray()); result.OnSale = true; } #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; var productUrl = productNode .Descendants("a") .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "m-offer-tile__link", MatchDireciton.InputContainsMatch)))? .GetAttributeValue("href", ""); result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString(); #endregion return(result); }
private SearchResultsModels.Product ExtractProduct(Content data, LinkStruct linkStruct) { var result = new SearchResultsModels.Product(); #region Check if product is viable if (!data.actualSku.promotion) { return(new SearchResultsModels.Product()); } if (!string.IsNullOrEmpty(data.actualSku.amount.actualOldPriceString) || !string.IsNullOrEmpty(data.actualSku.amount.actualGrossPriceString)) { if (data.actualSku.amount.actualGrossPrice > data.actualSku.amount.actualOldPrice) { return(new SearchResultsModels.Product()); } else { result.OnSale = true; } } else { result.OnSale = true; } #endregion #region Get Name result.Name = data.displayName; #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Spożywcze"; #endregion #region Get Price and Sale Price, set OnSale Flag result.SaleValue = (decimal)data.actualSku.amount.actualOldPrice; result.Value = (decimal)data.actualSku.amount.actualGrossPrice; #endregion #region Get Sale Description #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; result.SourceUrl = new Uri(new Uri($"https://{ new Uri(linkStruct.Link).Host}/"), data.url).ToString(); #endregion return(result); }
public override void GetResultsForSingleUrl(Dictionary <string, List <Product> > resultDictionary, LinkStruct linkStruct) { List <Product> products = new List <Product>(); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(linkStruct.Html); var divs = htmlDocument.DocumentNode.Descendants("div").Where(node => node.GetAttributeValue("class", "").Equals("productsimple-default")).ToList(); if (divs.Count() == 0) { return; } foreach (var div in divs) { try { var priceZl = int.TryParse(div.Descendants("span").Where(node => node.GetAttributeValue("class", "").Equals("pln")).FirstOrDefault().InnerText, out int resultZl); var priceGr = int.TryParse(div.Descendants("span").Where(node => node.GetAttributeValue("class", "").Equals("gr")).FirstOrDefault().InnerText, out int resultGr); var product = new Product { Provider = "Biedronka", Name = div.Descendants("a").FirstOrDefault().Attributes["title"].Value, PriceZl = priceZl == true ? resultZl : 0, PriceGr = priceGr == true ? resultGr : 0, Url = linkStruct.Link, DownloadDate = DateTime.Now, Category = "", Mark = "", Quantity = 0, Description = "" }; products.Add(product); } catch (Exception) { continue; } } try { resultDictionary.Add(linkStruct.Link, products); } catch (Exception) { return; } }
public override void GetResultsForSingleUrl(Dictionary <string, List <Product> > resultDictionary, LinkStruct linkStruct) { List <Product> products = new List <Product>(); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(linkStruct.Html); var divs = htmlDocument.DocumentNode.Descendants("div").Where(node => node.GetAttributeValue("class", "").Equals("product product--tile product--fullbleed")).ToList(); if (divs.Count() == 0) { return; } foreach (var div in divs) { try { StringBuilder stringBuilder = new StringBuilder(); stringBuilder.Append(div.Descendants("strong").Where(node => node.GetAttributeValue("class", "").Equals("pricefield__price")).FirstOrDefault().InnerText); stringBuilder.Replace("zł", ""); string[] splittedPrice = stringBuilder.ToString().Split(','); var priceZl = int.TryParse(splittedPrice[0], out int resultZl); var priceGr = int.TryParse(splittedPrice[1], out int resultGr); var product = new Product { Provider = "Lidl", Name = div.Descendants("h2").Where(node => node.GetAttributeValue("class", "").Equals("product__title")).FirstOrDefault().InnerText, PriceZl = priceZl == true ? resultZl : 0, PriceGr = priceGr == true ? resultGr : 0, Url = $"{BaseUrlForProducts}{div.Descendants("a").Where(node => node.GetAttributeValue("class", "").Equals("product__body")).FirstOrDefault().Attributes["href"].Value}", DownloadDate = DateTime.Now, Category = "", Mark = "", Quantity = 0, Description = "" }; try { product.Description = div.Descendants("span").Where(node => node.GetAttributeValue("class", "").Equals("pricefield__header")).FirstOrDefault().InnerText; } catch (Exception) { continue; } products.Add(product); } catch (Exception) { continue; } } try { resultDictionary.Add(linkStruct.Link, products); } catch (Exception) { return; } }
public abstract List <Product> GetResultsForSingleUrl(LinkStruct linkStruct);
private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "pricebox__price", MatchDireciton.InputContainsMatch)))) { return(new Product()); } #endregion #region Get Name var names = new List <string> { productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && y.Value == "product__title")) .Select(z => z.InnerText) .FirstOrDefault()? .RemoveMetaCharacters() .Trim(), productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && y.Value == "pricebox__highlight")) .Select(z => z.InnerText) .FirstOrDefault()? .RemoveMetaCharacters() .Trim(), }; names.RemoveAll(x => string.IsNullOrEmpty(x)); result.Name = String.Join(", ", names.ToArray()); #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Spożywcze"; #endregion #region Get Price and Sale Price var promoPrice = productNode.Descendants("span") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "pricebox__price")? .InnerText .Replace(",-", "00") .RemoveNonNumeric(); var regularPrice = productNode.Descendants("span") .FirstOrDefault(x => x.GetAttributeValue("class", "") == "pricebox__recommended-retail-price")? .InnerText .Replace(",-", "00") .RemoveNonNumeric(); if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal)) { result.Value = promoPriceDecimal / 100; } if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal)) { result.SaleValue = regularPriceDecimal / 100; } if (result.Value != 0 && result.SaleValue != 0) { result.OnSale = true; } else { return(new Product()); } #endregion #region Get Sale Description #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; result.SourceUrl = linkStruct.Link; var productUrl = productNode.GetAttributeValue("href", ""); result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString(); #endregion return(result); }
public override void GetResultsForSingleUrl(Dictionary <string, List <Product> > resultDictionary, LinkStruct linkStruct) { List <Product> products = new List <Product>(); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(linkStruct.Html); foreach (var htmlPattern in _htmlPattens) { var divs = htmlDocument.DocumentNode.Descendants(htmlPattern.TopNode.Descendant).Where(node => node.GetAttributeValue(htmlPattern.TopNode.AttributeName, "").Contains(htmlPattern.TopNode.AttributeValue)).ToList(); if (divs.Count() == 0) { return; } foreach (var div in divs) { var product = new Product(); product.Provider = "Biedronka"; product.Name = div.Descendants(htmlPattern.Name.Descendant).FirstOrDefault().Attributes[htmlPattern.Name.AttributeValue].Value.Replace(@""", ""); product.Url = linkStruct.Link; product.DownloadDate = DateTime.Now; product.Category = ""; product.Mark = ""; product.Quantity = 0; try { product.Description = div.Descendants(htmlPattern.Description.Descendant).Where(node => htmlPattern.Description.CombinedAttributeName.Any(node.GetAttributeValue(htmlPattern.Description.AttributeName, "").Contains)).FirstOrDefault().InnerText; } catch { product.Description = "Ni ma komentarza, pewno coś nowego, kliknij IDŹ!"; } try { if (int.TryParse(div.Descendants(htmlPattern.ZlNode.Descendant).Where(node => node.GetAttributeValue(htmlPattern.ZlNode.AttributeName, "").Equals(htmlPattern.ZlNode.AttributeValue)).FirstOrDefault().InnerText, out int resultZl)) { product.PriceZl = resultZl; } else { continue; } } catch { continue; } try { if (int.TryParse(div.Descendants(htmlPattern.GrNode.Descendant).Where(node => node.GetAttributeValue(htmlPattern.GrNode.AttributeName, "").Equals(htmlPattern.GrNode.AttributeValue)).FirstOrDefault().InnerText, out int resultGr)) { product.PriceGr = resultGr; } else { continue; } } catch { continue; } products.Add(product); } resultDictionary.Add(linkStruct.Link, products); } }
private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists if (!productNode.Descendants("span") .Any(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "product-compact__prev-price"))) { return(new Product()); } #endregion #region Get Name var header = productNode .Descendants("span") .FirstOrDefault(x => x.HasClass("product-compact__name")) .InnerText; var description = productNode .Descendants("span") .FirstOrDefault(x => x.HasClass("product-compact__type")) .InnerText .RemoveMetaCharacters() .Trim(); if (header != null) { result.Name += $" {header}"; } if (description != null) { result.Name += $" {description}"; } result.Name = Regex.Replace(result.Name, @"\s+", " "); if (string.IsNullOrEmpty(result.Name)) { return(new Product()); } #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Budowlane"; #endregion #region Get Price and Sale Price var value = productNode.GetAttributeValue("data-price", ""); var saleValue = productNode.Descendants("span") .FirstOrDefault(x => x.HasClass("product-compact__comparable-price-element"))? .InnerText? .RemoveMetaCharacters(); if (!Regex.IsMatch(value, @"\.[0-9][0-9]$")) { value = value.RemoveNonNumeric(); value += "00"; } else { value = value.Replace(@".", ""); } if (!Regex.IsMatch(saleValue, @"\,[0-9][0-9]$")) { saleValue = saleValue.RemoveNonNumeric(); saleValue += "00"; } else { saleValue = saleValue.Replace(",", ""); } if (string.IsNullOrEmpty(value) || string.IsNullOrEmpty(saleValue)) { return(new Product()); } if (decimal.TryParse(value, out decimal valueDecimal) && decimal.TryParse(saleValue, out decimal saleValueDecimal)) { result.SaleValue = saleValueDecimal / 100; result.Value = valueDecimal / 100; result.OnSale = true; } else { return(new Product()); } #endregion #region Get Sale Description #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; result.SourceUrl = productNode.Descendants("a").FirstOrDefault()?.GetAttributeValue("href", ""); #endregion return(result); }
private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists var promoPriceNode = productNode.Descendants("span") .Where(n => n.Attributes.Any(x => x.Name == "class" && CrawlerRegex.StandardMatch(x.Value, "price__main", MatchDireciton.Equals))) .FirstOrDefault(); var regularPriceNode = productNode.Descendants("s") .Where(n => n.Attributes .Any(x => x.Name == "class" && CrawlerRegex.StandardMatch(x.Value, "price__previous", MatchDireciton.Equals))) .FirstOrDefault(); if (promoPriceNode == null || regularPriceNode == null) { return(result); } #endregion #region Get Name result.Name = productNode .Descendants("a") .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && y.Value.NormalizeWithStandardRegex() == "mod-article-tile__action".NormalizeWithStandardRegex()))? .InnerText; #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Spożywcze"; #endregion #region Get Price and Sale Price, set OnSale Flag if (regularPriceNode == null) { var promoPrice = promoPriceNode.GetAttributeValue("data-price", "")?.RemoveNonNumeric(); if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal)) { result.Value = promoPriceDecimal / 100; } result.OnSale = true; } else { var promoPrice = promoPriceNode.InnerText?.RemoveNonNumeric(); var regularPrice = regularPriceNode.InnerText?.RemoveNonNumeric(); if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal)) { result.Value = promoPriceDecimal / 100; } if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal)) { result.SaleValue = regularPriceDecimal / 100; } result.OnSale = true; } #endregion #region Get Sale Description #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; var productUrl = productNode .Descendants("a") .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && y.Value.NormalizeWithStandardRegex() == "mod-article-tile__action".NormalizeWithStandardRegex()))? .GetAttributeValue("href", ""); result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString(); #endregion return(result); }
private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price", MatchDireciton.Equals) || CrawlerRegex.StandardMatch(y.Value, "price-wrapper", MatchDireciton.Equals)))) { return(new Product()); } #endregion #region Get Name var name = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "tilename", MatchDireciton.InputContainsMatch))) .Select(z => z.InnerText) .FirstOrDefault()? .RemoveMetaCharacters() .RemoveUnwantedStrings() .Replace(";", ""); result.Name = name; #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Spożywcze"; #endregion #region Get Price and Sale Price, set OnSale Flag if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.Equals)))) { var pln = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "pln", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveMetaCharacters(); var gr = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "gr", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveMetaCharacters(); if (decimal.TryParse(pln, out decimal plnDecimal) && decimal.TryParse(gr, out decimal grDecimal)) { result.Value = plnDecimal + (grDecimal / 100); } else { return(new Product()); } var oldPrice = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveNonNumeric(); if (decimal.TryParse(oldPrice, out decimal oldPriceDecimal)) { result.SaleValue = oldPriceDecimal / 100; } else { return(new Product()); } result.OnSale = true; } else { var pln = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "pln", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveMetaCharacters(); var gr = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "gr", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveMetaCharacters(); if (decimal.TryParse(pln, out decimal plnDecimal) && decimal.TryParse(gr, out decimal grDecimal)) { result.Value = plnDecimal + (grDecimal / 100); } else { return(new Product()); } result.OnSale = false; } #endregion #region Get Sale Description var promoCommnets = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "productpromo", MatchDireciton.InputContainsMatch))) .Select(z => z.InnerText.RemoveMetaCharacters()) .ToList(); if (promoCommnets.Count != 0) { result.SaleDescription = String.Join(", ", promoCommnets.ToArray()); result.OnSale = true; } #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; var productUrl = productNode.Descendants("a").FirstOrDefault().GetAttributeValue("href", ""); result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString(); #endregion return(result); }
public override void GetResultsForSingleUrl(Dictionary <string, List <Product> > resultDictionary, LinkStruct linkStruct) { List <Product> products = new List <Product>(); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(linkStruct.Html); foreach (var htmlPattern in _htmlPattens) { var divs = htmlDocument.DocumentNode.Descendants(htmlPattern.TopNode.Descendant).Where(node => node.GetAttributeValue(htmlPattern.TopNode.AttributeName, "").Contains(htmlPattern.TopNode.AttributeValue)).ToList(); if (divs.Count() == 0) { return; } foreach (var div in divs) { var product = new Product(); var price = div.Descendants(htmlPattern.PriceNode.Descendant).Where(node => node.GetAttributeValue(htmlPattern.PriceNode.AttributeName, "").Equals(htmlPattern.PriceNode.AttributeValue)).FirstOrDefault().InnerText; StringBuilder stringBuilder = new StringBuilder(); stringBuilder.Append(price); stringBuilder.Replace("zł", ""); string[] splittedPrice = stringBuilder.ToString().Split(','); var priceZl = int.TryParse(splittedPrice[0], out int resultZl); var priceGr = int.TryParse(splittedPrice[1], out int resultGr); product.PriceZl = priceZl == true ? resultZl : 0; product.PriceGr = priceGr == true ? resultGr : 0; product.Provider = "Kaufland"; try { product.Name = div.Descendants(htmlPattern.Name.Descendant).Where(node => node.GetAttributeValue(htmlPattern.Name.AttributeName, "").Equals(htmlPattern.Name.AttributeValue)).FirstOrDefault().InnerText; } catch (NullReferenceException) { continue; } try { product.Name += $" {div.Descendants(htmlPattern.SubName.Descendant).Where(node => node.GetAttributeValue(htmlPattern.SubName.AttributeName, "").Equals(htmlPattern.SubName.AttributeValue)).FirstOrDefault().InnerText}"; } catch (NullReferenceException) { continue; } try { var description = div.Descendants(htmlPattern.Description.Descendant).Where(node => node.GetAttributeValue(htmlPattern.Description.AttributeName, "").Equals(htmlPattern.Description.AttributeValue)).FirstOrDefault().InnerText; product.Description = Regex.Replace(description, @"\t|\n|\r", ""); } catch (NullReferenceException) { product.Description = ""; } product.Url = linkStruct.Link; product.DownloadDate = DateTime.Now; product.Category = ""; product.Mark = ""; product.Quantity = 0; products.Add(product); } resultDictionary.Add(linkStruct.Link, products); } }
public virtual void GetResultsForSingleUrl(Dictionary <string, List <Product> > resultDictionary, LinkStruct linkStruct) { //To be always overriten by derived classes. }
private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists and if it is on sale if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__title title", MatchDireciton.Equals)))) { return(new Product()); } #endregion #region Get Name var name = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__title title", MatchDireciton.InputContainsMatch))) .Select(z => z.InnerText) .FirstOrDefault(); result.Name = CrawlerRegex.RemoveMetaCharacters(name).Trim(); #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category #endregion #region Get Price and Sale Price, set OnSale Flag if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__price-wrapper", MatchDireciton.Equals)))) { var priceNode = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__price-wrapper", MatchDireciton.Equals))) .FirstOrDefault(); if (priceNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__old-price", MatchDireciton.Equals)))) { var regularPriceNode = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__price", MatchDireciton.Equals))) .FirstOrDefault(); var promoPriceNode = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__old-price", MatchDireciton.Equals))) .FirstOrDefault(); var regularPricePLN = regularPriceNode.InnerText.RemoveNonNumeric(); var promoPricePLN = promoPriceNode.InnerText.RemoveNonNumeric(); if (decimal.TryParse(regularPricePLN, out decimal priceDecimal)) { result.Value = priceDecimal / 100; } if (decimal.TryParse(promoPricePLN, out decimal promoPriceDecimal)) { result.SaleValue = promoPriceDecimal / 100; } result.OnSale = true; } else { var regularPriceNode = productNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__price", MatchDireciton.Equals))) .FirstOrDefault(); var regularPricePLN = regularPriceNode.InnerText.RemoveNonNumeric(); var regularPriceGR = regularPriceNode.Descendants() .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-cents", MatchDireciton.Equals))) .FirstOrDefault()? .InnerText .RemoveNonNumeric(); if (decimal.TryParse(regularPricePLN + regularPriceGR, out decimal priceDecimal)) { result.Value = priceDecimal / 100; } result.OnSale = false; } } else { return(new Product()); } if (result.Value == 0) { return(new Product()); } #endregion #region Get Sale Description #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; result.SourceUrl = linkStruct.Link; #endregion return(result); }
private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct) { var result = new Product(); #region Check if product node exists var pricesNode = productNode.Descendants("span") .Where(n => n.Attributes.Any(x => x.Name == "class" && CrawlerRegex.StandardMatch(x.Value, "prices", MatchDireciton.InputContainsMatch))) .FirstOrDefault(); if (pricesNode == null || productNode.Descendants("h3").FirstOrDefault() == null) { return(result); } #endregion #region Get Name result.Name = productNode.Descendants("h3") .FirstOrDefault()? .InnerText .RemoveMetaCharacters(); #endregion #region Get Description #endregion #region Get Producer #endregion #region Get Category result.Category = "Markety Budowlane"; #endregion #region Get Price and Sale Price, set OnSale Flag if (pricesNode.Descendants("span") .Any(n => n.Attributes.Any(x => x.Name == "class" && x.Value.Contains("product-price promotional")))) { try { var prices = pricesNode.Descendants("span") .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "price")) .ToList(); var price1Int = Int32.Parse(prices[0].Descendants("span") .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "integer")) .FirstOrDefault()? .InnerText .RemoveNonNumeric()) * 100; var price1Frac = Int32.Parse(prices[0].Descendants("span") .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "fractional")) .FirstOrDefault()? .InnerText .RemoveNonNumeric()); var price2Int = Int32.Parse(prices[1].Descendants("span") .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "integer")) .FirstOrDefault()? .InnerText .RemoveNonNumeric()) * 100; var price2Frac = Int32.Parse(prices[1].Descendants("span") .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "fractional")) .FirstOrDefault()? .InnerText .RemoveNonNumeric()); if (decimal.TryParse((price1Int + price1Frac).ToString(), out decimal promoPriceDecimal)) { result.Value = promoPriceDecimal / 100; } if (decimal.TryParse((price2Int + price2Frac).ToString(), out decimal regularPriceDecimal)) { result.SaleValue = regularPriceDecimal / 100; } if ((promoPriceDecimal == regularPriceDecimal) || (promoPriceDecimal < (regularPriceDecimal * 0.3m))) { return(new Product()); } result.OnSale = true; } catch (Exception) { return(new Product()); } } #endregion #region Get Sale Description #endregion #region Get Sale Deadline #endregion #region Get Seller, TimeStamp, URL result.Seller = this.GetType().Name.Replace("Crawler", ""); result.TimeStamp = DateTime.Now; var productUrl = productNode .Descendants("a") .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && y.Value.NormalizeWithStandardRegex() == "Url".NormalizeWithStandardRegex()))? .GetAttributeValue("href", ""); result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString(); #endregion return(result); }