Exemplo n.º 1
0
 private bool LinkCleanUp(string link, string output)
 {
     if (CrawlerRegex.StandardMatch(link, output, MatchDireciton.InputContainsMatch))
     {
         return(true);
     }
     else
     {
         return(false);
     }
 }
Exemplo n.º 2
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists and if it is on sale

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__title title", MatchDireciton.Equals))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var name = productNode.Descendants()
                       .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__title title", MatchDireciton.InputContainsMatch)))
                       .Select(z => z.InnerText)
                       .FirstOrDefault();

            result.Name = CrawlerRegex.RemoveMetaCharacters(name).Trim();

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__price-wrapper", MatchDireciton.Equals))))
            {
                var priceNode = productNode.Descendants()
                                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__price-wrapper", MatchDireciton.Equals)))
                                .FirstOrDefault();

                if (priceNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__old-price", MatchDireciton.Equals))))
                {
                    var regularPriceNode = productNode.Descendants()
                                           .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__price", MatchDireciton.Equals)))
                                           .FirstOrDefault();

                    var promoPriceNode = productNode.Descendants()
                                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__old-price", MatchDireciton.Equals)))
                                         .FirstOrDefault();

                    var regularPricePLN = regularPriceNode.InnerText.RemoveNonNumeric();
                    var promoPricePLN   = promoPriceNode.InnerText.RemoveNonNumeric();

                    if (decimal.TryParse(regularPricePLN, out decimal priceDecimal))
                    {
                        result.Value = priceDecimal / 100;
                    }
                    if (decimal.TryParse(promoPricePLN, out decimal promoPriceDecimal))
                    {
                        result.SaleValue = promoPriceDecimal / 100;
                    }

                    result.OnSale = true;
                }
                else
                {
                    var regularPriceNode = productNode.Descendants()
                                           .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__price", MatchDireciton.Equals)))
                                           .FirstOrDefault();

                    var regularPricePLN = regularPriceNode.InnerText.RemoveNonNumeric();

                    var regularPriceGR = regularPriceNode.Descendants()
                                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-cents", MatchDireciton.Equals)))
                                         .FirstOrDefault()?
                                         .InnerText
                                         .RemoveNonNumeric();

                    if (decimal.TryParse(regularPricePLN + regularPriceGR, out decimal priceDecimal))
                    {
                        result.Value = priceDecimal / 100;
                    }

                    result.OnSale = false;
                }
            }
            else
            {
                return(new Product());
            }

            if (result.Value == 0)
            {
                return(new Product());
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;
            result.SourceUrl = linkStruct.Link;

            #endregion

            return(result);
        }
Exemplo n.º 3
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            var promoPriceNode = productNode.Descendants("span")
                                 .Where(n => n.Attributes.Any(x => x.Name == "class" && CrawlerRegex.StandardMatch(x.Value, "price__main", MatchDireciton.Equals)))
                                 .FirstOrDefault();

            var regularPriceNode = productNode.Descendants("s")
                                   .Where(n => n.Attributes
                                          .Any(x => x.Name == "class" && CrawlerRegex.StandardMatch(x.Value, "price__previous", MatchDireciton.Equals)))
                                   .FirstOrDefault();

            if (promoPriceNode == null || regularPriceNode == null)
            {
                return(result);
            }

            #endregion

            #region Get Name

            result.Name = productNode
                          .Descendants("a")
                          .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && y.Value.NormalizeWithStandardRegex() == "mod-article-tile__action".NormalizeWithStandardRegex()))?
                          .InnerText;

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Spożywcze";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (regularPriceNode == null)
            {
                var promoPrice = promoPriceNode.GetAttributeValue("data-price", "")?.RemoveNonNumeric();

                if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal))
                {
                    result.Value = promoPriceDecimal / 100;
                }
                result.OnSale = true;
            }
            else
            {
                var promoPrice   = promoPriceNode.InnerText?.RemoveNonNumeric();
                var regularPrice = regularPriceNode.InnerText?.RemoveNonNumeric();

                if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal))
                {
                    result.Value = promoPriceDecimal / 100;
                }
                if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal))
                {
                    result.SaleValue = regularPriceDecimal / 100;
                }
                result.OnSale = true;
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;

            var productUrl = productNode
                             .Descendants("a")
                             .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && y.Value.NormalizeWithStandardRegex() == "mod-article-tile__action".NormalizeWithStandardRegex()))?
                             .GetAttributeValue("href", "");

            result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }
Exemplo n.º 4
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "content", MatchDireciton.Equals))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var name = productNode.Descendants()
                       .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "about", MatchDireciton.InputContainsMatch)))
                       .Select(z => z.InnerText)
                       .FirstOrDefault();

            result.Name = CrawlerRegex.RemoveMetaCharacters(name).Trim();

            #endregion

            #region Get Description

            if (CrawlerRegex.StandardMatch(linkStruct.Link, "direct", MatchDireciton.InputContainsMatch))
            {
                result.Description = "Znalezione na Auchan Direct!";
            }

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Budowlane";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "prices", MatchDireciton.Equals))))
            {
                var priceNode = productNode.Descendants()
                                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "prices", MatchDireciton.Equals)))
                                .FirstOrDefault();

                if (priceNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "discount", MatchDireciton.Equals))))
                {
                    var regularPrice = priceNode.Descendants()
                                       .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "discount", MatchDireciton.Equals)))
                                       .FirstOrDefault()?
                                       .InnerText
                                       .RemoveNonNumeric();

                    var promoPriceNode = productNode.Descendants()
                                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "normal", MatchDireciton.Equals)))
                                         .FirstOrDefault();

                    var promoPricePLN = promoPriceNode.Descendants()
                                        .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-nb", MatchDireciton.Equals)))
                                        .FirstOrDefault()?
                                        .InnerText
                                        .RemoveNonNumeric();

                    var promoPriceGR = promoPriceNode.Descendants()
                                       .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-cents", MatchDireciton.Equals)))
                                       .FirstOrDefault()?
                                       .InnerText
                                       .RemoveNonNumeric();


                    if (decimal.TryParse(regularPrice, out decimal priceDecimal))
                    {
                        result.Value = priceDecimal / 100;
                    }
                    if (decimal.TryParse(promoPricePLN + promoPriceGR, out decimal promoPriceDecimal))
                    {
                        result.SaleValue = promoPriceDecimal / 100;
                    }

                    result.OnSale = true;
                }
                else
                {
                    var regularPriceNode = priceNode.Descendants()
                                           .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "standard", MatchDireciton.Equals)))
                                           .FirstOrDefault();

                    var regularPricePLN = regularPriceNode.Descendants()
                                          .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-nb", MatchDireciton.Equals)))
                                          .FirstOrDefault()?
                                          .InnerText
                                          .RemoveNonNumeric();

                    var regularPriceGR = regularPriceNode.Descendants()
                                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-cents", MatchDireciton.Equals)))
                                         .FirstOrDefault()?
                                         .InnerText
                                         .RemoveNonNumeric();

                    if (decimal.TryParse(regularPricePLN + regularPriceGR, out decimal priceDecimal))
                    {
                        result.Value = priceDecimal / 100;
                    }

                    result.OnSale = false;
                }
            }
            else
            {
                return(new Product());
            }

            if (result.Value == 0)
            {
                return(new Product());
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;
            result.SourceUrl = linkStruct.Link;

            #endregion

            return(result);
        }
Exemplo n.º 5
0
        private Product ExtractListProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "description", MatchDireciton.Equals))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var nameNode = productNode.Descendants()
                           .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "description", MatchDireciton.Equals)))
                           .FirstOrDefault();

            if (nameNode == null)
            {
                return(new Product());
            }
            else
            {
                var name = nameNode
                           .Descendants("p")
                           .FirstOrDefault()?
                           .InnerText;

                if (string.IsNullOrEmpty(name))
                {
                    return(new Product());
                }

                result.Name = CrawlerRegex.RemoveMetaCharacters(name).Trim();
            }

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Budowlane";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            var priceNode = productNode.Descendants()
                            .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price", MatchDireciton.Equals)))
                            .FirstOrDefault();

            if (priceNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.InputContainsMatch))))
            {
                var regularPrice = priceNode.Descendants()
                                   .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.InputContainsMatch)))
                                   .FirstOrDefault()?
                                   .Descendants("del")
                                   .FirstOrDefault()?
                                   .Descendants("span")
                                   .FirstOrDefault()?
                                   .InnerText
                                   .RemoveNonNumeric();

                var promoPrice = productNode.Descendants()
                                 .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price", MatchDireciton.Equals)))
                                 .FirstOrDefault()?
                                 .Descendants("span")
                                 .Where(d => !d.Descendants().Any(c => c.Name == "del") && d.ParentNode.Name != "del")
                                 .FirstOrDefault()?
                                 .InnerText
                                 .RemoveNonNumeric();

                if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal))
                {
                    result.Value = promoPriceDecimal / 100;
                }
                if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal))
                {
                    result.SaleValue = regularPriceDecimal / 100;
                }

                result.OnSale = true;
            }
            else
            {
                return(new Product());
            }

            if (result.Value == 0)
            {
                return(new Product());
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;
            result.SourceUrl = linkStruct.Link;

            #endregion

            return(result);
        }
Exemplo n.º 6
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__price", MatchDireciton.InputContainsMatch))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var names = new List <string>
            {
                productNode.Descendants()
                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "m-offer-tile__subtitle", MatchDireciton.Equals)))
                .Select(z => z.InnerText)
                .FirstOrDefault()?
                .RemoveMetaCharacters(),

                productNode.Descendants()
                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "m-offer-tile__title", MatchDireciton.Equals)))
                .Select(z => z.InnerText)
                .FirstOrDefault()?
                .RemoveMetaCharacters(),
            };

            names.RemoveAll(x => string.IsNullOrEmpty(x));
            result.Name = String.Join(", ", names.ToArray());

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Spożywcze";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__old-price", MatchDireciton.Equals))))
            {
                var price = productNode.Descendants()
                            .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__price", MatchDireciton.Equals)))
                            .FirstOrDefault()?
                            .InnerText
                            .RemoveMetaCharacters()
                            .RemoveNonNumeric();

                var salePrice = productNode.Descendants()
                                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__old-price", MatchDireciton.Equals)))
                                .FirstOrDefault()?
                                .InnerText
                                .RemoveMetaCharacters()
                                .RemoveNonNumeric();


                if (decimal.TryParse(price, out decimal plnDecimal) &&
                    decimal.TryParse(salePrice, out decimal salePlnDecimal))
                {
                    result.SaleValue = salePlnDecimal / 100;
                    result.Value     = plnDecimal / 100;
                    result.OnSale    = true;
                }
                else
                {
                    return(new Product());
                }
            }
            else
            {
                var price = productNode.Descendants()
                            .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__price", MatchDireciton.Equals)))
                            .FirstOrDefault()?
                            .InnerText
                            .RemoveMetaCharacters()
                            .RemoveNonNumeric();

                if (decimal.TryParse(price, out decimal plnDecimal))
                {
                    result.Value  = plnDecimal / 100;
                    result.OnSale = false;
                }
                else
                {
                    return(new Product());
                }
            }

            #endregion

            #region Get Sale Description

            var promoCommnets = productNode.Descendants()
                                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__discount", MatchDireciton.Equals)))
                                .Select(z => z.InnerText.RemoveMetaCharacters())
                                .ToList();

            if (promoCommnets.Count != 0)
            {
                result.SaleDescription = String.Join(", ", promoCommnets.ToArray());
                result.OnSale          = true;
            }

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;

            var productUrl = productNode
                             .Descendants("a")
                             .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "m-offer-tile__link", MatchDireciton.InputContainsMatch)))?
                             .GetAttributeValue("href", "");

            result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }
Exemplo n.º 7
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price", MatchDireciton.Equals) || CrawlerRegex.StandardMatch(y.Value, "price-wrapper", MatchDireciton.Equals))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var name = productNode.Descendants()
                       .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "tilename", MatchDireciton.InputContainsMatch)))
                       .Select(z => z.InnerText)
                       .FirstOrDefault()?
                       .RemoveMetaCharacters()
                       .RemoveUnwantedStrings()
                       .Replace(";", "");

            result.Name = name;

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Spożywcze";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.Equals))))
            {
                var pln = productNode.Descendants()
                          .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "pln", MatchDireciton.Equals)))
                          .FirstOrDefault()?
                          .InnerText
                          .RemoveMetaCharacters();

                var gr = productNode.Descendants()
                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "gr", MatchDireciton.Equals)))
                         .FirstOrDefault()?
                         .InnerText
                         .RemoveMetaCharacters();

                if (decimal.TryParse(pln, out decimal plnDecimal) && decimal.TryParse(gr, out decimal grDecimal))
                {
                    result.Value = plnDecimal + (grDecimal / 100);
                }
                else
                {
                    return(new Product());
                }

                var oldPrice = productNode.Descendants()
                               .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.Equals)))
                               .FirstOrDefault()?
                               .InnerText
                               .RemoveNonNumeric();

                if (decimal.TryParse(oldPrice, out decimal oldPriceDecimal))
                {
                    result.SaleValue = oldPriceDecimal / 100;
                }
                else
                {
                    return(new Product());
                }

                result.OnSale = true;
            }
            else
            {
                var pln = productNode.Descendants()
                          .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "pln", MatchDireciton.Equals)))
                          .FirstOrDefault()?
                          .InnerText
                          .RemoveMetaCharacters();

                var gr = productNode.Descendants()
                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "gr", MatchDireciton.Equals)))
                         .FirstOrDefault()?
                         .InnerText
                         .RemoveMetaCharacters();

                if (decimal.TryParse(pln, out decimal plnDecimal) && decimal.TryParse(gr, out decimal grDecimal))
                {
                    result.Value = plnDecimal + (grDecimal / 100);
                }
                else
                {
                    return(new Product());
                }

                result.OnSale = false;
            }

            #endregion

            #region Get Sale Description

            var promoCommnets = productNode.Descendants()
                                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "productpromo", MatchDireciton.InputContainsMatch)))
                                .Select(z => z.InnerText.RemoveMetaCharacters())
                                .ToList();

            if (promoCommnets.Count != 0)
            {
                result.SaleDescription = String.Join(", ", promoCommnets.ToArray());
                result.OnSale          = true;
            }

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;

            var productUrl = productNode.Descendants("a").FirstOrDefault().GetAttributeValue("href", "");

            result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }
Exemplo n.º 8
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            var pricesNode = productNode.Descendants("span")
                             .Where(n => n.Attributes.Any(x => x.Name == "class" && CrawlerRegex.StandardMatch(x.Value, "prices", MatchDireciton.InputContainsMatch)))
                             .FirstOrDefault();

            if (pricesNode == null || productNode.Descendants("h3").FirstOrDefault() == null)
            {
                return(result);
            }

            #endregion

            #region Get Name

            result.Name = productNode.Descendants("h3")
                          .FirstOrDefault()?
                          .InnerText
                          .RemoveMetaCharacters();

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Budowlane";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (pricesNode.Descendants("span")
                .Any(n => n.Attributes.Any(x => x.Name == "class" && x.Value.Contains("product-price promotional"))))
            {
                try
                {
                    var prices = pricesNode.Descendants("span")
                                 .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "price"))
                                 .ToList();

                    var price1Int = Int32.Parse(prices[0].Descendants("span")
                                                .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "integer"))
                                                .FirstOrDefault()?
                                                .InnerText
                                                .RemoveNonNumeric()) * 100;

                    var price1Frac = Int32.Parse(prices[0].Descendants("span")
                                                 .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "fractional"))
                                                 .FirstOrDefault()?
                                                 .InnerText
                                                 .RemoveNonNumeric());

                    var price2Int = Int32.Parse(prices[1].Descendants("span")
                                                .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "integer"))
                                                .FirstOrDefault()?
                                                .InnerText
                                                .RemoveNonNumeric()) * 100;

                    var price2Frac = Int32.Parse(prices[1].Descendants("span")
                                                 .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "fractional"))
                                                 .FirstOrDefault()?
                                                 .InnerText
                                                 .RemoveNonNumeric());

                    if (decimal.TryParse((price1Int + price1Frac).ToString(), out decimal promoPriceDecimal))
                    {
                        result.Value = promoPriceDecimal / 100;
                    }
                    if (decimal.TryParse((price2Int + price2Frac).ToString(), out decimal regularPriceDecimal))
                    {
                        result.SaleValue = regularPriceDecimal / 100;
                    }

                    if ((promoPriceDecimal == regularPriceDecimal) || (promoPriceDecimal < (regularPriceDecimal * 0.3m)))
                    {
                        return(new Product());
                    }

                    result.OnSale = true;
                }
                catch (Exception)
                {
                    return(new Product());
                }
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;

            var productUrl = productNode
                             .Descendants("a")
                             .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && y.Value.NormalizeWithStandardRegex() == "Url".NormalizeWithStandardRegex()))?
                             .GetAttributeValue("href", "");

            result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }
Exemplo n.º 9
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "pricebox__price", MatchDireciton.InputContainsMatch))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var names = new List <string>
            {
                productNode.Descendants()
                .Where(x => x.Attributes.Any(y => y.Name == "class" && y.Value == "product__title"))
                .Select(z => z.InnerText)
                .FirstOrDefault()?
                .RemoveMetaCharacters()
                .Trim(),

                productNode.Descendants()
                .Where(x => x.Attributes.Any(y => y.Name == "class" && y.Value == "pricebox__highlight"))
                .Select(z => z.InnerText)
                .FirstOrDefault()?
                .RemoveMetaCharacters()
                .Trim(),
            };

            names.RemoveAll(x => string.IsNullOrEmpty(x));
            result.Name = String.Join(", ", names.ToArray());

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Spożywcze";

            #endregion

            #region Get Price and Sale Price

            var promoPrice = productNode.Descendants("span")
                             .FirstOrDefault(x => x.GetAttributeValue("class", "") == "pricebox__price")?
                             .InnerText
                             .Replace(",-", "00")
                             .RemoveNonNumeric();

            var regularPrice = productNode.Descendants("span")
                               .FirstOrDefault(x => x.GetAttributeValue("class", "") == "pricebox__recommended-retail-price")?
                               .InnerText
                               .Replace(",-", "00")
                               .RemoveNonNumeric();

            if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal))
            {
                result.Value = promoPriceDecimal / 100;
            }
            if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal))
            {
                result.SaleValue = regularPriceDecimal / 100;
            }

            if (result.Value != 0 && result.SaleValue != 0)
            {
                result.OnSale = true;
            }
            else
            {
                return(new Product());
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;
            result.SourceUrl = linkStruct.Link;

            var productUrl = productNode.GetAttributeValue("href", "");
            result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }