コード例 #1
0
        public override List <Product> GetResultsForSingleUrl(LinkStruct linkStruct)
        {
            var result       = new List <Product>();
            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(linkStruct.Html);

            var divs = htmlDocument.DocumentNode.Descendants("div")
                       .Where(node => node.GetAttributeValue("class", "")
                              .EqualsTrim("product"))
                       .ToList();

            var tasks = new List <Task>();

            foreach (var div in divs)
            {
                //ExtractProduct(div, linkStruct);
                var nodeToPass = div;
                tasks.Add(Task.Run(() => result.Add(ExtractProduct(nodeToPass, linkStruct))));
            }

            Task.WaitAll(tasks.ToArray());

            result.RemoveAll(x => x == null || string.IsNullOrEmpty(x.Name));
            result.TrimExcess();
            return(result);
        }
コード例 #2
0
ファイル: CrawlerAldi.cs プロジェクト: ViersJW/TanieZarcie
        public override List <Product> GetResultsForSingleUrl(LinkStruct linkStruct)
        {
            var result       = new List <Product>();
            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(linkStruct.Html);

            var divs = htmlDocument.DocumentNode.SelectNodes("//div[contains(@data-t-name, 'ArticleTile')]")?.ToList();

            if (divs == null)
            {
                return(result);
            }

            var tasks = new List <Task>();

            foreach (var div in divs)
            {
                //ExtractProduct(div, linkStruct);
                var nodeToPass = div;
                tasks.Add(Task.Run(() => result.Add(ExtractProduct(nodeToPass, linkStruct))));
            }

            Task.WaitAll(tasks.ToArray());

            result.RemoveAll(x => x == null || string.IsNullOrEmpty(x.Name));
            result.TrimExcess();
            return(result);
        }
コード例 #3
0
        public override List <SearchResultsModels.Product> GetResultsForSingleUrl(LinkStruct linkStruct)
        {
            var result       = new List <SearchResultsModels.Product>();
            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(linkStruct.Html);

            var json = htmlDocument.DocumentNode.Descendants("script").FirstOrDefault(x => x.Id == "__NEXT_DATA__")?.InnerText;

            CarrefourJsonModel dataSource;

            if (!string.IsNullOrEmpty(json))
            {
                dataSource = JsonConvert.DeserializeObject <CarrefourJsonModel>(json);
            }
            else
            {
                return(result);
            }

            var tasks = new List <Task>();

            foreach (var productData in dataSource.props.initialState.products.data.content)
            {
                //ExtractProduct(productData, linkStruct);
                var dataToPass = productData;
                tasks.Add(Task.Run(() => result.Add(ExtractProduct(dataToPass, linkStruct))));
            }

            Task.WaitAll(tasks.ToArray());

            result.RemoveAll(x => x == null || string.IsNullOrEmpty(x.Name));
            result.TrimExcess();
            return(result);
        }
コード例 #4
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (productNode.Descendants("div").Any(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "m-priceBox_old")))
            {
            }

            #endregion

            #region Get Name


            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Elektroniczne";

            #endregion

            #region Get Price and Sale Price



            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            //result.Seller = this.GetType().Name.Replace("Crawler", "");
            //result.TimeStamp = DateTime.Now;
            //result.SourceUrl = linkStruct.Link;

            //var productUrl = productNode.GetAttributeValue("href", "");

            //result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }
コード例 #5
0
ファイル: CrawlerObi.cs プロジェクト: ViersJW/TanieZarcie
        public override List <Product> GetResultsForSingleUrl(LinkStruct linkStruct)
        {
            var result       = new List <Product>();
            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(linkStruct.Html);

            var standardDivs = htmlDocument.DocumentNode.Descendants("div")
                               .AsParallel()
                               .Where(node => node.GetAttributeValue("class", "")
                                      .ContainsAny("teaser-hochkant border-orange single-prod"))
                               .ToList();

            var listDivs = htmlDocument.DocumentNode.Descendants("li")
                           .AsParallel()
                           .Where(node => node.GetAttributeValue("class", "")
                                  .ContainsAny("product large"))
                           .ToList();

            var tasks = new List <Task>();

            if (standardDivs.Count() == 0 && listDivs.Count() == 0)
            {
                return(result);
            }

            //foreach (var container in standardDivs)
            //{
            //    //ExtractProduct(container, linkStruct);
            //    var nodeToPass = container;
            //    tasks.Add(Task.Run(() => result.Add(ExtractProduct(nodeToPass, linkStruct))));
            //}

            foreach (var container in listDivs)
            {
                //ExtractListProduct(container, linkStruct);
                var nodeToPass = container;
                tasks.Add(Task.Run(() => result.Add(ExtractListProduct(nodeToPass, linkStruct))));
            }

            Task.WaitAll(tasks.ToArray());

            result.RemoveAll(x => string.IsNullOrEmpty(x.Name));
            result.TrimExcess();
            return(result);
        }
コード例 #6
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants("div").Any(x => x.GetAttributeValue("class", "") == "price price--prev oldPriceMarker trans"))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            result.Name = productNode.Descendants("p")
                          .FirstOrDefault(x => x.GetAttributeValue("class", "") == "product-teaser__content__head__title")?
                          .InnerText
                          .Trim();

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Spożywcze";

            #endregion

            #region Get Price and Sale Price

            var regularPrice =
                productNode.Descendants("div")
                .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price price--prev oldPriceMarker trans")
                .Descendants("span")
                .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price-int")
                .InnerText
                .RemoveNonNumeric() +
                productNode.Descendants("div")
                .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price price--prev oldPriceMarker trans")
                .Descendants("span")
                .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price-dec")
                .InnerText
                .RemoveNonNumeric();

            var promoPrice =
                productNode.Descendants("div")
                .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price" && x.ParentNode.GetAttributeValue("class", "") == "product-teaser__content__details change")?
                .Descendants("span")
                .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price-int")
                .InnerText
                .RemoveNonNumeric() +
                productNode.Descendants("div")
                .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price" && x.ParentNode.GetAttributeValue("class", "") == "product-teaser__content__details change")?
                .Descendants("span")
                .FirstOrDefault(x => x.GetAttributeValue("class", "") == "price-dec")
                .InnerText
                .RemoveNonNumeric();

            if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal))
            {
                result.Value = promoPriceDecimal / 100;
            }
            if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal))
            {
                result.SaleValue = regularPriceDecimal / 100;
            }

            result.OnSale = true;

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;
            result.SourceUrl = linkStruct.Link;

            var productUrl = productNode.GetAttributeValue("href", "");

            result.SourceUrl = linkStruct.Link;

            #endregion

            return(result);
        }
コード例 #7
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "content", MatchDireciton.Equals))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var name = productNode.Descendants()
                       .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "about", MatchDireciton.InputContainsMatch)))
                       .Select(z => z.InnerText)
                       .FirstOrDefault();

            result.Name = CrawlerRegex.RemoveMetaCharacters(name).Trim();

            #endregion

            #region Get Description

            if (CrawlerRegex.StandardMatch(linkStruct.Link, "direct", MatchDireciton.InputContainsMatch))
            {
                result.Description = "Znalezione na Auchan Direct!";
            }

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Budowlane";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "prices", MatchDireciton.Equals))))
            {
                var priceNode = productNode.Descendants()
                                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "prices", MatchDireciton.Equals)))
                                .FirstOrDefault();

                if (priceNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "discount", MatchDireciton.Equals))))
                {
                    var regularPrice = priceNode.Descendants()
                                       .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "discount", MatchDireciton.Equals)))
                                       .FirstOrDefault()?
                                       .InnerText
                                       .RemoveNonNumeric();

                    var promoPriceNode = productNode.Descendants()
                                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "normal", MatchDireciton.Equals)))
                                         .FirstOrDefault();

                    var promoPricePLN = promoPriceNode.Descendants()
                                        .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-nb", MatchDireciton.Equals)))
                                        .FirstOrDefault()?
                                        .InnerText
                                        .RemoveNonNumeric();

                    var promoPriceGR = promoPriceNode.Descendants()
                                       .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-cents", MatchDireciton.Equals)))
                                       .FirstOrDefault()?
                                       .InnerText
                                       .RemoveNonNumeric();


                    if (decimal.TryParse(regularPrice, out decimal priceDecimal))
                    {
                        result.Value = priceDecimal / 100;
                    }
                    if (decimal.TryParse(promoPricePLN + promoPriceGR, out decimal promoPriceDecimal))
                    {
                        result.SaleValue = promoPriceDecimal / 100;
                    }

                    result.OnSale = true;
                }
                else
                {
                    var regularPriceNode = priceNode.Descendants()
                                           .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "standard", MatchDireciton.Equals)))
                                           .FirstOrDefault();

                    var regularPricePLN = regularPriceNode.Descendants()
                                          .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-nb", MatchDireciton.Equals)))
                                          .FirstOrDefault()?
                                          .InnerText
                                          .RemoveNonNumeric();

                    var regularPriceGR = regularPriceNode.Descendants()
                                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-cents", MatchDireciton.Equals)))
                                         .FirstOrDefault()?
                                         .InnerText
                                         .RemoveNonNumeric();

                    if (decimal.TryParse(regularPricePLN + regularPriceGR, out decimal priceDecimal))
                    {
                        result.Value = priceDecimal / 100;
                    }

                    result.OnSale = false;
                }
            }
            else
            {
                return(new Product());
            }

            if (result.Value == 0)
            {
                return(new Product());
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;
            result.SourceUrl = linkStruct.Link;

            #endregion

            return(result);
        }
コード例 #8
0
ファイル: CrawlerObi.cs プロジェクト: ViersJW/TanieZarcie
        private Product ExtractListProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "description", MatchDireciton.Equals))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var nameNode = productNode.Descendants()
                           .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "description", MatchDireciton.Equals)))
                           .FirstOrDefault();

            if (nameNode == null)
            {
                return(new Product());
            }
            else
            {
                var name = nameNode
                           .Descendants("p")
                           .FirstOrDefault()?
                           .InnerText;

                if (string.IsNullOrEmpty(name))
                {
                    return(new Product());
                }

                result.Name = CrawlerRegex.RemoveMetaCharacters(name).Trim();
            }

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Budowlane";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            var priceNode = productNode.Descendants()
                            .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price", MatchDireciton.Equals)))
                            .FirstOrDefault();

            if (priceNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.InputContainsMatch))))
            {
                var regularPrice = priceNode.Descendants()
                                   .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.InputContainsMatch)))
                                   .FirstOrDefault()?
                                   .Descendants("del")
                                   .FirstOrDefault()?
                                   .Descendants("span")
                                   .FirstOrDefault()?
                                   .InnerText
                                   .RemoveNonNumeric();

                var promoPrice = productNode.Descendants()
                                 .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price", MatchDireciton.Equals)))
                                 .FirstOrDefault()?
                                 .Descendants("span")
                                 .Where(d => !d.Descendants().Any(c => c.Name == "del") && d.ParentNode.Name != "del")
                                 .FirstOrDefault()?
                                 .InnerText
                                 .RemoveNonNumeric();

                if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal))
                {
                    result.Value = promoPriceDecimal / 100;
                }
                if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal))
                {
                    result.SaleValue = regularPriceDecimal / 100;
                }

                result.OnSale = true;
            }
            else
            {
                return(new Product());
            }

            if (result.Value == 0)
            {
                return(new Product());
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;
            result.SourceUrl = linkStruct.Link;

            #endregion

            return(result);
        }
コード例 #9
0
ファイル: Program.cs プロジェクト: OrganizationUsername/Towel
        public static void Main()
        {
            Random random = new();
            int    test   = 10;

            Console.WriteLine("You are runnning the Data Structures example.");
            Console.WriteLine("======================================================");
            Console.WriteLine();

            #region Link (aka Tuple)
            {
                Console.WriteLine("  Link------------------------------------");
                Console.WriteLine();
                Console.WriteLine("    A \"Link\" is like a System.Tuple that implements");
                Console.WriteLine("    Towel.DataStructures.IDataStructure<T>. A Link/Tuple is");
                Console.WriteLine("    used when you have a small, known-sized set of objects");
                Console.WriteLine("    that you want to bundle together without making a custom");
                Console.WriteLine("    custom class.");
                Console.WriteLine();

                Link link = new Link <int, string, char, float, decimal>(1, "2", '3', 4f, 5m);
                Console.Write("    Traversal: ");
                link.Stepper(Console.Write);
                Console.WriteLine();

                Link linkStruct = new LinkStruct <int, string, char, float, decimal>(1, "2", '3', 4f, 5m);
                Console.Write("    Traversal: ");
                link.Stepper(Console.Write);
                Console.WriteLine();

                Pause();
            }
            #endregion

            #region Array
            {
                Console.WriteLine("  Array---------------------------------");
                Console.WriteLine();
                Console.WriteLine(@$ "    An Array<T> is just a wrapper for arrays that implements");
                Console.WriteLine(@$ "    Towel.DataStructures.IDataStructure<T>. An array is used when");
                Console.WriteLine(@$ "    dealing with static-sized, known-sized sets of data. Arrays");
                Console.WriteLine(@$ "    can be sorted along 1 dimensions for binary searching algorithms.");
                Console.WriteLine();

                IArray <int> array = new Array <int>(test);

                Console.Write($"    Filling in (0-{test - 1})...");
                for (int i = 0; i < test; i++)
                {
                    array[i] = i;
                }
                Console.WriteLine();

                Console.Write("    Traversal: ");
                array.Stepper(Console.Write);
                Console.WriteLine();

                Console.WriteLine($"    Length: {array.Length}");

                Pause();
            }
            #endregion

            #region List
            {
                Console.WriteLine("  List---------------------------------");
                Console.WriteLine();
                Console.WriteLine("    An List is like an IList that implements");
                Console.WriteLine("    Towel.DataStructures.IDataStructure<T>. \"ListArray\" is");
                Console.WriteLine("    the array implementation while \"ListLinked\" is the");
                Console.WriteLine("    the linked-list implementation. An List is used");
                Console.WriteLine("    when dealing with an unknown quantity of data that you");
                Console.WriteLine("    will likely have to enumerate/step through everything. The");
                Console.WriteLine("    ListArray shares the properties of an Array in");
                Console.WriteLine("    that it can be relateively quickly sorted along 1 dimensions");
                Console.WriteLine("    for binary search algorithms.");
                Console.WriteLine();

                // ListArray ---------------------------------------
                IList <int> listArray = new ListArray <int>(test);

                Console.Write($"    [ListArray] Adding (0-{test - 1})...");
                for (int i = 0; i < test; i++)
                {
                    listArray.Add(i);
                }
                Console.WriteLine();

                Console.Write("    [ListArray] Traversal: ");
                listArray.Stepper(Console.Write);
                Console.WriteLine();

                Console.WriteLine($"    [ListArray] Count: {listArray.Count}");

                listArray.Clear();

                Console.WriteLine();

                // ListLinked ---------------------------------------
                IList <int> listLinked = new ListLinked <int>();

                Console.Write($"    [ListLinked] Adding (0-{test - 1})...");
                for (int i = 0; i < test; i++)
                {
                    listLinked.Add(i);
                }
                Console.WriteLine();

                Console.Write("    [ListLinked] Traversal: ");
                listLinked.Stepper(Console.Write);
                Console.WriteLine();

                Console.WriteLine($"    [ListLinked] Count: {listLinked.Count}");

                listLinked.Clear();

                Pause();
            }
            #endregion

            #region Stack
            {
                Console.WriteLine("  Stack---------------------------------");
                Console.WriteLine();
                Console.WriteLine("    An \"Stack\" is a Stack that implements");
                Console.WriteLine("    Towel.DataStructures.IDataStructure<T>. \"StackArray\" is");
                Console.WriteLine("    the array implementation while \"StackLinked\" is the");
                Console.WriteLine("    the linked-list implementation. A Stack is used");
                Console.WriteLine("    specifically when you need the algorithm provided by the Push");
                Console.WriteLine("    and Pop functions.");
                Console.WriteLine();

                IStack <int> stackArray = new StackArray <int>();

                Console.Write($"    [StackArray] Pushing (0-{test - 1})...");
                for (int i = 0; i < test; i++)
                {
                    stackArray.Push(i);
                }
                Console.WriteLine();

                Console.Write("    [StackArray] Traversal: ");
                stackArray.Stepper(Console.Write);
                Console.WriteLine();

                Console.WriteLine($"    [StackArray] Pop: {stackArray.Pop()}");
                Console.WriteLine($"    [StackArray] Pop: {stackArray.Pop()}");
                Console.WriteLine($"    [StackArray] Peek: {stackArray.Peek()}");
                Console.WriteLine($"    [StackArray] Pop: {stackArray.Pop()}");
                Console.WriteLine($"    [StackArray] Count: {stackArray.Count}");

                stackArray.Clear();

                Console.WriteLine();

                IStack <int> stackLinked = new StackLinked <int>();

                Console.Write($"    [StackLinked] Pushing (0-{test - 1})...");
                for (int i = 0; i < test; i++)
                {
                    stackLinked.Push(i);
                }
                Console.WriteLine();

                Console.Write("    [StackLinked] Traversal: ");
                stackLinked.Stepper(Console.Write);
                Console.WriteLine();

                Console.WriteLine($"    [StackLinked] Pop: {stackLinked.Pop()}");
                Console.WriteLine($"    [StackLinked] Pop: {stackLinked.Pop()}");
                Console.WriteLine($"    [StackLinked] Peek: {stackLinked.Peek()}");
                Console.WriteLine($"    [StackLinked] Pop: {stackLinked.Pop()}");
                Console.WriteLine($"    [StackLinked] Count: {stackLinked.Count}");

                stackLinked.Clear();

                Pause();
            }
            #endregion

            #region Queue
            {
                Console.WriteLine("  Queue---------------------------------");
                Console.WriteLine();
                Console.WriteLine("    An \"Queue\" is a Queue that implements");
                Console.WriteLine("    Towel.DataStructures.IDataStructure<T>. \"QueueArray\" is");
                Console.WriteLine("    the array implementation while \"QueueLinked\" is the");
                Console.WriteLine("    the linked-list implementation. A Queue/Stack is used");
                Console.WriteLine("    specifically when you need the algorithm provided by the Queue");
                Console.WriteLine("    and Dequeue functions.");
                Console.WriteLine();

                IQueue <int> queueArray = new QueueArray <int>();

                Console.Write($"    [QueueArray] Enqueuing (0-{test - 1})...");
                for (int i = 0; i < test; i++)
                {
                    queueArray.Enqueue(i);
                }
                Console.WriteLine();

                Console.Write("    [QueueArray] Traversal: ");
                queueArray.Stepper(Console.Write);
                Console.WriteLine();

                Console.WriteLine($"    [QueueArray] Dequeue: {queueArray.Dequeue()}");
                Console.WriteLine($"    [QueueArray] Dequeue: {queueArray.Dequeue()}");
                Console.WriteLine($"    [QueueArray] Peek: {queueArray.Peek()}");
                Console.WriteLine($"    [QueueArray] Dequeue: {queueArray.Dequeue()}");
                Console.WriteLine($"    [QueueArray] Count: {queueArray.Count}");

                queueArray.Clear();

                Console.WriteLine();

                IQueue <int> queueLinked = new QueueLinked <int>();

                Console.Write($"    [QueueLinked] Enqueuing (0-{test - 1})...");
                for (int i = 0; i < test; i++)
                {
                    queueLinked.Enqueue(i);
                }
                Console.WriteLine();

                Console.Write("    [QueueLinked] Traversal: ");
                queueLinked.Stepper(Console.Write);
                Console.WriteLine();

                Console.WriteLine($"    [QueueLinked] Pop: {queueLinked.Dequeue()}");
                Console.WriteLine($"    [QueueLinked] Pop: {queueLinked.Dequeue()}");
                Console.WriteLine($"    [QueueLinked] Peek: {queueLinked.Peek()}");
                Console.WriteLine($"    [QueueLinked] Pop: {queueLinked.Dequeue()}");
                Console.WriteLine($"    [QueueLinked] Count: {queueLinked.Count}");

                queueLinked.Clear();

                Pause();
            }
            #endregion

            #region Heap
            {
                Console.WriteLine("  Heap---------------------------------");
                Console.WriteLine();
                Console.WriteLine("    An \"Heap\" is a binary tree that stores items based on priorities.");
                Console.WriteLine("    It implements Towel.DataStructures.IDataStructure<T> like the others.");
                Console.WriteLine("    It uses sifting algorithms to move nodes vertically through itself.");
                Console.WriteLine("    It is often the best data structure for standard priority queues.");
                Console.WriteLine("    \"HeapArray\" is an implementation where the tree has been flattened");
                Console.WriteLine("    into an array.");
                Console.WriteLine();

                Console.WriteLine("    Let's say the priority is how close a number is to \"5\".");
                Console.WriteLine("    So \"Dequeue\" will give us the next closest value to \"5\".");
コード例 #10
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__price", MatchDireciton.InputContainsMatch))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var names = new List <string>
            {
                productNode.Descendants()
                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "m-offer-tile__subtitle", MatchDireciton.Equals)))
                .Select(z => z.InnerText)
                .FirstOrDefault()?
                .RemoveMetaCharacters(),

                productNode.Descendants()
                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "m-offer-tile__title", MatchDireciton.Equals)))
                .Select(z => z.InnerText)
                .FirstOrDefault()?
                .RemoveMetaCharacters(),
            };

            names.RemoveAll(x => string.IsNullOrEmpty(x));
            result.Name = String.Join(", ", names.ToArray());

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Spożywcze";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__old-price", MatchDireciton.Equals))))
            {
                var price = productNode.Descendants()
                            .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__price", MatchDireciton.Equals)))
                            .FirstOrDefault()?
                            .InnerText
                            .RemoveMetaCharacters()
                            .RemoveNonNumeric();

                var salePrice = productNode.Descendants()
                                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__old-price", MatchDireciton.Equals)))
                                .FirstOrDefault()?
                                .InnerText
                                .RemoveMetaCharacters()
                                .RemoveNonNumeric();


                if (decimal.TryParse(price, out decimal plnDecimal) &&
                    decimal.TryParse(salePrice, out decimal salePlnDecimal))
                {
                    result.SaleValue = salePlnDecimal / 100;
                    result.Value     = plnDecimal / 100;
                    result.OnSale    = true;
                }
                else
                {
                    return(new Product());
                }
            }
            else
            {
                var price = productNode.Descendants()
                            .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__price", MatchDireciton.Equals)))
                            .FirstOrDefault()?
                            .InnerText
                            .RemoveMetaCharacters()
                            .RemoveNonNumeric();

                if (decimal.TryParse(price, out decimal plnDecimal))
                {
                    result.Value  = plnDecimal / 100;
                    result.OnSale = false;
                }
                else
                {
                    return(new Product());
                }
            }

            #endregion

            #region Get Sale Description

            var promoCommnets = productNode.Descendants()
                                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "a-pricetag__discount", MatchDireciton.Equals)))
                                .Select(z => z.InnerText.RemoveMetaCharacters())
                                .ToList();

            if (promoCommnets.Count != 0)
            {
                result.SaleDescription = String.Join(", ", promoCommnets.ToArray());
                result.OnSale          = true;
            }

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;

            var productUrl = productNode
                             .Descendants("a")
                             .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "m-offer-tile__link", MatchDireciton.InputContainsMatch)))?
                             .GetAttributeValue("href", "");

            result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }
コード例 #11
0
        private SearchResultsModels.Product ExtractProduct(Content data, LinkStruct linkStruct)
        {
            var result = new SearchResultsModels.Product();

            #region Check if product is viable

            if (!data.actualSku.promotion)
            {
                return(new SearchResultsModels.Product());
            }

            if (!string.IsNullOrEmpty(data.actualSku.amount.actualOldPriceString) ||
                !string.IsNullOrEmpty(data.actualSku.amount.actualGrossPriceString))
            {
                if (data.actualSku.amount.actualGrossPrice > data.actualSku.amount.actualOldPrice)
                {
                    return(new SearchResultsModels.Product());
                }
                else
                {
                    result.OnSale = true;
                }
            }
            else
            {
                result.OnSale = true;
            }

            #endregion

            #region Get Name

            result.Name = data.displayName;

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Spożywcze";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            result.SaleValue = (decimal)data.actualSku.amount.actualOldPrice;
            result.Value     = (decimal)data.actualSku.amount.actualGrossPrice;

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;

            result.SourceUrl = new Uri(new Uri($"https://{ new Uri(linkStruct.Link).Host}/"), data.url).ToString();

            #endregion

            return(result);
        }
コード例 #12
0
        public override void GetResultsForSingleUrl(Dictionary <string, List <Product> > resultDictionary, LinkStruct linkStruct)
        {
            List <Product> products     = new List <Product>();
            HtmlDocument   htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(linkStruct.Html);

            var divs = htmlDocument.DocumentNode.Descendants("div").Where(node => node.GetAttributeValue("class", "").Equals("productsimple-default")).ToList();

            if (divs.Count() == 0)
            {
                return;
            }

            foreach (var div in divs)
            {
                try
                {
                    var priceZl = int.TryParse(div.Descendants("span").Where(node => node.GetAttributeValue("class", "").Equals("pln")).FirstOrDefault().InnerText, out int resultZl);
                    var priceGr = int.TryParse(div.Descendants("span").Where(node => node.GetAttributeValue("class", "").Equals("gr")).FirstOrDefault().InnerText, out int resultGr);

                    var product = new Product
                    {
                        Provider     = "Biedronka",
                        Name         = div.Descendants("a").FirstOrDefault().Attributes["title"].Value,
                        PriceZl      = priceZl == true ? resultZl : 0,
                        PriceGr      = priceGr == true ? resultGr : 0,
                        Url          = linkStruct.Link,
                        DownloadDate = DateTime.Now,
                        Category     = "",
                        Mark         = "",
                        Quantity     = 0,
                        Description  = ""
                    };

                    products.Add(product);
                }
                catch (Exception)
                {
                    continue;
                }
            }

            try
            {
                resultDictionary.Add(linkStruct.Link, products);
            }
            catch (Exception)
            {
                return;
            }
        }
コード例 #13
0
ファイル: CrawlerLidl.cs プロジェクト: ViersJW/TanieZarcie
        public override void GetResultsForSingleUrl(Dictionary <string, List <Product> > resultDictionary, LinkStruct linkStruct)
        {
            List <Product> products     = new List <Product>();
            HtmlDocument   htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(linkStruct.Html);

            var divs = htmlDocument.DocumentNode.Descendants("div").Where(node => node.GetAttributeValue("class", "").Equals("product product--tile product--fullbleed")).ToList();

            if (divs.Count() == 0)
            {
                return;
            }

            foreach (var div in divs)
            {
                try
                {
                    StringBuilder stringBuilder = new StringBuilder();
                    stringBuilder.Append(div.Descendants("strong").Where(node => node.GetAttributeValue("class", "").Equals("pricefield__price")).FirstOrDefault().InnerText);
                    stringBuilder.Replace("zł", "");

                    string[] splittedPrice = stringBuilder.ToString().Split(',');

                    var priceZl = int.TryParse(splittedPrice[0], out int resultZl);
                    var priceGr = int.TryParse(splittedPrice[1], out int resultGr);

                    var product = new Product
                    {
                        Provider     = "Lidl",
                        Name         = div.Descendants("h2").Where(node => node.GetAttributeValue("class", "").Equals("product__title")).FirstOrDefault().InnerText,
                        PriceZl      = priceZl == true ? resultZl : 0,
                        PriceGr      = priceGr == true ? resultGr : 0,
                        Url          = $"{BaseUrlForProducts}{div.Descendants("a").Where(node => node.GetAttributeValue("class", "").Equals("product__body")).FirstOrDefault().Attributes["href"].Value}",
                        DownloadDate = DateTime.Now,
                        Category     = "",
                        Mark         = "",
                        Quantity     = 0,
                        Description  = ""
                    };

                    try
                    {
                        product.Description = div.Descendants("span").Where(node => node.GetAttributeValue("class", "").Equals("pricefield__header")).FirstOrDefault().InnerText;
                    }
                    catch (Exception)
                    {
                        continue;
                    }

                    products.Add(product);
                }
                catch (Exception)
                {
                    continue;
                }
            }

            try
            {
                resultDictionary.Add(linkStruct.Link, products);
            }
            catch (Exception)
            {
                return;
            }
        }
コード例 #14
0
ファイル: WebCrawler.cs プロジェクト: ViersJW/TanieZarcie
 public abstract List <Product> GetResultsForSingleUrl(LinkStruct linkStruct);
コード例 #15
0
ファイル: CrawlerLidl.cs プロジェクト: ViersJW/TanieZarcie
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "pricebox__price", MatchDireciton.InputContainsMatch))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var names = new List <string>
            {
                productNode.Descendants()
                .Where(x => x.Attributes.Any(y => y.Name == "class" && y.Value == "product__title"))
                .Select(z => z.InnerText)
                .FirstOrDefault()?
                .RemoveMetaCharacters()
                .Trim(),

                productNode.Descendants()
                .Where(x => x.Attributes.Any(y => y.Name == "class" && y.Value == "pricebox__highlight"))
                .Select(z => z.InnerText)
                .FirstOrDefault()?
                .RemoveMetaCharacters()
                .Trim(),
            };

            names.RemoveAll(x => string.IsNullOrEmpty(x));
            result.Name = String.Join(", ", names.ToArray());

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Spożywcze";

            #endregion

            #region Get Price and Sale Price

            var promoPrice = productNode.Descendants("span")
                             .FirstOrDefault(x => x.GetAttributeValue("class", "") == "pricebox__price")?
                             .InnerText
                             .Replace(",-", "00")
                             .RemoveNonNumeric();

            var regularPrice = productNode.Descendants("span")
                               .FirstOrDefault(x => x.GetAttributeValue("class", "") == "pricebox__recommended-retail-price")?
                               .InnerText
                               .Replace(",-", "00")
                               .RemoveNonNumeric();

            if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal))
            {
                result.Value = promoPriceDecimal / 100;
            }
            if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal))
            {
                result.SaleValue = regularPriceDecimal / 100;
            }

            if (result.Value != 0 && result.SaleValue != 0)
            {
                result.OnSale = true;
            }
            else
            {
                return(new Product());
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;
            result.SourceUrl = linkStruct.Link;

            var productUrl = productNode.GetAttributeValue("href", "");
            result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }
コード例 #16
0
        public override void GetResultsForSingleUrl(Dictionary <string, List <Product> > resultDictionary, LinkStruct linkStruct)
        {
            List <Product> products     = new List <Product>();
            HtmlDocument   htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(linkStruct.Html);

            foreach (var htmlPattern in _htmlPattens)
            {
                var divs = htmlDocument.DocumentNode.Descendants(htmlPattern.TopNode.Descendant).Where(node => node.GetAttributeValue(htmlPattern.TopNode.AttributeName, "").Contains(htmlPattern.TopNode.AttributeValue)).ToList();

                if (divs.Count() == 0)
                {
                    return;
                }

                foreach (var div in divs)
                {
                    var product = new Product();
                    product.Provider     = "Biedronka";
                    product.Name         = div.Descendants(htmlPattern.Name.Descendant).FirstOrDefault().Attributes[htmlPattern.Name.AttributeValue].Value.Replace(@"&quot;", "");
                    product.Url          = linkStruct.Link;
                    product.DownloadDate = DateTime.Now;
                    product.Category     = "";
                    product.Mark         = "";
                    product.Quantity     = 0;

                    try
                    {
                        product.Description = div.Descendants(htmlPattern.Description.Descendant).Where(node => htmlPattern.Description.CombinedAttributeName.Any(node.GetAttributeValue(htmlPattern.Description.AttributeName, "").Contains)).FirstOrDefault().InnerText;
                    }
                    catch
                    {
                        product.Description = "Ni ma komentarza, pewno coś nowego, kliknij IDŹ!";
                    }

                    try
                    {
                        if (int.TryParse(div.Descendants(htmlPattern.ZlNode.Descendant).Where(node => node.GetAttributeValue(htmlPattern.ZlNode.AttributeName, "").Equals(htmlPattern.ZlNode.AttributeValue)).FirstOrDefault().InnerText, out int resultZl))
                        {
                            product.PriceZl = resultZl;
                        }
                        else
                        {
                            continue;
                        }
                    }
                    catch
                    {
                        continue;
                    }

                    try
                    {
                        if (int.TryParse(div.Descendants(htmlPattern.GrNode.Descendant).Where(node => node.GetAttributeValue(htmlPattern.GrNode.AttributeName, "").Equals(htmlPattern.GrNode.AttributeValue)).FirstOrDefault().InnerText, out int resultGr))
                        {
                            product.PriceGr = resultGr;
                        }
                        else
                        {
                            continue;
                        }
                    }
                    catch
                    {
                        continue;
                    }

                    products.Add(product);
                }

                resultDictionary.Add(linkStruct.Link, products);
            }
        }
コード例 #17
0
ファイル: CrawlerIkea.cs プロジェクト: ViersJW/TanieZarcie
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants("span")
                .Any(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "product-compact__prev-price")))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var header = productNode
                         .Descendants("span")
                         .FirstOrDefault(x => x.HasClass("product-compact__name"))
                         .InnerText;

            var description = productNode
                              .Descendants("span")
                              .FirstOrDefault(x => x.HasClass("product-compact__type"))
                              .InnerText
                              .RemoveMetaCharacters()
                              .Trim();

            if (header != null)
            {
                result.Name += $" {header}";
            }

            if (description != null)
            {
                result.Name += $" {description}";
            }

            result.Name = Regex.Replace(result.Name, @"\s+", " ");

            if (string.IsNullOrEmpty(result.Name))
            {
                return(new Product());
            }

            #endregion

            #region Get Description


            #endregion

            #region Get Producer


            #endregion

            #region Get Category

            result.Category = "Markety Budowlane";

            #endregion

            #region Get Price and Sale Price

            var value = productNode.GetAttributeValue("data-price", "");

            var saleValue = productNode.Descendants("span")
                            .FirstOrDefault(x => x.HasClass("product-compact__comparable-price-element"))?
                            .InnerText?
                            .RemoveMetaCharacters();

            if (!Regex.IsMatch(value, @"\.[0-9][0-9]$"))
            {
                value  = value.RemoveNonNumeric();
                value += "00";
            }
            else
            {
                value = value.Replace(@".", "");
            }

            if (!Regex.IsMatch(saleValue, @"\,[0-9][0-9]$"))
            {
                saleValue  = saleValue.RemoveNonNumeric();
                saleValue += "00";
            }
            else
            {
                saleValue = saleValue.Replace(",", "");
            }

            if (string.IsNullOrEmpty(value) || string.IsNullOrEmpty(saleValue))
            {
                return(new Product());
            }

            if (decimal.TryParse(value, out decimal valueDecimal) &&
                decimal.TryParse(saleValue, out decimal saleValueDecimal))
            {
                result.SaleValue = saleValueDecimal / 100;
                result.Value     = valueDecimal / 100;
                result.OnSale    = true;
            }
            else
            {
                return(new Product());
            }

            #endregion

            #region Get Sale Description



            #endregion

            #region Get Sale Deadline



            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;
            result.SourceUrl = productNode.Descendants("a").FirstOrDefault()?.GetAttributeValue("href", "");

            #endregion

            return(result);
        }
コード例 #18
0
ファイル: CrawlerAldi.cs プロジェクト: ViersJW/TanieZarcie
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            var promoPriceNode = productNode.Descendants("span")
                                 .Where(n => n.Attributes.Any(x => x.Name == "class" && CrawlerRegex.StandardMatch(x.Value, "price__main", MatchDireciton.Equals)))
                                 .FirstOrDefault();

            var regularPriceNode = productNode.Descendants("s")
                                   .Where(n => n.Attributes
                                          .Any(x => x.Name == "class" && CrawlerRegex.StandardMatch(x.Value, "price__previous", MatchDireciton.Equals)))
                                   .FirstOrDefault();

            if (promoPriceNode == null || regularPriceNode == null)
            {
                return(result);
            }

            #endregion

            #region Get Name

            result.Name = productNode
                          .Descendants("a")
                          .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && y.Value.NormalizeWithStandardRegex() == "mod-article-tile__action".NormalizeWithStandardRegex()))?
                          .InnerText;

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Spożywcze";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (regularPriceNode == null)
            {
                var promoPrice = promoPriceNode.GetAttributeValue("data-price", "")?.RemoveNonNumeric();

                if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal))
                {
                    result.Value = promoPriceDecimal / 100;
                }
                result.OnSale = true;
            }
            else
            {
                var promoPrice   = promoPriceNode.InnerText?.RemoveNonNumeric();
                var regularPrice = regularPriceNode.InnerText?.RemoveNonNumeric();

                if (decimal.TryParse(promoPrice, out decimal promoPriceDecimal))
                {
                    result.Value = promoPriceDecimal / 100;
                }
                if (decimal.TryParse(regularPrice, out decimal regularPriceDecimal))
                {
                    result.SaleValue = regularPriceDecimal / 100;
                }
                result.OnSale = true;
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;

            var productUrl = productNode
                             .Descendants("a")
                             .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && y.Value.NormalizeWithStandardRegex() == "mod-article-tile__action".NormalizeWithStandardRegex()))?
                             .GetAttributeValue("href", "");

            result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }
コード例 #19
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price", MatchDireciton.Equals) || CrawlerRegex.StandardMatch(y.Value, "price-wrapper", MatchDireciton.Equals))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var name = productNode.Descendants()
                       .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "tilename", MatchDireciton.InputContainsMatch)))
                       .Select(z => z.InnerText)
                       .FirstOrDefault()?
                       .RemoveMetaCharacters()
                       .RemoveUnwantedStrings()
                       .Replace(";", "");

            result.Name = name;

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Spożywcze";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.Equals))))
            {
                var pln = productNode.Descendants()
                          .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "pln", MatchDireciton.Equals)))
                          .FirstOrDefault()?
                          .InnerText
                          .RemoveMetaCharacters();

                var gr = productNode.Descendants()
                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "gr", MatchDireciton.Equals)))
                         .FirstOrDefault()?
                         .InnerText
                         .RemoveMetaCharacters();

                if (decimal.TryParse(pln, out decimal plnDecimal) && decimal.TryParse(gr, out decimal grDecimal))
                {
                    result.Value = plnDecimal + (grDecimal / 100);
                }
                else
                {
                    return(new Product());
                }

                var oldPrice = productNode.Descendants()
                               .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price-old", MatchDireciton.Equals)))
                               .FirstOrDefault()?
                               .InnerText
                               .RemoveNonNumeric();

                if (decimal.TryParse(oldPrice, out decimal oldPriceDecimal))
                {
                    result.SaleValue = oldPriceDecimal / 100;
                }
                else
                {
                    return(new Product());
                }

                result.OnSale = true;
            }
            else
            {
                var pln = productNode.Descendants()
                          .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "pln", MatchDireciton.Equals)))
                          .FirstOrDefault()?
                          .InnerText
                          .RemoveMetaCharacters();

                var gr = productNode.Descendants()
                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "gr", MatchDireciton.Equals)))
                         .FirstOrDefault()?
                         .InnerText
                         .RemoveMetaCharacters();

                if (decimal.TryParse(pln, out decimal plnDecimal) && decimal.TryParse(gr, out decimal grDecimal))
                {
                    result.Value = plnDecimal + (grDecimal / 100);
                }
                else
                {
                    return(new Product());
                }

                result.OnSale = false;
            }

            #endregion

            #region Get Sale Description

            var promoCommnets = productNode.Descendants()
                                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "productpromo", MatchDireciton.InputContainsMatch)))
                                .Select(z => z.InnerText.RemoveMetaCharacters())
                                .ToList();

            if (promoCommnets.Count != 0)
            {
                result.SaleDescription = String.Join(", ", promoCommnets.ToArray());
                result.OnSale          = true;
            }

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;

            var productUrl = productNode.Descendants("a").FirstOrDefault().GetAttributeValue("href", "");

            result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }
コード例 #20
0
        public override void GetResultsForSingleUrl(Dictionary <string, List <Product> > resultDictionary, LinkStruct linkStruct)
        {
            List <Product> products     = new List <Product>();
            HtmlDocument   htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(linkStruct.Html);

            foreach (var htmlPattern in _htmlPattens)
            {
                var divs = htmlDocument.DocumentNode.Descendants(htmlPattern.TopNode.Descendant).Where(node => node.GetAttributeValue(htmlPattern.TopNode.AttributeName, "").Contains(htmlPattern.TopNode.AttributeValue)).ToList();

                if (divs.Count() == 0)
                {
                    return;
                }

                foreach (var div in divs)
                {
                    var product = new Product();

                    var price = div.Descendants(htmlPattern.PriceNode.Descendant).Where(node => node.GetAttributeValue(htmlPattern.PriceNode.AttributeName, "").Equals(htmlPattern.PriceNode.AttributeValue)).FirstOrDefault().InnerText;

                    StringBuilder stringBuilder = new StringBuilder();
                    stringBuilder.Append(price);
                    stringBuilder.Replace("zł", "");

                    string[] splittedPrice = stringBuilder.ToString().Split(',');

                    var priceZl = int.TryParse(splittedPrice[0], out int resultZl);
                    var priceGr = int.TryParse(splittedPrice[1], out int resultGr);

                    product.PriceZl = priceZl == true ? resultZl : 0;
                    product.PriceGr = priceGr == true ? resultGr : 0;

                    product.Provider = "Kaufland";

                    try
                    {
                        product.Name = div.Descendants(htmlPattern.Name.Descendant).Where(node => node.GetAttributeValue(htmlPattern.Name.AttributeName, "").Equals(htmlPattern.Name.AttributeValue)).FirstOrDefault().InnerText;
                    }
                    catch (NullReferenceException)
                    {
                        continue;
                    }

                    try
                    {
                        product.Name += $" {div.Descendants(htmlPattern.SubName.Descendant).Where(node => node.GetAttributeValue(htmlPattern.SubName.AttributeName, "").Equals(htmlPattern.SubName.AttributeValue)).FirstOrDefault().InnerText}";
                    }
                    catch (NullReferenceException)
                    {
                        continue;
                    }

                    try
                    {
                        var description = div.Descendants(htmlPattern.Description.Descendant).Where(node => node.GetAttributeValue(htmlPattern.Description.AttributeName, "").Equals(htmlPattern.Description.AttributeValue)).FirstOrDefault().InnerText;
                        product.Description = Regex.Replace(description, @"\t|\n|\r", "");
                    }
                    catch (NullReferenceException)
                    {
                        product.Description = "";
                    }

                    product.Url          = linkStruct.Link;
                    product.DownloadDate = DateTime.Now;
                    product.Category     = "";
                    product.Mark         = "";
                    product.Quantity     = 0;

                    products.Add(product);
                }

                resultDictionary.Add(linkStruct.Link, products);
            }
        }
コード例 #21
0
 public virtual void GetResultsForSingleUrl(Dictionary <string, List <Product> > resultDictionary, LinkStruct linkStruct)
 {
     //To be always overriten by derived classes.
 }
コード例 #22
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists and if it is on sale

            if (!productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__title title", MatchDireciton.Equals))))
            {
                return(new Product());
            }

            #endregion

            #region Get Name

            var name = productNode.Descendants()
                       .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__title title", MatchDireciton.InputContainsMatch)))
                       .Select(z => z.InnerText)
                       .FirstOrDefault();

            result.Name = CrawlerRegex.RemoveMetaCharacters(name).Trim();

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (productNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__price-wrapper", MatchDireciton.Equals))))
            {
                var priceNode = productNode.Descendants()
                                .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "product__price-wrapper", MatchDireciton.Equals)))
                                .FirstOrDefault();

                if (priceNode.Descendants().Any(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__old-price", MatchDireciton.Equals))))
                {
                    var regularPriceNode = productNode.Descendants()
                                           .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__price", MatchDireciton.Equals)))
                                           .FirstOrDefault();

                    var promoPriceNode = productNode.Descendants()
                                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__old-price", MatchDireciton.Equals)))
                                         .FirstOrDefault();

                    var regularPricePLN = regularPriceNode.InnerText.RemoveNonNumeric();
                    var promoPricePLN   = promoPriceNode.InnerText.RemoveNonNumeric();

                    if (decimal.TryParse(regularPricePLN, out decimal priceDecimal))
                    {
                        result.Value = priceDecimal / 100;
                    }
                    if (decimal.TryParse(promoPricePLN, out decimal promoPriceDecimal))
                    {
                        result.SaleValue = promoPriceDecimal / 100;
                    }

                    result.OnSale = true;
                }
                else
                {
                    var regularPriceNode = productNode.Descendants()
                                           .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "price product__price", MatchDireciton.Equals)))
                                           .FirstOrDefault();

                    var regularPricePLN = regularPriceNode.InnerText.RemoveNonNumeric();

                    var regularPriceGR = regularPriceNode.Descendants()
                                         .Where(x => x.Attributes.Any(y => y.Name == "class" && CrawlerRegex.StandardMatch(y.Value, "p-cents", MatchDireciton.Equals)))
                                         .FirstOrDefault()?
                                         .InnerText
                                         .RemoveNonNumeric();

                    if (decimal.TryParse(regularPricePLN + regularPriceGR, out decimal priceDecimal))
                    {
                        result.Value = priceDecimal / 100;
                    }

                    result.OnSale = false;
                }
            }
            else
            {
                return(new Product());
            }

            if (result.Value == 0)
            {
                return(new Product());
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;
            result.SourceUrl = linkStruct.Link;

            #endregion

            return(result);
        }
コード例 #23
0
        private Product ExtractProduct(HtmlNode productNode, LinkStruct linkStruct)
        {
            var result = new Product();

            #region Check if product node exists

            var pricesNode = productNode.Descendants("span")
                             .Where(n => n.Attributes.Any(x => x.Name == "class" && CrawlerRegex.StandardMatch(x.Value, "prices", MatchDireciton.InputContainsMatch)))
                             .FirstOrDefault();

            if (pricesNode == null || productNode.Descendants("h3").FirstOrDefault() == null)
            {
                return(result);
            }

            #endregion

            #region Get Name

            result.Name = productNode.Descendants("h3")
                          .FirstOrDefault()?
                          .InnerText
                          .RemoveMetaCharacters();

            #endregion

            #region Get Description

            #endregion

            #region Get Producer

            #endregion

            #region Get Category

            result.Category = "Markety Budowlane";

            #endregion

            #region Get Price and Sale Price, set OnSale Flag

            if (pricesNode.Descendants("span")
                .Any(n => n.Attributes.Any(x => x.Name == "class" && x.Value.Contains("product-price promotional"))))
            {
                try
                {
                    var prices = pricesNode.Descendants("span")
                                 .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "price"))
                                 .ToList();

                    var price1Int = Int32.Parse(prices[0].Descendants("span")
                                                .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "integer"))
                                                .FirstOrDefault()?
                                                .InnerText
                                                .RemoveNonNumeric()) * 100;

                    var price1Frac = Int32.Parse(prices[0].Descendants("span")
                                                 .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "fractional"))
                                                 .FirstOrDefault()?
                                                 .InnerText
                                                 .RemoveNonNumeric());

                    var price2Int = Int32.Parse(prices[1].Descendants("span")
                                                .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "integer"))
                                                .FirstOrDefault()?
                                                .InnerText
                                                .RemoveNonNumeric()) * 100;

                    var price2Frac = Int32.Parse(prices[1].Descendants("span")
                                                 .Where(n => n.Attributes.Any(x => x.Name == "class" && x.Value == "fractional"))
                                                 .FirstOrDefault()?
                                                 .InnerText
                                                 .RemoveNonNumeric());

                    if (decimal.TryParse((price1Int + price1Frac).ToString(), out decimal promoPriceDecimal))
                    {
                        result.Value = promoPriceDecimal / 100;
                    }
                    if (decimal.TryParse((price2Int + price2Frac).ToString(), out decimal regularPriceDecimal))
                    {
                        result.SaleValue = regularPriceDecimal / 100;
                    }

                    if ((promoPriceDecimal == regularPriceDecimal) || (promoPriceDecimal < (regularPriceDecimal * 0.3m)))
                    {
                        return(new Product());
                    }

                    result.OnSale = true;
                }
                catch (Exception)
                {
                    return(new Product());
                }
            }

            #endregion

            #region Get Sale Description

            #endregion

            #region Get Sale Deadline

            #endregion

            #region Get Seller, TimeStamp, URL

            result.Seller    = this.GetType().Name.Replace("Crawler", "");
            result.TimeStamp = DateTime.Now;

            var productUrl = productNode
                             .Descendants("a")
                             .FirstOrDefault(x => x.Attributes.Any(y => y.Name == "class" && y.Value.NormalizeWithStandardRegex() == "Url".NormalizeWithStandardRegex()))?
                             .GetAttributeValue("href", "");

            result.SourceUrl = new Uri(new Uri(BaseUrls[0]), productUrl).ToString();

            #endregion

            return(result);
        }