Пример #1
0
        static public List <ParsedProduct> GetProducts(Site site, IProductXPath productForParsing)
        {
            string HTMLPage = HttpService.LoadHTMLPage(site);
            List <ParsedProduct> resultProduct = Parser.GetParsedProduct(HTMLPage, productForParsing);

            return(resultProduct);
        }
Пример #2
0
        static public List <ParsedProduct> GetParsedProduct(string HTML, IProductXPath parsedProduct)
        {
            List <ParsedProduct> parsedProductList = new List <ParsedProduct>();
            var document = new HtmlDocument();

            document.LoadHtml(HTML);
            string title = null;

            // parse product title
            HtmlNodeCollection titleNodes = document.DocumentNode.SelectNodes(parsedProduct.TitleXPath); //".//h3[@class='content-product__name-heading']/a"

            foreach (HtmlNode titleProd in titleNodes)
            {
                title = titleProd.GetAttributeValue("title", null);
                ParsedProduct resultProduct = new ParsedProduct {
                    Title = title
                };
                parsedProductList.Add(resultProduct);
            }


            // parse product Price
            HtmlNodeCollection prices = document.DocumentNode.SelectNodes(parsedProduct.PriseXPath); //  //span[@class='wc-price-sale']//span[@class='wc-price-number']
            int j = 0;

            int n = 0;

            foreach (HtmlNode price in prices)
            {
                if (price != null)
                {
                    string decodedString = price.InnerText.Replace("&nbsp; &#8372;", " грн."); // ₴InnerText = "\n            от 2 619&nbsp; &#8372;        "
                    decodedString = decodedString.Replace("  ", string.Empty);
                    decodedString = decodedString.Replace("\n", string.Empty);
                    parsedProductList.ElementAt(n++).Price = decodedString;
                }
            }


            // parce product image .//li//div[@class='content-product__thumb']//a//img
            HtmlNodeCollection imgs = document.DocumentNode.SelectNodes(parsedProduct.ImageXPath); //content - product__thumb
            int k = 0;

            foreach (HtmlNode imgUrl in imgs)
            {
                string   urlImage = imgUrl.Attributes["data-srcset"].Value;// ("DeEntitizeValue", null);// DeEntitizeValue = "https://badminton.ua/wp-content/themes/badminton/public/img/logo/logo-icon.png"
                string[] arr_Url  = urlImage.Split(' ');
                urlImage = arr_Url[0];
                if (urlImage != null)
                {
                    Image  imageCurProd     = DownloadImageByUrl(urlImage);
                    byte[] bytesArrForImage = imageToByteArray(imageCurProd);
                    parsedProductList.ElementAt(k++).ImageBytes = bytesArrForImage;
                }
            }

            return(parsedProductList);
        }