/// <summary>
        /// Get product information
        /// </summary>
        /// <param name="productLink">Example: https://www.jomashop.com/tissot-watch-t0064071603300.html</param>
        private Models.Product GetProductInformation(string productLink)
        {
            //Create product to save crawled data
            Models.Product product = new Models.Product();

            //Redirect to site by URL
            //browser.Navigate().GoToUrl("https://www.jomashop.com/tissot-watch-t0064071603300.html");
            browser.Navigate().GoToUrl(productLink);
            //browser.Navigate().GoToUrl("https://dosi-in.com/tee-black-myson-mix-reflective/");
            //Select elements by CSS Selector (easiest way)
            //You can also select element by ID, Class, Name, XPath,...
            //Get brand by CSS Attribute Selectors (https://www.w3schools.com/css/css_attribute_selectors.asp)
            //var element = browser.FindElement(By.CssSelector("[itemprop=\"brand manufacturer\"]>a"));
            //product.Attribute1Value = element.GetAttribute("innerHTML"); //OuterHTML will give full element HTML code

            //Get price by CSS Selectors (https://www.w3schools.com/css/css_selectors.asp)
            //element = browser.FindElement(By.CssSelector("#final-price")); //No timeout, wait until page loaded
            //string finalPrice = element.GetAttribute("innerHTML");
            //finalPrice = finalPrice.Replace("$", "");
            //double finalPriceInVnd = Double.Parse(finalPrice) * 24300 * 1.1 + 350000;
            // product.SalePrice = finalPriceInVnd;


            List <IWebElement> elementList0 = new List <IWebElement>();

            elementList0.AddRange(browser.FindElements(By.CssSelector(".bigEntrance.banner")));
            if (elementList0.Count < 1)
            {
                List <IWebElement> elementList1 = new List <IWebElement>();
                elementList1.AddRange(browser.FindElements(By.CssSelector(".product-short_description li:first-child")));
                if (elementList1.Count > 0)
                {
                    string SKU = browser.FindElement(By.CssSelector(".product-short_description li:first-child")).GetAttribute("innerHTML");
                    product.SKU = SKU.Replace("Mã sản phẩm: ", "");
                }

                string Name = browser.FindElement(By.CssSelector("h1.mainbox-title")).GetAttribute("innerHTML");
                product.Name = Name;
                double finalPriceInVnd = Double.Parse(browser.FindElement(By.CssSelector(".price-num")).GetAttribute("innerHTML")) * 1.2 + 50000;
                product.SalePrice = finalPriceInVnd;

                List <IWebElement> elementList2 = new List <IWebElement>();
                elementList2.AddRange(browser.FindElements(By.CssSelector(".strike span:first-child")));
                if (elementList2.Count > 0)
                {
                    Double retailPriceInVnd = Double.Parse(browser.FindElement(By.CssSelector(".strike span:first-child")).GetAttribute("innerHTML")) * 1.2 + 50000;
                    product.RegularPrice = retailPriceInVnd;
                }

                List <IWebElement> elementList3 = new List <IWebElement>();
                elementList3.AddRange(browser.FindElements(By.CssSelector("label.radio.float-left.dosi_get_change_option.dosi_option_size")));
                if (elementList3.Count > 0)
                {
                    List <string> sizes = new List <string>();
                    foreach (var size in browser.FindElements(By.CssSelector("label.radio.float-left.dosi_get_change_option.dosi_option_size")))
                    {
                        sizes.Add(size.GetAttribute("innerHTML").Trim());
                    }
                    product.Attribute1Value = String.Join(", ", sizes);
                }

                List <string> images = new List <string>();
                foreach (var image in browser.FindElements(By.CssSelector("a.cm-image-previewer.cm-previewer.previewer img")))
                {
                    images.Add(image.GetAttribute("src").Replace(".webp", ""));
                }
                product.Images = String.Join(", ", images);

                //string Description = browser.FindElement(By.CssSelector("#myTabproductContent div")).GetAttribute("innerHTML");
                //product.Description = Description.Trim();


                string brand = browser.FindElement(By.CssSelector("a.product_company")).GetAttribute("innerHTML");
                product.Attribute2Value = brand.Trim();


                product.Categories = product.Categories + "Brand>" + brand.Trim();

                // Có 6 PriceRange: > 2.000.000, 1.000.000 - 2.000.000, 500.000 - 1.000.000, 350.000 - 500.000, 150.000 - 350.000, 95.000 - 150.000
                string PriceRange = "0 - 95.000";
                if (finalPriceInVnd > 2000000)
                {
                    PriceRange = ">2000000";
                }
                if (finalPriceInVnd >= 1000000 && finalPriceInVnd <= 2000000)
                {
                    PriceRange = "1.000.000 - 2.000.000";
                }
                if (finalPriceInVnd >= 350000 && finalPriceInVnd <= 500000)
                {
                    PriceRange = "350.000 - 500.000";
                }
                if (finalPriceInVnd >= 150000 && finalPriceInVnd <= 350000)
                {
                    PriceRange = "150.000 - 350.000";
                }
                if (finalPriceInVnd >= 95000 && finalPriceInVnd <= 150000)
                {
                    PriceRange = "95.000 - 150.000";
                }
                product.Attribute3Value = PriceRange;

                //string shortDes = browser.FindElement(By.CssSelector(".cm-disable-empty-files.cm-processed-form ul.product-short_description li")).GetAttribute("innerHTML");
                //product.ShortDescription = shortDes.Replace("DOSI", "VENUS").Trim();

                //----------------------------------------------
            }



            return(product);
        }
Exemple #2
0
        /// <summary>
        /// Get product information
        /// </summary>
        /// <param name="productLink">Example: https://www.jomashop.com/tissot-watch-t0064071603300.html</param>
        private Models.Product GetProductInformation(string productLink)
        {
            //Create product to save crawled data
            Models.Product product = new Models.Product();

            //Redirect to site by URL
            //browser.Navigate().GoToUrl("https://www.jomashop.com/tissot-watch-t0064071603300.html");
            browser.Navigate().GoToUrl(productLink);

            //Sử dụng try catch để over lỗi

            //Lấy loại (Type) Dựa trên breadcumb
            try
            {
                var    elementtype = browser.FindElement(By.CssSelector("[class=\"mc-brea\"]"));
                string breadcumb   = elementtype.GetAttribute("innerHTML");
                string temp        = Regex.Match(breadcumb, "<ul>.*?</li>", RegexOptions.Singleline).Value;
                breadcumb    = breadcumb.Replace(temp, "").Trim();                                                     //Replace Trang chủ
                temp         = Regex.Match(breadcumb, "<li>.*?</li>", RegexOptions.Singleline).Value;
                breadcumb    = breadcumb.Replace(temp, "").Trim();                                                     //Replace Máy đổi trả
                temp         = Regex.Match(breadcumb, "<li>.*?</li>", RegexOptions.Singleline).Value;
                product.Type = Regex.Match(temp, "<li><a.*?>(.*?)</a></li>", RegexOptions.Singleline).Groups[1].Value; //Lấy giá trị Type
                breadcumb    = breadcumb.Replace(temp, "").Trim();                                                     //Replace Type

                //Lấy Brand dựa trên link trên (Brand Attribute 1)
                product.Attribute1Value = Regex.Match(breadcumb, "<li><a.*?>(.*?)</a></li>", RegexOptions.Singleline).Groups[1].Value;//Lấy giá trị Brand
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\nType & Brand went wrong."); }

            //Lấy SKU (SKU)
            try
            {
                var element0 = browser.FindElement(By.CssSelector("[class=\"mc-ctname\"]"));
                var SKU      = element0.GetAttribute("innerHTML");
                SKU         = Regex.Match(SKU, "<span>(.*?)</span>", RegexOptions.Singleline).Groups[1].Value;
                product.SKU = Regex.Replace(SKU, "\\W", "").Trim();
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\nSKU went wrong."); }

            //Lấy tên (Name)
            try
            {
                var element1 = browser.FindElement(By.CssSelector("[class=\"mc-ctname\"]"));
                var name     = element1.GetAttribute("innerHTML");
                product.Name = Regex.Replace(name, "<span>.*?</span>", "").Trim();
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\nName went wrong."); }

            //Lấy màu (Attribute 2)
            try
            {
                var element2 = browser.FindElement(By.CssSelector("[class=\"mc-ctclo\"]"));
                var color    = element2.GetAttribute("innerHTML");
                product.Attribute2Value = Regex.Replace(color, "<i.*?</i>", "").Trim();
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\nColor went wrong."); }

            //Lấy giá hiện tại (Regular Price)
            try
            {
                var element3 = browser.FindElement(By.CssSelector("[class=\"mc-ctpri1\"]"));
                var price1   = element3.GetAttribute("innerHTML");
                price1 = Regex.Replace(price1, ".*?<span>", "").Trim();
                price1 = Regex.Replace(price1, "đ</span>.*", "", RegexOptions.Singleline).Trim();
                price1 = price1.Replace(".", "");
                product.RegularPrice = Double.Parse(price1);
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\nPrice went wrong."); }

            //Lấy giá máy mới (Attribute 3)
            try
            {
                var element4 = browser.FindElement(By.CssSelector("[class=\"mc-ctpri2\"]"));
                product.Attribute3Value = element4.GetAttribute("innerHTML");
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\nNewPrice went wrong."); }

            //Tiết kiệm (Attribute 4)
            try
            {
                var element5 = browser.FindElement(By.CssSelector("[class=\"mc-ctpri3\"]"));
                var price3   = element5.GetAttribute("innerHTML");
                product.Attribute4Value = Regex.Replace(price3, "<p>|</p>", "").Trim();
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\nSavings went wrong."); }

            //Tình trạng (Attribute 5)
            try
            {
                var    element6 = browser.FindElement(By.CssSelector("[class=\"mc-ctttm\"]"));
                string status   = element6.GetAttribute("innerHTML");
                string stttemp  = Regex.Match(status, "<li>(.*?)</li>", RegexOptions.Singleline).Groups[1].Value;
                product.Attribute5Value = stttemp;
                status = status.Replace(stttemp, "");

                //Phụ kiện (Attribute 6)
                stttemp = Regex.Match(status, "<li></li>.*?<li>(.*?)</li>", RegexOptions.Singleline).Groups[1].Value;
                product.Attribute6Value = stttemp;
                status = status.Replace(stttemp, "");

                //Bảo hành (Attribute 7)
                stttemp = Regex.Match(status, "<li></li>.*?<li></li>.*?<li>(.*?)</li>", RegexOptions.Singleline).Groups[1].Value;
                product.Attribute7Value = stttemp;
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\nStatus, Acess & Grua went wrong."); }

            //Thông số kĩ thuật (Description)
            try
            {
                var    element7 = browser.FindElement(By.CssSelector("[class=\"modal-body tskt-popct\"]"));
                string infor    = element7.GetAttribute("innerHTML");
                infor.Trim();
                infor = infor.Replace("@", "&");
                product.Description = infor;
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\nDecript went wrong."); }

            //Hình ảnh (Images)
            try
            {
                var    element8 = browser.FindElement(By.CssSelector("[class=\"slick-list draggable\"]"));
                string img      = element8.GetAttribute("innerHTML");
                string imgLinks = "";
                string pattern  = "https://.*?\"";
                Regex  r        = new Regex(pattern);
                foreach (Match m in r.Matches(img))
                {
                    imgLinks += m.Value + ", ";
                    //Hiển thị kết quả
                }
                //loại bỏ cdn. mới truy cập được
                imgLinks       = imgLinks.Replace("cdn.", "").Replace("\"", "");
                imgLinks       = imgLinks.TrimEnd(',');
                product.Images = imgLinks;
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\nImage went wrong."); }

            //lấy từng địa chỉ cho từng biến (2 địa chỉ)
            //string imageLink1 = Regex.Match(img, "src=\"(.*?)\"", RegexOptions.Singleline).Groups[1].Value;
            //imageLink1 = imageLink1.Replace("cdn.", "");
            //img = img.Replace("src=\"" + imageLink1 + "\"", "");
            //string imageLink2 = Regex.Match(img, "src=\"(.*?)\"", RegexOptions.Singleline).Groups[1].Value;
            //imageLink2 = imageLink2.Replace("cdn.", "");

            //Lấy Địa chỉ cửa hàng (Địa chỉ -> Attribute8, link ->Attribute9)
            try
            {
                var    element9 = browser.FindElement(By.CssSelector("[class=\"mc-ctlocit\"]"));
                string location = element9.GetAttribute("innerHTML");
                var    address  = Regex.Match(location, "(.*?)<a", RegexOptions.Singleline).Groups[1].Value;
                product.Attribute8Value = address;
                string locLink = Regex.Match(location, "href=\"(.*?)\"", RegexOptions.Singleline).Groups[1].Value;
                product.Attribute9Value = locLink;
            }
            catch (Exception e)
            { Console.WriteLine(productLink + "\n Address went wrong."); }


            /* //Select elements by CSS Selector (easiest way)
             * //You can also select element by ID, Class, Name, XPath,...
             * //Get brand by CSS Attribute Selectors (https://www.w3schools.com/css/css_attribute_selectors.asp)
             * var element = browser.FindElement(By.CssSelector("[itemprop=\"brand manufacturer\"]>a"));
             * product.Attribute1Value = element.GetAttribute("innerHTML"); //OuterHTML will give full element HTML code
             *
             * //Get price by CSS Selectors (https://www.w3schools.com/css/css_selectors.asp)
             * element = browser.FindElement(By.CssSelector("#final-price")); //No timeout, wait until page loaded
             * string finalPrice = element.GetAttribute("innerHTML");
             * finalPrice = finalPrice.Replace("$", "");
             * double finalPriceInVnd = Double.Parse(finalPrice) * 24300 * 1.1 + 350000;
             * product.SalePrice = finalPriceInVnd;
             */

            //----------------------------------------------

            return(product);
        }
Exemple #3
0
        /// <summary>
        /// Get product information
        /// </summary>
        /// <param name="productLink">Example
        private Models.Product GetProductInformation(string productLink)
        {
            //Create product to save crawled data
            Models.Product product = new Models.Product();

            //Redirect to site by URL
            //browser.Navigate().GoToUrl()
            System.Threading.Thread.Sleep(new Random().Next(2) * 1000); //Sleep random from 1-5 seconds
            WebDriverWait wait = new WebDriverWait(this.browser, TimeSpan.FromSeconds(120));

            browser.Navigate().GoToUrl(productLink);
            //Select elements by CSS Selector (easiest way)
            //You can also select element by ID, Class, Name, XPath,...

            //Select elements by CSS Selector (easiest way)
            //You can also select element by ID, Class, Name, XPath,...
            //Get brand by CSS Attribute Selectors (https://www.w3schools.com/css/css_attribute_selectors.asp)

            wait.Until((x) =>
            {
                return(((IJavaScriptExecutor)this.browser).ExecuteScript("return document.readyState").Equals("complete"));
            });


            //Get SKU
            bool test;

            if (test = (verify(browser, "[itemprop = \"sku\"]") == true))
            {
                var element = browser.FindElement(By.CssSelector("[itemprop = \"sku\"]"));
                product.SKU = element.GetAttribute("innerHTML");
            }

            //Get Name
            if (test = (verify(browser, "[itemprop =\"url\"]") == true))
            {
                var element = browser.FindElement(By.CssSelector("[itemprop =\"url\"]"));
                product.Name = element.GetAttribute("innerHTML");
            }

            //Get Regular price
            if (test = (verify(browser, "[class=\"ngachngang\"]") == true))
            {
                var element      = browser.FindElement(By.CssSelector("[class=\"ngachngang\"]"));
                var RegularPrice = element.GetAttribute("innerHTML");
                RegularPrice = RegularPrice.Replace(" VNĐ", "");       // Remove Currency
                double finalRegularPrice = Double.Parse(RegularPrice); // Parse to Double
                product.RegularPrice = finalRegularPrice;
            }

            //Get Sale Price
            if (test = (verify(browser, "[class=\"price_sale\"]") == true))
            {
                var element   = browser.FindElement(By.CssSelector("[class=\"price_sale\"]"));
                var SalePrice = element.GetAttribute("innerHTML");
                if (SalePrice != "Liên hệ")
                {
                    SalePrice = SalePrice.Replace(" VNĐ", "");       // Remove Currency
                    double finalSalePrice = Double.Parse(SalePrice); // Parse to Double
                    product.SalePrice = finalSalePrice;
                }
                else
                {
                    SalePrice = SalePrice.Replace("Liên hệ", "9999999"); // Remove Currency
                    double finalSalePrice = Double.Parse(SalePrice);     // Parse to Double
                    product.SalePrice = finalSalePrice;
                }
            }

            //Get Description
            if (test = (verify(browser, "[class=\"view-content\"]") == true))
            {
                var element     = browser.FindElement(By.CssSelector("[class=\"view-content\"]"));
                var Description = element.GetAttribute("innerText");
                product.Description = Description.Replace("tại BinhMinhDigital", "").Replace("/s", "").Replace("\r", "").Replace("\n", "");
            }

            //Get Image

            //if (test = (verify(browser, "[itemprop=\"image\"]") == true))
            //{
            //    var element = browser.FindElement(By.CssSelector("[itemprop=\"image\"]"));
            //    product.Images = element.GetAttribute("src");
            //}

            // RegularExpression
            //var eelement = browser.FindElement(By.XPath("//picture"));
            //var ex = eelement.GetAttribute("innerHTML");
            //product.Images = Regex.Match(ex.ToString(), @"img src(.*?)alt").Value.Replace("img src=\"", "").Replace("\" alt", "");


            //Get Brand
            if (test = (verify(browser, "[itemprop=\"brand\"]") == true))
            {
                var element = browser.FindElement(By.CssSelector("[itemprop=\"brand\"]"));
                product.Attribute1Value = element.GetAttribute("innerHTML");
            }

            //Get AdvanceInfor
            if (test = (verify(browser, "[class=\"view-content\"]") == true))
            {
                var element      = browser.FindElement(By.CssSelector("[class=\"product-recap\"]"));
                var AdvanceInfor = element.GetAttribute("innerText");
                product.Attribute2Value = AdvanceInfor = AdvanceInfor.Replace("TÍNH NĂNG NỔI BẬT", "").Replace("/s", "").Replace("\r", "").Replace("\n", "");
            }


            //Attribute Price
            if (product.SalePrice <= 1000000)
            {
                product.Attribute3Value = "<= 1 000 000";
            }
            ;
            if (product.SalePrice > 1000000 && product.SalePrice <= 5000000)
            {
                product.Attribute3Value = "1 000 000 - 5 000 000";
            }
            ;
            if (product.SalePrice > 5000000 && product.SalePrice <= 20000000)
            {
                product.Attribute3Value = "5 000 000 - 20 000 000";
            }
            ;
            if (product.SalePrice > 2000000)
            {
                product.Attribute3Value = "> 20 000 000";
            }
            ;

            //Get category - brand, thuộc tính category, brand > thuộc tính category
            product.Categories = "Chân Camera" + ">" + product.Attribute1Value;


            //Get Images Galleries
            string list       = "";
            string htmlpage   = browser.PageSource;
            var    ListImages = Regex.Matches(htmlpage, @"data-standard(.*?)jpg", RegexOptions.Singleline);

            foreach (var course in ListImages)
            {
                string Gallery = Regex.Match(course.ToString(), @"data-standard(.*?)jpg").Value.Replace("data-standard=\"/", "https://binhminhdigital.com/");
                list += Gallery + ",";
            }

            product.Images = list.TrimEnd(',');



            //----------------------------------------------

            return(product);
        }