Ejemplo n.º 1
0
        public async Task ProcessIt(int pages)
        {
            #region
            ServicePointManager.Expect100Continue = true;
            ServicePointManager.SecurityProtocol  = SecurityProtocolType.Tls12;

            ServicePointManager.ServerCertificateValidationCallback = delegate { return(true); };

            ////WebProxy proxy = new WebProxy(@"http://proxy1.smretailinc.com/SMRI.pac");

            ////////(@);
            ////proxy.Credentials = CredentialCache.DefaultCredentials;

            //WebClient client = new WebClient();
            ////client.Proxy = proxy;

            //var asd = client.DownloadString(@"https://www.etsy.com/search?q=arts%20and%20prints");
            //HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            //doc.LoadHtml(asd);
            #endregion
            watch.Start();

            List <EtsyProduct> productList = new List <EtsyProduct>();

            //WebView wv = new WebView();
            //wv.NavigationCompleted += Wv_NavigationCompleted;
            //wv.Navigate(new Uri(url));

            //private async void Wv_NavigationCompleted(WebView sender, WebViewNavigationCompletedEventArgs args)
            //{
            //    string wvresult = await sender.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" });
            //}

            //var doc = await Task.Factory.StartNew(() => web.Load(url));

            try
            {
                for (int x = 1; x <= pages; x++)
                {
                    string url = $"https://www.etsy.com/search?q=arts+and+crafst&_qc=arts+and+crafts&ref=pagination&page={x}";

                    string html;
                    using (var client = new HttpClient())
                    {
                        html = await client.GetStringAsync(url);
                    }

                    var htmlDocument = new HtmlAgilityPack.HtmlDocument();
                    htmlDocument.LoadHtml(html.ToString());
                    htmlDocument.OptionEmptyCollection = true;

                    #region ==== XPath ====
                    //var productName = doc.DocumentNode.SelectNodes("//*[@id=\"content\"]/div/div//div/div/div//div//div//div/div/ul/li//div/a/div[2]/div/h3/text()");
                    //var products = productName.Select(node => node.InnerText).ToList();
                    //products = StringCleaner(products);

                    HtmlWeb web       = new HtmlWeb();
                    var     testNodes = htmlDocument.DocumentNode.SelectNodes("//*[@class=\"responsive-listing-grid wt-grid wt-grid--block justify-content-flex-start pl-xs-0\"]")
                                        .Descendants("a").ToList();

                    foreach (var item in testNodes)
                    {
                        EtsyProduct cursorProduct = new EtsyProduct();
                        //    var productname = item.Descendants("h3")
                        //        .Where(node => node.GetAttributeValue("class", "")
                        //.Equals("text-gray text-truncate mb-xs-0 text-body ")).ToList();

                        //Get all the stores in nodes
                        //var shopname = item.SelectNodes("//*[@class=\"text-gray-lighter text-body-smaller display-inline-block\"]/text()");

                        cursorProduct.StoreName = HttpUtility.HtmlDecode(item.SelectSingleNode(".//p[@class=\"text-gray-lighter text-body-smaller display-inline-block\"]/text()").InnerText);


                        cursorProduct.ProductName = item.SelectSingleNode(".//h3").InnerText.Trim();

                        var ratingNode = item.SelectNodes(".//span[@class=\"v2-listing-card__rating icon-t-2 display-block\"]").ToListOrEmpty();
                        cursorProduct.Rating = ratingNode.FirstOrDefault().InnerHtml.Replace("\n", "").Trim();
                        if (cursorProduct.Rating != string.Empty)
                        {
                            cursorProduct.Rating  = item.SelectSingleNode(".//input[@name=\"rating\"]").GetAttributeValue("value", "");
                            cursorProduct.Reviews = item.SelectSingleNode(".//div[2]/div/div/span/span[3]/text()").InnerText.Replace(" reviews", "");
                        }
                        else
                        {
                            cursorProduct.Rating  = "No Rating";
                            cursorProduct.Reviews = "No Reviews";
                        }

                        cursorProduct.Price = item.SelectSingleNode(".//span[@class=\"currency-value\"]/text()").InnerText;

                        productList.Add(cursorProduct);
                        dataGridView1.DataSource = null;
                        dataGridView1.DataSource = productList;

                        lblProductStatus.Text = $"Product Count: {productList.Count()}";
                    }
                }

                lblProducts.Text = $"Number of Products: {productList.Count()}";

                var uniqueShopList = productList.Select(x => x.StoreName).Distinct().ToList();

                int successCounter = 0;
                lblStatus.Text = $"{successCounter}/{uniqueShopList.Count}";
                lblStores.Text = $"Number of Stores: {uniqueShopList.Count()}";

                List <EtsyStoreInfo> storeInfoList = new List <EtsyStoreInfo>();
                foreach (var store in uniqueShopList)
                {
                    var storeInfo = await ProcessStore(store);

                    storeInfoList.Add(storeInfo);
                    successCounter++;
                    lblStatus.Text = $"{successCounter}/{uniqueShopList.Count}";
                    UpdateDataGridView(storeInfoList);
                }
                DataTable dt = new DataTableHelper().ToDataTable(storeInfoList);
                CSVHelper.CreateCSVFile(dt, @"C:\\Users\\User\\Documents\\Lumaprints\\" + "Store.csv");

                DataTable dt1 = new DataTableHelper().ToDataTable(productList);
                CSVHelper.CreateCSVFile(dt1, @"C:\\Users\\User\\Documents\\Lumaprints\\" + "Products.csv");
            }
            catch (NullReferenceException aasd)
            {
            }
            catch (Exception ex)
            {
            }

            //.Where(node => node.GetAttributeValue("class", "").Equals("c617a42c11375c7d display-inline-block listing-link  logged")).ToList();



            //var storeName = doc.DocumentNode.SelectNodes("//*[@id=\"content\"]/div/div//div/div/div//div//div//div/div/ul/li//div/a/div//div/div/p[1]/text()");
            //var stores = storeName.Select(node => node.InnerText).ToList();

            //var ratingNodes = doc.DocumentNode.SelectNodes("//*[@id=\"content\"]/div/div//div/div/div//div//div//div/div/ul/li//div/div/a/div//div/div/span/span//input[2]");
            //var ratings = ratingNodes.Select(node => node.InnerText).ToList();
            //var asd = doc.DocumentNode.Descendants("ul").ToList();
            //var testing = doc.DocumentNode.SelectNodes("//*[@class=\"wt-list-unstyled wt-grid__item-xs-6 wt-grid__item-md-4 wt-grid__item-xl-3 wt-order-xs-0 wt-order-sm-0 wt-order-md-0 wt-order-lg-0 wt-order-xl-0 wt-order-tv-0\"]");

            #endregion

            #region ==== LINQ ====

            //var asd = doc.DocumentNode.Descendants("ul")
            //   .Where(node => node.GetAttributeValue("class", "")
            //   .Equals("responsive-listing-grid wt-grid wt-grid--block justify-content-flex-start pl-xs-0")).ToList();


            //var products = asd[0].Descendants("li").ToList();

            //foreach (var item in products)
            //{
            //    EtsyProduct cursorProduct = new EtsyProduct();

            //    var products1 = products[0].Descendants("h3")
            //    .Where(node => node.GetAttributeValue("class", "")
            //.Equals("text-gray text-truncate mb-xs-0 text-body ")).ToList();

            //    var details = item.Descendants("div").
            //        Where(node => node.GetAttributeValue("cla" +
            //        "ss", "").Equals("v2-listing-card__shop")).Select(x => x.InnerHtml).ToList();

            //    var ShopName = item.Descendants("h3")
            //    .Where(node => node.GetAttributeValue("class", "")
            //.Equals("text-gray text-truncate mb-xs-0 text-body ")).ToList();

            //    cursorProduct.StoreName = item.Descendants("p").
            //        Where(node => node.GetAttributeValue("class", "").Equals("text-gray-lighter text-body-smaller display-inline-block")).FirstOrDefault().OuterHtml;


            //    //var czc = getShopName[0].InnerHtml.ToList();

            //    //var rating = item.Descendants("input").Select(x=> x["input"].V)
            //    //.Where(x => x["input"].v == "rating").ToList();

            //    //var ratings = item.Selec

            //    //var currency = details.Select(x => x.Descendants("p").
            //    //.Where(x => x["class"].Value == "text-gray-lighter text-body-smaller display-inline-block")).ToList();



            //    productList.Add(cursorProduct);
            //}
            #endregion

            //List<EtsyProduct> listProducts = new List<EtsyProduct>();
            //var asd = products.Zip(stores, (_productName, _storeName) =>
            //    new EtsyProduct()
            //    {
            //        ProductName = _productName,
            //        StoreName = _storeName
            //    });
            Console.WriteLine(watch.Elapsed);
            watch.Stop();
            watch.Reset();
        }
Ejemplo n.º 2
0
        public async Task ProcessIt()
        {
            #region
            ServicePointManager.Expect100Continue = true;
            ServicePointManager.SecurityProtocol  = SecurityProtocolType.Tls12;

            ServicePointManager.ServerCertificateValidationCallback = delegate { return(true); };

            WebProxy proxy = new WebProxy(@"http://proxy1.smretailinc.com/SMRI.pac");

            ////(@);
            proxy.Credentials = CredentialCache.DefaultCredentials;

            WebClient client = new WebClient();
            client.Proxy = proxy;

            //var asd = client.DownloadString(@"https://www.etsy.com/search?q=arts%20and%20prints");
            //HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            //doc.LoadHtml(asd);
            #endregion

            HtmlWeb web = new HtmlWeb();

            List <EtsyProduct> productList = new List <EtsyProduct>();

            var doc = await Task.Factory.StartNew(() => web.Load(@"https://www.etsy.com/search?q=arts%20and%20prints"));

            #region ==== XPath ====
            //var productName = doc.DocumentNode.SelectNodes("//*[@id=\"content\"]/div/div//div/div/div//div//div//div/div/ul/li//div/a/div[2]/div/h3/text()");
            //var products = productName.Select(node => node.InnerText).ToList();
            //products = StringCleaner(products);



            var testNodes = doc.DocumentNode.SelectNodes("//*[@class=\"responsive-listing-grid wt-grid wt-grid--block justify-content-flex-start pl-xs-0\"]")
                            .Descendants("a").ToList();

            foreach (var item in testNodes)
            {
                EtsyProduct cursorProduct = new EtsyProduct();
                //    var productname = item.Descendants("h3")
                //        .Where(node => node.GetAttributeValue("class", "")
                //.Equals("text-gray text-truncate mb-xs-0 text-body ")).ToList();

                //Get all the stores in nodes
                //var shopname = item.SelectNodes("//*[@class=\"text-gray-lighter text-body-smaller display-inline-block\"]/text()");
                if (item.SelectSingleNode(".//p[@class=\"text-gray-lighter text-body-smaller display-inline-block\"]/text()").InnerText != null)
                {
                }
                else
                {
                }
                if (item.SelectSingleNode(".//h3").InnerText != null)
                {
                }
                else
                {
                }
                //cursorProduct.StoreName = ;
                //cursorProduct.ProductName = ;

                //item.GetAttributeValue
                productList.Add(cursorProduct);
            }
            var asaaad = productList;
            //.Where(node => node.GetAttributeValue("class", "").Equals("c617a42c11375c7d display-inline-block listing-link  logged")).ToList();



            //var storeName = doc.DocumentNode.SelectNodes("//*[@id=\"content\"]/div/div//div/div/div//div//div//div/div/ul/li//div/a/div//div/div/p[1]/text()");
            //var stores = storeName.Select(node => node.InnerText).ToList();

            //var ratingNodes = doc.DocumentNode.SelectNodes("//*[@id=\"content\"]/div/div//div/div/div//div//div//div/div/ul/li//div/div/a/div//div/div/span/span//input[2]");
            //var ratings = ratingNodes.Select(node => node.InnerText).ToList();
            //var asd = doc.DocumentNode.Descendants("ul").ToList();
            //var testing = doc.DocumentNode.SelectNodes("//*[@class=\"wt-list-unstyled wt-grid__item-xs-6 wt-grid__item-md-4 wt-grid__item-xl-3 wt-order-xs-0 wt-order-sm-0 wt-order-md-0 wt-order-lg-0 wt-order-xl-0 wt-order-tv-0\"]");

            #endregion


            var asd = doc.DocumentNode.Descendants("ul")
                      .Where(node => node.GetAttributeValue("class", "")
                             .Equals("responsive-listing-grid wt-grid wt-grid--block justify-content-flex-start pl-xs-0")).ToList();


            var products = asd[0].Descendants("li").ToList();

            foreach (var item in products)
            {
                EtsyProduct cursorProduct = new EtsyProduct();

                var products1 = products[0].Descendants("h3")
                                .Where(node => node.GetAttributeValue("class", "")
                                       .Equals("text-gray text-truncate mb-xs-0 text-body ")).ToList();

                var details = item.Descendants("div").
                              Where(node => node.GetAttributeValue("cla" +
                                                                   "ss", "").Equals("v2-listing-card__shop")).Select(x => x.InnerHtml).ToList();

                var ShopName = item.Descendants("h3")
                               .Where(node => node.GetAttributeValue("class", "")
                                      .Equals("text-gray text-truncate mb-xs-0 text-body ")).ToList();

                cursorProduct.StoreName = item.Descendants("p").
                                          Where(node => node.GetAttributeValue("class", "").Equals("text-gray-lighter text-body-smaller display-inline-block")).FirstOrDefault().OuterHtml;


                //var czc = getShopName[0].InnerHtml.ToList();

                //var rating = item.Descendants("input").Select(x=> x["input"].V)
                //.Where(x => x["input"].v == "rating").ToList();

                //var ratings = item.Selec

                //var currency = details.Select(x => x.Descendants("p").
                //.Where(x => x["class"].Value == "text-gray-lighter text-body-smaller display-inline-block")).ToList();



                productList.Add(cursorProduct);
            }


            //List<EtsyProduct> listProducts = new List<EtsyProduct>();
            //var asd = products.Zip(stores, (_productName, _storeName) =>
            //    new EtsyProduct()
            //    {
            //        ProductName = _productName,
            //        StoreName = _storeName
            //    });
        }