Example #1
0
        static void CostcoFetcher()
        {
            var categoryName = "";
            var title = "";
            var rowKey = "";
            var img = "";
            var des = "";
            var online_price = "";
            var less_price = "";
            var your_price = "";
            var startDate = "";
            var endDate = "";
            var review = "";

            // Retrieve the storage account from the connection string.
            CloudStorageAccount storageAccount = CloudStorageAccount.Parse(
                ConfigurationManager.ConnectionStrings["AzureWebJobsStorage"].ConnectionString);

            // Create the table client.
            CloudTableClient tableClient = storageAccount.CreateCloudTableClient();

            // Create the CloudTable object that represents the "people" table.
            CloudTable table = tableClient.GetTableReference("Costco");

            HtmlWeb htmlWeb = new HtmlWeb();
            HtmlDocument htmlDocument = htmlWeb.Load("http://www.costco.com/vitamins-herbals-dietary-supplements.html");
            var category = htmlDocument.DocumentNode.SelectNodes("//div[@class = 'category-tile']");
            foreach (var product_col in category)
            {
                var col_link = product_col.SelectSingleNode(".//a[@href]").Attributes["href"].Value;
                categoryName = product_col.SelectSingleNode(".//a[@href]").Attributes["title"].Value.Replace("&", "&");

                HtmlDocument htmlDocument1 = htmlWeb.Load(col_link);
                var products = htmlDocument1.DocumentNode.SelectNodes("//*[contains(@class,'product-tile-image-container')]");

                foreach (var product in products)
                {
                    rowKey = Guid.NewGuid().ToString();
                    var productInfo = product.SelectSingleNode(".//a[@href]");
                    var link = productInfo.Attributes["href"].Value;
                    var itemNumber = link.Split('.')[link.Split('.').Count() - 2];

                    foreach (var x in productInfo.SelectNodes(".//*[contains(@class,'short-desc')]"))
                    {
                        title = x.InnerText.Trim();
                    }

                    foreach (var x in productInfo.SelectNodes(".//img"))
                    {
                        img = x.Attributes["src"].Value;
                        des = x.Attributes["alt"].Value;
                    }

                    try
                    {
                        HtmlDocument htmlDocument2 = htmlWeb.Load(link);
                        var productDetail = htmlDocument2.DocumentNode.SelectNodes("//*[contains(@class,'product-price')]");
                        var productValid = htmlDocument2.DocumentNode.SelectNodes("//*[contains(@class,'col2')]");
                        foreach (var x in productValid)
                        {
                            var y = x.SelectSingleNode(".//*[contains(@class,'merchandisingText')]");
                            if (y.InnerText.Count() == 0)
                            {
                                startDate = "";
                                endDate = "";
                            }
                            else
                            {
                                var validDate = y.InnerText.Split(' ');
                                startDate = validDate[5];
                                endDate = validDate[7].Replace(".", string.Empty);
                            }
                        }
                        foreach (var price in productDetail)
                        {
                            var online_price_node = price.SelectSingleNode(".//*[contains(@class,'online-price')]");
                            online_price = online_price_node.SelectSingleNode(".//*[contains(@class,'currency')]").InnerText;
                            var less_price_node = price.SelectSingleNode(".//*[contains(@class,'less-price')]");
                            less_price = less_price_node.SelectSingleNode(".//*[contains(@class,'currency')]").InnerText;
                            var your_price_node = price.SelectSingleNode(".//*[contains(@class,'your-price')]");
                            your_price = your_price_node.SelectSingleNode(".//*[contains(@class,'currency')]").InnerText;
                        }
                    }
                    catch (Exception e)
                    {
                        continue;
                    }

                    var productReview = product.SelectSingleNode(".//*[contains(@class,'product-rating')]");
                    if (productReview == null)
                    {
                        review = "";
                    }
                    else
                    {
                        review = productReview.InnerText.Substring(0, 3) + "星(共5星)";
                    }

                    Console.WriteLine(title);
                    Console.WriteLine(itemNumber);
                    Console.WriteLine(link);
                    Console.WriteLine(img);
                    Console.WriteLine(des);
                    Console.WriteLine(online_price);
                    Console.WriteLine(less_price);
                    Console.WriteLine(your_price);
                    Console.WriteLine(startDate);
                    Console.WriteLine(endDate);
                    Console.WriteLine(review);
                    Console.WriteLine(rowKey);
                    //Console.ReadLine();

                    // Create a new customer entity.
                    Costco product1 = new Costco("好市多", rowKey);
                    product1.产品分类 = categoryName;
                    product1.产品名称 = title;
                    product1.产品编号 = itemNumber;
                    product1.产品链接 = link;
                    product1.产品图片 = img;
                    product1.产品描述 = des;
                    product1.原价 = online_price;
                    product1.减价 = less_price;
                    product1.折扣价 = your_price;
                    product1.开始日期 = startDate;
                    product1.结束日期 = endDate;
                    product1.产品评价 = review;

                    // Create the TableOperation that inserts the customer entity.
                    TableOperation insertOperation = TableOperation.Insert(product1);

                    // Execute the insert operation.
                    table.Execute(insertOperation);
                }
            }
        }
Example #2
0
        public static void CostcoFetcher(string link)
        {
            var categoryName = "";
            var title = "";
            var rowKey = "";
            var img = "";
            var des = "";
            var des_cn = "";
            var online_price = "";
            var less_price = "";
            var your_price = "";
            var startDate = "";
            var endDate = "";
            var review = "";
            var costcoLink = link;

            // Retrieve the storage account from the connection string.
            //CloudStorageAccount storageAccount = CloudStorageAccount.Parse(
            //    ConfigurationManager.ConnectionStrings["AzureWebJobsStorage"].ConnectionString);

            // Create the table client.
            //CloudTableClient tableClient = storageAccount.CreateCloudTableClient();

            // Create the CloudTable object that represents the "people" table.
            //CloudTable table = tableClient.GetTableReference("Coupons");

            HtmlWeb htmlWeb = new HtmlWeb();
            HtmlDocument htmlDocument = htmlWeb.Load(costcoLink);
            var category = htmlDocument.DocumentNode.SelectNodes("//div[@class = 'category-tile']");
            foreach (var product_col in category)
            {
                var col_link = product_col.SelectSingleNode(".//a[@href]").Attributes["href"].Value;
                categoryName = product_col.SelectSingleNode(".//a[@href]").Attributes["title"].Value.Replace("&", "&").Replace("'", "'");

                HtmlDocument htmlDocument1 = htmlWeb.Load(col_link);
                var products = htmlDocument1.DocumentNode.SelectNodes("//*[contains(@class,'product-tile-image-container')]");

                foreach (var product in products)
                {
                    rowKey = Guid.NewGuid().ToString();
                    var productInfo = product.SelectSingleNode(".//a[@href]");
                    var productLink = productInfo.Attributes["href"].Value;
                    var itemNumber = productLink.Split('.')[productLink.Split('.').Count() - 2];

                    foreach (var x in productInfo.SelectNodes(".//*[contains(@class,'short-desc')]"))
                    {
                        title = x.InnerText.Trim();
                    }

                    foreach (var x in productInfo.SelectNodes(".//img"))
                    {
                        img = x.Attributes["src"].Value;
                        des = x.Attributes["alt"].Value;
                        try
                        {
                            des_cn = TranslateText(des);
                        }
                        catch (Exception e)
                        {
                            Console.WriteLine(productLink);
                            Console.WriteLine(e.Message);
                            continue;
                        }
                    }

                    try
                    {
                        HtmlDocument htmlDocument2 = htmlWeb.Load(productLink);
                        var productDetail = htmlDocument2.DocumentNode.SelectNodes("//*[contains(@class,'product-price')]");
                        var productValid = htmlDocument2.DocumentNode.SelectNodes("//*[contains(@class,'col2')]");
                        foreach (var x in productValid)
                        {
                            var y = x.SelectSingleNode(".//*[contains(@class,'merchandisingText')]");
                            if (y.InnerText.Count() == 0)
                            {
                                startDate = "";
                                endDate = "";
                            }
                            else
                            {
                                var validDate = y.InnerText.Split(' ');
                                startDate = validDate[5];
                                endDate = validDate[7].Replace(".", string.Empty);
                            }
                        }
                        foreach (var price in productDetail)
                        {
                            var online_price_node = price.SelectSingleNode(".//*[contains(@class,'online-price')]");
                            online_price = online_price_node.SelectSingleNode(".//*[contains(@class,'currency')]").InnerText;
                            var less_price_node = price.SelectSingleNode(".//*[contains(@class,'less-price')]");
                            less_price = less_price_node.SelectSingleNode(".//*[contains(@class,'currency')]").InnerText;
                            var your_price_node = price.SelectSingleNode(".//*[contains(@class,'your-price')]");
                            your_price = your_price_node.SelectSingleNode(".//*[contains(@class,'currency')]").InnerText;
                        }
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(productLink);
                        Console.WriteLine(e.Message);
                        continue;
                    }

                    var productReview = product.SelectSingleNode(".//*[contains(@class,'product-rating')]");
                    if (productReview == null)
                    {
                        review = "";
                    }
                    else
                    {
                        review = productReview.InnerText.Substring(0, 3) + "星(共5星)";
                    }

                    /* Console.WriteLine(title);
                    Console.WriteLine(itemNumber);
                    Console.WriteLine(productLink);
                    Console.WriteLine(img);
                    Console.WriteLine(des);
                    Console.WriteLine(online_price);
                    Console.WriteLine(less_price);
                    Console.WriteLine(your_price);
                    Console.WriteLine(startDate);
                    Console.WriteLine(endDate);
                    Console.WriteLine(review);
                    Console.WriteLine(rowKey); */
                    //Console.ReadLine();

                    // Create a new customer entity.
                    Costco product1 = new Costco("Costco", rowKey);
                    product1.ProductCategory = categoryName;
                    product1.ProductName = title;
                    product1.ItemNumber = itemNumber;
                    product1.ProductLink = productLink;
                    product1.ProductPicture = img;
                    product1.ProductDescription = des;
                    product1.ProductDescription_cn = des_cn;
                    product1.OriginalPrice = online_price;
                    product1.LessPrice = less_price;
                    product1.OfferPrice = your_price;
                    product1.StartDate = startDate;
                    product1.EndDate = endDate;
                    product1.ProductReview = review;

                    // Create the TableOperation that inserts the customer entity.
                    TableOperation insertOperation = TableOperation.Insert(product1);

                    // Execute the insert operation.
                    try
                    {
                        table.Execute(insertOperation);
                        couponsArchive.Execute(insertOperation);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(productLink);
                        Console.WriteLine(e.Message);
                        continue;
                    }
                }
            }
        }