Example #1
0
        /// <summary>
        /// 1.0 加载所有的类别list集合
        /// </summary>
        private List <Category> LoadCategories()
        {
            //0:加载国美的数据的所有分类
            string url = "http://list.gome.com.cn/";
            //1:找到所有的类别对应的URL
            string          docHtml      = HttpTools.Get(url);
            GM_Crawler      center       = new GM_Crawler();
            List <Category> tempCateList = center.GetAllCategoryURl(docHtml);
            //2:获取各个类别的父级菜单
            List <Category> cateList = new List <Category>();
            //3:用于排除同一个二级菜单多次请求的问题
            List <string> twoCategory = new List <string>();

            foreach (var item in tempCateList)
            {
                if (!twoCategory.Contains(item.ParentCode))
                {
                    twoCategory.Add(item.ParentCode);
                    var strDoc = HttpTools.Get(item.Url);
                    cateList.AddRange(center.GetAllCategoryList(strDoc));
                }
            }
            Console.WriteLine("{0}国美所有类别已经都加载完了{1}", prefix, prefix);
            return(cateList);
        }
Example #2
0
        /// <summary>
        /// 加载所有分类
        /// </summary>
        /// <param name="categoryService"></param>
        private static void LoadCategory(ICategoryService categoryService)
        {
            //2:初始化爬虫
            GM_Crawler crawler = new GM_Crawler();

            string allCateURL  = ConfigurationManager.AppSettings["GM_AllCategory"];
            string strCategory = HttpTools.Get(allCateURL);
            //2.1 加载当前页面所有的分类
            List <Category> tempCateList = crawler.GetAllCategoryURl(strCategory);
            //2.2 加载所有分类明细详情
            List <Category> allCagoryList = new List <Category>();
            //2.3 记录统一类别的情况不在发起请求
            List <string> typeList = new List <string>();

            tempCateList.ForEach(u =>
            {
                if (!typeList.Contains(u.ParentCode))
                {
                    Console.WriteLine($"类别:{u.Name},Url={u.Url}");

                    string strHtml = HttpTools.Get(u.Url);
                    allCagoryList.AddRange(crawler.GetAllCategoryList(strHtml));
                    typeList.Add(u.ParentCode);
                }
            });
            Console.WriteLine($"所有的类别数据都已经下载完成");
            //2.3 将类别数据插入到数据库中
            categoryService.AddCategory(allCagoryList);
            Console.WriteLine($"Category数据插入完成");
        }
        public T GetSource()
        {
            HttpWebResponse response = HttpTools.Get(this.url);
            string          json     = Encoding.UTF8.GetString(response.ReadBody());

            T result = JsonConvert.DeserializeObject <T>(json);

            return(result);
        }
Example #4
0
        public List <GM_Commodity> GetCategoryDetails(string url, int page = 1)
        {
            HtmlDocument doc     = new HtmlDocument();
            string       pageUrl = url + "?page=" + page;

            //1:加载当前URL
            Console.WriteLine($"page={page},url={pageUrl}");
            string strDoc = HttpTools.Get(pageUrl);

            doc.LoadHtml(strDoc);

            //2:检查当前页面商品的数量,如果数量大于48,那么就记载下一页
            List <GM_Commodity> commodityList = new List <GM_Commodity>();

            commodityList.AddRange(GetCommodityList(strDoc));
            if (commodityList.Count == 48)
            {
                commodityList.AddRange(GetCategoryDetails(url, page + 1));
            }
            //3:检查当前页面是否有最后一页,如果有最后一页
            return(commodityList);
        }
Example #5
0
        public void GetCommodityPrice(GM_Commodity commodity)
        {
            if (string.IsNullOrEmpty(commodity.SkuId) || string.IsNullOrEmpty(commodity.ProductId))
            {
                commodity.Price = 0;
            }

            var priceURL = $"http://ss.gome.com.cn/search/v1/price/single/{commodity.ProductId}/{commodity.SkuId}/23060000/flag/item/fn0?callback=fn0&_=1500802980750";

            //请求URL
            Console.WriteLine($"价格设置:商品:{commodity.Title}");
            string callback = HttpTools.Get(priceURL, "text/json");
            Regex  regex    = new Regex("\\d+\\.\\d+", RegexOptions.Singleline);

            if (regex.IsMatch(callback))
            {
                string  strPrice = regex.Match(callback).Value.ToString();
                decimal price    = 0;
                if (decimal.TryParse(strPrice, out price))
                {
                    commodity.Price = price;
                }
            }
        }
Example #6
0
        public List <GM_Commodity> GetCommodityList(string strDoc)
        {
            HtmlDocument doc = new HtmlDocument();
            //通过URL加载
            string products = "//div[@class='product-box']/ul/li[@class='product-item']";

            doc.LoadHtml(strDoc);
            HtmlNodeCollection  nodes         = doc.DocumentNode.SelectNodes(products);
            List <GM_Commodity> commodityList = new List <GM_Commodity>();

            foreach (var item in nodes)
            {
                HtmlNode     subNode   = item.SelectSingleNode("div[@class='item-tab-warp']");
                GM_Commodity commodity = new GM_Commodity();
                var          urlNode   = subNode.SelectSingleNode("p[1]/a");
                if (urlNode != null)
                {
                    commodity.Url = urlNode.Attributes["href"].Value;
                }

                var imgNode = subNode.SelectSingleNode("p[1]/a/img");
                if (imgNode != null)
                {
                    if (imgNode.Attributes["gome-src"] != null)
                    {
                        commodity.ImageUrl = imgNode.Attributes["gome-src"].Value;
                    }
                    else
                    {
                        commodity.ImageUrl = imgNode.Attributes["src"].Value;
                    }
                }
                var textNode = subNode.SelectSingleNode("p[2]/a");
                if (textNode != null)
                {
                    commodity.Title = textNode.InnerText;
                }
                //获取价格
                if (!string.IsNullOrEmpty(commodity.Url))
                {
                    string[] strTemp  = commodity.Url.Split('/');
                    var      pId      = strTemp[strTemp.Length - 1].Split('-')[0];
                    var      skuid    = strTemp[strTemp.Length - 1].Split('-')[1].Split('.')[0];
                    var      priceURL = $"http://ss.gome.com.cn/search/v1/price/single/{pId}/{skuid}/23060000/flag/item/fn0?callback=fn0&_=1500802980750";
                    //请求URL
                    string callback = HttpTools.Get(priceURL, "text/json");
                    Regex  regex    = new Regex("\\d+\\.\\d+", RegexOptions.Singleline);
                    if (regex.IsMatch(callback))
                    {
                        string  strPrice = regex.Match(callback).Value.ToString();
                        decimal price    = 0;
                        if (decimal.TryParse(strPrice, out price))
                        {
                            commodity.Price = price;
                        }
                    }
                }
                commodityList.Add(commodity);
            }
            return(commodityList);
        }