/// <summary> /// 1.0 加载所有的类别list集合 /// </summary> private List <Category> LoadCategories() { //0:加载国美的数据的所有分类 string url = "http://list.gome.com.cn/"; //1:找到所有的类别对应的URL string docHtml = HttpTools.Get(url); GM_Crawler center = new GM_Crawler(); List <Category> tempCateList = center.GetAllCategoryURl(docHtml); //2:获取各个类别的父级菜单 List <Category> cateList = new List <Category>(); //3:用于排除同一个二级菜单多次请求的问题 List <string> twoCategory = new List <string>(); foreach (var item in tempCateList) { if (!twoCategory.Contains(item.ParentCode)) { twoCategory.Add(item.ParentCode); var strDoc = HttpTools.Get(item.Url); cateList.AddRange(center.GetAllCategoryList(strDoc)); } } Console.WriteLine("{0}国美所有类别已经都加载完了{1}", prefix, prefix); return(cateList); }
/// <summary> /// 加载所有分类 /// </summary> /// <param name="categoryService"></param> private static void LoadCategory(ICategoryService categoryService) { //2:初始化爬虫 GM_Crawler crawler = new GM_Crawler(); string allCateURL = ConfigurationManager.AppSettings["GM_AllCategory"]; string strCategory = HttpTools.Get(allCateURL); //2.1 加载当前页面所有的分类 List <Category> tempCateList = crawler.GetAllCategoryURl(strCategory); //2.2 加载所有分类明细详情 List <Category> allCagoryList = new List <Category>(); //2.3 记录统一类别的情况不在发起请求 List <string> typeList = new List <string>(); tempCateList.ForEach(u => { if (!typeList.Contains(u.ParentCode)) { Console.WriteLine($"类别:{u.Name},Url={u.Url}"); string strHtml = HttpTools.Get(u.Url); allCagoryList.AddRange(crawler.GetAllCategoryList(strHtml)); typeList.Add(u.ParentCode); } }); Console.WriteLine($"所有的类别数据都已经下载完成"); //2.3 将类别数据插入到数据库中 categoryService.AddCategory(allCagoryList); Console.WriteLine($"Category数据插入完成"); }
public T GetSource() { HttpWebResponse response = HttpTools.Get(this.url); string json = Encoding.UTF8.GetString(response.ReadBody()); T result = JsonConvert.DeserializeObject <T>(json); return(result); }
public List <GM_Commodity> GetCategoryDetails(string url, int page = 1) { HtmlDocument doc = new HtmlDocument(); string pageUrl = url + "?page=" + page; //1:加载当前URL Console.WriteLine($"page={page},url={pageUrl}"); string strDoc = HttpTools.Get(pageUrl); doc.LoadHtml(strDoc); //2:检查当前页面商品的数量,如果数量大于48,那么就记载下一页 List <GM_Commodity> commodityList = new List <GM_Commodity>(); commodityList.AddRange(GetCommodityList(strDoc)); if (commodityList.Count == 48) { commodityList.AddRange(GetCategoryDetails(url, page + 1)); } //3:检查当前页面是否有最后一页,如果有最后一页 return(commodityList); }
public void GetCommodityPrice(GM_Commodity commodity) { if (string.IsNullOrEmpty(commodity.SkuId) || string.IsNullOrEmpty(commodity.ProductId)) { commodity.Price = 0; } var priceURL = $"http://ss.gome.com.cn/search/v1/price/single/{commodity.ProductId}/{commodity.SkuId}/23060000/flag/item/fn0?callback=fn0&_=1500802980750"; //请求URL Console.WriteLine($"价格设置:商品:{commodity.Title}"); string callback = HttpTools.Get(priceURL, "text/json"); Regex regex = new Regex("\\d+\\.\\d+", RegexOptions.Singleline); if (regex.IsMatch(callback)) { string strPrice = regex.Match(callback).Value.ToString(); decimal price = 0; if (decimal.TryParse(strPrice, out price)) { commodity.Price = price; } } }
public List <GM_Commodity> GetCommodityList(string strDoc) { HtmlDocument doc = new HtmlDocument(); //通过URL加载 string products = "//div[@class='product-box']/ul/li[@class='product-item']"; doc.LoadHtml(strDoc); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(products); List <GM_Commodity> commodityList = new List <GM_Commodity>(); foreach (var item in nodes) { HtmlNode subNode = item.SelectSingleNode("div[@class='item-tab-warp']"); GM_Commodity commodity = new GM_Commodity(); var urlNode = subNode.SelectSingleNode("p[1]/a"); if (urlNode != null) { commodity.Url = urlNode.Attributes["href"].Value; } var imgNode = subNode.SelectSingleNode("p[1]/a/img"); if (imgNode != null) { if (imgNode.Attributes["gome-src"] != null) { commodity.ImageUrl = imgNode.Attributes["gome-src"].Value; } else { commodity.ImageUrl = imgNode.Attributes["src"].Value; } } var textNode = subNode.SelectSingleNode("p[2]/a"); if (textNode != null) { commodity.Title = textNode.InnerText; } //获取价格 if (!string.IsNullOrEmpty(commodity.Url)) { string[] strTemp = commodity.Url.Split('/'); var pId = strTemp[strTemp.Length - 1].Split('-')[0]; var skuid = strTemp[strTemp.Length - 1].Split('-')[1].Split('.')[0]; var priceURL = $"http://ss.gome.com.cn/search/v1/price/single/{pId}/{skuid}/23060000/flag/item/fn0?callback=fn0&_=1500802980750"; //请求URL string callback = HttpTools.Get(priceURL, "text/json"); Regex regex = new Regex("\\d+\\.\\d+", RegexOptions.Singleline); if (regex.IsMatch(callback)) { string strPrice = regex.Match(callback).Value.ToString(); decimal price = 0; if (decimal.TryParse(strPrice, out price)) { commodity.Price = price; } } } commodityList.Add(commodity); } return(commodityList); }