/// <summary> /// 启动redis /// </summary> public void Start(LuceneTools luceneTools) { //开启一个异步线程 Task.Run(() => { while (true) { var commodityList = _rListService.Get(listKey); if (commodityList.Count > 0) { Console.WriteLine($"当前redis 对象的包含的list集合的数量是{commodityList.Count}"); var strList = _rListService.BlockingDequeueItemFromList(listKey, TimeSpan.FromHours(1)); //1.将值转换为List集合 GM_Commodity entity = Common.JsonHelper.ToObject <GM_Commodity>(strList); //2.检查索引中是否有该对象,如果有那么就更新索引 luceneTools.UpdateIndex(entity); ////3:更新成功再次查询数据 //luceneTools.QueryList("Title:asus", new Page() { Sort = "Price" }, "[100,200]"); } else { Thread.Sleep(1000); } } }); }
private List <GM_Commodity> GetCommodityList(string strDoc) { HtmlDocument doc = new HtmlDocument(); //通过URL加载 string products = "//div[@class='product-box']/ul/li[@class='product-item']"; doc.LoadHtml(strDoc); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(products); List <GM_Commodity> commodityList = new List <GM_Commodity>(); if (nodes == null) { return(commodityList); } foreach (var item in nodes) { HtmlNode subNode = item.SelectSingleNode("div[@class='item-tab-warp']"); GM_Commodity commodity = new GM_Commodity(); var urlNode = subNode.SelectSingleNode("p[1]/a"); if (urlNode != null) { commodity.Url = urlNode.Attributes["href"].Value; } var imgNode = subNode.SelectSingleNode("p[1]/a/img"); if (imgNode != null) { if (imgNode.Attributes["gome-src"] != null) { commodity.ImageUrl = imgNode.Attributes["gome-src"].Value; } else { commodity.ImageUrl = imgNode.Attributes["src"].Value; } } var textNode = subNode.SelectSingleNode("p[2]/a"); if (textNode != null) { commodity.Title = textNode.InnerText; } //获取价格 if (!string.IsNullOrEmpty(commodity.Url)) { string strTemp = Path.GetFileName(commodity.Url).Replace(".html", ""); commodity.ProductId = strTemp.Split('-')[0]; commodity.SkuId = strTemp.Split('-')[1]; //var priceURL = $"http://ss.gome.com.cn/search/v1/price/single/{commodity.ProductId}/{commodity.SkuId}/23060000/flag/item/fn0?callback=fn0&_=1500802980750"; } commodityList.Add(commodity); } return(commodityList); }
/// <summary> /// 加载所有产品的价格 /// </summary> /// <param name="list"></param> private static void LoadPriceAndInsertDB(List <GM_Commodity> commodityList, ICategoryService categoryService) { Console.WriteLine($"*****************开始加载该产品的价格信息**********************"); if (commodityList.Count == 0) { return; } GM_Crawler crawler = new GM_Crawler(); //2:创建多个线程 List <Task> taskList = new List <Task>(); TaskFactory taskFactory = new TaskFactory(); ctsPrice = new CancellationTokenSource(); //3:开启一个监控线程 foreach (GM_Commodity category in commodityList) { //searcher.Crawler(); taskList.Add(taskFactory.StartNew((obj) => { if (!ctsPrice.IsCancellationRequested) { GM_Commodity commodity = obj as GM_Commodity; crawler.GetCommodityPrice(commodity); } }, category, ctsPrice.Token)); //如果线程开启的数量超过30个的情况下,那么就给线程等待一下 if (taskList.Count > 30) { taskList = taskList.Where(t => !t.IsCompleted && !t.IsCanceled && !t.IsFaulted).ToList(); //等待任意一个线程结束 Task.WaitAny(taskList.ToArray()); } } //等待所有的线程的任务完结 taskFactory.ContinueWhenAll(taskList.ToArray(), (ts) => { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("设置商品价格的所有线程都已经完结"); //将所有数据保存到数据库中 categoryService.InsertCommodity(commodityList); Console.ForegroundColor = ConsoleColor.White; }); }
public void GetCommodityPrice(GM_Commodity commodity) { if (string.IsNullOrEmpty(commodity.SkuId) || string.IsNullOrEmpty(commodity.ProductId)) { commodity.Price = 0; } var priceURL = $"http://ss.gome.com.cn/search/v1/price/single/{commodity.ProductId}/{commodity.SkuId}/23060000/flag/item/fn0?callback=fn0&_=1500802980750"; //请求URL Console.WriteLine($"价格设置:商品:{commodity.Title}"); string callback = HttpTools.Get(priceURL, "text/json"); Regex regex = new Regex("\\d+\\.\\d+", RegexOptions.Singleline); if (regex.IsMatch(callback)) { string strPrice = regex.Match(callback).Value.ToString(); decimal price = 0; if (decimal.TryParse(strPrice, out price)) { commodity.Price = price; } } }
/// <summary> /// 将对象添加到list集合中 /// </summary> /// <param name="entity"></param> public void Add(GM_Commodity entity) { //一个一个对象放入队列中 _rListService.LPush(listKey, Common.JsonHelper.ToJSON <GM_Commodity>(entity)); }
public void UpdateIndex(GM_Commodity commodity) { _gmLucene.Update(commodity); }
public int Insert(GM_Commodity entity) { return(_dbHelper.Insert <GM_Commodity>(entity, "GM_Commodity_030")); }
static void Main(string[] args) { try { #region 1.0 爬虫 //1:初始化容器和服务 IOCFactory.InitContainer(); ICategoryService categoryService = IOCFactory.Resolve <ICategoryService>(); ICommodityService commodityService = IOCFactory.Resolve <ICommodityService>(); //这个是爬虫的demo //CrawlerDemo(categoryService, commodityService); #endregion #region 2.0 lucene.net Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($"*****************lucene.net demo*****************"); LuceneTools luceneTools = new LuceneTools(commodityService); //luceneTools.InitLucene(); luceneTools.QueryList("Title:asus", new Page() { Sort = "Price" }, "[100,200]"); #endregion #region 3.0 redis 异步队列 //3.1 启动异步队列 Console.ForegroundColor = ConsoleColor.DarkYellow; Console.WriteLine($"*****************启动一个异步队列*****************"); RedisTools.Current.Start(luceneTools); //3.2 添加一个实体 Console.WriteLine($"1.0 添加一个实体到数据"); GM_Commodity commodity = new GM_Commodity() { Title = "华硕(asus),测试数据", ImageUrl = "", Price = 150, ProductId = "11", SkuId = "11", Url = "http://www.baidu.com" }; int id = commodityService.Insert(commodity); Console.WriteLine($"2.0 添加一个实体到数据库,得到的Id={id}"); commodity.Id = id; //将该数据添加到异步队列中 Console.WriteLine($"3.0 更新实体到索引中"); RedisTools.Current.Add(commodity); //停留2秒中等待redis中的数据更新到index 中 Thread.Sleep(2000); Console.WriteLine($"4.0 在查询包含 asus的索引"); luceneTools.QueryList("Title:asus", new Page() { Sort = "Price" }, "[100,200]"); #endregion } catch (Exception ex) { //如果其中一个线程出现异常,那么就终止所有其他线程 cts?.Cancel(); ctsPrice?.Cancel(); //出现异常中断其他运行的线程 Console.WriteLine("页面抓取出现异常:{0}", ex.Message); } Console.ReadLine(); }
public List <GM_Commodity> GetCommodityList(string strDoc) { HtmlDocument doc = new HtmlDocument(); //通过URL加载 string products = "//div[@class='product-box']/ul/li[@class='product-item']"; doc.LoadHtml(strDoc); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(products); List <GM_Commodity> commodityList = new List <GM_Commodity>(); foreach (var item in nodes) { HtmlNode subNode = item.SelectSingleNode("div[@class='item-tab-warp']"); GM_Commodity commodity = new GM_Commodity(); var urlNode = subNode.SelectSingleNode("p[1]/a"); if (urlNode != null) { commodity.Url = urlNode.Attributes["href"].Value; } var imgNode = subNode.SelectSingleNode("p[1]/a/img"); if (imgNode != null) { if (imgNode.Attributes["gome-src"] != null) { commodity.ImageUrl = imgNode.Attributes["gome-src"].Value; } else { commodity.ImageUrl = imgNode.Attributes["src"].Value; } } var textNode = subNode.SelectSingleNode("p[2]/a"); if (textNode != null) { commodity.Title = textNode.InnerText; } //获取价格 if (!string.IsNullOrEmpty(commodity.Url)) { string[] strTemp = commodity.Url.Split('/'); var pId = strTemp[strTemp.Length - 1].Split('-')[0]; var skuid = strTemp[strTemp.Length - 1].Split('-')[1].Split('.')[0]; var priceURL = $"http://ss.gome.com.cn/search/v1/price/single/{pId}/{skuid}/23060000/flag/item/fn0?callback=fn0&_=1500802980750"; //请求URL string callback = HttpTools.Get(priceURL, "text/json"); Regex regex = new Regex("\\d+\\.\\d+", RegexOptions.Singleline); if (regex.IsMatch(callback)) { string strPrice = regex.Match(callback).Value.ToString(); decimal price = 0; if (decimal.TryParse(strPrice, out price)) { commodity.Price = price; } } } commodityList.Add(commodity); } return(commodityList); }