static void StartCrawlGift() { var giftCrawler = new GiftCrawler(); //giftCrawler.CrawlingRoom += new EventHandler<CrawlingRoomEventArgs>(GiftCrawler_CrawlingRoom); GiftCrawlResult.Initialize(Thread.CurrentThread.Name, DateTime.Now); while (true) { var watch = Stopwatch.StartNew(); giftCrawler.CrawlOnePage(); LogService.InfoFormat("[时间] 爬取一个网页, 总耗时\t{0}", watch.ElapsedMilliseconds); LogService.Info(""); } }
void CrawlOnePage() { _watch.Start(); // 新建web client var webClient = CreateWebClient(); Debug("创建了Webclient"); // 获取要爬取的url var roomNumber = DouyuService.NextRoom(); var url = DouyuService.GetRoomApiUrl(roomNumber); Debug("获取了房间页面: {0}", url); // 获取页面内容 var page = ""; while (true) { // 申请代理 while (Proxy == null) { Proxy = ProxyPool.GetProxy(); if (Proxy == null) { MyThread.Wait(1000); continue; } Debug("找到了代理 {0}", Proxy.Address); // 使用代理 webClient.Proxy = Proxy; Debug("设置了代理"); break; } // 获取网页 try { // Action<int> crawlingRoom = new Action<int>(GiftCrawler.OnCrawlingRoom); //IAsyncResult result= crawlingRoom.BeginInvoke(roomNumber, null, null); // crawlingRoom.EndInvoke(result); GiftCrawler.OnCrawlingRoom(roomNumber); Debug("on crawing room"); page = Encoding.UTF8.GetString(webClient.DownloadData(url)); Debug("dowanload data"); if (page.Contains("error") == false) { webClient = CreateWebClient(); ProxyPool.RemoveProxy(Proxy); Proxy = null; Debug("页面都是乱码, 重新创建Webclient, 移除代理"); continue; } //Action<int> crawledRoom = GiftCrawler.OnCrawledRoom; //crawledRoom.BeginInvoke(roomNumber, null, null); GiftCrawler.OnCrawledRoom(roomNumber); Debug("on crawled room"); } catch (WebException webEx) { // 代理无效了? Debug("爬取礼物页面发生WebException, 异常信息 = {0}, url = {1}, proxy = {2}", webEx.Message, url, Proxy.Address); ProxyPool.RemoveProxy(Proxy); Debug("移除了无效的代理 - {0}", Proxy.Address); Proxy = null; continue; } catch (Exception ex) { Debug("爬取礼物页面发生Exception, 异常信息 = {0}, url = {1}, proxy = {2}", ex.Message, url, Proxy.Address); } // 解析礼物 try { // 解析礼物页面 dynamic roomInfo = JsonConvert.DeserializeObject <dynamic>(page); Debug("deserialize object"); // 没有礼物 if (roomInfo["error"].Value != 0) { Debug("没有爬到礼物, 错误信息 = {0}, url = {1}, proxy = {2}", roomInfo["data"].Value, url, Proxy.Address); return; } // 爬到礼物 foreach (dynamic item in roomInfo["data"]["gift"]) { Gift gift = new Gift( int.Parse(item["id"].Value), item["name"].Value, item["type"].Value, item["pc"].Value, item["gx"].Value, item["desc"].Value, item["intro"].Value, item["mimg"].Value, item["himg"].Value ); //Action<Gift> crawlingGift = GiftCrawler.OnCrawlingGift; //crawlingGift.BeginInvoke(gift, null, null); GiftCrawler.OnCrawledGift(gift); Debug("on crawled gift: {0}", gift.Name); GiftService.SaveGift(gift); Debug("save gift: {0}", gift.Name); } MyThread.Wait(3000); Debug("delayed 3000ms..."); return; } catch (Exception ex) { Debug("解析礼物页面出现Exception, 异常信息 = {0}, url = {1}, proxy = {2}", ex.Message, url, Proxy.Address); continue; } } }