/// <summary> /// 头条频道新闻抓取处理 /// </summary> public static void AuthorUrlGathering() { try { if (Global.IsEnableGatherChannel != "1") { return; } ////时段控制 0-8点不抓取 //if (DateTime.Now.Hour < 8) //{ // return; //} while (true) { Log.Info("频道新闻抓取开始:" + DateTime.Now); var siteList = XmlDeal.GetSitesInfo(); if (siteList != null && siteList.Count > 0) { foreach (var site in siteList) { if (site.SiteName.ToLower() == "toutiao") { var bll = new ToutiaoGather(); bll.GatheringAuthorUrlFromChannel(site.Url, site.NewsType, 0); } Thread.Sleep(60 * 1000); } } else { Log.Error("抓取错误-检查site.xml" + DateTime.Now); } Log.Info("频道新闻抓取结束:" + DateTime.Now); Thread.Sleep(60 * 1000); } } catch (Exception ex) { Log.Error(ex.Message + ex.StackTrace); } }
public void GetSitesInfoTest() { var list = XmlDeal.GetSitesInfo(); Assert.IsTrue(list.Count > 0); }
/// <summary> /// 头条频道新闻抓取处理 /// </summary> public static void GatheringAuthorUrlFromChannel() { try { if (Global.IsEnableGatherChannel != "1") { return; } ////时段控制 0-8点不抓取 //if (DateTime.Now.Hour < 8) //{ // return; //} int i = 0; while (true && ProxyDeal.IsProxyReady) { i++; Log.Info("频道新闻抓取开始 i=" + i + " time=" + DateTime.Now); var siteList = XmlDeal.GetSitesInfo(); if (siteList != null && siteList.Count > 0) { //foreach (var site in siteList) //{ // if (site.SiteName.ToLower() == "toutiao") // { // var bll = new ToutiaoGather(); // bll.AuthorUrlGathering(site.Url, site.NewsType); // } // Thread.Sleep(60 * 1000); //} #region === 改成随机,不固定顺序,避免多开时从同一个顺序启动抓取 === Random rnd = new Random(); var iStart = rnd.Next(0, siteList.Count); //增加从下面索引开始的机率 if (iStart % 3 == 0) { iStart = 0; } if (iStart % 4 == 0) { iStart = 1; } if (iStart % 5 == 0) { iStart = 5; } if (iStart % 6 == 0) { iStart = 9; } for (var start = iStart; start < siteList.Count; start++) { if (start > siteList.Count || start < 0) { start = 0; } if (siteList[start].SiteName.ToLower() == "toutiao") { var bll = new ToutiaoGather(); bll.GatheringAuthorUrlFromChannel(siteList[start].Url, siteList[start].NewsType, 0); } Thread.Sleep(5 * 1000); } #endregion } else { Log.Error("抓取错误-检查site.xml" + DateTime.Now); } Log.Info("频道新闻抓取结束 i=" + i + " time=" + DateTime.Now); Thread.Sleep(60 * 1000); } if (!ProxyDeal.IsProxyReady) { Log.Info("代理未准备好" + DateTime.Now); Thread.Sleep(60 * 1000); } } catch (Exception ex) { Log.Error(ex.Message + ex.StackTrace); } }