/// <summary> /// 从新闻页抓取作者信息 /// </summary> public static void GatherAuthorFromNews() { try { if (Global.IsEnableGatherAuthorFromNews != "1") { return; } ////时段控制 0-8点不抓取 //if (DateTime.Now.Hour < 8) //{ // return; //} while (true) { Log.Info("从新闻页抓取作者开始:" + DateTime.Now); var bll = new ToutiaoGather(); bll.GatherAuthorFromNews(); Log.Info("从新闻页抓取作者结束:" + DateTime.Now); Thread.Sleep(60 * 1000); } } catch (Exception ex) { Log.Error(ex.Message + ex.StackTrace); } }
/// <summary> /// 根据文章的刷新间隔取得该作者的主页来 抓取该作者文章阅读量等数据 /// </summary> public static void AuthorNewsByRefreshGathering() { try { if (Global.IsEnableRefreshNews != "1") { return; } ////时段控制 0-8点不抓取 //if (DateTime.Now.Hour < 8) //{ // return; //} while (true) { Log.Info("作者列表页刷新开始:" + DateTime.Now); var bll = new ToutiaoGather(); bll.GatheringAuthorNewsByRefresh(); Log.Info("作者列表页刷新结束:" + DateTime.Now); Thread.Sleep(60 * 1000); } } catch (Exception ex) { Log.Error(ex.Message + ex.StackTrace); } }
/// <summary> /// 头条频道新闻抓取处理 /// </summary> public static void AuthorUrlGathering() { try { if (Global.IsEnableGatherChannel != "1") { return; } ////时段控制 0-8点不抓取 //if (DateTime.Now.Hour < 8) //{ // return; //} while (true) { Log.Info("频道新闻抓取开始:" + DateTime.Now); var siteList = XmlDeal.GetSitesInfo(); if (siteList != null && siteList.Count > 0) { foreach (var site in siteList) { if (site.SiteName.ToLower() == "toutiao") { var bll = new ToutiaoGather(); bll.GatheringAuthorUrlFromChannel(site.Url, site.NewsType, 0); } Thread.Sleep(60 * 1000); } } else { Log.Error("抓取错误-检查site.xml" + DateTime.Now); } Log.Info("频道新闻抓取结束:" + DateTime.Now); Thread.Sleep(60 * 1000); } } catch (Exception ex) { Log.Error(ex.Message + ex.StackTrace); } }
/// <summary> /// 从组图列表抓取作者信息 /// </summary> public static void GatherNewsFromZtRecent() { try { if (Global.IsEnableGatherZt != "1") { return; } ////时段控制 0-8点不抓取 //if (DateTime.Now.Hour < 8) //{ // return; //} var i = 0; while (true && ProxyDeal.IsProxyReady) { i++; Log.Info("从组图列表抓相关新闻的作者开始 i=" + i + " time=" + DateTime.Now); var bll = new ToutiaoGather(); var url = "http://www.toutiao.com/api/article/recent/?source=2&count=20&category=%E7%BB%84%E5%9B%BE&max_behot_time=0&utm_source=toutiao&device_platform=web&offset=0&as=A1B508A27D30C8F&cp=582D607C78CFCE1&_=1479347343375"; bll.GatherNewsFromZtRecent(url, 0); Log.Info("从组图列表抓相关新闻的作者结束 i=" + i + " time=" + DateTime.Now); Thread.Sleep(60 * 1000); } if (!ProxyDeal.IsProxyReady) { Log.Info("代理未准备好" + DateTime.Now); Thread.Sleep(60 * 1000); } } catch (Exception ex) { Log.Error(ex.Message + ex.StackTrace); } }
/// <summary> /// 从作者抓取相关新闻作者信息 /// 频率可以小点,因为作者相关新闻更新也慢 /// </summary> public static void GatherRelationFromAuthor() { try { if (Global.IsEnableGatherRelationFromAuthor != "1") { return; } ////时段控制 0-8点不抓取 //if (DateTime.Now.Hour < 8) //{ // return; //} var i = 0; while (true && ProxyDeal.IsProxyReady) { i++; Log.Info("从作者抓相关新闻的作者开始 i=" + i + " time=" + DateTime.Now); var bll = new ToutiaoGather(); bll.GatherRelationNewsFromAuthor(); Log.Info("从作者抓相关新闻的作者结束 i=" + i + " time=" + DateTime.Now); Thread.Sleep(60 * 1000); } if (!ProxyDeal.IsProxyReady) { Log.Info("代理未准备好" + DateTime.Now); Thread.Sleep(60 * 1000); } } catch (Exception ex) { Log.Error(ex.Message + ex.StackTrace); } }
/// <summary> /// 从用户订阅抓取作者信息 暂不用,放在新闻列表处理里一起做 /// 暂不用在这里启动,在新闻处理里一起做了 /// </summary> public static void GatherAuthorFromUserSub() { try { if (Global.IsEnableGatherUserSub != "1") { return; } ////时段控制 0-8点不抓取 //if (DateTime.Now.Hour < 8) //{ // return; //} while (true && ProxyDeal.IsProxyReady) { Log.Info("从用户订阅抓相关新闻的作者开始:" + DateTime.Now); var bll = new ToutiaoGather(); //bll.GatherAuthorFromUserSub(); Log.Info("从用户订阅抓相关新闻的作者结束:" + DateTime.Now); Thread.Sleep(60 * 1000); } if (!ProxyDeal.IsProxyReady) { Log.Info("代理未准备好" + DateTime.Now); Thread.Sleep(60 * 1000); } } catch (Exception ex) { Log.Error(ex.Message + ex.StackTrace); } }
/// <summary> /// 头条频道新闻抓取处理 /// </summary> public static void GatheringAuthorUrlFromChannel() { try { if (Global.IsEnableGatherChannel != "1") { return; } ////时段控制 0-8点不抓取 //if (DateTime.Now.Hour < 8) //{ // return; //} int i = 0; while (true && ProxyDeal.IsProxyReady) { i++; Log.Info("频道新闻抓取开始 i=" + i + " time=" + DateTime.Now); var siteList = XmlDeal.GetSitesInfo(); if (siteList != null && siteList.Count > 0) { //foreach (var site in siteList) //{ // if (site.SiteName.ToLower() == "toutiao") // { // var bll = new ToutiaoGather(); // bll.AuthorUrlGathering(site.Url, site.NewsType); // } // Thread.Sleep(60 * 1000); //} #region === 改成随机,不固定顺序,避免多开时从同一个顺序启动抓取 === Random rnd = new Random(); var iStart = rnd.Next(0, siteList.Count); //增加从下面索引开始的机率 if (iStart % 3 == 0) { iStart = 0; } if (iStart % 4 == 0) { iStart = 1; } if (iStart % 5 == 0) { iStart = 5; } if (iStart % 6 == 0) { iStart = 9; } for (var start = iStart; start < siteList.Count; start++) { if (start > siteList.Count || start < 0) { start = 0; } if (siteList[start].SiteName.ToLower() == "toutiao") { var bll = new ToutiaoGather(); bll.GatheringAuthorUrlFromChannel(siteList[start].Url, siteList[start].NewsType, 0); } Thread.Sleep(5 * 1000); } #endregion } else { Log.Error("抓取错误-检查site.xml" + DateTime.Now); } Log.Info("频道新闻抓取结束 i=" + i + " time=" + DateTime.Now); Thread.Sleep(60 * 1000); } if (!ProxyDeal.IsProxyReady) { Log.Info("代理未准备好" + DateTime.Now); Thread.Sleep(60 * 1000); } } catch (Exception ex) { Log.Error(ex.Message + ex.StackTrace); } }
public void SetUp() { bll = new ToutiaoGather(); }