示例#1
0
        /// <summary>
        /// 头条频道新闻抓取处理
        /// </summary>
        public static void AuthorUrlGathering()
        {
            try
            {
                if (Global.IsEnableGatherChannel != "1")
                {
                    return;
                }
                ////时段控制 0-8点不抓取
                //if (DateTime.Now.Hour < 8)
                //{
                //    return;
                //}

                while (true)
                {
                    Log.Info("频道新闻抓取开始:" + DateTime.Now);
                    var siteList = XmlDeal.GetSitesInfo();

                    if (siteList != null && siteList.Count > 0)
                    {
                        foreach (var site in siteList)
                        {
                            if (site.SiteName.ToLower() == "toutiao")
                            {
                                var bll = new ToutiaoGather();
                                bll.GatheringAuthorUrlFromChannel(site.Url, site.NewsType, 0);
                            }

                            Thread.Sleep(60 * 1000);
                        }
                    }
                    else
                    {
                        Log.Error("抓取错误-检查site.xml" + DateTime.Now);
                    }
                    Log.Info("频道新闻抓取结束:" + DateTime.Now);
                    Thread.Sleep(60 * 1000);
                }
            }
            catch (Exception ex)
            {
                Log.Error(ex.Message + ex.StackTrace);
            }
        }
示例#2
0
        public void GetSitesInfoTest()
        {
            var list = XmlDeal.GetSitesInfo();

            Assert.IsTrue(list.Count > 0);
        }
示例#3
0
        /// <summary>
        /// 头条频道新闻抓取处理
        /// </summary>
        public static void GatheringAuthorUrlFromChannel()
        {
            try
            {
                if (Global.IsEnableGatherChannel != "1")
                {
                    return;
                }
                ////时段控制 0-8点不抓取
                //if (DateTime.Now.Hour < 8)
                //{
                //    return;
                //}
                int i = 0;
                while (true && ProxyDeal.IsProxyReady)
                {
                    i++;
                    Log.Info("频道新闻抓取开始 i=" + i + " time=" + DateTime.Now);
                    var siteList = XmlDeal.GetSitesInfo();

                    if (siteList != null && siteList.Count > 0)
                    {
                        //foreach (var site in siteList)
                        //{
                        //    if (site.SiteName.ToLower() == "toutiao")
                        //    {
                        //        var bll = new ToutiaoGather();
                        //        bll.AuthorUrlGathering(site.Url, site.NewsType);
                        //    }
                        //    Thread.Sleep(60 * 1000);
                        //}

                        #region === 改成随机,不固定顺序,避免多开时从同一个顺序启动抓取 ===

                        Random rnd    = new Random();
                        var    iStart = rnd.Next(0, siteList.Count);
                        //增加从下面索引开始的机率
                        if (iStart % 3 == 0)
                        {
                            iStart = 0;
                        }
                        if (iStart % 4 == 0)
                        {
                            iStart = 1;
                        }
                        if (iStart % 5 == 0)
                        {
                            iStart = 5;
                        }
                        if (iStart % 6 == 0)
                        {
                            iStart = 9;
                        }
                        for (var start = iStart; start < siteList.Count; start++)
                        {
                            if (start > siteList.Count || start < 0)
                            {
                                start = 0;
                            }
                            if (siteList[start].SiteName.ToLower() == "toutiao")
                            {
                                var bll = new ToutiaoGather();
                                bll.GatheringAuthorUrlFromChannel(siteList[start].Url, siteList[start].NewsType, 0);
                            }
                            Thread.Sleep(5 * 1000);
                        }
                        #endregion
                    }
                    else
                    {
                        Log.Error("抓取错误-检查site.xml" + DateTime.Now);
                    }
                    Log.Info("频道新闻抓取结束 i=" + i + " time=" + DateTime.Now);
                    Thread.Sleep(60 * 1000);
                }
                if (!ProxyDeal.IsProxyReady)
                {
                    Log.Info("代理未准备好" + DateTime.Now);
                    Thread.Sleep(60 * 1000);
                }
            }
            catch (Exception ex)
            {
                Log.Error(ex.Message + ex.StackTrace);
            }
        }