Пример #1
0
        /// <summary>
        /// 获取指定日期的新闻
        /// </summary>
        public void GetNewsListCJYW(string dateTime)
        {
            if (StringChecker.IsDateTime(dateTime))
            {
                var formatDate     = string.Format("{0:yyyyMMdd}", Convert.ToDateTime(dateTime));
                var mongo          = DataStorage.GetInstance(DBType.MongoDB);
                var webSrc         = DataSourceTHS.CreateInstance();
                var newNewsList    = new Dictionary <string, object>();
                var listHtml       = webSrc.GetNewsListCJYW(Convert.ToDateTime(dateTime));
                var dbName         = CONST.DB.DBName_StockService;
                var collectionNews = CONST.DB.CollectionName_News;

                if (!string.IsNullOrWhiteSpace(listHtml))
                {
                    var resDict = NodeService.Get(CONST.NodeServiceUrl, "同花顺", "GetDataFromHtml", new { ContentType = "THS财经要闻新闻列表", Page = listHtml }) as Dictionary <string, object>;
                    var resList = resDict["PageData"] as ArrayList;
                    if (null != resList)
                    {
                        foreach (Dictionary <string, object> listItem in resList)
                        {
                            var href      = listItem["Href"].ToString().Trim();
                            var parentUrl = string.Format("http://news.10jqka.com.cn/today_list/{0}/", formatDate).Trim();
                            var newsHtml  = webSrc.GetNewsArticle(href, parentUrl);
                            if (!string.IsNullOrWhiteSpace(newsHtml))
                            {
                                TaskStatusManager.Set("GetNewsListCJYW", new { ID = "GetNewsListCJYW", CreateTime = DateTime.Now, Status = "准备操作THS财经要闻新闻详细", Html = newsHtml });
                                var resItem   = NodeService.Get(CONST.NodeServiceUrl, "同花顺", "GetDataFromHtml", new { ContentType = "THS财经要闻新闻详细", Page = newsHtml }) as Dictionary <string, object>;
                                var resDetail = resItem["PageData"] as Dictionary <string, object>;
                                if (null != resDetail)
                                {
                                    var newsCreateTimeString = resDetail["CreateTime"].ToString().Trim();
                                    var svItem = new
                                    {
                                        ContentType    = "THS财经要闻",
                                        Title          = resDetail["Title"].ToString().Trim(),
                                        Url            = href,
                                        ParentUrl      = parentUrl,
                                        SourceHref     = resDetail["SourceHref"].ToString().Trim(),
                                        SourceName     = resDetail["SourceName"].ToString().Trim(),
                                        NewsCreateTime = (StringChecker.IsDateTime(newsCreateTimeString))?Convert.ToDateTime(newsCreateTimeString):DateTime.MinValue,
                                        Content        = resDetail["Content"].ToString().Trim(),
                                        Tag            = Convert.ToInt32(formatDate),
                                        CreateTime     = DateTime.Now,
                                        PageMD5        = "无"
                                    };
                                    newNewsList.Add(href, svItem);
                                    LOGGER.Log(string.Format("获取一个新闻正文 {0}", svItem.Title));
                                    ThreadManager.Pause(seconds: 5); ///5秒钟更新一次新闻


                                    ///分词
                                    var task = Task.Factory.StartNew(() =>
                                    {
                                        var fcZStartTime = DateTime.Now;
                                        this.SaveFenCi(svItem.Content, svItem.Url);
                                        LOGGER.Log(string.Format("分词花费时间 开始时间:{0} 花费时间:{1}", fcZStartTime, DateTime.Now - fcZStartTime));
                                    });
                                }
                            }
                        }

                        foreach (var svItem in newNewsList)
                        {
                            var filter = "{\"Url\":\"" + svItem.Key + "\"}";
                            mongo.Save3(dbName, collectionNews, svItem.Value, filter);
                        }
                    }
                }
                else
                {
                    LOGGER.Log(string.Format("获取的新闻列表为空白"));
                }
            }
            else
            {
                LOGGER.Log(string.Format("传入的时间字符串不对:{0}", dateTime));
                ThreadManager.Pause(minutes: 5);
            }
        }