public override void GetNewsDetail(ChannelConfig chlCfg) { var url = string.Format(site.IndexUrl + "&limit=500", chlCfg.ChannelVal); var cate = cateList.Find(p => p.CategoryName == "财经"); try { HttpResponse<JSON> response = new CommonService.HttpResponse<JSON>(); var json = response.GetFuncGetResponse(url, Serilize_Type.Json); //yw?encode=ywjh&limit=500 var uri = url.Split('?')[0]; foreach (var item in json.news) { //var newsParam = SqlParamHelper.GetDefaultParam(1, 10, "NewsId", true); //newsParam.where.where.Add(SqlParamHelper.CreateWhere( //PARAM_TYPE.EQUATE, LINK_TYPE.AND, "SourceUrl", item.url_m)); var newsItem = newsItemAccess.Find( p => p.SourceUrl == item.url_m ); if (newsItem == null) { newsItem = new NewsItem() { NewsId = -1, SourceUrl = item.url_m, SourceSite = site.SiteId, FromSite = "东方财富网", Author = "" }; } else { continue; } newsItem.CategoryId = cate.CategoryId; newsItem.Title = item.title; newsItem.CreateTime = item.showtime; newsItem.ImgUrl = item.image ?? ""; newsItem.ChannelName = chlCfg.ChannelName; //采集内容 HtmlDocument doc = new HtmlDocument(); HtmlNode.ElementsFlags.Remove("option"); url = uri.Replace("yw", "content?newsid=" + item.newsid.ToString()); for (int i = 0; i < 5; i++) { try { HttpResponse<ContentJson> contentResponse = new CommonService.HttpResponse<ContentJson>(); var contentJson = contentResponse.GetFuncGetResponse(url, Serilize_Type.Json); doc.LoadHtml(contentJson.body); break; } catch (Exception ex) { Logger.WriteException(string.Format("请求详情页失败,次数:{0} , url:{1}", i, newsItem.SourceUrl), ex); } } try { var div = doc.DocumentNode; if (string.IsNullOrEmpty(div.InnerText)) { continue; } newsItem.NewsText = div.InnerText.Replace("<!-- EM_StockImg_Start --><!--IMG#0--><!-- EM_StockImg_End -->", ""); RemoveUnsafe(div); newsItem.NewsContent = div.InnerHtml; //保存新闻列表 newsItemAccess.Add(newsItem); newsItemAccess.Save(); SaveSegMents(newsItem); } catch (Exception ex) { Logger.WriteException("保存内容异常", ex); } } } catch (Exception ex) { Logger.WriteException(string.Format("分类下抓取新闻出现异常:{0}", chlCfg.ChannelName), ex); } }
public override void GetNewsDetail(ChannelConfig chlCfg) { var url = string.Format(site.IndexUrl, chlCfg.ChannelVal); var cate = cateList.Find(p => p.CategoryName == "财经"); try { while (!string.IsNullOrEmpty(url)) { HttpResponse<xmlColumn> response = new CommonService.HttpResponse<xmlColumn>(); var tonghuaSunXmlColumn = response.GetFuncGetResponse(url, Serilize_Type.Xml); DateTime dt = System.DateTime.Now; url = tonghuaSunXmlColumn.nextPage ?? ""; foreach (var item in tonghuaSunXmlColumn.pageItems.item) { //var newsParam = SqlParamHelper.GetDefaultParam(1, 10, "NewsId", true); //newsParam.where.where.Add(SqlParamHelper.CreateWhere( //PARAM_TYPE.EQUATE, LINK_TYPE.AND, "SourceUrl", item.url)); var newsItem = newsItemAccess.Find(p => p.SourceUrl == item.url); if (newsItem == null) { newsItem = new NewsItem() { NewsId = -1, SourceUrl = item.url, SourceSite = site.SiteId, Author = "" }; } else { continue; } newsItem.CategoryId = cate.CategoryId; newsItem.Title = item.title; DateTime.TryParse(item.ctime, out dt); newsItem.CreateTime = dt; newsItem.FromSite = item.source; newsItem.ImgUrl = item.imgurl ?? ""; newsItem.ChannelName = tonghuaSunXmlColumn.columnName; //采集内容 HtmlDocument doc = new HtmlDocument(); HtmlNode.ElementsFlags.Remove("option"); for (int i = 0; i < 5; i++) { try { doc.LoadHtml(HttpUtility.Get(newsItem.SourceUrl, Encoding.UTF8)); break; } catch (Exception ex) { Logger.WriteException(string.Format("请求详情页失败,次数:{0} , url:{1}", i, newsItem.SourceUrl), ex); } } try { var div = doc.DocumentNode.SelectSingleNode("//div[@id='content']"); if (string.IsNullOrEmpty(div.InnerText)) { continue; } newsItem.NewsText = div.InnerText; RemoveUnsafe(div); newsItem.NewsContent = div.InnerHtml; //保存新闻列表 newsItemAccess.Add(newsItem); newsItemAccess.Save(); SaveSegMents(newsItem); } catch (Exception ex) { Logger.WriteException("保存新闻异常", ex); } } } } catch (Exception ex) { Logger.WriteException(string.Format("分类下抓取新闻出现异常:{0}", chlCfg.ChannelName), ex); } }