public ResponseItem Post(yy_Spider_News value) { try { DB.yy_Spider_News.Add(value); DB.SaveChanges(); return(new ResponseItem(0, "添加蜘蛛新闻成功。")); } catch (Exception ex) { return(new ResponseItem(2, ex.Message)); } }
public ResponseItem Put(yy_Spider_News value) { var _Entity = DB.yy_Spider_News.Find(value.ID); if (_Entity != null) { _Entity.Title = value.Title; _Entity.KeyWords = value.KeyWords; _Entity.Summary = value.Summary; _Entity.CreateDate = value.CreateDate; DB.SaveChanges(); return(new ResponseItem(0, "")); } return(new ResponseItem(2, "不存在的蜘蛛新闻。")); }
private List <yy_Spider_News> DataList_Convert(Int64 _SpiderID, String _Url, JObject _Config, long _UserID) { String Html = Get(_Url); var doc = NSoupClient.Parse(Html); String SourceListMatch = _Config["SourceListMatch"].ToString(); var SourceList = doc.Select(SourceListMatch); var Result = new List <yy_Spider_News>(); if (SourceList.Count < 1) { return(Result); } for (int i = 0; i < SourceList.Count; i++) { var v = SourceList[i]; var Item = new yy_Spider_News() { SpiderID = _SpiderID, UserID = _UserID, IsSync = 0 }; #region 转换 try { #region Title var TitleMatch = _Config["TitleMatch"].ToString(); var _TitleEntity = v.Select(TitleMatch).FirstOrDefault(); if (_TitleEntity != null) { var _Title = _TitleEntity.OwnText(); var _TitleReplace = string.Empty; if (_Config["TitleReplace"] != null) { _TitleReplace = _Config["TitleReplace"].ToString(); } Item.Title = Str_Replace(_Title, _TitleReplace); } else { Item.Title = ""; } #endregion Title #region DefaultImg var DefaultImgMatch = _Config["DefaultImgMatch"].ToString(); var _DefaultImageEntity = v.Select(DefaultImgMatch).FirstOrDefault(); if (_DefaultImageEntity != null) { var _DefaultImage = _DefaultImageEntity.Attributes["src"].ToString(); var _DefaultImgReplace = string.Empty; if (_Config["DefaultImgReplace"] != null) { _DefaultImgReplace = _Config["DefaultImgReplace"].ToString(); } Item.DefaultImage = Str_Replace(_DefaultImage, _DefaultImgReplace); } else { Item.DefaultImage = ""; } #endregion DefaultImg #region Summary var SummaryMatch = _Config["SummaryMatch"].ToString(); var _SummaryEntity = v.Select(SummaryMatch).FirstOrDefault(); if (_SummaryEntity != null) { String _Summary = _SummaryEntity.OwnText(); String _SummaryReplace = string.Empty; if (_Config["SummaryReplace"] != null) { _SummaryReplace = _Config["SummaryReplace"].ToString(); } Item.Summary = Str_Replace(_Summary, _SummaryReplace); } else { Item.Summary = ""; } #endregion Summary #region SourceFrom var SourceFromMatch = _Config["SourceFromMatch"].ToString(); var _SourceUrlEntity = v.Select(SourceFromMatch).FirstOrDefault(); if (_SourceUrlEntity != null) { String SourceFrom = _SourceUrlEntity.Attributes["href"].ToString(); String SourceFromReplace = string.Empty; if (_Config["SourceFromReplace"] != null) { SourceFromReplace = _Config["SourceFromReplace"].ToString(); } Item.SourceUrl = Str_Replace(SourceFrom, SourceFromReplace); } else { Item.SourceUrl = ""; } #endregion SourceFrom #region Info var InfoMatch = _Config["InfoMatch"].ToString(); var InfoSource = Get(Item.SourceUrl); var InfoDoc = NSoupClient.Parse(InfoSource); var InfoNode = InfoDoc.Select(InfoMatch).FirstOrDefault(); if (InfoNode != null) { var InfoStr = InfoNode.OuterHtml(); var InfoReplace = string.Empty; if (_Config["InfoReplace"] != null) { InfoReplace = _Config["InfoReplace"].ToString(); } InfoStr = Str_Replace(InfoStr, InfoReplace); Item.Info = InfoStr; } else { Item.Info = ""; } #endregion Info #region KeyWords Segment.Init(Environment.CurrentDirectory + "\\Segment.xml"); Segment segment = new Segment(); ICollection <WordInfo> words = segment.DoSegment(Item.Info); var Words = words.Where(a => a.Word.Length > 1 && (a.OriginalWordType == WordType.English || a.OriginalWordType == WordType.SimplifiedChinese || a.OriginalWordType == WordType.TraditionalChinese || a.OriginalWordType == WordType.Synonym) ).OrderBy(a => a.Rank).Select(a => a.Word).Distinct().Take(20).ToArray(); Item.KeyWords = String.Join(",", Words); #endregion KeyWords #region ReleaseTime var ReleaseTimeMatch = _Config["ReleaseTimeMatch"].ToString(); var _CreateDateEntity = InfoDoc.Select(ReleaseTimeMatch).FirstOrDefault(); if (_CreateDateEntity != null) { String _CreateDate = _CreateDateEntity.OwnText(); String _ReleaseTimeReplace = string.Empty; if (_Config["ReleaseTimeReplace"] != null) { _ReleaseTimeReplace = _Config["ReleaseTimeReplace"].ToString(); } Item.CreateDate = DateTime.Parse(Str_Replace(_CreateDate, _ReleaseTimeReplace)); } else { Item.CreateDate = DateTime.Now; } #endregion ReleaseTime #region 如果默认图片为空,从详情中匹配 if (String.IsNullOrEmpty(Item.DefaultImage)) { Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); MatchCollection matches = regImg.Matches(Item.Info); foreach (Match match in matches) { if (!String.IsNullOrEmpty(match.Groups["imgUrl"].Value)) { Item.DefaultImage = match.Groups["imgUrl"].Value; } break; } } #endregion 如果默认图片为空,从详情中匹配 } catch { Item = null; } #endregion 转换 if (Item != null) { Result.Add(Item); } Thread.Sleep(1); } #region 记录历史,重新计算蜘蛛质量 DB.Database.ExecuteSqlCommand("INSERT INTO yy_Spider_Log ([SpiderID],[TotalUrl],[SuccessUrl])VALUES(" + _SpiderID + "," + SourceList.Count + "," + Result.Count + ")"); DB.Database.ExecuteSqlCommand(string.Format(@"UPDATE [dbo].[yy_Spider] SET Quality = (SELECT SUM(TotalUrl) / SUM(SuccessUrl) * 100 FROM [dbo].[yy_Spider_Log] WITH(NOLOCK) WHERE SpiderID={0}),LastStartTime=getdate() WHERE ID = {0}", _SpiderID)); #endregion 记录历史,重新计算蜘蛛质量 return(Result); }