public ResponseItem Post(yy_Spider value) { try { DB.yy_Spider.Add(value); DB.SaveChanges(); return(new ResponseItem(0, "添加蜘蛛成功。")); } catch (Exception ex) { return(new ResponseItem(2, ex.Message)); } }
public ResponseItem ShowHide(yy_Spider value) { var _News = DB.yy_Spider.Find(value.ID); if (_News != null) { _News.IsShow = value.IsShow; DB.SaveChanges(); return(new ResponseItem(0, "")); } return(new ResponseItem(2, "不存在的蜘蛛。")); }
public ResponseItem Put(yy_Spider value) { var _Entity = DB.yy_Spider.Find(value.ID); if (_Entity != null) { _Entity.ExecutionInterval = value.ExecutionInterval; _Entity.IsShow = value.IsShow; _Entity.KeyWords = value.KeyWords; _Entity.RuleConfig = value.RuleConfig; _Entity.SourceUrls = value.SourceUrls; _Entity.SpiderMode = value.SpiderMode; _Entity.TargetPlatforms = value.TargetPlatforms; _Entity.Title = value.Title; _Entity.TypeIDs = value.TypeIDs; DB.SaveChanges(); return(new ResponseItem(0, "")); } return(new ResponseItem(2, "不存在的蜘蛛。")); }
public Object UploadCrawler() { if (HttpContext.Current.Request.Files.Count < 1) { return(""); } String TypeIDs = HttpContext.Current.Request.Params["typeids"]; if (String.IsNullOrEmpty(TypeIDs)) { return(new { code = 1, msg = "所属分类不能为空。" }); } for (var i = 0; i < HttpContext.Current.Request.Files.Count; i++) { HttpPostedFile file = HttpContext.Current.Request.Files[i]; if (file.ContentLength < 1) { return(new { code = 2, msg = "文件内容为空。" }); } String jsonScript = String.Empty; try { using (var sr = new StreamReader(file.InputStream)) { jsonScript = sr.ReadToEnd(); } } catch { } if (string.IsNullOrEmpty(jsonScript)) { return(new { code = 3, msg = "文件读取错误。" }); } yy_Spider obj = null; try { obj = JsonConvert.DeserializeObject <yy_Spider>(jsonScript); obj.Code = (obj.SourceUrls + obj.RuleConfig).GetHashCode().ToString(); var spideItem = DB.yy_Spider.Where(x => x.Code == obj.Code).FirstOrDefault(); if (spideItem != null) { return(new { code = 4, msg = "已存在的蜘蛛。" }); } obj.CreateDate = DateTime.Now; obj.LastStartTime = DateTime.Now; obj.UserID = User.ID; obj.LookCount = 0; obj.Status = 0; obj.TypeIDs = TypeIDs; } catch { return(new { code = 5, msg = "文件格式为空。" }); } DB.yy_Spider.Add(obj); DB.SaveChanges(); } return(new { code = 0, msg = "导入成功。" }); }
private void Start(yy_Spider Spider) { var _SpiderID = Spider.ID; var Urls = JsonConvert.DeserializeObject <String[]>(Spider.SourceUrls); var RuleConfig = JsonConvert.DeserializeObject <JObject>(Spider.RuleConfig); #region 1,更新爬虫状态为[执行中] Spider.Status = (int)SpiderStatus.Running; SqlHelper.ExecuteNonQuery(DB.Database.Connection.ConnectionString, CommandType.Text, "UPDATE yy_Spider SET Status=@Status,LastStartTime=getdate() WHERE ID=@ID", new SqlParameter("@Status", (int)SpiderStatus.Running), new SqlParameter("@ID", _SpiderID)); #endregion 1,更新爬虫状态为[执行中] Boolean BreakAll = false; foreach (var SourceUrl in Urls) { var NewsCollection = DataList_Convert(_SpiderID, SourceUrl, RuleConfig, Spider.UserID); #region 2,遍历信息源 foreach (var NewsItem in NewsCollection) { NewsItem.Identifer = NewsItem.Title.GetHashCode(); var ExistsNewsItem = DB.yy_Spider_News.Where(a => a.Identifer == NewsItem.Identifer).FirstOrDefault(); #region 如果已经存在的信息源属于当前爬虫,并且发布时间大于等于爬虫最后执行时间,就结束任务 if (ExistsNewsItem != null && ExistsNewsItem.SpiderID == Spider.ID && ExistsNewsItem.CreateDate >= Spider.LastStartTime) { BreakAll = true; break; } #endregion 如果已经存在的信息源属于当前爬虫,并且发布时间大于等于爬虫最后执行时间,就结束任务 #region 如果当前信息源不存在,就添加 if (ExistsNewsItem == null) { DB.yy_Spider_News.Add(NewsItem); DB.SaveChanges(); continue; } #endregion 如果当前信息源不存在,就添加 #region 如果信息源存在,进行数据整合 else { Boolean Merged = false; #region 关键词、详情 if (String.IsNullOrEmpty(ExistsNewsItem.KeyWords) && !String.IsNullOrEmpty(NewsItem.KeyWords)) { ExistsNewsItem.KeyWords = NewsItem.KeyWords; ExistsNewsItem.Info = NewsItem.Info; Merged = true; } else if (!String.IsNullOrEmpty(ExistsNewsItem.KeyWords) && !String.IsNullOrEmpty(NewsItem.KeyWords)) { var KeyWordsCount = ExistsNewsItem.KeyWords.Split(',').Length; var NewKeyWordsCount = NewsItem.KeyWords.Split(',').Length; if (NewKeyWordsCount > KeyWordsCount) { ExistsNewsItem.KeyWords = NewsItem.KeyWords; ExistsNewsItem.Info = NewsItem.Info; Merged = true; } } #endregion 关键词、详情 #region 图片 if (String.IsNullOrEmpty(ExistsNewsItem.DefaultImage) && !String.IsNullOrEmpty(NewsItem.DefaultImage)) { ExistsNewsItem.DefaultImage = NewsItem.DefaultImage; Merged = true; } #endregion 图片 #region 描述 if (String.IsNullOrEmpty(ExistsNewsItem.Summary) && !String.IsNullOrEmpty(NewsItem.Summary)) { ExistsNewsItem.Summary = NewsItem.Summary; Merged = true; } #endregion 描述 if (Merged) { SqlHelper.ExecuteNonQuery(DB.Database.Connection.ConnectionString, CommandType.Text, "UPDATE yy_Spider_News SET KeyWords=@KeyWords,Info=@Info,Summary=@Summary,DefaultImage=@DefaultImage,LastStartTime=getdate() WHERE ID=@ID", new SqlParameter("@KeyWords", ExistsNewsItem.KeyWords), new SqlParameter("@Info", ExistsNewsItem.Info), new SqlParameter("@Summary", ExistsNewsItem.Summary), new SqlParameter("@DefaultImage", ExistsNewsItem.DefaultImage), new SqlParameter("@ID", ExistsNewsItem.ID)); } } #endregion 如果信息源存在,进行数据整合 Thread.Sleep(1); } #endregion 2,遍历信息源 if (BreakAll) { break; } Thread.Sleep(1); } #region 3,更新爬虫状态[已完成] if (Spider.Status == (int)SpiderStatus.Running) { SqlHelper.ExecuteNonQuery(DB.Database.Connection.ConnectionString, CommandType.Text, "UPDATE yy_Spider SET Status=@Status,LastStartTime=@LastStartTime WHERE ID=@ID", new SqlParameter("@Status", (int)SpiderStatus.Complate), new SqlParameter("@LastStartTime", DateTime.Now), new SqlParameter("@ID", _SpiderID)); } #endregion 3,更新爬虫状态[已完成] }