Beispiel #1
0
 public ResponseItem Post(yy_Spider value)
 {
     try
     {
         DB.yy_Spider.Add(value);
         DB.SaveChanges();
         return(new ResponseItem(0, "添加蜘蛛成功。"));
     }
     catch (Exception ex)
     {
         return(new ResponseItem(2, ex.Message));
     }
 }
Beispiel #2
0
        public ResponseItem ShowHide(yy_Spider value)
        {
            var _News = DB.yy_Spider.Find(value.ID);

            if (_News != null)
            {
                _News.IsShow = value.IsShow;
                DB.SaveChanges();

                return(new ResponseItem(0, ""));
            }

            return(new ResponseItem(2, "不存在的蜘蛛。"));
        }
Beispiel #3
0
        public ResponseItem Put(yy_Spider value)
        {
            var _Entity = DB.yy_Spider.Find(value.ID);

            if (_Entity != null)
            {
                _Entity.ExecutionInterval = value.ExecutionInterval;
                _Entity.IsShow            = value.IsShow;
                _Entity.KeyWords          = value.KeyWords;
                _Entity.RuleConfig        = value.RuleConfig;
                _Entity.SourceUrls        = value.SourceUrls;
                _Entity.SpiderMode        = value.SpiderMode;
                _Entity.TargetPlatforms   = value.TargetPlatforms;
                _Entity.Title             = value.Title;
                _Entity.TypeIDs           = value.TypeIDs;
                DB.SaveChanges();
                return(new ResponseItem(0, ""));
            }

            return(new ResponseItem(2, "不存在的蜘蛛。"));
        }
Beispiel #4
0
        public Object UploadCrawler()
        {
            if (HttpContext.Current.Request.Files.Count < 1)
            {
                return("");
            }

            String TypeIDs = HttpContext.Current.Request.Params["typeids"];

            if (String.IsNullOrEmpty(TypeIDs))
            {
                return(new { code = 1, msg = "所属分类不能为空。" });
            }

            for (var i = 0; i < HttpContext.Current.Request.Files.Count; i++)
            {
                HttpPostedFile file = HttpContext.Current.Request.Files[i];

                if (file.ContentLength < 1)
                {
                    return(new { code = 2, msg = "文件内容为空。" });
                }

                String jsonScript = String.Empty;

                try
                {
                    using (var sr = new StreamReader(file.InputStream))
                    {
                        jsonScript = sr.ReadToEnd();
                    }
                }
                catch
                {
                }

                if (string.IsNullOrEmpty(jsonScript))
                {
                    return(new { code = 3, msg = "文件读取错误。" });
                }

                yy_Spider obj = null;

                try
                {
                    obj      = JsonConvert.DeserializeObject <yy_Spider>(jsonScript);
                    obj.Code = (obj.SourceUrls + obj.RuleConfig).GetHashCode().ToString();

                    var spideItem = DB.yy_Spider.Where(x => x.Code == obj.Code).FirstOrDefault();

                    if (spideItem != null)
                    {
                        return(new { code = 4, msg = "已存在的蜘蛛。" });
                    }

                    obj.CreateDate    = DateTime.Now;
                    obj.LastStartTime = DateTime.Now;
                    obj.UserID        = User.ID;
                    obj.LookCount     = 0;
                    obj.Status        = 0;
                    obj.TypeIDs       = TypeIDs;
                }
                catch
                {
                    return(new { code = 5, msg = "文件格式为空。" });
                }

                DB.yy_Spider.Add(obj);
                DB.SaveChanges();
            }

            return(new { code = 0, msg = "导入成功。" });
        }
Beispiel #5
0
        private void Start(yy_Spider Spider)
        {
            var _SpiderID = Spider.ID;

            var Urls = JsonConvert.DeserializeObject <String[]>(Spider.SourceUrls);

            var RuleConfig = JsonConvert.DeserializeObject <JObject>(Spider.RuleConfig);

            #region 1,更新爬虫状态为[执行中]

            Spider.Status = (int)SpiderStatus.Running;
            SqlHelper.ExecuteNonQuery(DB.Database.Connection.ConnectionString, CommandType.Text,
                                      "UPDATE yy_Spider SET Status=@Status,LastStartTime=getdate() WHERE ID=@ID",
                                      new SqlParameter("@Status", (int)SpiderStatus.Running),
                                      new SqlParameter("@ID", _SpiderID));

            #endregion 1,更新爬虫状态为[执行中]

            Boolean BreakAll = false;

            foreach (var SourceUrl in Urls)
            {
                var NewsCollection = DataList_Convert(_SpiderID, SourceUrl, RuleConfig, Spider.UserID);

                #region 2,遍历信息源

                foreach (var NewsItem in NewsCollection)
                {
                    NewsItem.Identifer = NewsItem.Title.GetHashCode();

                    var ExistsNewsItem = DB.yy_Spider_News.Where(a => a.Identifer == NewsItem.Identifer).FirstOrDefault();

                    #region 如果已经存在的信息源属于当前爬虫,并且发布时间大于等于爬虫最后执行时间,就结束任务

                    if (ExistsNewsItem != null &&
                        ExistsNewsItem.SpiderID == Spider.ID &&
                        ExistsNewsItem.CreateDate >= Spider.LastStartTime)
                    {
                        BreakAll = true;
                        break;
                    }

                    #endregion 如果已经存在的信息源属于当前爬虫,并且发布时间大于等于爬虫最后执行时间,就结束任务

                    #region 如果当前信息源不存在,就添加

                    if (ExistsNewsItem == null)
                    {
                        DB.yy_Spider_News.Add(NewsItem);
                        DB.SaveChanges();
                        continue;
                    }

                    #endregion 如果当前信息源不存在,就添加

                    #region 如果信息源存在,进行数据整合

                    else
                    {
                        Boolean Merged = false;

                        #region 关键词、详情

                        if (String.IsNullOrEmpty(ExistsNewsItem.KeyWords) && !String.IsNullOrEmpty(NewsItem.KeyWords))
                        {
                            ExistsNewsItem.KeyWords = NewsItem.KeyWords;
                            ExistsNewsItem.Info     = NewsItem.Info;
                            Merged = true;
                        }
                        else if (!String.IsNullOrEmpty(ExistsNewsItem.KeyWords) && !String.IsNullOrEmpty(NewsItem.KeyWords))
                        {
                            var KeyWordsCount    = ExistsNewsItem.KeyWords.Split(',').Length;
                            var NewKeyWordsCount = NewsItem.KeyWords.Split(',').Length;

                            if (NewKeyWordsCount > KeyWordsCount)
                            {
                                ExistsNewsItem.KeyWords = NewsItem.KeyWords;
                                ExistsNewsItem.Info     = NewsItem.Info;
                                Merged = true;
                            }
                        }

                        #endregion 关键词、详情

                        #region 图片

                        if (String.IsNullOrEmpty(ExistsNewsItem.DefaultImage) && !String.IsNullOrEmpty(NewsItem.DefaultImage))
                        {
                            ExistsNewsItem.DefaultImage = NewsItem.DefaultImage;
                            Merged = true;
                        }

                        #endregion 图片

                        #region 描述

                        if (String.IsNullOrEmpty(ExistsNewsItem.Summary) && !String.IsNullOrEmpty(NewsItem.Summary))
                        {
                            ExistsNewsItem.Summary = NewsItem.Summary;
                            Merged = true;
                        }

                        #endregion 描述

                        if (Merged)
                        {
                            SqlHelper.ExecuteNonQuery(DB.Database.Connection.ConnectionString,
                                                      CommandType.Text,
                                                      "UPDATE yy_Spider_News SET KeyWords=@KeyWords,Info=@Info,Summary=@Summary,DefaultImage=@DefaultImage,LastStartTime=getdate() WHERE ID=@ID",
                                                      new SqlParameter("@KeyWords", ExistsNewsItem.KeyWords),
                                                      new SqlParameter("@Info", ExistsNewsItem.Info),
                                                      new SqlParameter("@Summary", ExistsNewsItem.Summary),
                                                      new SqlParameter("@DefaultImage", ExistsNewsItem.DefaultImage),
                                                      new SqlParameter("@ID", ExistsNewsItem.ID));
                        }
                    }

                    #endregion 如果信息源存在,进行数据整合

                    Thread.Sleep(1);
                }

                #endregion 2,遍历信息源

                if (BreakAll)
                {
                    break;
                }

                Thread.Sleep(1);
            }

            #region 3,更新爬虫状态[已完成]

            if (Spider.Status == (int)SpiderStatus.Running)
            {
                SqlHelper.ExecuteNonQuery(DB.Database.Connection.ConnectionString, CommandType.Text,
                                          "UPDATE yy_Spider SET Status=@Status,LastStartTime=@LastStartTime WHERE ID=@ID",
                                          new SqlParameter("@Status", (int)SpiderStatus.Complate),
                                          new SqlParameter("@LastStartTime", DateTime.Now),
                                          new SqlParameter("@ID", _SpiderID));
            }

            #endregion 3,更新爬虫状态[已完成]
        }