Example #1
0
 public ResponseItem Post(yy_Spider_News value)
 {
     try
     {
         DB.yy_Spider_News.Add(value);
         DB.SaveChanges();
         return(new ResponseItem(0, "添加蜘蛛新闻成功。"));
     }
     catch (Exception ex)
     {
         return(new ResponseItem(2, ex.Message));
     }
 }
Example #2
0
        public ResponseItem Put(yy_Spider_News value)
        {
            var _Entity = DB.yy_Spider_News.Find(value.ID);

            if (_Entity != null)
            {
                _Entity.Title      = value.Title;
                _Entity.KeyWords   = value.KeyWords;
                _Entity.Summary    = value.Summary;
                _Entity.CreateDate = value.CreateDate;
                DB.SaveChanges();
                return(new ResponseItem(0, ""));
            }

            return(new ResponseItem(2, "不存在的蜘蛛新闻。"));
        }
Example #3
0
        private List <yy_Spider_News> DataList_Convert(Int64 _SpiderID, String _Url, JObject _Config, long _UserID)
        {
            String Html = Get(_Url);

            var doc = NSoupClient.Parse(Html);

            String SourceListMatch = _Config["SourceListMatch"].ToString();

            var SourceList = doc.Select(SourceListMatch);

            var Result = new List <yy_Spider_News>();

            if (SourceList.Count < 1)
            {
                return(Result);
            }

            for (int i = 0; i < SourceList.Count; i++)
            {
                var v = SourceList[i];

                var Item = new yy_Spider_News()
                {
                    SpiderID = _SpiderID, UserID = _UserID, IsSync = 0
                };

                #region 转换

                try
                {
                    #region Title

                    var TitleMatch   = _Config["TitleMatch"].ToString();
                    var _TitleEntity = v.Select(TitleMatch).FirstOrDefault();
                    if (_TitleEntity != null)
                    {
                        var _Title        = _TitleEntity.OwnText();
                        var _TitleReplace = string.Empty;
                        if (_Config["TitleReplace"] != null)
                        {
                            _TitleReplace = _Config["TitleReplace"].ToString();
                        }
                        Item.Title = Str_Replace(_Title, _TitleReplace);
                    }
                    else
                    {
                        Item.Title = "";
                    }

                    #endregion Title

                    #region DefaultImg

                    var DefaultImgMatch     = _Config["DefaultImgMatch"].ToString();
                    var _DefaultImageEntity = v.Select(DefaultImgMatch).FirstOrDefault();
                    if (_DefaultImageEntity != null)
                    {
                        var _DefaultImage      = _DefaultImageEntity.Attributes["src"].ToString();
                        var _DefaultImgReplace = string.Empty;
                        if (_Config["DefaultImgReplace"] != null)
                        {
                            _DefaultImgReplace = _Config["DefaultImgReplace"].ToString();
                        }
                        Item.DefaultImage = Str_Replace(_DefaultImage, _DefaultImgReplace);
                    }
                    else
                    {
                        Item.DefaultImage = "";
                    }
                    #endregion DefaultImg

                    #region Summary

                    var SummaryMatch   = _Config["SummaryMatch"].ToString();
                    var _SummaryEntity = v.Select(SummaryMatch).FirstOrDefault();
                    if (_SummaryEntity != null)
                    {
                        String _Summary        = _SummaryEntity.OwnText();
                        String _SummaryReplace = string.Empty;
                        if (_Config["SummaryReplace"] != null)
                        {
                            _SummaryReplace = _Config["SummaryReplace"].ToString();
                        }
                        Item.Summary = Str_Replace(_Summary, _SummaryReplace);
                    }
                    else
                    {
                        Item.Summary = "";
                    }
                    #endregion Summary

                    #region SourceFrom

                    var SourceFromMatch  = _Config["SourceFromMatch"].ToString();
                    var _SourceUrlEntity = v.Select(SourceFromMatch).FirstOrDefault();
                    if (_SourceUrlEntity != null)
                    {
                        String SourceFrom        = _SourceUrlEntity.Attributes["href"].ToString();
                        String SourceFromReplace = string.Empty;
                        if (_Config["SourceFromReplace"] != null)
                        {
                            SourceFromReplace = _Config["SourceFromReplace"].ToString();
                        }
                        Item.SourceUrl = Str_Replace(SourceFrom, SourceFromReplace);
                    }
                    else
                    {
                        Item.SourceUrl = "";
                    }
                    #endregion SourceFrom

                    #region Info

                    var InfoMatch  = _Config["InfoMatch"].ToString();
                    var InfoSource = Get(Item.SourceUrl);
                    var InfoDoc    = NSoupClient.Parse(InfoSource);
                    var InfoNode   = InfoDoc.Select(InfoMatch).FirstOrDefault();
                    if (InfoNode != null)
                    {
                        var InfoStr     = InfoNode.OuterHtml();
                        var InfoReplace = string.Empty;
                        if (_Config["InfoReplace"] != null)
                        {
                            InfoReplace = _Config["InfoReplace"].ToString();
                        }
                        InfoStr   = Str_Replace(InfoStr, InfoReplace);
                        Item.Info = InfoStr;
                    }
                    else
                    {
                        Item.Info = "";
                    }
                    #endregion Info

                    #region KeyWords

                    Segment.Init(Environment.CurrentDirectory + "\\Segment.xml");
                    Segment segment = new Segment();
                    ICollection <WordInfo> words = segment.DoSegment(Item.Info);
                    var Words = words.Where(a =>
                                            a.Word.Length > 1 &&
                                            (a.OriginalWordType == WordType.English ||
                                             a.OriginalWordType == WordType.SimplifiedChinese ||
                                             a.OriginalWordType == WordType.TraditionalChinese ||
                                             a.OriginalWordType == WordType.Synonym)
                                            ).OrderBy(a => a.Rank).Select(a => a.Word).Distinct().Take(20).ToArray();

                    Item.KeyWords = String.Join(",", Words);

                    #endregion KeyWords

                    #region ReleaseTime

                    var ReleaseTimeMatch  = _Config["ReleaseTimeMatch"].ToString();
                    var _CreateDateEntity = InfoDoc.Select(ReleaseTimeMatch).FirstOrDefault();
                    if (_CreateDateEntity != null)
                    {
                        String _CreateDate         = _CreateDateEntity.OwnText();
                        String _ReleaseTimeReplace = string.Empty;
                        if (_Config["ReleaseTimeReplace"] != null)
                        {
                            _ReleaseTimeReplace = _Config["ReleaseTimeReplace"].ToString();
                        }
                        Item.CreateDate = DateTime.Parse(Str_Replace(_CreateDate, _ReleaseTimeReplace));
                    }
                    else
                    {
                        Item.CreateDate = DateTime.Now;
                    }
                    #endregion ReleaseTime

                    #region 如果默认图片为空,从详情中匹配

                    if (String.IsNullOrEmpty(Item.DefaultImage))
                    {
                        Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>",
                                                 RegexOptions.IgnoreCase);
                        MatchCollection matches = regImg.Matches(Item.Info);
                        foreach (Match match in matches)
                        {
                            if (!String.IsNullOrEmpty(match.Groups["imgUrl"].Value))
                            {
                                Item.DefaultImage = match.Groups["imgUrl"].Value;
                            }
                            break;
                        }
                    }

                    #endregion 如果默认图片为空,从详情中匹配
                }
                catch
                {
                    Item = null;
                }

                #endregion 转换

                if (Item != null)
                {
                    Result.Add(Item);
                }

                Thread.Sleep(1);
            }

            #region 记录历史,重新计算蜘蛛质量

            DB.Database.ExecuteSqlCommand("INSERT INTO yy_Spider_Log ([SpiderID],[TotalUrl],[SuccessUrl])VALUES(" + _SpiderID + "," + SourceList.Count + "," + Result.Count + ")");
            DB.Database.ExecuteSqlCommand(string.Format(@"UPDATE [dbo].[yy_Spider] SET
            Quality = (SELECT SUM(TotalUrl) / SUM(SuccessUrl) * 100 FROM [dbo].[yy_Spider_Log]
            WITH(NOLOCK) WHERE SpiderID={0}),LastStartTime=getdate() WHERE ID = {0}", _SpiderID));

            #endregion 记录历史,重新计算蜘蛛质量

            return(Result);
        }