Пример #1
0
        public void DoRefresh( int id )
        {
            SpiderArticle post = SpiderArticle.findById( id );

            StringBuilder log = new StringBuilder();
            String content = new PagedDetailSpider().GetContent( post.Url, post.SpiderTemplate, log );
            if (strUtil.HasText( content )) {
                post.Body = content;
                post.update( "Body" );
                echoJsonMsg( "刷新成功", true, "" );
            }
            else {
                errors.Add( log.ToString().Replace( Environment.NewLine, "" ).Trim() );
                echoError();
            }
        }
Пример #2
0
        private static void savePageDetail(DetailLink lnk, StringBuilder sb)
        {
            SpiderTemplate template = lnk.Template;
            string         url      = lnk.Url;
            string         title    = lnk.Title;
            string         summary  = lnk.Abstract;

            if (isPageExist(url, sb))
            {
                return;
            }

            String pageBody = new PagedDetailSpider().GetContent(url, template, sb);

            if (pageBody == null)
            {
                return;
            }

            SpiderArticle pd = new SpiderArticle();

            pd.Title          = title;
            pd.Url            = strUtil.SubString(url, 200);
            pd.Abstract       = summary;
            pd.Body           = pageBody;
            pd.SpiderTemplate = template;

            MatchCollection matchs = Regex.Matches(pageBody, RegPattern.Img, RegexOptions.Singleline);

            if (matchs.Count > 0)
            {
                pd.IsPic  = 1;
                pd.PicUrl = matchs[0].Groups[1].Value;
            }

            pd.insert();

            sb.AppendLine("保存成功..." + lnk.Title + "_" + lnk.Url);
        }
Пример #3
0
        private static void savePageDetail(DetailLink lnk, StringBuilder sb)
        {
            SpiderTemplate template = lnk.Template;
            string         url      = lnk.Url;
            string         title    = lnk.Title;
            string         summary  = lnk.Abstract;

            if (isPageExist(url, sb))
            {
                return;
            }

            String pageBody = new PagedDetailSpider().GetContent(url, template, sb);


            if (pageBody == null)
            {
                return;
            }

            SpiderArticle pd = new SpiderArticle();

            pd.Title          = title;
            pd.Url            = strUtil.SubString(url, 250);
            pd.Abstract       = summary;
            pd.Body           = pageBody;
            pd.SpiderTemplate = template;

            MatchCollection matchs = Regex.Matches(pageBody, RegPattern.Img, RegexOptions.Singleline);

            if (matchs.Count > 0)
            {
                pd.IsPic  = 1;
                pd.PicUrl = matchs[0].Groups[1].Value;
            }

            pd.insert();

            sb.AppendLine("保存成功..." + lnk.Title + "_" + lnk.Url);


            pageBody = Regex.Replace(pageBody, "font-size", "", RegexOptions.IgnoreCase);
            string strArcitleLink = "<div class=\"ArcitleLink\"><a href=" + pd.Url + ">原文链接</a></div>";

            pageBody = pageBody + strArcitleLink;

            Maticsoft.BLL.BlogCategory bllBlogCategory = new Maticsoft.BLL.BlogCategory();
            DataSet ds      = bllBlogCategory.GetList("AppId = '" + template.IsDelete.ToString() + "'");
            int     nCateID = 1;

            if (ds.Tables[0].Rows.Count > 0)
            {
                nCateID = (int)ds.Tables[0].Rows[0]["Id"];
            }



            BlogPost data = new BlogPost();


            data.CategoryId       = nCateID;
            data.Title            = title;
            data.Abstract         = summary;
            data.Content          = pageBody;
            data.AccessStatus     = 0;
            data.CommentCondition = 0;
            data.SaveStatus       = 1;//草稿
            data.Created          = System.DateTime.Now.Date;
            data.IsTop            = 0;
            data.IsPick           = 0;
            data.IsPic            = 0;
            data.Ip         = "";
            data.OwnerId    = template.IsDelete;
            data.OwnerUrl   = template.SiteName;
            data.OwnerType  = "wojilu.Members.Users.Domain.User";
            data.CreatorUrl = template.SiteName;
            data.AppId      = template.IsDelete;;
            data.CreatorId  = template.IsDelete;
            Maticsoft.BLL.BlogPost bll = new Maticsoft.BLL.BlogPost();
            bll.Add(data);
        }
Пример #4
0
        public void GetDetail()
        {
            String newsUrl = ctx.Post( "detailUrl" );

            SpiderTemplate s = new SpiderTemplate();

            //String detailBeginCode = ctx.PostHtmlAll( "detailBeginCode" );
            //String detailEndCode = ctx.PostHtmlAll( "detailEndCode" );
            //String DetailPattern = detailBeginCode + "(.+?)" + detailEndCode;

            String DetailPattern = ctx.PostHtmlAll( "DetailPattern" );
            s.DetailPattern = DetailPattern;

            logger.Info( "DetailPattern=" + s.DetailPattern );

            String detailEncoding = ctx.Post( "detailEncoding" );
            s.DetailEncoding = detailEncoding;

            s.IsSavePic = 0;

            StringBuilder log = new StringBuilder();

            string newsBody = new PagedDetailSpider().GetContent( newsUrl, s, log );

            String strLog = log.ToString();
            if (strLog.IndexOf( "error=" ) >= 0) {
                StringBuilder sblog = new StringBuilder();
                sblog.AppendLine( "detailUrl=" + newsUrl );
                sblog.AppendLine( "detailPattern=" + s.DetailPattern );
                sblog.Append( log );
                echoText( sblog.ToString() );
            }
            else {
                echoText( newsBody );
            }
        }
Пример #5
0
        private static void savePageDetail( DetailLink lnk, StringBuilder sb )
        {
            SpiderTemplate template = lnk.Template;
            string url = lnk.Url;
            string title = lnk.Title;
            string summary = lnk.Abstract;

            if (isPageExist( url, sb )) return;

            String pageBody = new PagedDetailSpider().GetContent( url, template, sb );

            if (pageBody == null) return;

            SpiderArticle pd = new SpiderArticle();
            pd.Title = title;
            pd.Url = strUtil.SubString( url, 250 );
            pd.Abstract = summary;
            pd.Body = pageBody;
            pd.SpiderTemplate = template;

            MatchCollection matchs = Regex.Matches( pageBody, RegPattern.Img, RegexOptions.Singleline );
            if (matchs.Count > 0) {
                pd.IsPic = 1;
                pd.PicUrl = matchs[0].Groups[1].Value;
            }

            pd.insert();

            sb.AppendLine( "保存成功..." + lnk.Title + "_" + lnk.Url );

            pageBody = Regex.Replace(pageBody, "font-size", "", RegexOptions.IgnoreCase);
            string strArcitleLink = "<div class=\"ArcitleLink\"><a href=" + pd.Url + ">原文链接</a></div>";
            pageBody = pageBody + strArcitleLink;

            Maticsoft.BLL.BlogCategory bllBlogCategory = new Maticsoft.BLL.BlogCategory();
            DataSet ds = bllBlogCategory.GetList("AppId = '" + template.IsDelete.ToString() + "'");
            int nCateID = 1;
            if (ds.Tables[0].Rows.Count > 0)
            {
                nCateID = (int)ds.Tables[0].Rows[0]["Id"];
            }

            BlogPost data = new BlogPost();

            data.CategoryId = nCateID;
            data.Title = title;
            data.Abstract = summary;
            data.Content = pageBody;
            data.AccessStatus = 0;
            data.CommentCondition = 0;
            data.SaveStatus = 1;//草稿
            data.Created = System.DateTime.Now.Date;
            data.IsTop = 0;
            data.IsPick = 0;
            data.IsPic = 0;
            data.Ip = "";
            data.OwnerId = template.IsDelete;
            data.OwnerUrl = template.SiteName;
            data.OwnerType = "wojilu.Members.Users.Domain.User";
            data.CreatorUrl = template.SiteName;
            data.AppId = template.IsDelete; ;
            data.CreatorId = template.IsDelete;
            Maticsoft.BLL.BlogPost bll = new Maticsoft.BLL.BlogPost();
            bll.Add(data);
        }
Пример #6
0
        private static void savePageDetail( DetailLink lnk, StringBuilder sb )
        {
            SpiderTemplate template = lnk.Template;
            string url = lnk.Url;
            string title = lnk.Title;
            string summary = lnk.Abstract;

            if (isPageExist( url, sb )) return;

            String pageBody = new PagedDetailSpider().GetContent( url, template, sb );
            if (pageBody == null) return;

            SpiderArticle pd = new SpiderArticle();
            pd.Title = title;
            pd.Url = strUtil.SubString( url, 200 );
            pd.Abstract = summary;
            pd.Body = pageBody;
            pd.SpiderTemplate = template;

            MatchCollection matchs = Regex.Matches( pageBody, RegPattern.Img, RegexOptions.Singleline );
            if (matchs.Count > 0) {
                pd.IsPic = 1;
                pd.PicUrl = matchs[0].Groups[1].Value;
            }

            pd.insert();

            sb.AppendLine( "保存成功..." + lnk.Title + "_" + lnk.Url );
        }