public void DoRefresh( int id ) { SpiderArticle post = SpiderArticle.findById( id ); StringBuilder log = new StringBuilder(); String content = new PagedDetailSpider().GetContent( post.Url, post.SpiderTemplate, log ); if (strUtil.HasText( content )) { post.Body = content; post.update( "Body" ); echoJsonMsg( "刷新成功", true, "" ); } else { errors.Add( log.ToString().Replace( Environment.NewLine, "" ).Trim() ); echoError(); } }
private static void savePageDetail(DetailLink lnk, StringBuilder sb) { SpiderTemplate template = lnk.Template; string url = lnk.Url; string title = lnk.Title; string summary = lnk.Abstract; if (isPageExist(url, sb)) { return; } String pageBody = new PagedDetailSpider().GetContent(url, template, sb); if (pageBody == null) { return; } SpiderArticle pd = new SpiderArticle(); pd.Title = title; pd.Url = strUtil.SubString(url, 200); pd.Abstract = summary; pd.Body = pageBody; pd.SpiderTemplate = template; MatchCollection matchs = Regex.Matches(pageBody, RegPattern.Img, RegexOptions.Singleline); if (matchs.Count > 0) { pd.IsPic = 1; pd.PicUrl = matchs[0].Groups[1].Value; } pd.insert(); sb.AppendLine("保存成功..." + lnk.Title + "_" + lnk.Url); }
private static void savePageDetail(DetailLink lnk, StringBuilder sb) { SpiderTemplate template = lnk.Template; string url = lnk.Url; string title = lnk.Title; string summary = lnk.Abstract; if (isPageExist(url, sb)) { return; } String pageBody = new PagedDetailSpider().GetContent(url, template, sb); if (pageBody == null) { return; } SpiderArticle pd = new SpiderArticle(); pd.Title = title; pd.Url = strUtil.SubString(url, 250); pd.Abstract = summary; pd.Body = pageBody; pd.SpiderTemplate = template; MatchCollection matchs = Regex.Matches(pageBody, RegPattern.Img, RegexOptions.Singleline); if (matchs.Count > 0) { pd.IsPic = 1; pd.PicUrl = matchs[0].Groups[1].Value; } pd.insert(); sb.AppendLine("保存成功..." + lnk.Title + "_" + lnk.Url); pageBody = Regex.Replace(pageBody, "font-size", "", RegexOptions.IgnoreCase); string strArcitleLink = "<div class=\"ArcitleLink\"><a href=" + pd.Url + ">原文链接</a></div>"; pageBody = pageBody + strArcitleLink; Maticsoft.BLL.BlogCategory bllBlogCategory = new Maticsoft.BLL.BlogCategory(); DataSet ds = bllBlogCategory.GetList("AppId = '" + template.IsDelete.ToString() + "'"); int nCateID = 1; if (ds.Tables[0].Rows.Count > 0) { nCateID = (int)ds.Tables[0].Rows[0]["Id"]; } BlogPost data = new BlogPost(); data.CategoryId = nCateID; data.Title = title; data.Abstract = summary; data.Content = pageBody; data.AccessStatus = 0; data.CommentCondition = 0; data.SaveStatus = 1;//草稿 data.Created = System.DateTime.Now.Date; data.IsTop = 0; data.IsPick = 0; data.IsPic = 0; data.Ip = ""; data.OwnerId = template.IsDelete; data.OwnerUrl = template.SiteName; data.OwnerType = "wojilu.Members.Users.Domain.User"; data.CreatorUrl = template.SiteName; data.AppId = template.IsDelete;; data.CreatorId = template.IsDelete; Maticsoft.BLL.BlogPost bll = new Maticsoft.BLL.BlogPost(); bll.Add(data); }
public void GetDetail() { String newsUrl = ctx.Post( "detailUrl" ); SpiderTemplate s = new SpiderTemplate(); //String detailBeginCode = ctx.PostHtmlAll( "detailBeginCode" ); //String detailEndCode = ctx.PostHtmlAll( "detailEndCode" ); //String DetailPattern = detailBeginCode + "(.+?)" + detailEndCode; String DetailPattern = ctx.PostHtmlAll( "DetailPattern" ); s.DetailPattern = DetailPattern; logger.Info( "DetailPattern=" + s.DetailPattern ); String detailEncoding = ctx.Post( "detailEncoding" ); s.DetailEncoding = detailEncoding; s.IsSavePic = 0; StringBuilder log = new StringBuilder(); string newsBody = new PagedDetailSpider().GetContent( newsUrl, s, log ); String strLog = log.ToString(); if (strLog.IndexOf( "error=" ) >= 0) { StringBuilder sblog = new StringBuilder(); sblog.AppendLine( "detailUrl=" + newsUrl ); sblog.AppendLine( "detailPattern=" + s.DetailPattern ); sblog.Append( log ); echoText( sblog.ToString() ); } else { echoText( newsBody ); } }
private static void savePageDetail( DetailLink lnk, StringBuilder sb ) { SpiderTemplate template = lnk.Template; string url = lnk.Url; string title = lnk.Title; string summary = lnk.Abstract; if (isPageExist( url, sb )) return; String pageBody = new PagedDetailSpider().GetContent( url, template, sb ); if (pageBody == null) return; SpiderArticle pd = new SpiderArticle(); pd.Title = title; pd.Url = strUtil.SubString( url, 250 ); pd.Abstract = summary; pd.Body = pageBody; pd.SpiderTemplate = template; MatchCollection matchs = Regex.Matches( pageBody, RegPattern.Img, RegexOptions.Singleline ); if (matchs.Count > 0) { pd.IsPic = 1; pd.PicUrl = matchs[0].Groups[1].Value; } pd.insert(); sb.AppendLine( "保存成功..." + lnk.Title + "_" + lnk.Url ); pageBody = Regex.Replace(pageBody, "font-size", "", RegexOptions.IgnoreCase); string strArcitleLink = "<div class=\"ArcitleLink\"><a href=" + pd.Url + ">原文链接</a></div>"; pageBody = pageBody + strArcitleLink; Maticsoft.BLL.BlogCategory bllBlogCategory = new Maticsoft.BLL.BlogCategory(); DataSet ds = bllBlogCategory.GetList("AppId = '" + template.IsDelete.ToString() + "'"); int nCateID = 1; if (ds.Tables[0].Rows.Count > 0) { nCateID = (int)ds.Tables[0].Rows[0]["Id"]; } BlogPost data = new BlogPost(); data.CategoryId = nCateID; data.Title = title; data.Abstract = summary; data.Content = pageBody; data.AccessStatus = 0; data.CommentCondition = 0; data.SaveStatus = 1;//草稿 data.Created = System.DateTime.Now.Date; data.IsTop = 0; data.IsPick = 0; data.IsPic = 0; data.Ip = ""; data.OwnerId = template.IsDelete; data.OwnerUrl = template.SiteName; data.OwnerType = "wojilu.Members.Users.Domain.User"; data.CreatorUrl = template.SiteName; data.AppId = template.IsDelete; ; data.CreatorId = template.IsDelete; Maticsoft.BLL.BlogPost bll = new Maticsoft.BLL.BlogPost(); bll.Add(data); }
private static void savePageDetail( DetailLink lnk, StringBuilder sb ) { SpiderTemplate template = lnk.Template; string url = lnk.Url; string title = lnk.Title; string summary = lnk.Abstract; if (isPageExist( url, sb )) return; String pageBody = new PagedDetailSpider().GetContent( url, template, sb ); if (pageBody == null) return; SpiderArticle pd = new SpiderArticle(); pd.Title = title; pd.Url = strUtil.SubString( url, 200 ); pd.Abstract = summary; pd.Body = pageBody; pd.SpiderTemplate = template; MatchCollection matchs = Regex.Matches( pageBody, RegPattern.Img, RegexOptions.Singleline ); if (matchs.Count > 0) { pd.IsPic = 1; pd.PicUrl = matchs[0].Groups[1].Value; } pd.insert(); sb.AppendLine( "保存成功..." + lnk.Title + "_" + lnk.Url ); }