private void GetOtherPage(string otherurl, string PageDoc, string pattern) { Match m = Utility.GetMatchUrl(PageDoc, pattern, "[分页新闻]"); if (m.Success) { string obturl = Utility.StickUrl(otherurl, m.Groups["TARGET"].Value); if (!obturl.Trim().Equals(otherurl.Trim())) { PageNews pgns = new PageNews(obturl, _Encode); pgns.RuleOfContent = this._contentrule; if (pgns.Fetch()) { pgns.FigureContent(); otherpgcon += pgns.Content; GetOtherPage(obturl, pgns._Doc, pattern); } } } }
public string GetIndexPagination(string profile) { string OtherContent = ""; Match m = Utility.GetMatchUrl(_Doc, profile, "[分页新闻]"); while (m.Success) { string otherurl = Utility.StickUrl(_Url, m.Groups["TARGET"].Value); if (!otherurl.Trim().Equals(this._Url)) { PageNews pgns = new PageNews(otherurl, _Encode); if (pgns.Fetch()) { pgns.FigureContent(); OtherContent += pgns.Content; } } m = m.NextMatch(); } return(OtherContent); }
/// <summary> /// 处理采集单条新闻 /// </summary> /// <param name="Url"></param> /// <param name="r"></param> /// <param name="norepeat"></param> /// <returns>0为成功,-1为重复,1,为失败</returns> private int CollectPage(string Url, DataRow r, bool norepeat) { try { if (Url == null || Url.Trim().Equals("")) { return(1); } PageNews pn = new PageNews(Url, r["Encode"].ToString()); if (!pn.Fetch()) { return(1); } pn.RuleOfTitle = r["PageTitleSetting"].ToString(); pn.RuleOfContent = r["PagebodySetting"].ToString(); pn.FigureTitle(); if (norepeat) { if (pn.Title == null) { return(1); } if (dal.TitleExist(pn.Title)) { return(-1); } } pn.FigureContent(); if (r.IsNull("HandSetAuthor")) { pn.FigureAuthor(r["AuthorSetting"].ToString(), false); } else { pn.FigureAuthor(r["HandSetAuthor"].ToString(), true); } if (r.IsNull("HandSetSource")) { pn.FigureSource(r["SourceSetting"].ToString(), false); } else { pn.FigureSource(r["HandSetSource"].ToString(), true); } if (r.IsNull("HandSetAddDate")) { pn.FigureAddTime(r["AddDateSetting"].ToString(), false); } else { pn.FigureAddTime(r["HandSetAddDate"].ToString(), true); } int pgtp = int.Parse(r["OtherNewsType"].ToString()); if (pgtp == 1) { pn.Content += pn.GetOtherPagination(r["OtherNewsPageSetting"].ToString()); } else if (pgtp == 2) { pn.Content += pn.GetIndexPagination(r["OtherNewsPageSetting"].ToString()); } pn.Filter(bool.Parse(r["TextTF"].ToString()), bool.Parse(r["IsStyle"].ToString()), bool.Parse(r["IsDIV"].ToString()), bool.Parse(r["IsA"].ToString()), bool.Parse(r["IsClass"].ToString()), bool.Parse(r["IsFont"].ToString()), bool.Parse(r["IsSpan"].ToString()), bool.Parse(r["IsObject"].ToString()), bool.Parse(r["IsIFrame"].ToString()), bool.Parse(r["IsScript"].ToString())); if (!r.IsNull("OldContent") && !r.IsNull("ReContent") && !r.IsNull("IgnoreCase")) { pn.Replace(r["OldContent"].ToString(), r["ReContent"].ToString(), bool.Parse(r["IgnoreCase"].ToString())); } if (pn.Content != null && !pn.Content.Trim().Equals("") && !pn.Title.Trim().Equals("")) { NetCMS.Model.CollectNewsInfo ninf = new NetCMS.Model.CollectNewsInfo(); ninf.Author = pn.Author; ninf.Source = pn.Source; ninf.AddDate = pn.AddTime; ninf.Title = pn.Title; ninf.SiteID = int.Parse(r["ID"].ToString()); ninf.Links = Url; ninf.ClassID = r["ClassID"].ToString(); string Content = pn.Content; if (bSaveRemotePic) { RemoteResource rs = new RemoteResource(Content, PicSaveUrl, PicSavePath, Url, true); rs.FetchResource(); Content = rs.Content; } ninf.Content = Content; NewsAdd(ninf); return(0); } else { return(1); } } catch (Exception e) { return(1); } }
public string GetIndexPagination(string profile) { string OtherContent = ""; Match m = Utility.GetMatchUrl(_Doc, profile, "[分页新闻]"); while (m.Success) { string otherurl = Utility.StickUrl(_Url, m.Groups["TARGET"].Value); if (!otherurl.Trim().Equals(this._Url)) { PageNews pgns = new PageNews(otherurl, _Encode); if (pgns.Fetch()) { pgns.FigureContent(); OtherContent += pgns.Content; } } m = m.NextMatch(); } return OtherContent; }
/// <summary> /// 处理采集单条新闻 /// </summary> /// <param name="Url"></param> /// <param name="r"></param> /// <param name="norepeat"></param> /// <returns>0为成功,-1为重复,1,为失败</returns> private int CollectPage(string Url, DataRow r, bool norepeat) { try { if (Url == null || Url.Trim().Equals("")) return 1; PageNews pn = new PageNews(Url, r["Encode"].ToString()); if (!pn.Fetch()) return 1; pn.RuleOfTitle = r["PageTitleSetting"].ToString(); pn.RuleOfContent = r["PagebodySetting"].ToString(); pn.FigureTitle(); if (norepeat) { if (pn.Title == null) return 1; if (dal.TitleExist(pn.Title)) return -1; } pn.FigureContent(); if (r.IsNull("HandSetAuthor")) { pn.FigureAuthor(r["AuthorSetting"].ToString(), false); } else { pn.FigureAuthor(r["HandSetAuthor"].ToString(), true); } if (r.IsNull("HandSetSource")) { pn.FigureSource(r["SourceSetting"].ToString(), false); } else { pn.FigureSource(r["HandSetSource"].ToString(), true); } if (r.IsNull("HandSetAddDate")) { pn.FigureAddTime(r["AddDateSetting"].ToString(), false); } else { pn.FigureAddTime(r["HandSetAddDate"].ToString(), true); } int pgtp = int.Parse(r["OtherNewsType"].ToString()); if (pgtp == 1) { pn.Content += pn.GetOtherPagination(r["OtherNewsPageSetting"].ToString()); } else if (pgtp == 2) { pn.Content += pn.GetIndexPagination(r["OtherNewsPageSetting"].ToString()); } pn.Filter(bool.Parse(r["TextTF"].ToString()), bool.Parse(r["IsStyle"].ToString()), bool.Parse(r["IsDIV"].ToString()), bool.Parse(r["IsA"].ToString()), bool.Parse(r["IsClass"].ToString()), bool.Parse(r["IsFont"].ToString()), bool.Parse(r["IsSpan"].ToString()), bool.Parse(r["IsObject"].ToString()), bool.Parse(r["IsIFrame"].ToString()), bool.Parse(r["IsScript"].ToString())); if (!r.IsNull("OldContent") && !r.IsNull("ReContent") && !r.IsNull("IgnoreCase")) pn.Replace(r["OldContent"].ToString(), r["ReContent"].ToString(), bool.Parse(r["IgnoreCase"].ToString())); if (pn.Content != null && !pn.Content.Trim().Equals("") && !pn.Title.Trim().Equals("")) { NetCMS.Model.CollectNewsInfo ninf = new NetCMS.Model.CollectNewsInfo(); ninf.Author = pn.Author; ninf.Source = pn.Source; ninf.AddDate = pn.AddTime; ninf.Title = pn.Title; ninf.SiteID = int.Parse(r["ID"].ToString()); ninf.Links = Url; ninf.ClassID = r["ClassID"].ToString(); string Content = pn.Content; if (bSaveRemotePic) { RemoteResource rs = new RemoteResource(Content, PicSaveUrl, PicSavePath, Url, true); rs.FetchResource(); Content = rs.Content; } ninf.Content = Content; NewsAdd(ninf); return 0; } else { return 1; } } catch (Exception e) { return 1; } }