Exemple #1
0
        private void GetOtherPage(string otherurl, string PageDoc, string pattern)
        {
            Match m = Utility.GetMatchUrl(PageDoc, pattern, "[分页新闻]");

            if (m.Success)
            {
                string obturl = Utility.StickUrl(otherurl, m.Groups["TARGET"].Value);
                if (!obturl.Trim().Equals(otherurl.Trim()))
                {
                    PageNews pgns = new PageNews(obturl, _Encode);
                    pgns.RuleOfContent = this._contentrule;
                    if (pgns.Fetch())
                    {
                        pgns.FigureContent();
                        otherpgcon += pgns.Content;
                        GetOtherPage(obturl, pgns._Doc, pattern);
                    }
                }
            }
        }
Exemple #2
0
        public string GetIndexPagination(string profile)
        {
            string OtherContent = "";
            Match  m            = Utility.GetMatchUrl(_Doc, profile, "[分页新闻]");

            while (m.Success)
            {
                string otherurl = Utility.StickUrl(_Url, m.Groups["TARGET"].Value);
                if (!otherurl.Trim().Equals(this._Url))
                {
                    PageNews pgns = new PageNews(otherurl, _Encode);
                    if (pgns.Fetch())
                    {
                        pgns.FigureContent();
                        OtherContent += pgns.Content;
                    }
                }
                m = m.NextMatch();
            }
            return(OtherContent);
        }
Exemple #3
0
 /// <summary>
 /// 处理采集单条新闻
 /// </summary>
 /// <param name="Url"></param>
 /// <param name="r"></param>
 /// <param name="norepeat"></param>
 /// <returns>0为成功,-1为重复,1,为失败</returns>
 private int CollectPage(string Url, DataRow r, bool norepeat)
 {
     try
     {
         if (Url == null || Url.Trim().Equals(""))
         {
             return(1);
         }
         PageNews pn = new PageNews(Url, r["Encode"].ToString());
         if (!pn.Fetch())
         {
             return(1);
         }
         pn.RuleOfTitle   = r["PageTitleSetting"].ToString();
         pn.RuleOfContent = r["PagebodySetting"].ToString();
         pn.FigureTitle();
         if (norepeat)
         {
             if (pn.Title == null)
             {
                 return(1);
             }
             if (dal.TitleExist(pn.Title))
             {
                 return(-1);
             }
         }
         pn.FigureContent();
         if (r.IsNull("HandSetAuthor"))
         {
             pn.FigureAuthor(r["AuthorSetting"].ToString(), false);
         }
         else
         {
             pn.FigureAuthor(r["HandSetAuthor"].ToString(), true);
         }
         if (r.IsNull("HandSetSource"))
         {
             pn.FigureSource(r["SourceSetting"].ToString(), false);
         }
         else
         {
             pn.FigureSource(r["HandSetSource"].ToString(), true);
         }
         if (r.IsNull("HandSetAddDate"))
         {
             pn.FigureAddTime(r["AddDateSetting"].ToString(), false);
         }
         else
         {
             pn.FigureAddTime(r["HandSetAddDate"].ToString(), true);
         }
         int pgtp = int.Parse(r["OtherNewsType"].ToString());
         if (pgtp == 1)
         {
             pn.Content += pn.GetOtherPagination(r["OtherNewsPageSetting"].ToString());
         }
         else if (pgtp == 2)
         {
             pn.Content += pn.GetIndexPagination(r["OtherNewsPageSetting"].ToString());
         }
         pn.Filter(bool.Parse(r["TextTF"].ToString()),
                   bool.Parse(r["IsStyle"].ToString()), bool.Parse(r["IsDIV"].ToString()), bool.Parse(r["IsA"].ToString()),
                   bool.Parse(r["IsClass"].ToString()), bool.Parse(r["IsFont"].ToString()), bool.Parse(r["IsSpan"].ToString()),
                   bool.Parse(r["IsObject"].ToString()), bool.Parse(r["IsIFrame"].ToString()), bool.Parse(r["IsScript"].ToString()));
         if (!r.IsNull("OldContent") && !r.IsNull("ReContent") && !r.IsNull("IgnoreCase"))
         {
             pn.Replace(r["OldContent"].ToString(), r["ReContent"].ToString(), bool.Parse(r["IgnoreCase"].ToString()));
         }
         if (pn.Content != null && !pn.Content.Trim().Equals("") && !pn.Title.Trim().Equals(""))
         {
             NetCMS.Model.CollectNewsInfo ninf = new NetCMS.Model.CollectNewsInfo();
             ninf.Author  = pn.Author;
             ninf.Source  = pn.Source;
             ninf.AddDate = pn.AddTime;
             ninf.Title   = pn.Title;
             ninf.SiteID  = int.Parse(r["ID"].ToString());
             ninf.Links   = Url;
             ninf.ClassID = r["ClassID"].ToString();
             string Content = pn.Content;
             if (bSaveRemotePic)
             {
                 RemoteResource rs = new RemoteResource(Content, PicSaveUrl, PicSavePath, Url, true);
                 rs.FetchResource();
                 Content = rs.Content;
             }
             ninf.Content = Content;
             NewsAdd(ninf);
             return(0);
         }
         else
         {
             return(1);
         }
     }
     catch (Exception e)
     {
         return(1);
     }
 }
Exemple #4
0
 private void GetOtherPage(string otherurl, string PageDoc, string pattern)
 {
     Match m = Utility.GetMatchUrl(PageDoc, pattern, "[分页新闻]");
     if (m.Success)
     {
         string obturl = Utility.StickUrl(otherurl, m.Groups["TARGET"].Value);
         if (!obturl.Trim().Equals(otherurl.Trim()))
         {
             PageNews pgns = new PageNews(obturl, _Encode);
             pgns.RuleOfContent = this._contentrule;
             if (pgns.Fetch())
             {
                 pgns.FigureContent();
                 otherpgcon += pgns.Content;
                 GetOtherPage(obturl, pgns._Doc, pattern);
             }
         }
     }
 }
Exemple #5
0
 public string GetIndexPagination(string profile)
 {
     string OtherContent = "";
     Match m = Utility.GetMatchUrl(_Doc, profile, "[分页新闻]");
     while (m.Success)
     {
         string otherurl = Utility.StickUrl(_Url, m.Groups["TARGET"].Value);
         if (!otherurl.Trim().Equals(this._Url))
         {
             PageNews pgns = new PageNews(otherurl, _Encode);
             if (pgns.Fetch())
             {
                 pgns.FigureContent();
                 OtherContent += pgns.Content;
             }
         }
         m = m.NextMatch();
     }
     return OtherContent;
 }
Exemple #6
0
 /// <summary>
 /// 处理采集单条新闻
 /// </summary>
 /// <param name="Url"></param>
 /// <param name="r"></param>
 /// <param name="norepeat"></param>
 /// <returns>0为成功,-1为重复,1,为失败</returns>
 private int CollectPage(string Url, DataRow r, bool norepeat)
 {
     try
     {
         if (Url == null || Url.Trim().Equals(""))
             return 1;
         PageNews pn = new PageNews(Url, r["Encode"].ToString());
         if (!pn.Fetch())
             return 1;
         pn.RuleOfTitle = r["PageTitleSetting"].ToString();
         pn.RuleOfContent = r["PagebodySetting"].ToString();
         pn.FigureTitle();
         if (norepeat)
         {
             if (pn.Title == null)
                 return 1;
             if (dal.TitleExist(pn.Title))
                 return -1;
         }
         pn.FigureContent();
         if (r.IsNull("HandSetAuthor"))
         {
             pn.FigureAuthor(r["AuthorSetting"].ToString(), false);
         }
         else
         {
             pn.FigureAuthor(r["HandSetAuthor"].ToString(), true);
         }
         if (r.IsNull("HandSetSource"))
         {
             pn.FigureSource(r["SourceSetting"].ToString(), false);
         }
         else
         {
             pn.FigureSource(r["HandSetSource"].ToString(), true);
         }
         if (r.IsNull("HandSetAddDate"))
         {
             pn.FigureAddTime(r["AddDateSetting"].ToString(), false);
         }
         else
         {
             pn.FigureAddTime(r["HandSetAddDate"].ToString(), true);
         }
         int pgtp = int.Parse(r["OtherNewsType"].ToString());
         if (pgtp == 1)
         {
             pn.Content += pn.GetOtherPagination(r["OtherNewsPageSetting"].ToString());
         }
         else if (pgtp == 2)
         {
             pn.Content += pn.GetIndexPagination(r["OtherNewsPageSetting"].ToString());
         }
         pn.Filter(bool.Parse(r["TextTF"].ToString()),
             bool.Parse(r["IsStyle"].ToString()), bool.Parse(r["IsDIV"].ToString()), bool.Parse(r["IsA"].ToString()),
             bool.Parse(r["IsClass"].ToString()), bool.Parse(r["IsFont"].ToString()), bool.Parse(r["IsSpan"].ToString()),
             bool.Parse(r["IsObject"].ToString()), bool.Parse(r["IsIFrame"].ToString()), bool.Parse(r["IsScript"].ToString()));
         if (!r.IsNull("OldContent") && !r.IsNull("ReContent") && !r.IsNull("IgnoreCase"))
             pn.Replace(r["OldContent"].ToString(), r["ReContent"].ToString(), bool.Parse(r["IgnoreCase"].ToString()));
         if (pn.Content != null && !pn.Content.Trim().Equals("") && !pn.Title.Trim().Equals(""))
         {
             NetCMS.Model.CollectNewsInfo ninf = new NetCMS.Model.CollectNewsInfo();
             ninf.Author = pn.Author;
             ninf.Source = pn.Source;
             ninf.AddDate = pn.AddTime;
             ninf.Title = pn.Title;
             ninf.SiteID = int.Parse(r["ID"].ToString());
             ninf.Links = Url;
             ninf.ClassID = r["ClassID"].ToString();
             string Content = pn.Content;
             if (bSaveRemotePic)
             {
                 RemoteResource rs = new RemoteResource(Content, PicSaveUrl, PicSavePath, Url, true);
                 rs.FetchResource();
                 Content = rs.Content;
             }
             ninf.Content = Content;
             NewsAdd(ninf);
             return 0;
         }
         else
         {
             return 1;
         }
     }
     catch (Exception e)
     {
         return 1;
     }
 }