Ejemplo n.º 1
0
		//根据资源集合列表网址获取单独资源的列表
		public static void GetPageResouceList(tb_fistclasslist firClassListModel)
		{
			HtmlDocument doc = CaptureWebSite.GetHtmlDocument (firClassListModel.WebURL ,VerycdEncoding ) ;
			HtmlNodeCollection hc = doc.DocumentNode.SelectNodes (xPath_ResouceList ) ;
			firClassListModel.CollectionMark = 1 ;
			firClassListModel.Update () ;
			for (int i = 0; i < hc.Count ; i++) {
				try
				{
					tb_resoucepageslist model = new tb_resoucepageslist () ;
					model.PageURL =(verycdWebSite + hc[i].SelectSingleNode (@"a[1]").Attributes["href"].Value.Trim ()) ;
					model.PageTitle = hc[i].InnerText.Trim () ;
					model.ClassName = firClassListModel.ClassName ;
					model.SubClassName = firClassListModel.SubClassName ;
					model.CollectionMark  = 0 ;
					model.InfoOrigin = "VeryCd" ;
					model.Remark = string.Empty ;
					model.ResouceType = firClassListModel.ResouceType ;
					model.UpdateTime = DateTime.Now ;
					model.Save () ;
					
				}
				catch (Exception err)
				{
					continue ;
				}
				finally
				{
					firClassListModel.CollectionMark = 2 ;
					firClassListModel.Update () ;
				}
			}
		}
Ejemplo n.º 2
0
        //根据大类资源网址获取资源集合列表网址
		public static void GetTypePageList(string URL,string FirName,string SubClassName,ResouceType resType)
		{
			HtmlDocument doc = CaptureWebSite.GetHtmlDocument (URL ,VerycdEncoding ) ;
			HtmlNodeCollection hc = doc.DocumentNode.SelectNodes (xPath_TypePageList ) ;
			for (int i = 0; i < hc.Count ; i++) {
				try
				{
					tb_fistclasslist model = new tb_fistclasslist () ;
					model.WebURL = (verycdWebSite + hc[i].SelectSingleNode (@"a[1]").Attributes["href"].Value.Trim ()) ;
					model.ClassName = FirName ;
					model.SubClassName = SubClassName ;
					model.CollectionMark = 0 ;
					model.InfoOrigin = "VeryCd" ;
					model.Remark = string.Empty ;
					model.ResouceType = resType.ToString () ;
					model.UpdateTime = DateTime.Now ;
					model.Save () ;
				}
				catch (Exception err)
				{
					continue ;
				}
			}
		}
Ejemplo n.º 3
0
 //根据资源集合列表网址获取单独资源的列表
 public static void GetPageResouceList(tb_fistclasslist firClassListModel)
 {
     //传入进来的都是可以操作的
     if (firClassListModel.CollectionMark == 2) return;//采集过的不再重复进行
     HtmlDocument doc = CaptureWebSite.GetHtmlDocument(firClassListModel.WebURL, VerycdEncoding);
     HtmlNodeCollection hc = doc.DocumentNode.SelectNodes(xPath_ResouceList);
     firClassListModel.CollectionMark = 1;
     firClassListModel.Update();
     int count = 0;
     try
     {
         for (int i = 0; i < hc.Count; i++)
         {
             string url = (verycdWebSite + hc[i].SelectSingleNode(@"a[1]").Attributes["href"].Value.Trim());
             if (tb_resoucepageslist.FindCount(tb_resoucepageslist._.PageURL, url) < 1)
             {
                 tb_resoucepageslist model = new tb_resoucepageslist();
                 model.PageURL = url;
                 model.PageTitle = hc[i].InnerText.Trim();
                 model.ClassName = firClassListModel.ClassName;
                 model.SubClassName = firClassListModel.SubClassName;
                 model.CollectionMark = 0;
                 model.InfoOrigin = "VeryCd";
                 model.Remark = string.Empty;
                 model.ResouceType = firClassListModel.ResouceType;
                 model.UpdateTime = DateTime.Now;
                 model.Insert();
                 count++;
             }
         }
     }
     catch (Exception err) { XTrace.WriteException(err); }
     finally
     {
         firClassListModel.CollectionMark = 2; firClassListModel.Update();
         XTrace.WriteLine("通过大类资源列表页面:{0},获取到更新记录{1}条", firClassListModel.WebURL, count);
     }
 }
Ejemplo n.º 4
0
        //以下为基本采集功能方法

        #region 根据大类资源网址获取资源集合列表网址
        //根据大类资源网址获取资源集合列表网址
        public static void GetTypePageList(tb_typelist typelist)
        {
            //string URL, string FirName, string SubClassName, ResouceType resType
            HtmlDocument doc = CaptureWebSite.GetHtmlDocument(typelist.URL, VerycdEncoding);
            HtmlNodeCollection hc = doc.DocumentNode.SelectNodes(xPath_TypePageList);
            int count = 0;//计数器
            for (int i = 0; i < hc.Count; i++)
            {
                try
                {
                    string url = (verycdWebSite + hc[i].SelectSingleNode(@"a[1]").Attributes["href"].Value.Trim());
                    if (tb_fistclasslist.FindCount(tb_fistclasslist._.WebURL, url) < 1)
                    {
                        tb_fistclasslist model = new tb_fistclasslist();
                        model.WebURL = url;
                        model.ClassName = typelist.TypeName ;
                        model.SubClassName = typelist.SubClassName ;
                        model.CollectionMark = 0;
                        model.InfoOrigin = "VeryCd";
                        model.Remark = string.Empty;
                        model.ResouceType = typelist.ResType.ToString ();
                        model.UpdateTime = DateTime.Now;
                        model.Insert();
                        count++;
                    }
                }
                catch (Exception err)
                {
                    XTrace.WriteException(err);
                    continue;
                }
                finally
                {
                    XTrace.WriteLine("通过大类资源列表{0},获取到更新记录{1}条", typelist.URL , count);
                }
            }
        }