Пример #1
0
		//根据资源集合列表网址获取单独资源的列表
		public static void GetPageResouceList(tb_fistclasslist firClassListModel)
		{
			HtmlDocument doc = CaptureWebSite.GetHtmlDocument (firClassListModel.WebURL ,VerycdEncoding ) ;
			HtmlNodeCollection hc = doc.DocumentNode.SelectNodes (xPath_ResouceList ) ;
			firClassListModel.CollectionMark = 1 ;
			firClassListModel.Update () ;
			for (int i = 0; i < hc.Count ; i++) {
				try
				{
					tb_resoucepageslist model = new tb_resoucepageslist () ;
					model.PageURL =(verycdWebSite + hc[i].SelectSingleNode (@"a[1]").Attributes["href"].Value.Trim ()) ;
					model.PageTitle = hc[i].InnerText.Trim () ;
					model.ClassName = firClassListModel.ClassName ;
					model.SubClassName = firClassListModel.SubClassName ;
					model.CollectionMark  = 0 ;
					model.InfoOrigin = "VeryCd" ;
					model.Remark = string.Empty ;
					model.ResouceType = firClassListModel.ResouceType ;
					model.UpdateTime = DateTime.Now ;
					model.Save () ;
					
				}
				catch (Exception err)
				{
					continue ;
				}
				finally
				{
					firClassListModel.CollectionMark = 2 ;
					firClassListModel.Update () ;
				}
			}
		}
Пример #2
0
		//根据页面链接获取页面的资源下的信息 详细信息,即名称和下载链接,介绍等
		public static void GetResoucePageInfo(tb_resoucepageslist respageListModel)
		{
            //string xpath =@"td[1]/a[1] "; //属性 ed2k
			HtmlDocument doc = CaptureWebSite.GetHtmlDocument (respageListModel.PageURL ,VerycdEncoding ) ;
			HtmlNodeCollection hc = doc.DocumentNode.SelectNodes ("//@ed2k") ;
			respageListModel.CollectionMark = 1 ;
			respageListModel.Update () ;
			for (int i = 0; i <hc.Count ; i++) {
				try
				{
					tb_resoucelink model = new tb_resoucelink () ;
					string name = hc[i ].Attributes["ed2k"].Value.Trim ();
					string urlLink = HttpUtility.UrlDecode (name,Encoding.UTF8 ) ;
					model.ResouceMD5 = urlLink.Split('|')[4] ;
					model.ResouceName = urlLink.Split('|')[2] ;
					model.ResouceLink = urlLink ;
					model.FromURL  = respageListModel.PageURL ;
					model.ClassName = respageListModel .ClassName ;
					model.SubClassName = respageListModel .SubClassName ;
					model.InfoOrigin = "VeryCd" ;
					model.Remark = string.Empty ;
					model.ResouceType = respageListModel.ResouceType ;
					model.UpdateTime = DateTime.Now ;
					model.Save () ;
				}
				catch (Exception err)
				{
					continue ;
				}
			}
			respageListModel.CollectionMark = 2 ;
			respageListModel.Update () ;
			Console.WriteLine (respageListModel.PageURL ) ;
		}
Пример #3
0
        //根据页面链接获取页面的资源下的信息 详细信息,即名称和下载链接,介绍等
        public static void GetResoucePageInfo(tb_resoucepageslist respageListModel)
        {
            if (respageListModel.CollectionMark == 2) return;
            HtmlDocument doc = CaptureWebSite.GetHtmlDocument(respageListModel.PageURL, VerycdEncoding);
            HtmlNodeCollection hc = doc.DocumentNode.SelectNodes("//@ed2k");
            respageListModel.CollectionMark = 1;
            respageListModel.Update();
            int count = 0;
            try
            {
                for (int i = 0; i < hc.Count; i++)
                {

                    string name = hc[i].Attributes["ed2k"].Value.Trim();
                    string urlLink = HttpUtility.UrlDecode(name, Encoding.UTF8);
                    string[] linkFields = urlLink.Split('|');
                    if (tb_resoucelink.FindCount(tb_resoucelink._.ResouceMD5, linkFields[4]) < 1)
                    {
                        tb_resoucelink model = new tb_resoucelink();                        
                        model.ResouceMD5 = linkFields[4];
                        model.ResouceName = linkFields[2];
                        long size;
                        if (long.TryParse(linkFields[3], out size)) model.Size = (ulong)size;
                        else model.Size = 0;
                        model.ResouceLink = urlLink;
                        model.FromURL = respageListModel.PageURL;
                        model.ClassName = respageListModel.ClassName;
                        model.SubClassName = respageListModel.SubClassName;
                        model.InfoOrigin = "VeryCd";
                        model.Remark = string.Empty;
                        model.ResouceType = respageListModel.ResouceType;
                        model.UpdateTime = DateTime.Now;
                        model.IsDownload = 0;
                        model.Insert();
                        count++;
                    }
                }
            }
            catch (Exception err) { XTrace.WriteException(err); }
            finally
            {
                respageListModel.CollectionMark = 2;
                respageListModel.Update();
                XTrace.WriteLine("从资源页面{0}获取到{1}条新的资源链接", respageListModel.PageURL, count);
            }
        }
Пример #4
0
 //获取当前页面的链接,返回成功率
 private static double GetPageContentHerf(tb_typelist item, string curUrl)
 {
     HtmlDocument doc = CaptureWebSite.GetHtmlDocument(curUrl, VerycdEncoding);
     HtmlNodeCollection hc = doc.DocumentNode.SelectNodes("//@href");
     if (hc == null || hc.Count == 0) return 0;
     int count = 0;
     try
     {
         foreach (var s in hc)
         {
             string urls = s.Attributes["href"].Value.ToString();
             string url = verycdWebSite + urls;
             if (Regex.IsMatch(urls, pagePatten))
             {
                 #region 数据库操作
                 //不包括“全文”字样
                 string name = s.InnerText.Replace("\r\n", "").Trim();
                 if (name != "" && !name.Contains("全文"))
                 {
                     //写入加入到页面数据库,如果页面已经存在,则检查更新时间,如更新时间>10天,则更新状态                            
                     if (tb_resoucepageslist.FindCount(tb_resoucepageslist._.PageURL, url) < 1)
                     {
                         //直接插入
                         tb_resoucepageslist model = new tb_resoucepageslist();
                         model.PageURL = url;
                         model.ClassName = item.TypeName;
                         model.CollectionMark = 0;
                         model.InfoOrigin = "VeryCd";
                         model.PageTitle = name;
                         model.ResouceType = item.ResType;
                         model.SubClassName = item.SubClassName;
                         model.UpdateTime = DateTime.Now;
                         model.Insert();
                         count++;
                     }
                     else
                     {
                         //更新状态
                         tb_resoucepageslist model = tb_resoucepageslist.FindByPageURL(url);
                         if ((DateTime.Now - model.UpdateTime).TotalDays > 5)
                         {
                             model.ClassName = item.TypeName;
                             model.CollectionMark = 0;
                             model.InfoOrigin = "VeryCd";
                             model.PageTitle = name;
                             model.ResouceType = item.ResType;
                             model.SubClassName = item.SubClassName;
                             model.UpdateTime = DateTime.Now;
                             model.Update();
                             count++;
                         }
                     }
                 }
                 #endregion
             }
         }
         return ((double)count) / ((double)hc.Count);
     }
     catch (Exception err) { XTrace.WriteException(err); return ((double)count) / ((double)hc.Count); }
     finally { XTrace.WriteLine("通过网页:{0},获取到更新记录页面{1}条", curUrl, count); }
 }
Пример #5
0
 //根据资源集合列表网址获取单独资源的列表
 public static void GetPageResouceList(tb_fistclasslist firClassListModel)
 {
     //传入进来的都是可以操作的
     if (firClassListModel.CollectionMark == 2) return;//采集过的不再重复进行
     HtmlDocument doc = CaptureWebSite.GetHtmlDocument(firClassListModel.WebURL, VerycdEncoding);
     HtmlNodeCollection hc = doc.DocumentNode.SelectNodes(xPath_ResouceList);
     firClassListModel.CollectionMark = 1;
     firClassListModel.Update();
     int count = 0;
     try
     {
         for (int i = 0; i < hc.Count; i++)
         {
             string url = (verycdWebSite + hc[i].SelectSingleNode(@"a[1]").Attributes["href"].Value.Trim());
             if (tb_resoucepageslist.FindCount(tb_resoucepageslist._.PageURL, url) < 1)
             {
                 tb_resoucepageslist model = new tb_resoucepageslist();
                 model.PageURL = url;
                 model.PageTitle = hc[i].InnerText.Trim();
                 model.ClassName = firClassListModel.ClassName;
                 model.SubClassName = firClassListModel.SubClassName;
                 model.CollectionMark = 0;
                 model.InfoOrigin = "VeryCd";
                 model.Remark = string.Empty;
                 model.ResouceType = firClassListModel.ResouceType;
                 model.UpdateTime = DateTime.Now;
                 model.Insert();
                 count++;
             }
         }
     }
     catch (Exception err) { XTrace.WriteException(err); }
     finally
     {
         firClassListModel.CollectionMark = 2; firClassListModel.Update();
         XTrace.WriteLine("通过大类资源列表页面:{0},获取到更新记录{1}条", firClassListModel.WebURL, count);
     }
 }