//根据资源集合列表网址获取单独资源的列表 public static void GetPageResouceList(tb_fistclasslist firClassListModel) { HtmlDocument doc = CaptureWebSite.GetHtmlDocument (firClassListModel.WebURL ,VerycdEncoding ) ; HtmlNodeCollection hc = doc.DocumentNode.SelectNodes (xPath_ResouceList ) ; firClassListModel.CollectionMark = 1 ; firClassListModel.Update () ; for (int i = 0; i < hc.Count ; i++) { try { tb_resoucepageslist model = new tb_resoucepageslist () ; model.PageURL =(verycdWebSite + hc[i].SelectSingleNode (@"a[1]").Attributes["href"].Value.Trim ()) ; model.PageTitle = hc[i].InnerText.Trim () ; model.ClassName = firClassListModel.ClassName ; model.SubClassName = firClassListModel.SubClassName ; model.CollectionMark = 0 ; model.InfoOrigin = "VeryCd" ; model.Remark = string.Empty ; model.ResouceType = firClassListModel.ResouceType ; model.UpdateTime = DateTime.Now ; model.Save () ; } catch (Exception err) { continue ; } finally { firClassListModel.CollectionMark = 2 ; firClassListModel.Update () ; } } }
//根据大类资源网址获取资源集合列表网址 public static void GetTypePageList(string URL,string FirName,string SubClassName,ResouceType resType) { HtmlDocument doc = CaptureWebSite.GetHtmlDocument (URL ,VerycdEncoding ) ; HtmlNodeCollection hc = doc.DocumentNode.SelectNodes (xPath_TypePageList ) ; for (int i = 0; i < hc.Count ; i++) { try { tb_fistclasslist model = new tb_fistclasslist () ; model.WebURL = (verycdWebSite + hc[i].SelectSingleNode (@"a[1]").Attributes["href"].Value.Trim ()) ; model.ClassName = FirName ; model.SubClassName = SubClassName ; model.CollectionMark = 0 ; model.InfoOrigin = "VeryCd" ; model.Remark = string.Empty ; model.ResouceType = resType.ToString () ; model.UpdateTime = DateTime.Now ; model.Save () ; } catch (Exception err) { continue ; } } }
//根据资源集合列表网址获取单独资源的列表 public static void GetPageResouceList(tb_fistclasslist firClassListModel) { //传入进来的都是可以操作的 if (firClassListModel.CollectionMark == 2) return;//采集过的不再重复进行 HtmlDocument doc = CaptureWebSite.GetHtmlDocument(firClassListModel.WebURL, VerycdEncoding); HtmlNodeCollection hc = doc.DocumentNode.SelectNodes(xPath_ResouceList); firClassListModel.CollectionMark = 1; firClassListModel.Update(); int count = 0; try { for (int i = 0; i < hc.Count; i++) { string url = (verycdWebSite + hc[i].SelectSingleNode(@"a[1]").Attributes["href"].Value.Trim()); if (tb_resoucepageslist.FindCount(tb_resoucepageslist._.PageURL, url) < 1) { tb_resoucepageslist model = new tb_resoucepageslist(); model.PageURL = url; model.PageTitle = hc[i].InnerText.Trim(); model.ClassName = firClassListModel.ClassName; model.SubClassName = firClassListModel.SubClassName; model.CollectionMark = 0; model.InfoOrigin = "VeryCd"; model.Remark = string.Empty; model.ResouceType = firClassListModel.ResouceType; model.UpdateTime = DateTime.Now; model.Insert(); count++; } } } catch (Exception err) { XTrace.WriteException(err); } finally { firClassListModel.CollectionMark = 2; firClassListModel.Update(); XTrace.WriteLine("通过大类资源列表页面:{0},获取到更新记录{1}条", firClassListModel.WebURL, count); } }
//以下为基本采集功能方法 #region 根据大类资源网址获取资源集合列表网址 //根据大类资源网址获取资源集合列表网址 public static void GetTypePageList(tb_typelist typelist) { //string URL, string FirName, string SubClassName, ResouceType resType HtmlDocument doc = CaptureWebSite.GetHtmlDocument(typelist.URL, VerycdEncoding); HtmlNodeCollection hc = doc.DocumentNode.SelectNodes(xPath_TypePageList); int count = 0;//计数器 for (int i = 0; i < hc.Count; i++) { try { string url = (verycdWebSite + hc[i].SelectSingleNode(@"a[1]").Attributes["href"].Value.Trim()); if (tb_fistclasslist.FindCount(tb_fistclasslist._.WebURL, url) < 1) { tb_fistclasslist model = new tb_fistclasslist(); model.WebURL = url; model.ClassName = typelist.TypeName ; model.SubClassName = typelist.SubClassName ; model.CollectionMark = 0; model.InfoOrigin = "VeryCd"; model.Remark = string.Empty; model.ResouceType = typelist.ResType.ToString (); model.UpdateTime = DateTime.Now; model.Insert(); count++; } } catch (Exception err) { XTrace.WriteException(err); continue; } finally { XTrace.WriteLine("通过大类资源列表{0},获取到更新记录{1}条", typelist.URL , count); } } }