public List <SpliderContent> GetByRule(SpliderRule rule) { HtmlWeb web = new HtmlWeb(); //1.支持从web加载html var htmlDoc = web.Load(rule.Url); var contentnode = htmlDoc.DocumentNode.SelectSingleNode(rule.ContentXPath); var list = new List <SpliderContent>(); //详情页 var cfields = GetFields(contentnode, rule); var sc = new SpliderContent() { Fields = cfields, SpliderRuleId = rule.Id }; list.Add(sc); return(list); }
public List <SpliderContent> GetByRuleFromFile(SpliderRule rule, string filename) { var htmlDoc = new HtmlDocument(); htmlDoc.Load(filename); var contentnode = htmlDoc.DocumentNode.SelectSingleNode(rule.ContentXPath); var list = new List <SpliderContent>(); //详情页 var cfields = GetFields(contentnode, rule); var sc = new SpliderContent() { Fields = cfields, SpliderRuleId = rule.Id }; list.Add(sc); return(list); }
/// <summary> /// 根据Rule /// </summary> /// <param name="rule"></param> /// <returns></returns> public List <SpliderContent> GetByRule(SpliderRule rule) { var url = @"https://www.wandoujia.com/wdjweb/api/category/more?catId=5017&subCatId=593&page=2";//rule.Url; HtmlWeb web = new HtmlWeb(); //1.支持从web或本地path加载html var htmlDoc = web.Load(url); var contentnode = htmlDoc.DocumentNode.SelectSingleNode(rule.ContentXPath); var list = new List <SpliderContent>(); //列表页 if (!string.IsNullOrWhiteSpace(rule.EachXPath)) { var itemsNodes = contentnode.SelectNodes(rule.EachXPath); foreach (var item in itemsNodes) { var fields = GetFields(item, rule); list.Add(new SpliderContent() { Fields = fields, SpliderRuleId = rule.Id }); } return(list); } //详情页 var cfields = GetFields(contentnode, rule); var sc = new SpliderContent() { Fields = cfields, SpliderRuleId = rule.Id }; list.Add(sc); return(list); }