Exemple #1
0
        public List <SpliderContent> GetByRule(SpliderRule rule)
        {
            HtmlWeb web = new HtmlWeb();
            //1.支持从web加载html
            var htmlDoc     = web.Load(rule.Url);
            var contentnode = htmlDoc.DocumentNode.SelectSingleNode(rule.ContentXPath);

            var list = new List <SpliderContent>();
            //详情页
            var cfields = GetFields(contentnode, rule);
            var sc      = new SpliderContent()
            {
                Fields        = cfields,
                SpliderRuleId = rule.Id
            };

            list.Add(sc);
            return(list);
        }
Exemple #2
0
        public List <SpliderContent> GetByRuleFromFile(SpliderRule rule, string filename)
        {
            var htmlDoc = new HtmlDocument();

            htmlDoc.Load(filename);
            var contentnode = htmlDoc.DocumentNode.SelectSingleNode(rule.ContentXPath);

            var list = new List <SpliderContent>();
            //详情页
            var cfields = GetFields(contentnode, rule);
            var sc      = new SpliderContent()
            {
                Fields        = cfields,
                SpliderRuleId = rule.Id
            };

            list.Add(sc);
            return(list);
        }
Exemple #3
0
        /// <summary>
        /// 根据Rule
        /// </summary>
        /// <param name="rule"></param>
        /// <returns></returns>
        public List <SpliderContent> GetByRule(SpliderRule rule)
        {
            var     url = @"https://www.wandoujia.com/wdjweb/api/category/more?catId=5017&subCatId=593&page=2";//rule.Url;
            HtmlWeb web = new HtmlWeb();
            //1.支持从web或本地path加载html
            var htmlDoc     = web.Load(url);
            var contentnode = htmlDoc.DocumentNode.SelectSingleNode(rule.ContentXPath);

            var list = new List <SpliderContent>();

            //列表页
            if (!string.IsNullOrWhiteSpace(rule.EachXPath))
            {
                var itemsNodes = contentnode.SelectNodes(rule.EachXPath);
                foreach (var item in itemsNodes)
                {
                    var fields = GetFields(item, rule);

                    list.Add(new SpliderContent()
                    {
                        Fields        = fields,
                        SpliderRuleId = rule.Id
                    });
                }
                return(list);
            }
            //详情页
            var cfields = GetFields(contentnode, rule);
            var sc      = new SpliderContent()
            {
                Fields        = cfields,
                SpliderRuleId = rule.Id
            };

            list.Add(sc);
            return(list);
        }