Exemplo n.º 1
0
        private List <CourseEntity> GetPageIndeData(string url)
        {
            //获取li标签里面的数据
            // 先获取所有的Li
            //  然后循环获取li中的有效数据
            string       strHtml  = HttpHelper.DownloadUrl(url);
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(strHtml);
            string             liPath  = "/html/body/section[1]/div/div[@class='market-bd market-bd-6 course-list course-card-list-multi-wrap js-course-list']/ul/li";
            HtmlNodeCollection liNodes = document.DocumentNode.SelectNodes(liPath);

            List <CourseEntity> courseEntities = new List <CourseEntity>();

            foreach (var node in liNodes)
            {
                CourseEntity courseEntity = GetLiData(node);
                courseEntities.Add(courseEntity);
            }
            return(courseEntities);
        }
Exemplo n.º 2
0
        /// <summary>
        /// 当我们把这些数据获取到以后,那就应该保存起来
        /// </summary>
        /// <param name="node"></param>
        private CourseEntity GetLiData(HtmlNode node)
        {
            CourseEntity courseEntity = new CourseEntity();
            //从这里开始
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(node.OuterHtml);
            string   aPath      = "//*/a[1]";
            HtmlNode classANode = document.DocumentNode.SelectSingleNode(aPath);
            string   aHref      = classANode.Attributes["href"].Value;

            courseEntity.Url = aHref;

            Console.WriteLine($"课程Url:{aHref}");

            string Id = classANode.Attributes["data-id"].Value;

            Console.WriteLine($"课程Id:{Id}");

            courseEntity.CourseId = long.Parse(Id);

            string   imgPath = "//*/a[1]/img";
            HtmlNode imgNode = document.DocumentNode.SelectSingleNode(imgPath);
            string   imgUrl  = imgNode.Attributes["src"].Value;

            courseEntity.ImageUrl = imgUrl;

            Console.WriteLine($"ImageUrl:{imgUrl}");

            string   namePaths = "//*/h4/a[1]";
            HtmlNode nameNode  = document.DocumentNode.SelectSingleNode(namePaths);
            string   name      = nameNode.InnerText;

            courseEntity.Title = name;

            Console.WriteLine($"课程名称:{name}");

            courseEntity.Price = new Random().Next(100, 10000);  //关于腾讯课堂上的课程价格抓取 这是一个进阶内容  通过普通方式搞不了(他有一个自己的算法)
            return(courseEntity);
        }