示例#1
0
        private async Task <List <Article> > GetArticlesAsync(NewsWebsite website)
        {
            List <Article> articles;
            HtmlDocument   html = new HtmlDocument();
            HtmlWeb        web  = new HtmlWeb();

            web.AutoDetectEncoding = false;
            web.OverrideEncoding   = Encoding.UTF8;

            html = await web.LoadFromWebAsync(website.Url);

            articles = ParseArticles(html, website);
            return(articles);
        }
示例#2
0
        private List <Article> ParseArticles(HtmlDocument html, NewsWebsite website)
        {
            List <Article> articles = new List <Article>();
            var            nodes    = html.DocumentNode.SelectNodes(website.ArticleSelector);

            for (int i = 0; i < nodes.Count; i++)
            {
                Article article = new Article();
                article.url       = nodes[i].SelectSingleNode(website.UrlSelector).Attributes["href"].Value;
                article.title     = HtmlEntity.DeEntitize(nodes[i].SelectSingleNode(website.TitleSelector).InnerText.Trim());
                article.content   = HtmlEntity.DeEntitize(nodes[i].SelectSingleNode(website.ContentSelector).GetDirectInnerText().Trim()) + "...";
                article.thumbnail = nodes[i].SelectSingleNode(website.ThumbnailSelector).Attributes["src"].Value;
                article.date      = nodes[i].SelectSingleNode(website.DateSelector).InnerText.Replace(" ", "");
                article.author    = website.Name;
                article.color     = website.Color;
                articles.Add(article);
            }
            return(articles);
        }