private async void Crawler_ProcessPageCrawlCompleted(object sender, PageCrawlCompletedArgs e) { CrawledPage crawledPage = e.CrawledPage; if (crawledPage.HttpRequestException != null || crawledPage.HttpResponseMessage.StatusCode != HttpStatusCode.OK) { Console.WriteLine($"Crawl of page failed {crawledPage.Uri.AbsoluteUri}"); } else { Console.WriteLine($"Crawl of page succeeded {crawledPage.Uri.AbsoluteUri}"); } if (string.IsNullOrEmpty(crawledPage.Content.Text)) { Console.WriteLine($"Page had no content {crawledPage.Uri.AbsoluteUri}"); } var angleSharpHtmlDocument = crawledPage.AngleSharpHtmlDocument; var url = crawledPage.Uri.AbsoluteUri; var allArticle = angleSharpHtmlDocument.GetElementsByClassName("pw article"); if (allArticle.Length > 0) { string text = null; string html = null; string title = null; foreach (var f in allArticle) { title += f.GetElementsByTagName("h1").First().TextContent; text += f.TextContent; html += f.InnerHtml; } var newsDate = angleSharpHtmlDocument.GetElementsByClassName("news-date-time news_date"); DateTime date = new DateTime(); foreach (var t in newsDate) { date = DateTime.Parse(t.TextContent); } Article article = new Article(title, url, date, html, text); articleDao.Save(article); } }