public static List <News> GetNewsPageForDate(int year, int month, int page = 1) { year = Math.Abs(year); month = Math.Abs(month); page = Math.Abs(page); HtmlDocument htmlDoc = HtmlClient.GetHtmlDocument(NewsArchiveUrl + $"/{year}/{month}/{page}"); if (htmlDoc == null) { return(null); } // select news archive container HtmlNode newsArchiveNode = htmlDoc.DocumentNode.SelectSingleNode(NewsArchiveXpath); if (newsArchiveNode == null) { return(null); } List <News> news = GetNewsFromNewsArchiveNode(newsArchiveNode); return(news); }
public static string GetSingleNewsContent(string newsUrl) { if (string.IsNullOrEmpty(newsUrl.Trim())) { throw new ArgumentException(); } newsUrl = newsUrl.Trim(); HtmlDocument htmlDoc = HtmlClient.GetHtmlDocument(newsUrl); HtmlNode newsNode = htmlDoc.DocumentNode.SelectSingleNode(SingleNewsParagraphXpath); string newsContent = newsNode.InnerText.Trim(); // search and skip "komentarzy" part int posOftextDelimiter = newsContent.IndexOf(SingleNewsContentTextDelimiter); return((posOftextDelimiter == -1) ? newsContent : newsContent.Substring(posOftextDelimiter + SingleNewsContentTextDelimiter.Length).Trim()); }
public static News GetSingleNews(string newsUrl) { if (string.IsNullOrEmpty(newsUrl.Trim())) { throw new ArgumentException(); } newsUrl = newsUrl.Trim(); HtmlDocument htmlDoc = HtmlClient.GetHtmlDocument(newsUrl); HtmlNode newsNode = htmlDoc.DocumentNode.SelectSingleNode(SingleNewsParagraphXpath); // title HtmlNode titleNode = htmlDoc.DocumentNode.SelectSingleNode(SingleNewsTitleNodeXpath); string title = titleNode.InnerText; // content string newsContent = newsNode.InnerText.Trim(); int posOftextDelimiter = newsContent.IndexOf(SingleNewsContentTextDelimiter); if (posOftextDelimiter != -1) { newsContent = newsContent.Substring(posOftextDelimiter + SingleNewsContentTextDelimiter.Length).Trim(); } // number of comments HtmlNode numberOfCommentsNode = newsNode.SelectSingleNode(SingleNewsNumberOfCommentsNodeXpath); string numberOfCommentsText = numberOfCommentsNode.InnerText.Split(' ')[0]; int.TryParse(numberOfCommentsText, out int numberOfComments); // created date HtmlNode createdAtNode = newsNode.SelectSingleNode(SingleNewsCreatedAtNodeXpath); string createdAtText = createdAtNode.InnerText.Substring(0, 18); DateTime.TryParse(createdAtText, out DateTime createdAt); return(new News { Url = newsUrl, Title = title, Content = newsContent, NumberOfComments = numberOfComments, CreatedAt = createdAt, }); }
public static List <Comment> GetCommentsPageForNews(string newsUrl, int page = 1) { if (string.IsNullOrEmpty(newsUrl.Trim())) { throw new ArgumentException(); } newsUrl = newsUrl.Trim(); HtmlDocument htmlDoc = HtmlClient.GetHtmlDocument(newsUrl + "/" + page); if (htmlDoc == null) { return(null); } HtmlNode commentsListNode = htmlDoc.DocumentNode.SelectSingleNode(CommentsListXpath); if (commentsListNode == null) { return(null); } List <Comment> comments = new List <Comment>(); foreach (HtmlNode singleCommentNode in commentsListNode.SelectNodes(CommentXpath)) { HtmlNode commentContentNode = singleCommentNode.SelectSingleNode(CommentContainerXpath); string commentAuthor = commentContentNode.SelectSingleNode(CommentAuthorXpath).InnerText.Trim(); HtmlNode commentCreatedAtNode = commentContentNode.SelectSingleNode(CommentCreatedAtXpath); DateTime.TryParse(commentCreatedAtNode.InnerText.Trim(), out DateTime commentCreatedAt); string commentContent = singleCommentNode.SelectSingleNode(CommentContentXpath).InnerText.Trim(); comments.Add( new Comment { Author = commentAuthor, CreatedAt = commentCreatedAt, Content = commentContent } ); } return(comments); }
public static News GetMainNews() { HtmlDocument htmlDoc = HtmlClient.GetHtmlDocument(WebsiteUrl); if (htmlDoc == null) { return(null); } HtmlNode mainNewsReadMoreAnchorNode = htmlDoc.DocumentNode.SelectSingleNode(MainNewsAnchorXPath); if (mainNewsReadMoreAnchorNode == null) { return(null); } string url = mainNewsReadMoreAnchorNode.Attributes["href"].Value; return(GetSingleNews(url)); }