private static ICollection<string> ExtractTags(HtmlNode articleNode) { var tagNode = articleNode.GetElementByClassName("tags"); if (tagNode == null) { return new List<string>(); } return HtmlHelpers.ReplaceHtml(tagNode .InnerText).Split(new[] {", "}, StringSplitOptions.RemoveEmptyEntries); }
private static int ExtractViews(HtmlNode articleNode) { var viewsNode = articleNode.GetElementByClassName("views-count_post"); if (viewsNode == null) { return -1; } return int.Parse(viewsNode.InnerText); }
private static string ExtractName(HtmlNode articleNode) { return articleNode.GetElementByClassName("post_title").InnerText; }
private static int ExtractRating(HtmlNode articleNode) { var ratingNode = articleNode.GetElementByClassName("voting-wjt__counter-score js-score"); if (ratingNode == null) { return int.MinValue; } var ratingStr = articleNode.InnerText.Replace('–', '-'); int rating; if (int.TryParse(ratingStr, out rating)) { return rating; } return int.MinValue; }
private static ICollection<string> ExtractHabs(HtmlNode articleNode) { var hubNode = articleNode.GetElementByClassName("hubs"); if (hubNode == null) { return new List<string>(); } return hubNode .ChildNodes.Where(n => n.Name == "a") .Select(n => n.InnerText).ToList(); }
private static int ExtractFavourites(HtmlNode articleNode) { var favouritesNode = articleNode.GetElementByClassName("favorite-wjt__counter js-favs_count"); if (favouritesNode == null) { return -1; } return int.Parse(favouritesNode.InnerText); }
private static DateTime ExtractDate(HtmlNode articleNode) { var publishedNode = articleNode.GetElementByClassName("published"); return publishedNode == null ? DateTime.MinValue : HtmlHelpers.ParseHabrFormatDate(publishedNode.InnerText); }
private static List<string> ExtractComments(HtmlNode articleNode) { var commentsNode = articleNode.GetElementByClassName("comments"); if (commentsNode == null) { return new List<string>(); } var nodes = commentsNode .GetElementsByClassName("message html_format") .SelectMany(n => n.ChildNodes).ToArray(); foreach (var node in nodes.Where(node => node.Name != "#text")) { node.Remove(); } return nodes.Select(n => HtmlHelpers.ReplaceHtml(n.InnerText)) .Where(s => !string.IsNullOrWhiteSpace(s)).ToList(); }
private static string ExtractAuthor(HtmlNode articleNode) { var authorNode = articleNode.GetElementByClassName("author-info__name"); return authorNode == null ? string.Empty : HtmlHelpers.ReplaceHtml(authorNode.InnerText); }