/// <summary> /// Get a completed post from the provided HTML div node. /// </summary> /// <param name="li">Div node that contains the post.</param> /// <returns>Returns a post object with required information.</returns> private PostComponents GetPost(HtmlNode li, IQuest quest) { if (li == null) { throw new ArgumentNullException(nameof(li)); } string author; string id; string text; int number; id = li.GetAttributeValue("data-pid", ""); author = li.GetAttributeValue("data-username", ""); number = int.Parse(li.GetAttributeValue("data-index", "0")); var content = li.GetChildWithClass("div", "content"); // Get the full post text. text = PostText.ExtractPostText(content, n => false, Host); PostComponents post; try { post = new PostComponents(author, id, text, number); } catch { post = null; } return(post); }
/// <summary> /// Get a completed post from the provided HTML list item node. /// </summary> /// <param name="li">List item node that contains the post.</param> /// <returns>Returns a post object with required information.</returns> private PostComponents GetPost(HtmlNode li) { if (li == null) { throw new ArgumentNullException(nameof(li)); } string author = ""; string id = ""; string text = ""; int number = 0; // ID id = li.Id.Substring("post_".Length); // Number var postCount = li.OwnerDocument.GetElementbyId($"postcount{id}"); if (postCount != null) { number = int.Parse(postCount.GetAttributeValue("name", "0")); } HtmlNode postDetails = li.Elements("div").FirstOrDefault(n => n.GetAttributeValue("class", "") == "postdetails"); if (postDetails != null) { // Author HtmlNode userinfo = postDetails.GetChildWithClass("div", "userinfo"); HtmlNode username = userinfo?.GetChildWithClass("a", "username"); author = PostText.CleanupWebString(username?.InnerText); // Text string postMessageId = "post_message_" + id; var message = li.OwnerDocument.GetElementbyId(postMessageId)?.Element("blockquote"); // Predicate filtering out elements that we don't want to include var exclusion = PostText.GetClassExclusionPredicate("bbcode_quote"); // Get the full post text. text = PostText.ExtractPostText(message, exclusion, Host); } PostComponents post; try { post = new PostComponents(author, id, text, number); } catch { post = null; } return(post); }
/// <summary> /// Get a completed post from the provided HTML div node. /// </summary> /// <param name="div">Div node that contains the post.</param> /// <returns>Returns a post object with required information.</returns> private PostComponents GetPost(HtmlNode div, IQuest quest) { if (div == null) { throw new ArgumentNullException(nameof(div)); } string author = ""; string id; string text; int number = 0; // id="p12345" id = div.Id.Substring(1); var inner = div.GetChildWithClass("div", "inner"); var postbody = inner.GetChildWithClass("div", "postbody"); var authorNode = postbody.GetChildWithClass("p", "author"); var authorStrong = authorNode.Descendants("strong").FirstOrDefault(); var authorAnchor = authorStrong.Element("a"); author = PostText.CleanupWebString(authorAnchor.InnerText); // No way to get the post number?? // Get the full post text. Two different layout variants. var content = postbody.GetChildWithClass("div", "content"); if (content == null) { content = postbody.Elements("div").FirstOrDefault(n => n.Id.StartsWith("post_content", StringComparison.Ordinal)); } text = PostText.ExtractPostText(content, n => false, Host); PostComponents post; try { post = new PostComponents(author, id, text, number); } catch { post = null; } return(post); }
/// <summary> /// Get a completed post from the provided HTML div node. /// </summary> /// <param name="postDiv">Div node that contains the post.</param> /// <returns>Returns a post object with required information.</returns> private PostComponents GetPost(HtmlNode postDiv) { if (postDiv == null) { throw new ArgumentNullException(nameof(postDiv)); } string author = ""; string id; string text; int number = 0; var postTable = postDiv.Descendants("table").FirstOrDefault(a => a.Id.StartsWith("post", StringComparison.Ordinal)); if (postTable == null) { return(null); } id = postTable.Id.Substring("post".Length); string postAuthorDivID = "postmenu_" + id; var authorAnchor = postTable.OwnerDocument.GetElementbyId(postAuthorDivID).Element("a"); if (authorAnchor != null) { author = authorAnchor.InnerText; // ?? if (authorAnchor.Element("span") != null) { author = authorAnchor.Element("span").InnerText; } } string postNumberAnchorID = "postcount" + id; var anchor = postTable.OwnerDocument.GetElementbyId(postNumberAnchorID); if (anchor != null) { string postNumText = anchor.GetAttributeValue("name", ""); number = int.Parse(postNumText); } string postMessageId = "post_message_" + id; var postContents = postTable.OwnerDocument.GetElementbyId(postMessageId); // Predicate filtering out elements that we don't want to include var exclusion = PostText.GetClassExclusionPredicate("bbcode_quote"); // Get the full post text. text = PostText.ExtractPostText(postContents, exclusion, Host); PostComponents post; try { post = new PostComponents(author, id, text, number); } catch { post = null; } return(post); }
/// <summary> /// Get a completed post from the provided HTML list item node. /// </summary> /// <param name="li">List item node that contains the post.</param> /// <returns>Returns a post object with required information.</returns> private PostComponents GetPost(HtmlNode li) { if (li == null) { throw new ArgumentNullException(nameof(li)); } string author; string id; string text; int number; // Author and ID are in the basic list item attributes author = PostText.CleanupWebString(li.GetAttributeValue("data-author", "")); id = li.Id.Substring("post-".Length); if (AdvancedOptions.Instance.DebugMode) { author = $"{author}_{id}"; } // Get the primary content of the list item HtmlNode primaryContent = li.GetChildWithClass("primaryContent"); // On one branch, we can get the post text HtmlNode messageContent = primaryContent.GetChildWithClass("messageContent"); HtmlNode postBlock = messageContent.Element("article").Element("blockquote"); // Predicate filtering out elements that we don't want to include List <string> excludedClasses = new List <string> { "bbCodeQuote", "messageTextEndMarker", "advbbcodebar_encadre", "advbbcodebar_article", "adv_tabs_wrapper", "adv_slider_wrapper" }; if (AdvancedOptions.Instance.IgnoreSpoilers) { excludedClasses.Add("bbCodeSpoilerContainer"); } var exclusions = PostText.GetClassesExclusionPredicate(excludedClasses); // Get the full post text. text = PostText.ExtractPostText(postBlock, exclusions, Host); // On another branch of the primary content, we can get the post number. HtmlNode messageMeta = primaryContent.GetChildWithClass("messageMeta"); // HTML parsing of the post was corrupted somehow. if (messageMeta == null) { return(null); } HtmlNode publicControls = messageMeta.GetChildWithClass("publicControls"); HtmlNode postNumber = publicControls.GetChildWithClass("postNumber"); string postNumberText = postNumber.InnerText; // Skip the leading # character. if (postNumberText.StartsWith("#", StringComparison.Ordinal)) { postNumberText = postNumberText.Substring(1); } number = int.Parse(postNumberText); PostComponents post; try { post = new PostComponents(author, id, text, number); } catch (Exception e) { ErrorLog.Log(e); post = null; } return(post); }
/// <summary> /// Get a completed post from the provided HTML list item. /// </summary> /// <param name="li">List item that contains the post.</param> /// <returns>Returns a post object with required information.</returns> private PostComponents GetPost(HtmlNode li) { if (li == null) { throw new ArgumentNullException(nameof(li)); } string author = ""; string id = ""; string text = ""; int number = 0; // ID id = li.GetAttributeValue("data-node-id", ""); if (string.IsNullOrEmpty(id)) { return(null); } // Author var postAuthorNode = li.Descendants("div").FirstOrDefault(a => a.GetAttributeValue("itemprop", "") == "author"); var authorNode = postAuthorNode?.GetDescendantWithClass("div", "author"); if (authorNode != null) { author = PostText.CleanupWebString(authorNode.InnerText); } var contentArea = li.GetDescendantWithClass("div", "b-post__content"); // Number var postCountAnchor = contentArea.GetDescendantWithClass("a", "b-post__count"); if (postCountAnchor != null) { string postNumText = postCountAnchor.InnerText; if (postNumText.StartsWith("#", StringComparison.Ordinal)) { postNumText = postNumText.Substring(1); } number = int.Parse(postNumText); } // Text var postTextNode = contentArea.Descendants("div").FirstOrDefault(a => a.GetAttributeValue("itemprop", "") == "text"); // Predicate filtering out elements that we don't want to include var exclusion = PostText.GetClassExclusionPredicate("bbcode_quote"); // Get the full post text. text = PostText.ExtractPostText(postTextNode, exclusion, Host); PostComponents post; try { post = new PostComponents(author, id, text, number); } catch { post = null; } return(post); }