Beispiel #1
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author = string.Empty; // vBulletin doesn't show the thread author
            int    pages  = 1;

            HtmlNode doc = page.DocumentNode.Element("html");

            // Find the page title
            title = doc.Element("head").Element("title")?.InnerText ?? "";
            title = ForumPostTextConverter.CleanupWebString(title);

            var threadViewTab = page.GetElementbyId("thread-view-tab");

            var pageNavControls = threadViewTab?.GetDescendantWithClass("div", "pagenav-controls");

            var pageTotalSpan = pageNavControls?.GetDescendantWithClass("span", "pagetotal");

            if (pageTotalSpan != null)
            {
                pages = int.Parse(pageTotalSpan.InnerText);
            }

            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
Beispiel #2
0
        private string GetPageAuthor(HtmlNode headerNode)
        {
            var descripNode = headerNode.GetChildWithClass("div", "p-description");
            var authorNode  = descripNode?.GetDescendantWithClass("a", "username");

            return(ForumPostTextConverter.CleanupWebString(authorNode?.InnerText.Trim() ?? ""));
        }
Beispiel #3
0
        private int GetMaxPageNumberOfThread(HtmlNode bodyNode)
        {
            var mainNode = bodyNode.GetChildWithClass("div", "p-body-main") ??
                           throw new InvalidOperationException("Unable to find p-body-main.");

            var navNode = mainNode.GetDescendantWithClass("nav", "pageNavWrapper");

            if (navNode != null)
            {
                var navItems = navNode.GetDescendantWithClass("ul", "pageNav-main")?.Elements("li").Where(n => n.HasClass("pageNav-page"));

                if (navItems != null && navItems.Any())
                {
                    var lastItem = ForumPostTextConverter.CleanupWebString(navItems.Last().Element("a").InnerText.Trim());

                    if (int.TryParse(lastItem, NumberStyles.AllowThousands, CultureInfo.InvariantCulture, out int pages))
                    {
                        if (pages == 0)
                        {
                            pages = 1;
                        }

                        return(pages);
                    }
                }
            }

            return(1);
        }
Beispiel #4
0
        /// <summary>
        /// Get a completed post from the provided HTML list item node.
        /// </summary>
        /// <param name="li">List item node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode li, IQuest quest)
        {
            if (li == null)
            {
                throw new ArgumentNullException(nameof(li));
            }

            string author = "";
            string id     = "";
            string text   = "";
            int    number = 0;

            // ID
            id = li.Id.Substring("post_".Length);

            // Number
            var postCount = li.OwnerDocument.GetElementbyId($"postcount{id}");

            if (postCount != null)
            {
                number = int.Parse(postCount.GetAttributeValue("name", "0"));
            }


            HtmlNode postDetails = li.Elements("div").FirstOrDefault(n => n.GetAttributeValue("class", "") == "postdetails");

            if (postDetails != null)
            {
                // Author
                HtmlNode?userinfo = postDetails.GetChildWithClass("div", "userinfo");
                HtmlNode?username = userinfo?.GetChildWithClass("a", "username");
                author = ForumPostTextConverter.CleanupWebString(username?.InnerText);

                // Text
                string postMessageId = "post_message_" + id;

                var message = li.OwnerDocument.GetElementbyId(postMessageId)?.Element("blockquote");

                // Predicate filtering out elements that we don't want to include
                var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

                // Get the full post text.
                text = ForumPostTextConverter.ExtractPostText(message, exclusion, Host);
            }

            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                post = null;
            }

            return(post);
        }
Beispiel #5
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author;
            int    pages;

            HtmlNode doc = page.DocumentNode;

            // Find the page title
            title = ForumPostTextConverter.CleanupWebString(doc.Element("html").Element("head")?.Element("title")?.InnerText);

            // Find a common parent for other data
            HtmlNode pageContent = GetPageContent(page, PageType.Thread);

            if (pageContent == null)
            {
                throw new InvalidOperationException("Cannot find content on page.");
            }

            // Find the thread author
            HtmlNode?titleBar = pageContent.GetDescendantWithClass("titleBar");

            // Non-thread pages (such as threadmark pages) won't have a title bar.
            if (titleBar == null)
            {
                throw new InvalidOperationException("Not a valid forum thread.");
            }

            HtmlNode?pageDesc = page.GetElementbyId("pageDescription");

            HtmlNode?authorNode = pageDesc?.GetChildWithClass("username");

            author = ForumPostTextConverter.CleanupWebString(authorNode?.InnerText ?? "");

            // Find the number of pages in the thread
            var    pageNavLinkGroup = pageContent.GetDescendantWithClass("div", "pageNavLinkGroup");
            var    pageNav          = pageNavLinkGroup?.GetChildWithClass("PageNav");
            string lastPage         = pageNav?.GetAttributeValue("data-last", "") ?? "";

            if (string.IsNullOrEmpty(lastPage))
            {
                pages = 1;
            }
            else
            {
                pages = Int32.Parse(lastPage);
            }

            // Create a ThreadInfo object to hold the acquired information.
            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
Beispiel #6
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author = string.Empty;
            int    pages  = 1;

            HtmlNode doc = page.DocumentNode.Element("html");

            // Find the page title
            title = ForumPostTextConverter.CleanupWebString(doc.Element("head")?.Element("title")?.InnerText);

            // Find the number of pages
            var pagebody = page.GetElementbyId("page-body");

            if (pagebody != null)
            {
                // Different versions of the forum have different methods of showing page numbers

                var topicactions = pagebody.GetChildWithClass("topic-actions");
                if (topicactions != null)
                {
                    HtmlNode?pagination     = topicactions.GetChildWithClass("pagination");
                    string?  paginationText = pagination?.InnerText;
                    if (paginationText != null)
                    {
                        Regex pageOf = new Regex(@"Page\s*\d+\s*of\s*(?<pages>\d+)");
                        Match m      = pageOf.Match(paginationText);
                        if (m.Success)
                        {
                            pages = int.Parse(m.Groups["pages"].Value);
                        }
                    }
                }
                else
                {
                    var actionbar  = pagebody.GetChildWithClass("action-bar");
                    var pagination = actionbar?.GetChildWithClass("pagination");

                    var ul           = pagination?.Element("ul");
                    var lastPageLink = ul?.Elements("li")?.LastOrDefault(n => !n.GetAttributeValue("class", "").Split(' ').Contains("next"));

                    if (lastPageLink != null)
                    {
                        pages = int.Parse(lastPageLink.InnerText);
                    }
                }
            }

            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
Beispiel #7
0
 private string GetPageTitle(HtmlDocument page)
 {
     return(ForumPostTextConverter.CleanupWebString(
                page.DocumentNode
                .Element("html")
                .Element("head")
                ?.Element("title")
                ?.InnerText));
 }
Beispiel #8
0
        private string GetPostAuthor(HtmlNode div)
        {
            HtmlNode?inner        = div.GetChildWithClass("div", "inner");
            HtmlNode?postbody     = inner?.GetChildWithClass("div", "postbody");
            HtmlNode?authorNode   = postbody?.GetChildWithClass("p", "author");
            HtmlNode?authorStrong = authorNode?.Descendants("strong").FirstOrDefault();
            HtmlNode?authorAnchor = authorStrong?.Element("a");

            return(ForumPostTextConverter.CleanupWebString(authorAnchor?.InnerText));
        }
Beispiel #9
0
        /// <summary>
        /// Get a completed post from the provided HTML div node.
        /// </summary>
        /// <param name="div">Div node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode div, IQuest quest)
        {
            if (div == null)
            {
                throw new ArgumentNullException(nameof(div));
            }

            string author = "";
            string id;
            string text;
            int    number = 0;

            // id="p12345"
            id = div.Id.Substring(1);


            HtmlNode?inner        = div.GetChildWithClass("div", "inner");
            HtmlNode?postbody     = inner?.GetChildWithClass("div", "postbody");
            HtmlNode?authorNode   = postbody?.GetChildWithClass("p", "author");
            HtmlNode?authorStrong = authorNode?.Descendants("strong").FirstOrDefault();
            HtmlNode?authorAnchor = authorStrong?.Element("a");

            author = ForumPostTextConverter.CleanupWebString(authorAnchor?.InnerText);

            // No way to get the post number??


            // Get the full post text.  Two different layout variants.
            var content = postbody?.GetChildWithClass("div", "content");

            if (content == null)
            {
                content = postbody?.Elements("div").FirstOrDefault(n => n.Id.StartsWith("post_content", StringComparison.Ordinal));
            }

            text = ForumPostTextConverter.ExtractPostText(content, n => false, Host);


            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                post = null;
            }

            return(post);
        }
Beispiel #10
0
        private string GetPostAuthor(HtmlNode li)
        {
            string author = "";

            var postAuthorNode = li.Descendants("div").FirstOrDefault(a => a.GetAttributeValue("itemprop", "") == "author");
            var authorNode     = postAuthorNode?.GetDescendantWithClass("div", "author");

            if (authorNode != null)
            {
                author = ForumPostTextConverter.CleanupWebString(authorNode.InnerText);
            }

            return(author);
        }
Beispiel #11
0
        private string GetPageTitle(HtmlDocument page, HtmlNode headerNode)
        {
            //var titleNode = headerNode.GetChildWithClass("div", "p-title");
            //string title = ForumPostTextConverter.CleanupWebString(titleNode?.Element("h1")?.InnerText.Trim());

            //if (!string.IsNullOrEmpty(title))
            //    return title;

            return(ForumPostTextConverter.CleanupWebString(
                       page.DocumentNode
                       .Element("html")
                       .Element("head")
                       ?.Element("title")
                       ?.InnerText));
        }
Beispiel #12
0
        private string GetPostAuthor(HtmlNode li)
        {
            string author = "";

            HtmlNode postDetails = li.Elements("div").FirstOrDefault(n => n.GetAttributeValue("class", "") == "postdetails");

            if (postDetails != null)
            {
                // Author
                HtmlNode?userinfo = postDetails.GetChildWithClass("div", "userinfo");
                HtmlNode?username = userinfo?.GetChildWithClass("a", "username");
                author = ForumPostTextConverter.CleanupWebString(username?.InnerText);
            }

            return(author);
        }
Beispiel #13
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author = string.Empty; // vBulletin doesn't show thread authors
            int    pages  = 1;

            HtmlNode doc = page.DocumentNode;

            // Find the page title
            title = doc.Element("html").Element("head").Element("title")?.InnerText ?? "";
            title = ForumPostTextConverter.CleanupWebString(title);

            // Get the number of pages from the navigation elements
            var paginationTop = page.GetElementbyId("pagination_top");

            var paginationForm = paginationTop.Element("form");

            // If there is no form, that means there's only one page in the thread.
            if (paginationForm != null)
            {
                var firstSpan  = paginationForm.Element("span");
                var firstSpanA = firstSpan?.Element("a");
                var pagesText  = firstSpanA?.InnerText;

                if (pagesText != null)
                {
                    Regex pageNumsRegex = new Regex(@"Page \d+ of (?<pages>\d+)");
                    Match m             = pageNumsRegex.Match(pagesText);
                    if (m.Success)
                    {
                        pages = int.Parse(m.Groups["pages"].Value);
                    }
                }
            }

            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
Beispiel #14
0
        private string GetPageAuthor(HtmlDocument page)
        {
            // Find a common parent for other data
            HtmlNode?pageContent = GetPageContent(page, PageType.Thread);

            if (pageContent == null)
            {
                throw new InvalidOperationException("Cannot find content on page.");
            }

            // Non-thread pages (such as threadmark pages) won't have a title bar.
            HtmlNode?titleBar = pageContent.GetDescendantWithClass("titleBar") ??
                                throw new InvalidOperationException("Not a valid forum thread.");

            // Find the thread author
            HtmlNode?authorNode = page.GetElementbyId("pageDescription")?.GetChildWithClass("username");

            return(ForumPostTextConverter.CleanupWebString(authorNode?.InnerText ?? ""));
        }
Beispiel #15
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author = string.Empty; // vBulletin doesn't show thread authors
            int    pages  = 1;

            HtmlNode doc = page.DocumentNode.Element("html");

            // Find the page title
            title = ForumPostTextConverter.CleanupWebString(doc.Element("head")?.Element("title")?.InnerText);

            // If there's no pagenav div, that means there's no navigation to alternate pages,
            // which means there's only one page in the thread.
            var pageNavDiv = doc.GetDescendantWithClass("div", "pagenav");

            if (pageNavDiv != null)
            {
                var vbMenuControl = pageNavDiv.GetDescendantWithClass("td", "vbmenu_control");

                if (vbMenuControl != null)
                {
                    Regex pageNumsRegex = new Regex(@"Page \d+ of (?<pages>\d+)");

                    Match m = pageNumsRegex.Match(vbMenuControl.InnerText);
                    if (m.Success)
                    {
                        pages = int.Parse(m.Groups["pages"].Value);
                    }
                }
            }

            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
Beispiel #16
0
        /// <summary>
        /// Get a completed post from the provided HTML list item node.
        /// </summary>
        /// <param name="article">List item node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode article, IQuest quest)
        {
            if (article == null)
            {
                throw new ArgumentNullException(nameof(article));
            }

            string author;
            string id;
            string text;
            int    number;

            // Author and ID are in the basic list item attributes
            author = ForumPostTextConverter.CleanupWebString(article.GetAttributeValue("data-author", ""));
            id     = ForumPostTextConverter.CleanupWebString(article.GetAttributeValue("data-content", "post-").Substring("post-".Length));

            if (AdvancedOptions.Instance.DebugMode)
            {
                author = $"{author}_{id}";
            }

            var attribution = article.GetDescendantWithClass("header", "message-attribution");

            if (attribution == null)
            {
                return(null);
            }

            string postNum = attribution.Descendants("a").LastOrDefault(c => c.ChildNodes.Count == 1)?.InnerText.Trim() ?? "";

            if (string.IsNullOrEmpty(postNum))
            {
                return(null);
            }


            if (postNum[0] == '#')
            {
                var numSpan = postNum.AsSpan()[1..];
Beispiel #17
0
        private string GetPostAuthor(HtmlDocument page, string id)
        {
            string author          = "";
            string postAuthorDivID = $"postmenu_{id}";

            var authorAnchor = page.GetElementbyId(postAuthorDivID).Element("a");

            if (authorAnchor != null)
            {
                // ??
                if (authorAnchor.Element("span") != null)
                {
                    author = authorAnchor.Element("span").InnerText;
                }
                else
                {
                    author = authorAnchor.InnerText;
                }
            }

            return(ForumPostTextConverter.CleanupWebString(author));
        }
Beispiel #18
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author = string.Empty;
            int    pages  = 1;

            HtmlNode doc = page.DocumentNode.Element("html");

            // Find the page title
            title = ForumPostTextConverter.CleanupWebString(doc.Element("head")?.Element("title")?.InnerText);

            // Find the number of pages
            var main = page.GetElementbyId("content");

            var paginationContainer = main.GetDescendantWithClass("div", "pagination-container");

            if (paginationContainer != null)
            {
                var lastPage       = paginationContainer.Element("ul").Elements("li").LastOrDefault(n => n.GetAttributeValue("class", "").Split(' ').Contains("page"));
                var lastPageNumber = lastPage?.Element("a")?.GetAttributeValue("data-page", "1");

                if (lastPageNumber != null)
                {
                    pages = int.Parse(lastPageNumber);
                }
            }

            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
Beispiel #19
0
 private string GetPostAuthor(HtmlNode li)
 {
     return(ForumPostTextConverter.CleanupWebString(li.GetAttributeValue("data-author", "")));
 }
Beispiel #20
0
        /// <summary>
        /// Get a completed post from the provided HTML list item node.
        /// </summary>
        /// <param name="li">List item node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode li, IQuest quest)
        {
            if (li == null)
            {
                throw new ArgumentNullException(nameof(li));
            }

            string author;
            string id;
            string text;
            int    number;

            // Author and ID are in the basic list item attributes
            author = ForumPostTextConverter.CleanupWebString(li.GetAttributeValue("data-author", ""));
            id     = li.Id.Substring("post-".Length);

            if (AdvancedOptions.Instance.DebugMode)
            {
                author = $"{author}_{id}";
            }

            // Get the primary content of the list item
            HtmlNode?primaryContent = li.GetChildWithClass("primaryContent");

            // On one branch, we can get the post text
            HtmlNode?messageContent = primaryContent?.GetChildWithClass("messageContent");
            HtmlNode?postBlock      = messageContent?.Element("article")?.Element("blockquote");

            // Predicate filtering out elements that we don't want to include
            List <string> excludedClasses = new List <string> {
                "bbCodeQuote", "messageTextEndMarker", "advbbcodebar_encadre",
                "advbbcodebar_article", "adv_tabs_wrapper", "adv_slider_wrapper"
            };

            if (quest.IgnoreSpoilers)
            {
                excludedClasses.Add("bbCodeSpoilerContainer");
            }

            var exclusions = ForumPostTextConverter.GetClassesExclusionPredicate(excludedClasses);

            // Get the full post text.
            text = ForumPostTextConverter.ExtractPostText(postBlock, exclusions, Host);

            // On another branch of the primary content, we can get the post number.
            HtmlNode?messageMeta = primaryContent?.GetChildWithClass("messageMeta");

            // HTML parsing of the post was corrupted somehow.
            if (messageMeta == null)
            {
                return(null);
            }
            HtmlNode?publicControls = messageMeta.GetChildWithClass("publicControls");
            HtmlNode?postNumber     = publicControls?.GetChildWithClass("postNumber");

            if (postNumber == null)
            {
                return(null);
            }

            string postNumberText = postNumber.InnerText;

            // Skip the leading # character.
            if (postNumberText.StartsWith("#", StringComparison.Ordinal))
            {
                postNumberText = postNumberText.Substring(1);
            }

            number = int.Parse(postNumberText);

            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch (Exception e)
            {
                Logger2.LogError(e, $"Attempt to create new post failed. (Author:{author}, ID:{id}, Number:{number}, Quest:{quest.DisplayName})");
                post = null;
            }

            return(post);
        }
Beispiel #21
0
        /// <summary>
        /// Get a completed post from the provided HTML list item.
        /// </summary>
        /// <param name="li">List item that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode li, IQuest quest)
        {
            if (li == null)
            {
                throw new ArgumentNullException(nameof(li));
            }

            string author = "";
            string id     = "";
            string text   = "";
            int    number = 0;

            // ID
            id = li.GetAttributeValue("data-node-id", "");

            if (string.IsNullOrEmpty(id))
            {
                return(null);
            }

            // Author
            var postAuthorNode = li.Descendants("div").FirstOrDefault(a => a.GetAttributeValue("itemprop", "") == "author");
            var authorNode     = postAuthorNode?.GetDescendantWithClass("div", "author");

            if (authorNode != null)
            {
                author = ForumPostTextConverter.CleanupWebString(authorNode.InnerText);
            }

            HtmlNode?contentArea = li.GetDescendantWithClass("div", "b-post__content");

            // Number
            HtmlNode?postCountAnchor = contentArea?.GetDescendantWithClass("a", "b-post__count");

            if (postCountAnchor != null)
            {
                string postNumText = postCountAnchor.InnerText;
                if (postNumText.StartsWith("#", StringComparison.Ordinal))
                {
                    postNumText = postNumText.Substring(1);
                }

                number = int.Parse(postNumText);
            }

            // Text
            var postTextNode = contentArea?.Descendants("div").FirstOrDefault(a => a.GetAttributeValue("itemprop", "") == "text");

            // Predicate filtering out elements that we don't want to include
            var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

            // Get the full post text.
            text = ForumPostTextConverter.ExtractPostText(postTextNode, exclusion, Host);


            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                post = null;
            }

            return(post);
        }
Beispiel #22
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author;
            int    pages = 0;

            HtmlNode doc = page.DocumentNode;


            // Start at the top of the structure

            var topNode = page.GetElementbyId("top");

            var bodyNode = topNode.GetChildWithClass("div", "p-body") ??
                           topNode.GetDescendantWithClass("div", "p-body") ??
                           throw new InvalidOperationException("Unable to find p-body.");

            if (bodyNode.Elements("div").Any(n => n.HasClass("p-body-inner")))
            {
                bodyNode = bodyNode.GetChildWithClass("p-body-inner") !;
            }

            var headerNode = bodyNode.GetChildWithClass("div", "p-body-header") ??
                             throw new InvalidOperationException("Unable to find p-body-header.");

            {
                var titleNode = headerNode.GetChildWithClass("div", "p-title");
                title = ForumPostTextConverter.CleanupWebString(titleNode?.Element("h1")?.InnerText.Trim());

                var descripNode = headerNode.GetChildWithClass("div", "p-description");
                var authorNode  = descripNode?.GetDescendantWithClass("a", "username");
                author = ForumPostTextConverter.CleanupWebString(authorNode?.InnerText.Trim() ?? "");
            }

            var mainNode = bodyNode.GetChildWithClass("div", "p-body-main") ??
                           throw new InvalidOperationException("Unable to find p-body-main.");

            var navNode = mainNode.GetDescendantWithClass("nav", "pageNavWrapper");

            if (navNode != null)
            {
                var navItems = navNode.GetDescendantWithClass("ul", "pageNav-main")?.Elements("li").Where(n => n.HasClass("pageNav-page"));

                if (navItems != null && navItems.Any())
                {
                    var lastItem = ForumPostTextConverter.CleanupWebString(navItems.Last().Element("a").InnerText.Trim());

                    _ = int.TryParse(lastItem, NumberStyles.AllowThousands, CultureInfo.InvariantCulture, out pages);
                }
            }

            if (pages == 0)
            {
                pages = 1;
            }

            // Create a ThreadInfo object to hold the acquired information.
            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
Beispiel #23
0
 private string GetPostId(HtmlNode article)
 {
     return(ForumPostTextConverter.CleanupWebString(article.GetAttributeValue("data-content", "post-")
                                                    .Substring("post-".Length)));
 }