コード例 #1
0
ファイル: PostTextTests.cs プロジェクト: MizMahem/NetTally
        public void Fonts_Test()
        {
            string html =
                @"<span style=""font-family: 'times new roman'"">[x][Wine] Red<br />
-[x] Pinot Noir</span><br />
<br />
Font test";

            string expectedText =
                @"[x][Wine] Red
-[x] Pinot Noir
Font test";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);

                Assert.AreEqual(expectedText, results);

                Origin origin = new Origin("Kinematics", "123456", 10, new Uri("http://www.example.com/"), "http://www.example.com");
                Post   post   = new Post(origin, results);

                Assert.IsTrue(post.HasVote);
                Assert.AreEqual(2, post.VoteLines.Count);
                Assert.AreEqual("[x][Wine] Red", post.VoteLines[0].ToString());
                Assert.AreEqual("-[x] Pinot Noir", post.VoteLines[1].ToString());
            }

            Assert.IsTrue(node != null);
        }
コード例 #2
0
ファイル: PostTextTests.cs プロジェクト: MizMahem/NetTally
        public void Content_Strikethrough()
        {
            string html =
                @"[X] Plan Air, <span style=""text-decoration: line-through"">Earth, Water,</span> Fire<br />
-[X] Aeromancy (40%)<br />
-[X] Pyromancy (25%)<br />
-[X] Pyromantic Divination (35%)<br />
<br />
We seem to have a real talent for divination considering that we managed to affect the past when we used the glass candle.";

            string expectedText =
                @"[X] Plan Air, ❰Earth, Water,❱ Fire
-[X] Aeromancy (40%)
-[X] Pyromancy (25%)
-[X] Pyromantic Divination (35%)
We seem to have a real talent for divination considering that we managed to affect the past when we used the glass candle.";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);

                Assert.AreEqual(expectedText, results);
            }

            Assert.IsTrue(node != null);
        }
コード例 #3
0
ファイル: XenForo2Adapter2.cs プロジェクト: MizMahem/NetTally
        private int GetMaxPageNumberOfThread(HtmlNode bodyNode)
        {
            var mainNode = bodyNode.GetChildWithClass("div", "p-body-main") ??
                           throw new InvalidOperationException("Unable to find p-body-main.");

            var navNode = mainNode.GetDescendantWithClass("nav", "pageNavWrapper");

            if (navNode != null)
            {
                var navItems = navNode.GetDescendantWithClass("ul", "pageNav-main")?.Elements("li").Where(n => n.HasClass("pageNav-page"));

                if (navItems != null && navItems.Any())
                {
                    var lastItem = ForumPostTextConverter.CleanupWebString(navItems.Last().Element("a").InnerText.Trim());

                    if (int.TryParse(lastItem, NumberStyles.AllowThousands, CultureInfo.InvariantCulture, out int pages))
                    {
                        if (pages == 0)
                        {
                            pages = 1;
                        }

                        return(pages);
                    }
                }
            }

            return(1);
        }
コード例 #4
0
ファイル: PostTextTests.cs プロジェクト: MizMahem/NetTally
        public void Check_Embedded_Callout_Plan()
        {
            string html =
                @"The referenced post did not have the problem described, but another post did.  Basically, <br />
[x] Plan <a href=""https://forums.sufficientvelocity.com/members/4076/"" class=""username"" data-xf-init=""member-tooltip"" data-user-id=""4076"" data-username=""@Kinematics"">@Kinematics</a> <br />
Wouldn't be applied to my proposed plan because it got turned into a member link (the '@' symbol is dropped on QQ's forums, so that doesn't interfere in this case).<br />
<br />";

            string expectedText =
                @"The referenced post did not have the problem described, but another post did.  Basically, 
[x] Plan 『url=""https://forums.sufficientvelocity.com/members/4076/""』@Kinematics『/url』 
Wouldn't be applied to my proposed plan because it got turned into a member link (the '@' symbol is dropped on QQ's forums, so that doesn't interfere in this case).";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);

                Assert.AreEqual(expectedText, results);

                Origin origin = new Origin("Kinematics1", "123456", 10, new Uri("http://www.example.com/"), "http://www.example.com");
                Post   post   = new Post(origin, results);

                Assert.IsTrue(post.HasVote);
                Assert.AreEqual(1, post.VoteLines.Count);
            }

            Assert.IsTrue(node != null);
        }
コード例 #5
0
ファイル: PostTextTests.cs プロジェクト: MizMahem/NetTally
        public void Full_Plan_Strikethrough_1()
        {
            string html =
                @"<span style=""text-decoration: line-through"">[X] Plan Triplemancer<br />
-[X] Aeromancy (40%)<br />
-[X] Hydromancy (30%)<br />
-[X] Pyromancy (30%)</span><br />
<br />
WeirGarth has minmaxed our Life pretty good, but we do need some extra options. The three elements will do nicely. Most focus on Aeromancy to maximize the chance of finishing the Rod and synergy with griffins. Hydromancy for Deep Den and Lannisport plus reconnecting with Bel. Pyromancy because Melisandre should have the most free time availalbe and all our fire and light imagery makes me think we have the most talent with it compared to other elements.<br />
<br />
Edit: Vote changed. See next page.";

            string expectedText =
                @"❰[X] Plan Triplemancer⦂-[X] Aeromancy (40%)⦂-[X] Hydromancy (30%)⦂-[X] Pyromancy (30%)❱
WeirGarth has minmaxed our Life pretty good, but we do need some extra options. The three elements will do nicely. Most focus on Aeromancy to maximize the chance of finishing the Rod and synergy with griffins. Hydromancy for Deep Den and Lannisport plus reconnecting with Bel. Pyromancy because Melisandre should have the most free time availalbe and all our fire and light imagery makes me think we have the most talent with it compared to other elements.
Edit: Vote changed. See next page.";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);


                ReadOnlySpan <char> e = expectedText.AsSpan();
                ReadOnlySpan <char> r = results.AsSpan();

                Assert.AreEqual(expectedText, results);
            }

            Assert.IsTrue(node != null);
        }
コード例 #6
0
ファイル: XenForo2Adapter2.cs プロジェクト: MizMahem/NetTally
        private string GetPageAuthor(HtmlNode headerNode)
        {
            var descripNode = headerNode.GetChildWithClass("div", "p-description");
            var authorNode  = descripNode?.GetDescendantWithClass("a", "username");

            return(ForumPostTextConverter.CleanupWebString(authorNode?.InnerText.Trim() ?? ""));
        }
コード例 #7
0
ファイル: PostTextTests.cs プロジェクト: MizMahem/NetTally
        public void Convert_Basic_HTML()
        {
            string html = @"[X] Plan Perfectly Balanced (As all things should be)<br />
-[X] Life (50%)<br />
-[X] Aeromancy (50%)<br />
<br />
I think Garth&#039;s entire thing so far is life, so he should try &quot;max&quot; that out as <i>quickly</i> as possible. On the other hand, some versatility is required, and he has already started down the Aeromancy path.<br />";

            string expectedText =
                @"[X] Plan Perfectly Balanced (As all things should be)
-[X] Life (50%)
-[X] Aeromancy (50%)
I think Garth's entire thing so far is life, so he should try ""max"" that out as 『i』quickly『/i』 as possible. On the other hand, some versatility is required, and he has already started down the Aeromancy path.";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);

                Assert.AreEqual(expectedText, results);
            }

            Assert.IsTrue(node != null);
        }
コード例 #8
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author = string.Empty; // vBulletin doesn't show the thread author
            int    pages  = 1;

            HtmlNode doc = page.DocumentNode.Element("html");

            // Find the page title
            title = doc.Element("head").Element("title")?.InnerText ?? "";
            title = ForumPostTextConverter.CleanupWebString(title);

            var threadViewTab = page.GetElementbyId("thread-view-tab");

            var pageNavControls = threadViewTab?.GetDescendantWithClass("div", "pagenav-controls");

            var pageTotalSpan = pageNavControls?.GetDescendantWithClass("span", "pagetotal");

            if (pageTotalSpan != null)
            {
                pages = int.Parse(pageTotalSpan.InnerText);
            }

            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
コード例 #9
0
        private string GetPostText(HtmlNode li, IQuest quest)
        {
            // Get the primary content of the list item
            HtmlNode?primaryContent = li.GetChildWithClass("primaryContent");

            // On one branch, we can get the post text
            HtmlNode?messageContent = primaryContent?.GetChildWithClass("messageContent");
            HtmlNode?postBlock      = messageContent?.Element("article")?.Element("blockquote");

            List <string> excludedClasses = new List <string> {
                "bbCodeQuote", "messageTextEndMarker", "advbbcodebar_encadre",
                "advbbcodebar_article", "adv_tabs_wrapper", "adv_slider_wrapper"
            };

            if (quest.IgnoreSpoilers)
            {
                excludedClasses.Add("bbCodeSpoilerContainer");
            }

            // Predicate for filtering out elements that we don't want to include
            var exclusions = ForumPostTextConverter.GetClassesExclusionPredicate(excludedClasses);

            Uri host = new Uri(quest.ThreadUri.GetLeftPart(UriPartial.Authority) + "/");;

            // Get the full post text.
            return(ForumPostTextConverter.ExtractPostText(postBlock, exclusions, host));
        }
コード例 #10
0
        /// <summary>
        /// Get a completed post from the provided HTML list item node.
        /// </summary>
        /// <param name="li">List item node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode li, IQuest quest)
        {
            if (li == null)
            {
                throw new ArgumentNullException(nameof(li));
            }

            string author = "";
            string id     = "";
            string text   = "";
            int    number = 0;

            // ID
            id = li.Id.Substring("post_".Length);

            // Number
            var postCount = li.OwnerDocument.GetElementbyId($"postcount{id}");

            if (postCount != null)
            {
                number = int.Parse(postCount.GetAttributeValue("name", "0"));
            }


            HtmlNode postDetails = li.Elements("div").FirstOrDefault(n => n.GetAttributeValue("class", "") == "postdetails");

            if (postDetails != null)
            {
                // Author
                HtmlNode?userinfo = postDetails.GetChildWithClass("div", "userinfo");
                HtmlNode?username = userinfo?.GetChildWithClass("a", "username");
                author = ForumPostTextConverter.CleanupWebString(username?.InnerText);

                // Text
                string postMessageId = "post_message_" + id;

                var message = li.OwnerDocument.GetElementbyId(postMessageId)?.Element("blockquote");

                // Predicate filtering out elements that we don't want to include
                var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

                // Get the full post text.
                text = ForumPostTextConverter.ExtractPostText(message, exclusion, Host);
            }

            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                post = null;
            }

            return(post);
        }
コード例 #11
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author;
            int    pages;

            HtmlNode doc = page.DocumentNode;

            // Find the page title
            title = ForumPostTextConverter.CleanupWebString(doc.Element("html").Element("head")?.Element("title")?.InnerText);

            // Find a common parent for other data
            HtmlNode pageContent = GetPageContent(page, PageType.Thread);

            if (pageContent == null)
            {
                throw new InvalidOperationException("Cannot find content on page.");
            }

            // Find the thread author
            HtmlNode?titleBar = pageContent.GetDescendantWithClass("titleBar");

            // Non-thread pages (such as threadmark pages) won't have a title bar.
            if (titleBar == null)
            {
                throw new InvalidOperationException("Not a valid forum thread.");
            }

            HtmlNode?pageDesc = page.GetElementbyId("pageDescription");

            HtmlNode?authorNode = pageDesc?.GetChildWithClass("username");

            author = ForumPostTextConverter.CleanupWebString(authorNode?.InnerText ?? "");

            // Find the number of pages in the thread
            var    pageNavLinkGroup = pageContent.GetDescendantWithClass("div", "pageNavLinkGroup");
            var    pageNav          = pageNavLinkGroup?.GetChildWithClass("PageNav");
            string lastPage         = pageNav?.GetAttributeValue("data-last", "") ?? "";

            if (string.IsNullOrEmpty(lastPage))
            {
                pages = 1;
            }
            else
            {
                pages = Int32.Parse(lastPage);
            }

            // Create a ThreadInfo object to hold the acquired information.
            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
コード例 #12
0
ファイル: PostTextTests.cs プロジェクト: MizMahem/NetTally
        private Predicate <HtmlNode> GetXenForoPredicate()
        {
            List <string> excludedClasses = new List <string> {
                "bbCodeQuote", "messageTextEndMarker", "advbbcodebar_encadre",
                "advbbcodebar_article", "adv_tabs_wrapper", "adv_slider_wrapper"
            };

            return(ForumPostTextConverter.GetClassesExclusionPredicate(excludedClasses));
        }
コード例 #13
0
ファイル: phpBBAdapter.cs プロジェクト: MizMahem/NetTally
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author = string.Empty;
            int    pages  = 1;

            HtmlNode doc = page.DocumentNode.Element("html");

            // Find the page title
            title = ForumPostTextConverter.CleanupWebString(doc.Element("head")?.Element("title")?.InnerText);

            // Find the number of pages
            var pagebody = page.GetElementbyId("page-body");

            if (pagebody != null)
            {
                // Different versions of the forum have different methods of showing page numbers

                var topicactions = pagebody.GetChildWithClass("topic-actions");
                if (topicactions != null)
                {
                    HtmlNode?pagination     = topicactions.GetChildWithClass("pagination");
                    string?  paginationText = pagination?.InnerText;
                    if (paginationText != null)
                    {
                        Regex pageOf = new Regex(@"Page\s*\d+\s*of\s*(?<pages>\d+)");
                        Match m      = pageOf.Match(paginationText);
                        if (m.Success)
                        {
                            pages = int.Parse(m.Groups["pages"].Value);
                        }
                    }
                }
                else
                {
                    var actionbar  = pagebody.GetChildWithClass("action-bar");
                    var pagination = actionbar?.GetChildWithClass("pagination");

                    var ul           = pagination?.Element("ul");
                    var lastPageLink = ul?.Elements("li")?.LastOrDefault(n => !n.GetAttributeValue("class", "").Split(' ').Contains("next"));

                    if (lastPageLink != null)
                    {
                        pages = int.Parse(lastPageLink.InnerText);
                    }
                }
            }

            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
コード例 #14
0
 private string GetPageTitle(HtmlDocument page)
 {
     return(ForumPostTextConverter.CleanupWebString(
                page.DocumentNode
                .Element("html")
                .Element("head")
                ?.Element("title")
                ?.InnerText));
 }
コード例 #15
0
ファイル: PhpBBAdapter2.cs プロジェクト: MizMahem/NetTally
        private string GetPostAuthor(HtmlNode div)
        {
            HtmlNode?inner        = div.GetChildWithClass("div", "inner");
            HtmlNode?postbody     = inner?.GetChildWithClass("div", "postbody");
            HtmlNode?authorNode   = postbody?.GetChildWithClass("p", "author");
            HtmlNode?authorStrong = authorNode?.Descendants("strong").FirstOrDefault();
            HtmlNode?authorAnchor = authorStrong?.Element("a");

            return(ForumPostTextConverter.CleanupWebString(authorAnchor?.InnerText));
        }
コード例 #16
0
ファイル: phpBBAdapter.cs プロジェクト: MizMahem/NetTally
        /// <summary>
        /// Get a completed post from the provided HTML div node.
        /// </summary>
        /// <param name="div">Div node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode div, IQuest quest)
        {
            if (div == null)
            {
                throw new ArgumentNullException(nameof(div));
            }

            string author = "";
            string id;
            string text;
            int    number = 0;

            // id="p12345"
            id = div.Id.Substring(1);


            HtmlNode?inner        = div.GetChildWithClass("div", "inner");
            HtmlNode?postbody     = inner?.GetChildWithClass("div", "postbody");
            HtmlNode?authorNode   = postbody?.GetChildWithClass("p", "author");
            HtmlNode?authorStrong = authorNode?.Descendants("strong").FirstOrDefault();
            HtmlNode?authorAnchor = authorStrong?.Element("a");

            author = ForumPostTextConverter.CleanupWebString(authorAnchor?.InnerText);

            // No way to get the post number??


            // Get the full post text.  Two different layout variants.
            var content = postbody?.GetChildWithClass("div", "content");

            if (content == null)
            {
                content = postbody?.Elements("div").FirstOrDefault(n => n.Id.StartsWith("post_content", StringComparison.Ordinal));
            }

            text = ForumPostTextConverter.ExtractPostText(content, n => false, Host);


            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                post = null;
            }

            return(post);
        }
コード例 #17
0
        private string GetPostAuthor(HtmlNode li)
        {
            string author = "";

            var postAuthorNode = li.Descendants("div").FirstOrDefault(a => a.GetAttributeValue("itemprop", "") == "author");
            var authorNode     = postAuthorNode?.GetDescendantWithClass("div", "author");

            if (authorNode != null)
            {
                author = ForumPostTextConverter.CleanupWebString(authorNode.InnerText);
            }

            return(author);
        }
コード例 #18
0
        private string GetPostText(HtmlDocument page, string id, IQuest quest)
        {
            string postMessageId = $"post_message_{id}";

            var postContents = page.GetElementbyId(postMessageId);

            // Predicate filtering out elements that we don't want to include
            var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

            Uri host = new Uri(quest.ThreadUri.GetLeftPart(UriPartial.Authority) + "/");;

            // Get the full post text.
            return(ForumPostTextConverter.ExtractPostText(postContents, exclusion, host));
        }
コード例 #19
0
ファイル: XenForo2Adapter2.cs プロジェクト: MizMahem/NetTally
        private string GetPageTitle(HtmlDocument page, HtmlNode headerNode)
        {
            //var titleNode = headerNode.GetChildWithClass("div", "p-title");
            //string title = ForumPostTextConverter.CleanupWebString(titleNode?.Element("h1")?.InnerText.Trim());

            //if (!string.IsNullOrEmpty(title))
            //    return title;

            return(ForumPostTextConverter.CleanupWebString(
                       page.DocumentNode
                       .Element("html")
                       .Element("head")
                       ?.Element("title")
                       ?.InnerText));
        }
コード例 #20
0
        private string GetPostAuthor(HtmlNode li)
        {
            string author = "";

            HtmlNode postDetails = li.Elements("div").FirstOrDefault(n => n.GetAttributeValue("class", "") == "postdetails");

            if (postDetails != null)
            {
                // Author
                HtmlNode?userinfo = postDetails.GetChildWithClass("div", "userinfo");
                HtmlNode?username = userinfo?.GetChildWithClass("a", "username");
                author = ForumPostTextConverter.CleanupWebString(username?.InnerText);
            }

            return(author);
        }
コード例 #21
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author = string.Empty; // vBulletin doesn't show thread authors
            int    pages  = 1;

            HtmlNode doc = page.DocumentNode;

            // Find the page title
            title = doc.Element("html").Element("head").Element("title")?.InnerText ?? "";
            title = ForumPostTextConverter.CleanupWebString(title);

            // Get the number of pages from the navigation elements
            var paginationTop = page.GetElementbyId("pagination_top");

            var paginationForm = paginationTop.Element("form");

            // If there is no form, that means there's only one page in the thread.
            if (paginationForm != null)
            {
                var firstSpan  = paginationForm.Element("span");
                var firstSpanA = firstSpan?.Element("a");
                var pagesText  = firstSpanA?.InnerText;

                if (pagesText != null)
                {
                    Regex pageNumsRegex = new Regex(@"Page \d+ of (?<pages>\d+)");
                    Match m             = pageNumsRegex.Match(pagesText);
                    if (m.Success)
                    {
                        pages = int.Parse(m.Groups["pages"].Value);
                    }
                }
            }

            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
コード例 #22
0
ファイル: PhpBBAdapter2.cs プロジェクト: MizMahem/NetTally
        private string GetPostText(HtmlNode div, IQuest quest)
        {
            // Get the full post text.  Two different layout variants.
            HtmlNode?inner    = div.GetChildWithClass("div", "inner");
            HtmlNode?postbody = inner?.GetChildWithClass("div", "postbody");
            var      content  = postbody?.GetChildWithClass("div", "content");

            if (content == null)
            {
                content = postbody?.Elements("div").FirstOrDefault(n => n.Id.StartsWith("post_content", StringComparison.Ordinal));
            }

            if (content != null)
            {
                return(ForumPostTextConverter.ExtractPostText(content, n => false, quest.ThreadUri));
            }

            return("");
        }
コード例 #23
0
        private string GetPageAuthor(HtmlDocument page)
        {
            // Find a common parent for other data
            HtmlNode?pageContent = GetPageContent(page, PageType.Thread);

            if (pageContent == null)
            {
                throw new InvalidOperationException("Cannot find content on page.");
            }

            // Non-thread pages (such as threadmark pages) won't have a title bar.
            HtmlNode?titleBar = pageContent.GetDescendantWithClass("titleBar") ??
                                throw new InvalidOperationException("Not a valid forum thread.");

            // Find the thread author
            HtmlNode?authorNode = page.GetElementbyId("pageDescription")?.GetChildWithClass("username");

            return(ForumPostTextConverter.CleanupWebString(authorNode?.InnerText ?? ""));
        }
コード例 #24
0
        private string GetPostText(HtmlNode li, IQuest quest)
        {
            HtmlNode?contentArea = li.GetDescendantWithClass("div", "b-post__content");

            var postTextNode = contentArea?.Descendants("div").FirstOrDefault(a => a.GetAttributeValue("itemprop", "") == "text");

            if (postTextNode != null)
            {
                // Predicate filtering out elements that we don't want to include
                var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

                Uri host = new Uri(quest.ThreadUri.GetLeftPart(UriPartial.Authority) + "/");;

                // Get the full post text.
                return(ForumPostTextConverter.ExtractPostText(postTextNode, exclusion, host));
            }

            return("");
        }
コード例 #25
0
ファイル: XenForo2Adapter2.cs プロジェクト: MizMahem/NetTally
        private string GetPostText(HtmlNode article, IQuest quest)
        {
            // Predicate filtering out elements that we don't want to include
            List <string> excludedClasses = new List <string> {
                "bbCodeQuote", "messageTextEndMarker", "advbbcodebar_encadre",
                "advbbcodebar_article", "adv_tabs_wrapper", "adv_slider_wrapper"
            };

            if (quest.IgnoreSpoilers)
            {
                excludedClasses.Add("bbCodeSpoilerContainer");
            }

            var exclusions = ForumPostTextConverter.GetClassesExclusionPredicate(excludedClasses);

            var articleBody = article.GetDescendantWithClass("article", "message-body")?.GetChildWithClass("div", "bbWrapper");

            Uri host = new Uri(quest.ThreadUri.GetLeftPart(UriPartial.Authority) + "/");;

            return(ForumPostTextConverter.ExtractPostText(articleBody, exclusions, host));
        }
コード例 #26
0
        /// <summary>
        /// Get thread info from the provided page.
        /// </summary>
        /// <param name="page">A web page from a forum that this adapter can handle.</param>
        /// <returns>Returns thread information that can be gleaned from that page.</returns>
        public ThreadInfo GetThreadInfo(HtmlDocument page)
        {
            if (page == null)
            {
                throw new ArgumentNullException(nameof(page));
            }

            string title;
            string author = string.Empty; // vBulletin doesn't show thread authors
            int    pages  = 1;

            HtmlNode doc = page.DocumentNode.Element("html");

            // Find the page title
            title = ForumPostTextConverter.CleanupWebString(doc.Element("head")?.Element("title")?.InnerText);

            // If there's no pagenav div, that means there's no navigation to alternate pages,
            // which means there's only one page in the thread.
            var pageNavDiv = doc.GetDescendantWithClass("div", "pagenav");

            if (pageNavDiv != null)
            {
                var vbMenuControl = pageNavDiv.GetDescendantWithClass("td", "vbmenu_control");

                if (vbMenuControl != null)
                {
                    Regex pageNumsRegex = new Regex(@"Page \d+ of (?<pages>\d+)");

                    Match m = pageNumsRegex.Match(vbMenuControl.InnerText);
                    if (m.Success)
                    {
                        pages = int.Parse(m.Groups["pages"].Value);
                    }
                }
            }

            ThreadInfo info = new ThreadInfo(title, author, pages);

            return(info);
        }
コード例 #27
0
ファイル: XenForo2Adapter.cs プロジェクト: MizMahem/NetTally
        /// <summary>
        /// Get a completed post from the provided HTML list item node.
        /// </summary>
        /// <param name="article">List item node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode article, IQuest quest)
        {
            if (article == null)
            {
                throw new ArgumentNullException(nameof(article));
            }

            string author;
            string id;
            string text;
            int    number;

            // Author and ID are in the basic list item attributes
            author = ForumPostTextConverter.CleanupWebString(article.GetAttributeValue("data-author", ""));
            id     = ForumPostTextConverter.CleanupWebString(article.GetAttributeValue("data-content", "post-").Substring("post-".Length));

            if (AdvancedOptions.Instance.DebugMode)
            {
                author = $"{author}_{id}";
            }

            var attribution = article.GetDescendantWithClass("header", "message-attribution");

            if (attribution == null)
            {
                return(null);
            }

            string postNum = attribution.Descendants("a").LastOrDefault(c => c.ChildNodes.Count == 1)?.InnerText.Trim() ?? "";

            if (string.IsNullOrEmpty(postNum))
            {
                return(null);
            }


            if (postNum[0] == '#')
            {
                var numSpan = postNum.AsSpan()[1..];
コード例 #28
0
        private string GetPostAuthor(HtmlDocument page, string id)
        {
            string author          = "";
            string postAuthorDivID = $"postmenu_{id}";

            var authorAnchor = page.GetElementbyId(postAuthorDivID).Element("a");

            if (authorAnchor != null)
            {
                // ??
                if (authorAnchor.Element("span") != null)
                {
                    author = authorAnchor.Element("span").InnerText;
                }
                else
                {
                    author = authorAnchor.InnerText;
                }
            }

            return(ForumPostTextConverter.CleanupWebString(author));
        }
コード例 #29
0
        private string GetPostText(HtmlNode li, string id, IQuest quest)
        {
            HtmlNode postDetails = li.Elements("div").FirstOrDefault(n => n.GetAttributeValue("class", "") == "postdetails");

            if (postDetails != null)
            {
                // Text
                string postMessageId = "post_message_" + id;

                var message = li.OwnerDocument.GetElementbyId(postMessageId)?.Element("blockquote");

                // Predicate filtering out elements that we don't want to include
                var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

                Uri host = new Uri(quest.ThreadUri.GetLeftPart(UriPartial.Authority) + "/");;

                // Get the full post text.
                return(ForumPostTextConverter.ExtractPostText(message, exclusion, host));
            }

            return("");
        }
コード例 #30
0
ファイル: NodeBBAdapter.cs プロジェクト: MizMahem/NetTally
        /// <summary>
        /// Get a completed post from the provided HTML div node.
        /// </summary>
        /// <param name="li">Div node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode li, IQuest quest)
        {
            if (li == null)
            {
                throw new ArgumentNullException(nameof(li));
            }

            string author;
            string id;
            string text;
            int    number;

            id     = li.GetAttributeValue("data-pid", "");
            author = li.GetAttributeValue("data-username", "");
            number = int.Parse(li.GetAttributeValue("data-index", "0"));

            var content = li.GetChildWithClass("div", "content");

            // Get the full post text.
            text = ForumPostTextConverter.ExtractPostText(content, n => false, Host);


            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                post = null;
            }

            return(post);
        }