Beispiel #1
0
        public void Fonts_Test()
        {
            string html =
                @"<span style=""font-family: 'times new roman'"">[x][Wine] Red<br />
-[x] Pinot Noir</span><br />
<br />
Font test";

            string expectedText =
                @"[x][Wine] Red
-[x] Pinot Noir
Font test";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);

                Assert.AreEqual(expectedText, results);

                Origin origin = new Origin("Kinematics", "123456", 10, new Uri("http://www.example.com/"), "http://www.example.com");
                Post   post   = new Post(origin, results);

                Assert.IsTrue(post.HasVote);
                Assert.AreEqual(2, post.VoteLines.Count);
                Assert.AreEqual("[x][Wine] Red", post.VoteLines[0].ToString());
                Assert.AreEqual("-[x] Pinot Noir", post.VoteLines[1].ToString());
            }

            Assert.IsTrue(node != null);
        }
Beispiel #2
0
        public void Check_Embedded_Callout_Plan()
        {
            string html =
                @"The referenced post did not have the problem described, but another post did.  Basically, <br />
[x] Plan <a href=""https://forums.sufficientvelocity.com/members/4076/"" class=""username"" data-xf-init=""member-tooltip"" data-user-id=""4076"" data-username=""@Kinematics"">@Kinematics</a> <br />
Wouldn't be applied to my proposed plan because it got turned into a member link (the '@' symbol is dropped on QQ's forums, so that doesn't interfere in this case).<br />
<br />";

            string expectedText =
                @"The referenced post did not have the problem described, but another post did.  Basically, 
[x] Plan 『url=""https://forums.sufficientvelocity.com/members/4076/""』@Kinematics『/url』 
Wouldn't be applied to my proposed plan because it got turned into a member link (the '@' symbol is dropped on QQ's forums, so that doesn't interfere in this case).";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);

                Assert.AreEqual(expectedText, results);

                Origin origin = new Origin("Kinematics1", "123456", 10, new Uri("http://www.example.com/"), "http://www.example.com");
                Post   post   = new Post(origin, results);

                Assert.IsTrue(post.HasVote);
                Assert.AreEqual(1, post.VoteLines.Count);
            }

            Assert.IsTrue(node != null);
        }
Beispiel #3
0
        public void Convert_Basic_HTML()
        {
            string html = @"[X] Plan Perfectly Balanced (As all things should be)<br />
-[X] Life (50%)<br />
-[X] Aeromancy (50%)<br />
<br />
I think Garth&#039;s entire thing so far is life, so he should try &quot;max&quot; that out as <i>quickly</i> as possible. On the other hand, some versatility is required, and he has already started down the Aeromancy path.<br />";

            string expectedText =
                @"[X] Plan Perfectly Balanced (As all things should be)
-[X] Life (50%)
-[X] Aeromancy (50%)
I think Garth's entire thing so far is life, so he should try ""max"" that out as 『i』quickly『/i』 as possible. On the other hand, some versatility is required, and he has already started down the Aeromancy path.";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);

                Assert.AreEqual(expectedText, results);
            }

            Assert.IsTrue(node != null);
        }
Beispiel #4
0
        public void Full_Plan_Strikethrough_1()
        {
            string html =
                @"<span style=""text-decoration: line-through"">[X] Plan Triplemancer<br />
-[X] Aeromancy (40%)<br />
-[X] Hydromancy (30%)<br />
-[X] Pyromancy (30%)</span><br />
<br />
WeirGarth has minmaxed our Life pretty good, but we do need some extra options. The three elements will do nicely. Most focus on Aeromancy to maximize the chance of finishing the Rod and synergy with griffins. Hydromancy for Deep Den and Lannisport plus reconnecting with Bel. Pyromancy because Melisandre should have the most free time availalbe and all our fire and light imagery makes me think we have the most talent with it compared to other elements.<br />
<br />
Edit: Vote changed. See next page.";

            string expectedText =
                @"❰[X] Plan Triplemancer⦂-[X] Aeromancy (40%)⦂-[X] Hydromancy (30%)⦂-[X] Pyromancy (30%)❱
WeirGarth has minmaxed our Life pretty good, but we do need some extra options. The three elements will do nicely. Most focus on Aeromancy to maximize the chance of finishing the Rod and synergy with griffins. Hydromancy for Deep Den and Lannisport plus reconnecting with Bel. Pyromancy because Melisandre should have the most free time availalbe and all our fire and light imagery makes me think we have the most talent with it compared to other elements.
Edit: Vote changed. See next page.";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);


                ReadOnlySpan <char> e = expectedText.AsSpan();
                ReadOnlySpan <char> r = results.AsSpan();

                Assert.AreEqual(expectedText, results);
            }

            Assert.IsTrue(node != null);
        }
Beispiel #5
0
        public void Content_Strikethrough()
        {
            string html =
                @"[X] Plan Air, <span style=""text-decoration: line-through"">Earth, Water,</span> Fire<br />
-[X] Aeromancy (40%)<br />
-[X] Pyromancy (25%)<br />
-[X] Pyromantic Divination (35%)<br />
<br />
We seem to have a real talent for divination considering that we managed to affect the past when we used the glass candle.";

            string expectedText =
                @"[X] Plan Air, ❰Earth, Water,❱ Fire
-[X] Aeromancy (40%)
-[X] Pyromancy (25%)
-[X] Pyromantic Divination (35%)
We seem to have a real talent for divination considering that we managed to affect the past when we used the glass candle.";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);

                Assert.AreEqual(expectedText, results);
            }

            Assert.IsTrue(node != null);
        }
Beispiel #6
0
        private string GetPostText(HtmlNode li, IQuest quest)
        {
            // Get the primary content of the list item
            HtmlNode?primaryContent = li.GetChildWithClass("primaryContent");

            // On one branch, we can get the post text
            HtmlNode?messageContent = primaryContent?.GetChildWithClass("messageContent");
            HtmlNode?postBlock      = messageContent?.Element("article")?.Element("blockquote");

            List <string> excludedClasses = new List <string> {
                "bbCodeQuote", "messageTextEndMarker", "advbbcodebar_encadre",
                "advbbcodebar_article", "adv_tabs_wrapper", "adv_slider_wrapper"
            };

            if (quest.IgnoreSpoilers)
            {
                excludedClasses.Add("bbCodeSpoilerContainer");
            }

            // Predicate for filtering out elements that we don't want to include
            var exclusions = ForumPostTextConverter.GetClassesExclusionPredicate(excludedClasses);

            Uri host = new Uri(quest.ThreadUri.GetLeftPart(UriPartial.Authority) + "/");;

            // Get the full post text.
            return(ForumPostTextConverter.ExtractPostText(postBlock, exclusions, host));
        }
Beispiel #7
0
        /// <summary>
        /// Get a completed post from the provided HTML list item node.
        /// </summary>
        /// <param name="li">List item node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode li, IQuest quest)
        {
            if (li == null)
            {
                throw new ArgumentNullException(nameof(li));
            }

            string author = "";
            string id     = "";
            string text   = "";
            int    number = 0;

            // ID
            id = li.Id.Substring("post_".Length);

            // Number
            var postCount = li.OwnerDocument.GetElementbyId($"postcount{id}");

            if (postCount != null)
            {
                number = int.Parse(postCount.GetAttributeValue("name", "0"));
            }


            HtmlNode postDetails = li.Elements("div").FirstOrDefault(n => n.GetAttributeValue("class", "") == "postdetails");

            if (postDetails != null)
            {
                // Author
                HtmlNode?userinfo = postDetails.GetChildWithClass("div", "userinfo");
                HtmlNode?username = userinfo?.GetChildWithClass("a", "username");
                author = ForumPostTextConverter.CleanupWebString(username?.InnerText);

                // Text
                string postMessageId = "post_message_" + id;

                var message = li.OwnerDocument.GetElementbyId(postMessageId)?.Element("blockquote");

                // Predicate filtering out elements that we don't want to include
                var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

                // Get the full post text.
                text = ForumPostTextConverter.ExtractPostText(message, exclusion, Host);
            }

            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                post = null;
            }

            return(post);
        }
Beispiel #8
0
        /// <summary>
        /// Get a completed post from the provided HTML div node.
        /// </summary>
        /// <param name="div">Div node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode div, IQuest quest)
        {
            if (div == null)
            {
                throw new ArgumentNullException(nameof(div));
            }

            string author = "";
            string id;
            string text;
            int    number = 0;

            // id="p12345"
            id = div.Id.Substring(1);


            HtmlNode?inner        = div.GetChildWithClass("div", "inner");
            HtmlNode?postbody     = inner?.GetChildWithClass("div", "postbody");
            HtmlNode?authorNode   = postbody?.GetChildWithClass("p", "author");
            HtmlNode?authorStrong = authorNode?.Descendants("strong").FirstOrDefault();
            HtmlNode?authorAnchor = authorStrong?.Element("a");

            author = ForumPostTextConverter.CleanupWebString(authorAnchor?.InnerText);

            // No way to get the post number??


            // Get the full post text.  Two different layout variants.
            var content = postbody?.GetChildWithClass("div", "content");

            if (content == null)
            {
                content = postbody?.Elements("div").FirstOrDefault(n => n.Id.StartsWith("post_content", StringComparison.Ordinal));
            }

            text = ForumPostTextConverter.ExtractPostText(content, n => false, Host);


            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                post = null;
            }

            return(post);
        }
        private string GetPostText(HtmlDocument page, string id, IQuest quest)
        {
            string postMessageId = $"post_message_{id}";

            var postContents = page.GetElementbyId(postMessageId);

            // Predicate filtering out elements that we don't want to include
            var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

            Uri host = new Uri(quest.ThreadUri.GetLeftPart(UriPartial.Authority) + "/");;

            // Get the full post text.
            return(ForumPostTextConverter.ExtractPostText(postContents, exclusion, host));
        }
Beispiel #10
0
        private string GetPostText(HtmlNode div, IQuest quest)
        {
            // Get the full post text.  Two different layout variants.
            HtmlNode?inner    = div.GetChildWithClass("div", "inner");
            HtmlNode?postbody = inner?.GetChildWithClass("div", "postbody");
            var      content  = postbody?.GetChildWithClass("div", "content");

            if (content == null)
            {
                content = postbody?.Elements("div").FirstOrDefault(n => n.Id.StartsWith("post_content", StringComparison.Ordinal));
            }

            if (content != null)
            {
                return(ForumPostTextConverter.ExtractPostText(content, n => false, quest.ThreadUri));
            }

            return("");
        }
Beispiel #11
0
        private string GetPostText(HtmlNode li, IQuest quest)
        {
            HtmlNode?contentArea = li.GetDescendantWithClass("div", "b-post__content");

            var postTextNode = contentArea?.Descendants("div").FirstOrDefault(a => a.GetAttributeValue("itemprop", "") == "text");

            if (postTextNode != null)
            {
                // Predicate filtering out elements that we don't want to include
                var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

                Uri host = new Uri(quest.ThreadUri.GetLeftPart(UriPartial.Authority) + "/");;

                // Get the full post text.
                return(ForumPostTextConverter.ExtractPostText(postTextNode, exclusion, host));
            }

            return("");
        }
Beispiel #12
0
        private string GetPostText(HtmlNode article, IQuest quest)
        {
            // Predicate filtering out elements that we don't want to include
            List <string> excludedClasses = new List <string> {
                "bbCodeQuote", "messageTextEndMarker", "advbbcodebar_encadre",
                "advbbcodebar_article", "adv_tabs_wrapper", "adv_slider_wrapper"
            };

            if (quest.IgnoreSpoilers)
            {
                excludedClasses.Add("bbCodeSpoilerContainer");
            }

            var exclusions = ForumPostTextConverter.GetClassesExclusionPredicate(excludedClasses);

            var articleBody = article.GetDescendantWithClass("article", "message-body")?.GetChildWithClass("div", "bbWrapper");

            Uri host = new Uri(quest.ThreadUri.GetLeftPart(UriPartial.Authority) + "/");;

            return(ForumPostTextConverter.ExtractPostText(articleBody, exclusions, host));
        }
Beispiel #13
0
        private string GetPostText(HtmlNode li, string id, IQuest quest)
        {
            HtmlNode postDetails = li.Elements("div").FirstOrDefault(n => n.GetAttributeValue("class", "") == "postdetails");

            if (postDetails != null)
            {
                // Text
                string postMessageId = "post_message_" + id;

                var message = li.OwnerDocument.GetElementbyId(postMessageId)?.Element("blockquote");

                // Predicate filtering out elements that we don't want to include
                var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

                Uri host = new Uri(quest.ThreadUri.GetLeftPart(UriPartial.Authority) + "/");;

                // Get the full post text.
                return(ForumPostTextConverter.ExtractPostText(message, exclusion, host));
            }

            return("");
        }
Beispiel #14
0
        /// <summary>
        /// Get a completed post from the provided HTML div node.
        /// </summary>
        /// <param name="li">Div node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode li, IQuest quest)
        {
            if (li == null)
            {
                throw new ArgumentNullException(nameof(li));
            }

            string author;
            string id;
            string text;
            int    number;

            id     = li.GetAttributeValue("data-pid", "");
            author = li.GetAttributeValue("data-username", "");
            number = int.Parse(li.GetAttributeValue("data-index", "0"));

            var content = li.GetChildWithClass("div", "content");

            // Get the full post text.
            text = ForumPostTextConverter.ExtractPostText(content, n => false, Host);


            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                post = null;
            }

            return(post);
        }
Beispiel #15
0
        public void Strikethrough_Overlap()
        {
            string html =
                @"[X] Plan Air, <span style=""text-decoration: line-through"">Earth, Water, Fire<br />
-[X]</span> Aeromancy (40%)<br />
-[X] Pyromancy (25%)<br />
-[X] Pyromantic Divination (35%)<br />
<br />
We seem to have a real talent for divination considering that we managed to affect the past when we used the glass candle.";

            string expectedText =
                @"[X] Plan Air, ❰Earth, Water, Fire⦂-[X]❱ Aeromancy (40%)
-[X] Pyromancy (25%)
-[X] Pyromantic Divination (35%)
We seem to have a real talent for divination considering that we managed to affect the past when we used the glass candle.";

            var node = GetHtmlFromString(html);

            if (node != null)
            {
                string results = ForumPostTextConverter.ExtractPostText(node, GetXenForoPredicate(), exampleUri);

                Assert.AreEqual(expectedText, results);

                Origin origin = new Origin("Kinematics", "123456", 10, new Uri("http://www.example.com/"), "http://www.example.com");
                Post   post   = new Post(origin, results);

                Assert.IsTrue(post.HasVote);
                Assert.AreEqual(3, post.VoteLines.Count);
                Assert.AreEqual("[X] Plan Air,", post.VoteLines[0].ToString());
                Assert.AreEqual("-[X] Pyromancy (25%)", post.VoteLines[1].ToString());
                Assert.AreEqual("-[X] Pyromantic Divination (35%)", post.VoteLines[2].ToString());
            }

            Assert.IsTrue(node != null);
        }
Beispiel #16
0
        /// <summary>
        /// Get a completed post from the provided HTML list item node.
        /// </summary>
        /// <param name="li">List item node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode li, IQuest quest)
        {
            if (li == null)
            {
                throw new ArgumentNullException(nameof(li));
            }

            string author;
            string id;
            string text;
            int    number;

            // Author and ID are in the basic list item attributes
            author = ForumPostTextConverter.CleanupWebString(li.GetAttributeValue("data-author", ""));
            id     = li.Id.Substring("post-".Length);

            if (AdvancedOptions.Instance.DebugMode)
            {
                author = $"{author}_{id}";
            }

            // Get the primary content of the list item
            HtmlNode?primaryContent = li.GetChildWithClass("primaryContent");

            // On one branch, we can get the post text
            HtmlNode?messageContent = primaryContent?.GetChildWithClass("messageContent");
            HtmlNode?postBlock      = messageContent?.Element("article")?.Element("blockquote");

            // Predicate filtering out elements that we don't want to include
            List <string> excludedClasses = new List <string> {
                "bbCodeQuote", "messageTextEndMarker", "advbbcodebar_encadre",
                "advbbcodebar_article", "adv_tabs_wrapper", "adv_slider_wrapper"
            };

            if (quest.IgnoreSpoilers)
            {
                excludedClasses.Add("bbCodeSpoilerContainer");
            }

            var exclusions = ForumPostTextConverter.GetClassesExclusionPredicate(excludedClasses);

            // Get the full post text.
            text = ForumPostTextConverter.ExtractPostText(postBlock, exclusions, Host);

            // On another branch of the primary content, we can get the post number.
            HtmlNode?messageMeta = primaryContent?.GetChildWithClass("messageMeta");

            // HTML parsing of the post was corrupted somehow.
            if (messageMeta == null)
            {
                return(null);
            }
            HtmlNode?publicControls = messageMeta.GetChildWithClass("publicControls");
            HtmlNode?postNumber     = publicControls?.GetChildWithClass("postNumber");

            if (postNumber == null)
            {
                return(null);
            }

            string postNumberText = postNumber.InnerText;

            // Skip the leading # character.
            if (postNumberText.StartsWith("#", StringComparison.Ordinal))
            {
                postNumberText = postNumberText.Substring(1);
            }

            number = int.Parse(postNumberText);

            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch (Exception e)
            {
                Logger2.LogError(e, $"Attempt to create new post failed. (Author:{author}, ID:{id}, Number:{number}, Quest:{quest.DisplayName})");
                post = null;
            }

            return(post);
        }
Beispiel #17
0
        /// <summary>
        /// Get a completed post from the provided HTML list item.
        /// </summary>
        /// <param name="li">List item that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode li, IQuest quest)
        {
            if (li == null)
            {
                throw new ArgumentNullException(nameof(li));
            }

            string author = "";
            string id     = "";
            string text   = "";
            int    number = 0;

            // ID
            id = li.GetAttributeValue("data-node-id", "");

            if (string.IsNullOrEmpty(id))
            {
                return(null);
            }

            // Author
            var postAuthorNode = li.Descendants("div").FirstOrDefault(a => a.GetAttributeValue("itemprop", "") == "author");
            var authorNode     = postAuthorNode?.GetDescendantWithClass("div", "author");

            if (authorNode != null)
            {
                author = ForumPostTextConverter.CleanupWebString(authorNode.InnerText);
            }

            HtmlNode?contentArea = li.GetDescendantWithClass("div", "b-post__content");

            // Number
            HtmlNode?postCountAnchor = contentArea?.GetDescendantWithClass("a", "b-post__count");

            if (postCountAnchor != null)
            {
                string postNumText = postCountAnchor.InnerText;
                if (postNumText.StartsWith("#", StringComparison.Ordinal))
                {
                    postNumText = postNumText.Substring(1);
                }

                number = int.Parse(postNumText);
            }

            // Text
            var postTextNode = contentArea?.Descendants("div").FirstOrDefault(a => a.GetAttributeValue("itemprop", "") == "text");

            // Predicate filtering out elements that we don't want to include
            var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

            // Get the full post text.
            text = ForumPostTextConverter.ExtractPostText(postTextNode, exclusion, Host);


            Post?post;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                post = null;
            }

            return(post);
        }
Beispiel #18
0
        /// <summary>
        /// Get a completed post from the provided HTML div node.
        /// </summary>
        /// <param name="postDiv">Div node that contains the post.</param>
        /// <returns>Returns a post object with required information.</returns>
        private Post?GetPost(HtmlNode postDiv, IQuest quest)
        {
            string author = "";
            string id;
            string text;
            int    number = 0;

            var postTable = postDiv.Descendants("table").FirstOrDefault(a => a.Id.StartsWith("post", StringComparison.Ordinal));

            if (postTable == null)
            {
                return(null);
            }

            id = postTable.Id.Substring("post".Length);

            string postAuthorDivID = "postmenu_" + id;

            var authorAnchor = postTable.OwnerDocument.GetElementbyId(postAuthorDivID).Element("a");

            if (authorAnchor != null)
            {
                author = authorAnchor.InnerText;

                // ??
                if (authorAnchor.Element("span") != null)
                {
                    author = authorAnchor.Element("span").InnerText;
                }
            }

            string postNumberAnchorID = "postcount" + id;

            var anchor = postTable.OwnerDocument.GetElementbyId(postNumberAnchorID);

            if (anchor != null)
            {
                string postNumText = anchor.GetAttributeValue("name", "");
                number = int.Parse(postNumText);
            }

            string postMessageId = "post_message_" + id;

            var postContents = postTable.OwnerDocument.GetElementbyId(postMessageId);

            // Predicate filtering out elements that we don't want to include
            var exclusion = ForumPostTextConverter.GetClassExclusionPredicate("bbcode_quote");

            // Get the full post text.
            text = ForumPostTextConverter.ExtractPostText(postContents, exclusion, Host);


            Post?post = null;

            try
            {
                Origin origin = new Origin(author, id, number, Site, GetPermalinkForId(id));
                post = new Post(origin, text);
            }
            catch
            {
                //post = null;
            }

            return(post);
        }