public void TestHeaderCreation_DoubleBHeaderWithSpace()
        {
            string source = "<p><B>Some</B>&nbsp;<strong>paragraph</strong></p>";
            var    html   = DocTester.ProcessSource(source, TextCleaner.CreateHeaders);

            Assert.AreEqual("<h2>Some paragraph</h2>", html);
        }
        public void TestHeaderCreation_SimpleBHeader()
        {
            string source = "<p><B>Some paragraph</B></p>";
            var    html   = DocTester.ProcessSource(source, TextCleaner.CreateHeaders);

            Assert.AreEqual("<h2>Some paragraph</h2>", html);
        }
Example #3
0
        public void CleanParagraph_ShouldStayClean()
        {
            string source = "<p>Some paragraph</p>";
            var    html   = DocTester.ProcessSource(source, TextCleaner.TrimParagraphs);

            Assert.AreEqual("<p>Some paragraph</p>", html);
        }
Example #4
0
        public void NoParagraph_ShouldStaySame()
        {
            var source = "<span> Some paragraph </span>";
            var html   = DocTester.ProcessSource(source, TextCleaner.TrimParagraphs);

            Assert.AreEqual(source, html);
        }
Example #5
0
        public void QuotesInBrackets_ShouldBeConverted()
        {
            var source = "<p>Something ('me') something</p>";
            var html   = DocTester.ProcessSource(source, doc => TextCleaner.UpdateQuotes(doc, QuoteProcessing.ChangeToSmartQuotes));

            Assert.AreEqual("<p>Something (&lsquo;me&rsquo;) something</p>", html);
        }
        public void TestHeaderCreation_SimpleStrongHeaderWithEm()
        {
            string source = "<p><strong>Some <em>paragraph</em></strong></p>";
            var    html   = DocTester.ProcessSource(source, TextCleaner.CreateHeaders);

            Assert.AreEqual("<h2>Some <em>paragraph</em></h2>", html);
        }
        public void RemoveOfficeMarkup()
        {
            var source = @"
<p class=MsoNormal><b><span lang=EN-US style='mso-ansi-language:EN-US'>HU-HU</span><u5:p></u5:p></b><span
lang=EN-US style='mso-ansi-language:EN-US'><o:p></o:p></span></p>

<ul style='margin-top:0cm' type=disc>
 <li class=MsoListParagraph style='margin-left:0cm;mso-list:l0 level1 lfo1'><span
     lang=EN-US style='mso-fareast-font-family:""Times New Roman"";mso-ansi-language:
     EN-US'>text in header should be in Hungarian: Olvassa el a Wolters Kluwer
     legújabb Megfelelőségi szakértői betekintéseit – Cikk, whitepaper,
     kutatás, esettanulmány és podcast.<o:p></o:p></span><u5:p></u5:p></li>
 <li class=MsoListParagraph style='margin-left:0cm;mso-list:l0 level1 lfo1'><span
     lang=EN-US style='mso-fareast-font-family:""Times New Roman"";mso-ansi-language:
     EN-US'>Read More button&nbsp;– see row 107 for local translation <o:p></o:p></span><u5:p></u5:p></li>
 <li class=MsoListParagraph style='color:black;margin-left:0cm;mso-list:l0 level1 lfo1'><span
     lang=EN-US style='mso-fareast-font-family:""Times New Roman"";color:windowtext;
     mso-ansi-language:EN-US'>add dynanic card for expert insights </span><span
     lang=EN-US style='mso-fareast-font-family:""Times New Roman"";mso-ansi-language:
     EN-US'><o:p></o:p></span></li>
</ul>

<u5:p></u5:p><u5:p>        ";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.RemoveOfficeMarkup(doc));

            Assert.IsFalse(html.Contains("<o:p>"), "Office markup should have been removed, like <o:p>");
            Assert.IsFalse(html.Contains("<u5:p>"), "Office markup should have been removed, like <u5:p>");
        }
        public void LinksToRemoteWithRel2_ShouldGetTargetAndNoOpener()
        {
            var source = "<a href=\"https://www.example.com\" rel=\"noreferrer\">link</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, true));

            Assert.AreEqual("<a href=\"https://www.example.com\" rel=\"noreferrer noopener\" target=\"_blank\">link</a>", html);
        }
        public void MultipleLinks_ShouldCombine2()
        {
            var source = "<a href=\"a.yy\">a</a><a href=\"a.yy\">b</a><a href=\"a.yy\">c</a><a href=\"a.yy\">d</a><a href=\"b.yy\">e</a> <a href=\"b.yy\">f</a>";

            var html = DocTester.ProcessSource(source, TextCleaner.CombineLinks);

            Assert.AreEqual("<a href=\"a.yy\">abcd</a><a href=\"b.yy\">e</a> <a href=\"b.yy\">f</a>", html);
        }
Example #10
0
        public void NoChange_ShouldNotChange()
        {
            var source = "<a target=\"_blank\">&ldquo;some remark&rdquo; said the so-called \"chief\"</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.UpdateQuotes(doc, QuoteProcessing.NoChange));

            Assert.AreEqual(source, html);
        }
Example #11
0
        public void ImageInParagraph_ShouldStay()
        {
            // paragraph without text (but with image) got wrongly removed as "empty"
            var source = "<p><img src=\"http://example.com/image.jpeg\"></p>";
            var html   = DocTester.ProcessSource(source, TextCleaner.RemoveEmptyElements);

            Assert.IsTrue(html.Contains("<img"));
        }
        public void LinkWithTrailingSpaces_ShouldBeCleaned()
        {
            var source = "<span>bla<a href=\"www.example.com\"> bla, </a>bla</span>";

            var html = DocTester.ProcessSource(source, TextCleaner.RemoveLeadingAndTrailingSpacesFromLinks);

            Assert.AreEqual("<span>bla <a href=\"www.example.com\">bla</a>, bla</span>", html);
        }
        public void NonConsecutiveLink_ShouldNotCombine()
        {
            var source = "<a href=\"a.yy\">A</a> <a href=\"a.yy\">B</a>";

            var html = DocTester.ProcessSource(source, TextCleaner.CombineLinks);

            Assert.AreEqual(source, html);
        }
        public void LinkAroundSpace_ShouldBeRemoved()
        {
            var source = "<span><a href=\"www.example.com\"> </a></span>";

            var html = DocTester.ProcessSource(source, TextCleaner.RemoveEmptyLinks);

            Assert.AreEqual("<span> </span>", html);
        }
        public void LinksToLocal_ShouldNotGetTargetOrOpener()
        {
            var source = "<a href=\"/default.html\">link</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, true));

            Assert.AreEqual("<a href=\"/default.html\">link</a>", html);
        }
        public void LinksToRemoteWithTarget_ShouldNotChangeTargetButAddOpener()
        {
            var source = "<a href=\"https://www.example.com\" target=\"_self\">link</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, true));

            Assert.AreEqual("<a href=\"https://www.example.com\" target=\"_self\" rel=\"noopener\">link</a>", html);
        }
Example #17
0
        public void ToSmartQuotes_ShouldChangeSimpleQuotes()
        {
            var source = "<a target=\"_blank\">&ldquo;some remark&rdquo; said the so-called \"chief.\"</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.UpdateQuotes(doc, QuoteProcessing.ChangeToSmartQuotes));

            Assert.AreEqual("<a target=\"_blank\">&ldquo;some remark&rdquo; said the so-called &ldquo;chief.&rdquo;</a>", html);
        }
        public void LinksToRemote_ShouldGetTarget()
        {
            var source = "<a href=\"https://www.example.com\">link</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, false));

            Assert.AreEqual("<a href=\"https://www.example.com\" target=\"_blank\">link</a>", html);
        }
        public void TabIndex_ShouldBeRemoved()
        {
            var source = "<p> follow <a>@WKHealth</a> or <a name='target'>@Wolters_Kluwer</a> on Twitter</p>";

            // removes an A without HREF
            var html = DocTester.ProcessSource(source, TextCleaner.RemoveAnchors);

            Assert.IsFalse(html.Contains("<a"));
        }
        public void NonConsecutiveLinks_ShouldNotCombine()
        {
            var source = "<a href=\"nu.nl\">x</a><a href=\"http://www.example.com/investment-compliance/solutions/gainskeeper.aspx\">GainsKeeper</a><a href=\"http://www.example.com/investment-compliance/solutions/gainskeeper.aspx\"><sup>&reg;</sup></a>";

            var html = DocTester.ProcessSource(source, TextCleaner.CombineLinks);

            // SUP is removed from around &reg;
            Assert.AreEqual("<a href=\"nu.nl\">x</a><a href=\"http://www.example.com/investment-compliance/solutions/gainskeeper.aspx\">GainsKeeper&reg;</a>", html);
        }
        public void NestedLinkWithTrailingSpaces_ShouldBeCleaned()
        {
            var source = "<p style=\"\">[<a href=\"http://www.example.com/a\" style=\"\">Some example</a>] bla bla [<a href=\"http://www.example.com/b\" style=\"\">name® bla].</a></p>";

            var html = DocTester.ProcessSource(source, TextCleaner.RemoveLeadingAndTrailingSpacesFromLinks);

            // actual ® is replaced by &reg;
            Assert.AreEqual("<p style=\"\">[<a href=\"http://www.example.com/a\" style=\"\">Some example</a>] bla bla [<a href=\"http://www.example.com/b\" style=\"\">name&reg; bla]</a>.</p>", html);
        }
Example #22
0
        public void ParagraphWithTrailingBreaks_ShouldBeClean()
        {
            string source = @"<p>
Some paragraph<br/>
<br/>
</p>";
            var    html   = DocTester.ProcessSource(source, TextCleaner.TrimParagraphs);

            Assert.AreEqual("<p>Some paragraph</p>", html);
        }
Example #23
0
        public void RemoveNonCMSElements_ShouldNotRemoveIframe()
        {
            string source = @"
<body>
    <iframe>some video content</iframe>
    <p>Text</p>
</body>";
            var    html   = DocTester.ProcessSource(source, TextCleaner.RemoveNonCMSElements);

            Assert.IsTrue(html.Contains("<iframe>"));
        }
Example #24
0
        public void RemoveNonCMSElements_ShouldRemoveScript()
        {
            string source = @"
<body>
    <script>alert('boo')</script>
    <p>Text</p>
</body>";

            var html = DocTester.ProcessSource(source, TextCleaner.RemoveNonCMSElements);

            Assert.IsFalse(html.Contains("<script>"));
        }
Example #25
0
        public void ClearStyling_ShouldRemoveAttributes()
        {
            var source = @"
<p class=""something"">Text 1</p>
<p style=""background-color: black"">Text <span class=""super"">2</span></p>
<p onclick=""clickhandler"">Text 3</p>";

            var html = DocTester.ProcessSource(source, TextCleaner.ClearStyling);

            Assert.AreEqual(@"
<p>Text 1</p>
<p>Text <span>2</span></p>
<p>Text 3</p>", html);
        }