public void TestHeaderCreation_DoubleBHeaderWithSpace() { string source = "<p><B>Some</B> <strong>paragraph</strong></p>"; var html = DocTester.ProcessSource(source, TextCleaner.CreateHeaders); Assert.AreEqual("<h2>Some paragraph</h2>", html); }
public void TestHeaderCreation_SimpleBHeader() { string source = "<p><B>Some paragraph</B></p>"; var html = DocTester.ProcessSource(source, TextCleaner.CreateHeaders); Assert.AreEqual("<h2>Some paragraph</h2>", html); }
public void CleanParagraph_ShouldStayClean() { string source = "<p>Some paragraph</p>"; var html = DocTester.ProcessSource(source, TextCleaner.TrimParagraphs); Assert.AreEqual("<p>Some paragraph</p>", html); }
public void NoParagraph_ShouldStaySame() { var source = "<span> Some paragraph </span>"; var html = DocTester.ProcessSource(source, TextCleaner.TrimParagraphs); Assert.AreEqual(source, html); }
public void QuotesInBrackets_ShouldBeConverted() { var source = "<p>Something ('me') something</p>"; var html = DocTester.ProcessSource(source, doc => TextCleaner.UpdateQuotes(doc, QuoteProcessing.ChangeToSmartQuotes)); Assert.AreEqual("<p>Something (‘me’) something</p>", html); }
public void TestHeaderCreation_SimpleStrongHeaderWithEm() { string source = "<p><strong>Some <em>paragraph</em></strong></p>"; var html = DocTester.ProcessSource(source, TextCleaner.CreateHeaders); Assert.AreEqual("<h2>Some <em>paragraph</em></h2>", html); }
public void RemoveOfficeMarkup() { var source = @" <p class=MsoNormal><b><span lang=EN-US style='mso-ansi-language:EN-US'>HU-HU</span><u5:p></u5:p></b><span lang=EN-US style='mso-ansi-language:EN-US'><o:p></o:p></span></p> <ul style='margin-top:0cm' type=disc> <li class=MsoListParagraph style='margin-left:0cm;mso-list:l0 level1 lfo1'><span lang=EN-US style='mso-fareast-font-family:""Times New Roman"";mso-ansi-language: EN-US'>text in header should be in Hungarian: Olvassa el a Wolters Kluwer legújabb Megfelelőségi szakértői betekintéseit – Cikk, whitepaper, kutatás, esettanulmány és podcast.<o:p></o:p></span><u5:p></u5:p></li> <li class=MsoListParagraph style='margin-left:0cm;mso-list:l0 level1 lfo1'><span lang=EN-US style='mso-fareast-font-family:""Times New Roman"";mso-ansi-language: EN-US'>Read More button – see row 107 for local translation <o:p></o:p></span><u5:p></u5:p></li> <li class=MsoListParagraph style='color:black;margin-left:0cm;mso-list:l0 level1 lfo1'><span lang=EN-US style='mso-fareast-font-family:""Times New Roman"";color:windowtext; mso-ansi-language:EN-US'>add dynanic card for expert insights </span><span lang=EN-US style='mso-fareast-font-family:""Times New Roman"";mso-ansi-language: EN-US'><o:p></o:p></span></li> </ul> <u5:p></u5:p><u5:p> "; var html = DocTester.ProcessSource(source, doc => TextCleaner.RemoveOfficeMarkup(doc)); Assert.IsFalse(html.Contains("<o:p>"), "Office markup should have been removed, like <o:p>"); Assert.IsFalse(html.Contains("<u5:p>"), "Office markup should have been removed, like <u5:p>"); }
public void LinksToRemoteWithRel2_ShouldGetTargetAndNoOpener() { var source = "<a href=\"https://www.example.com\" rel=\"noreferrer\">link</a>"; var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, true)); Assert.AreEqual("<a href=\"https://www.example.com\" rel=\"noreferrer noopener\" target=\"_blank\">link</a>", html); }
public void MultipleLinks_ShouldCombine2() { var source = "<a href=\"a.yy\">a</a><a href=\"a.yy\">b</a><a href=\"a.yy\">c</a><a href=\"a.yy\">d</a><a href=\"b.yy\">e</a> <a href=\"b.yy\">f</a>"; var html = DocTester.ProcessSource(source, TextCleaner.CombineLinks); Assert.AreEqual("<a href=\"a.yy\">abcd</a><a href=\"b.yy\">e</a> <a href=\"b.yy\">f</a>", html); }
public void NoChange_ShouldNotChange() { var source = "<a target=\"_blank\">“some remark” said the so-called \"chief\"</a>"; var html = DocTester.ProcessSource(source, doc => TextCleaner.UpdateQuotes(doc, QuoteProcessing.NoChange)); Assert.AreEqual(source, html); }
public void ImageInParagraph_ShouldStay() { // paragraph without text (but with image) got wrongly removed as "empty" var source = "<p><img src=\"http://example.com/image.jpeg\"></p>"; var html = DocTester.ProcessSource(source, TextCleaner.RemoveEmptyElements); Assert.IsTrue(html.Contains("<img")); }
public void LinkWithTrailingSpaces_ShouldBeCleaned() { var source = "<span>bla<a href=\"www.example.com\"> bla, </a>bla</span>"; var html = DocTester.ProcessSource(source, TextCleaner.RemoveLeadingAndTrailingSpacesFromLinks); Assert.AreEqual("<span>bla <a href=\"www.example.com\">bla</a>, bla</span>", html); }
public void NonConsecutiveLink_ShouldNotCombine() { var source = "<a href=\"a.yy\">A</a> <a href=\"a.yy\">B</a>"; var html = DocTester.ProcessSource(source, TextCleaner.CombineLinks); Assert.AreEqual(source, html); }
public void LinkAroundSpace_ShouldBeRemoved() { var source = "<span><a href=\"www.example.com\"> </a></span>"; var html = DocTester.ProcessSource(source, TextCleaner.RemoveEmptyLinks); Assert.AreEqual("<span> </span>", html); }
public void LinksToLocal_ShouldNotGetTargetOrOpener() { var source = "<a href=\"/default.html\">link</a>"; var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, true)); Assert.AreEqual("<a href=\"/default.html\">link</a>", html); }
public void LinksToRemoteWithTarget_ShouldNotChangeTargetButAddOpener() { var source = "<a href=\"https://www.example.com\" target=\"_self\">link</a>"; var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, true)); Assert.AreEqual("<a href=\"https://www.example.com\" target=\"_self\" rel=\"noopener\">link</a>", html); }
public void ToSmartQuotes_ShouldChangeSimpleQuotes() { var source = "<a target=\"_blank\">“some remark” said the so-called \"chief.\"</a>"; var html = DocTester.ProcessSource(source, doc => TextCleaner.UpdateQuotes(doc, QuoteProcessing.ChangeToSmartQuotes)); Assert.AreEqual("<a target=\"_blank\">“some remark” said the so-called “chief.”</a>", html); }
public void LinksToRemote_ShouldGetTarget() { var source = "<a href=\"https://www.example.com\">link</a>"; var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, false)); Assert.AreEqual("<a href=\"https://www.example.com\" target=\"_blank\">link</a>", html); }
public void TabIndex_ShouldBeRemoved() { var source = "<p> follow <a>@WKHealth</a> or <a name='target'>@Wolters_Kluwer</a> on Twitter</p>"; // removes an A without HREF var html = DocTester.ProcessSource(source, TextCleaner.RemoveAnchors); Assert.IsFalse(html.Contains("<a")); }
public void NonConsecutiveLinks_ShouldNotCombine() { var source = "<a href=\"nu.nl\">x</a><a href=\"http://www.example.com/investment-compliance/solutions/gainskeeper.aspx\">GainsKeeper</a><a href=\"http://www.example.com/investment-compliance/solutions/gainskeeper.aspx\"><sup>®</sup></a>"; var html = DocTester.ProcessSource(source, TextCleaner.CombineLinks); // SUP is removed from around ® Assert.AreEqual("<a href=\"nu.nl\">x</a><a href=\"http://www.example.com/investment-compliance/solutions/gainskeeper.aspx\">GainsKeeper®</a>", html); }
public void NestedLinkWithTrailingSpaces_ShouldBeCleaned() { var source = "<p style=\"\">[<a href=\"http://www.example.com/a\" style=\"\">Some example</a>] bla bla [<a href=\"http://www.example.com/b\" style=\"\">name® bla].</a></p>"; var html = DocTester.ProcessSource(source, TextCleaner.RemoveLeadingAndTrailingSpacesFromLinks); // actual ® is replaced by ® Assert.AreEqual("<p style=\"\">[<a href=\"http://www.example.com/a\" style=\"\">Some example</a>] bla bla [<a href=\"http://www.example.com/b\" style=\"\">name® bla]</a>.</p>", html); }
public void ParagraphWithTrailingBreaks_ShouldBeClean() { string source = @"<p> Some paragraph<br/> <br/> </p>"; var html = DocTester.ProcessSource(source, TextCleaner.TrimParagraphs); Assert.AreEqual("<p>Some paragraph</p>", html); }
public void RemoveNonCMSElements_ShouldNotRemoveIframe() { string source = @" <body> <iframe>some video content</iframe> <p>Text</p> </body>"; var html = DocTester.ProcessSource(source, TextCleaner.RemoveNonCMSElements); Assert.IsTrue(html.Contains("<iframe>")); }
public void RemoveNonCMSElements_ShouldRemoveScript() { string source = @" <body> <script>alert('boo')</script> <p>Text</p> </body>"; var html = DocTester.ProcessSource(source, TextCleaner.RemoveNonCMSElements); Assert.IsFalse(html.Contains("<script>")); }
public void ClearStyling_ShouldRemoveAttributes() { var source = @" <p class=""something"">Text 1</p> <p style=""background-color: black"">Text <span class=""super"">2</span></p> <p onclick=""clickhandler"">Text 3</p>"; var html = DocTester.ProcessSource(source, TextCleaner.ClearStyling); Assert.AreEqual(@" <p>Text 1</p> <p>Text <span>2</span></p> <p>Text 3</p>", html); }