public void GetTokens_SingleVoidTag_ReturnsSequence() { const string input = @"<root />"; var expected = new[] { MarkupGrammar.TokenElementVoid(new DataName("root")) }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_NamespacedChildTag_ReturnsSequence() { const string input = @"<foo><child xmlns=""http://example.com/schema"">value</child></foo>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("foo")), MarkupGrammar.TokenElementBegin(new DataName("child", String.Empty, "http://example.com/schema")), MarkupGrammar.TokenPrimitive("value"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_UnopenedCloseTagAutoBalance_ReturnsSequence() { const string input = @"</foo>"; var expected = new Token<MarkupTokenType>[0]; var tokenizer = new HtmlTokenizer { AutoBalanceTags=true }; var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_UnclosedOpenTagAutoBalance_ReturnsSequence() { const string input = @"<root>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("root")), MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer { AutoBalanceTags=true }; var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_HtmlStyleBlock_ReturnsCDataInside() { const string input = @"<div class=""content""> <style type=""text/css""> <strong>Lorem ipsum</strong> dolor sit amet, <i>consectetur</i> adipiscing elit. </style> </div>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("div")), MarkupGrammar.TokenAttribute(new DataName("class")), MarkupGrammar.TokenPrimitive("content"), MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementBegin(new DataName("style")), MarkupGrammar.TokenAttribute(new DataName("type")), MarkupGrammar.TokenPrimitive("text/css"), MarkupGrammar.TokenPrimitive("\r\n\t\t<strong>Lorem ipsum</strong> dolor sit amet, <i>consectetur</i> adipiscing elit.\r\n\t"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive("\r\n"), MarkupGrammar.TokenElementEnd, }; var tokenizer = new HtmlTokenizer { UnparsedTags = new[] { "script", "style" } }; var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_HtmlScriptBlockComment_ReturnsCommentBlock() { const string input = @"<div class=""content""> <script type=""text/javascript""><!-- var text = ""<strong>Lorem ipsum</strong> dolor sit amet, <i>consectetur</i> adipiscing elit.""; --></script> </div>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("div")), MarkupGrammar.TokenAttribute(new DataName("class")), MarkupGrammar.TokenPrimitive("content"), MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementBegin(new DataName("script")), MarkupGrammar.TokenAttribute(new DataName("type")), MarkupGrammar.TokenPrimitive("text/javascript"), MarkupGrammar.TokenPrimitive(new UnparsedBlock("!--", "--", "\r\n\t\tvar text = \"<strong>Lorem ipsum</strong> dolor sit amet, <i>consectetur</i> adipiscing elit.\";\r\n\t")), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive("\r\n"), MarkupGrammar.TokenElementEnd, }; var tokenizer = new HtmlTokenizer { UnparsedTags = new[] { "script", "style" }, UnwrapUnparsedComments = false }; var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_HtmlContentPrettyPrinted_ReturnsSequence() { const string input = @"<div class=""content""> <p style=""color:red""> <strong>Lorem ipsum</strong> dolor sit amet, <i>consectetur</i> adipiscing elit. </p> </div>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("div")), MarkupGrammar.TokenAttribute(new DataName("class")), MarkupGrammar.TokenPrimitive("content"), MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementBegin(new DataName("p")), MarkupGrammar.TokenAttribute(new DataName("style")), MarkupGrammar.TokenPrimitive("color:red"), MarkupGrammar.TokenPrimitive("\r\n\t\t"), MarkupGrammar.TokenElementBegin(new DataName("strong")), MarkupGrammar.TokenPrimitive("Lorem ipsum"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive(" dolor sit amet, "), MarkupGrammar.TokenElementBegin(new DataName("i")), MarkupGrammar.TokenPrimitive("consectetur"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive(" adipiscing elit.\r\n\t"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive("\r\n"), MarkupGrammar.TokenElementEnd, }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_EntityWithTrailingText_ReturnsSequence() { const string input = @"&trailing"; var expected = new[] { MarkupGrammar.TokenPrimitive("&trailing") }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_DifferentPrefixSameNamespace_ReturnsSequence() { const string input = @"<foo xmlns=""http://example.org"" xmlns:blah=""http://example.org"" blah:key=""value"" />"; var expected = new[] { MarkupGrammar.TokenElementVoid(new DataName("foo", String.Empty, "http://example.org")), MarkupGrammar.TokenAttribute(new DataName("key", "blah", "http://example.org")), MarkupGrammar.TokenPrimitive("value") }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_ParentAndChildDifferentDefaultNamespaces_ReturnsSequence() { const string input = @"<foo xmlns=""http://json.org""><child xmlns=""http://jsonfx.net"">text value</child></foo>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("foo", String.Empty, "http://json.org")), MarkupGrammar.TokenElementBegin(new DataName("child", String.Empty, "http://jsonfx.net")), MarkupGrammar.TokenPrimitive("text value"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_ParentAndChildSharePrefixedNamespace_ReturnsSequence() { const string input = @"<bar:foo xmlns:bar=""http://example.org""><bar:child>value</bar:child></bar:foo>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("foo", "bar", "http://example.org")), MarkupGrammar.TokenElementBegin(new DataName("child", "bar", "http://example.org")), MarkupGrammar.TokenPrimitive("value"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenElementEnd, }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_EmptyInput_ReturnsEmptySequence() { const string input = ""; var expected = new Token<MarkupTokenType>[0]; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_T4HelloWorld_ReturnsSequence() { const string input = @"<#@ template debug=""true"" hostspecific=""false"" language=""C#"" #> <#@ output extension="".cs"" #> using System; public class <#= this.GetClassName() #> { private readonly string Message; public <# this.Write(this.GetClassName()); #>() { this.Message = ""Hello world.""; } public string GetMessage() { return this.Message; } } <#+ private string GetClassName { get { return ""HelloWorld""; } } #>"; var expected = new[] { MarkupGrammar.TokenUnparsed("#@", "#", @" template debug=""true"" hostspecific=""false"" language=""C#"" "), MarkupGrammar.TokenPrimitive("\r\n"), MarkupGrammar.TokenUnparsed("#@", "#", @" output extension="".cs"" "), MarkupGrammar.TokenPrimitive(@" using System; public class "), MarkupGrammar.TokenUnparsed("#=", "#", @" this.GetClassName() "), MarkupGrammar.TokenPrimitive(@" { private readonly string Message; public "), MarkupGrammar.TokenUnparsed("#", "#", @" this.Write(this.GetClassName()); "), MarkupGrammar.TokenPrimitive(@"() { this.Message = ""Hello world.""; } public string GetMessage() { return this.Message; } } "), MarkupGrammar.TokenUnparsed("#+", "#", @" private string GetClassName { get { return ""HelloWorld""; } } ") }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_CodeCommentAroundMarkup_ReturnsSingleUnparsedBlock() { const string input = @"<%-- <html> <body style=""color:lime""> <!-- not much to say here --> </body> </html> --%>"; var expected = new[] { MarkupGrammar.TokenUnparsed("%--", "--%", @" <html> <body style=""color:lime""> <!-- not much to say here --> </body> </html> ") }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_PhpHelloWorld_ReturnsSequence() { const string input = @"<html> <head> <title>PHP Test</title> </head> <body> <?php echo '<p>Hello World</p>'; ?> </body> </html>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("html")), MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementBegin(new DataName("head")), MarkupGrammar.TokenPrimitive("\r\n\t\t"), MarkupGrammar.TokenElementBegin(new DataName("title")), MarkupGrammar.TokenPrimitive("PHP Test"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementBegin(new DataName("body")), MarkupGrammar.TokenPrimitive("\r\n\t\t"), MarkupGrammar.TokenUnparsed("?", "?", @"php echo '<p>Hello World</p>'; "), MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive("\r\n"), MarkupGrammar.TokenElementEnd, }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_XmlEntityHexLowerCase_ReturnsSequence() { const string input = @"ꯍ"; var expected = new[] { MarkupGrammar.TokenPrimitive("\uabcd") }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_HtmlEntityEuro_ReturnsSequence() { const string input = @"€"; var expected = new[] { MarkupGrammar.TokenPrimitive("€") }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_NestedDefaultNamespaces_ReturnsSequence() { const string input = @"<outer xmlns=""http://example.org/outer""><middle-1 xmlns=""http://example.org/inner""><inner>this should be inner</inner></middle-1><middle-2>this should be outer</middle-2></outer>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("outer", String.Empty, "http://example.org/outer")), MarkupGrammar.TokenElementBegin(new DataName("middle-1", String.Empty, "http://example.org/inner")), MarkupGrammar.TokenElementBegin(new DataName("inner", String.Empty, "http://example.org/inner")), MarkupGrammar.TokenPrimitive("this should be inner"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenElementBegin(new DataName("middle-2", String.Empty, "http://example.org/outer")), MarkupGrammar.TokenPrimitive("this should be outer"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_MixedEntitiesMalformed_ReturnsSequence() { const string input = @"there should &#xnot &Xltb&#gte decoded chars & inside this text"; var expected = new[] { MarkupGrammar.TokenPrimitive(@"there should &#xnot &Xltb&#gte decoded chars & inside this text") }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_UndeclaredPrefixes_ReturnsDefault() { const string input = @"<a:one><b:two><c:three></d:three></e:two></f:one>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("one", "a", null)), MarkupGrammar.TokenElementBegin(new DataName("two", "b", null)), MarkupGrammar.TokenElementBegin(new DataName("three", "c", null)), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_SingleAttributeEmptyValue_ReturnsSequence() { const string input = @"<root emptyValue=""""></root>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("root")), MarkupGrammar.TokenAttribute(new DataName("emptyValue")), MarkupGrammar.TokenPrimitive(String.Empty), MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_WhitespaceAttributeAposDelims_ReturnsSequence() { const string input = @"<root white = ' extra whitespace around apostrophe delims ' ></root>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("root")), MarkupGrammar.TokenAttribute(new DataName("white")), MarkupGrammar.TokenPrimitive(" extra whitespace around apostrophe delims "), MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_HtmlScriptBlockNoUnparsedTags_ReturnsParsedContent() { const string input = @"<div class=""content""> <script type=""text/javascript""> var text = ""<strong>Lorem ipsum</strong> dolor sit amet, <i>consectetur</i> adipiscing elit.""; </script> </div>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("div")), MarkupGrammar.TokenAttribute(new DataName("class")), MarkupGrammar.TokenPrimitive("content"), MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementBegin(new DataName("script")), MarkupGrammar.TokenAttribute(new DataName("type")), MarkupGrammar.TokenPrimitive("text/javascript"), MarkupGrammar.TokenPrimitive("\r\n\t\tvar text = \""), MarkupGrammar.TokenElementBegin(new DataName("strong")), MarkupGrammar.TokenPrimitive("Lorem ipsum"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive(" dolor sit amet, "), MarkupGrammar.TokenElementBegin(new DataName("i")), MarkupGrammar.TokenPrimitive("consectetur"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive(" adipiscing elit.\";\r\n\t"), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenPrimitive("\r\n"), MarkupGrammar.TokenElementEnd, }; var tokenizer = new HtmlTokenizer { UnparsedTags = null }; var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_SingleAttributeWhitespace_ReturnsSequence() { const string input = @"<root whitespace="" this contains whitespace ""></root>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("root")), MarkupGrammar.TokenAttribute(new DataName("whitespace")), MarkupGrammar.TokenPrimitive(" this contains whitespace "), MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_DefaultNamespaceTag_ReturnsSequence() { const string input = @"<root xmlns=""http://example.com/schema""></root>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("root", String.Empty, "http://example.com/schema")), MarkupGrammar.TokenElementEnd, }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_SingleAttributeSingleQuotedWhitespace_ReturnsSequence() { const string input = @"<root singleQuoted_whitespace=' apostrophe with whitespace '></root>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("root")), MarkupGrammar.TokenAttribute(new DataName("singleQuoted_whitespace")), MarkupGrammar.TokenPrimitive(" apostrophe with whitespace "), MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_UnopenedCloseTag_ReturnsSequence() { const string input = @"</foo>"; var expected = new [] { MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_MultipleAttributes_ReturnsSequence() { const string input = @"<root no-value whitespace="" this contains whitespace "" anyQuotedText="""+"/\\\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"+@"""></root>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("root")), MarkupGrammar.TokenAttribute(new DataName("no-value")), MarkupGrammar.TokenPrimitive(String.Empty), MarkupGrammar.TokenAttribute(new DataName("whitespace")), MarkupGrammar.TokenPrimitive(" this contains whitespace "), MarkupGrammar.TokenAttribute(new DataName("anyQuotedText")), MarkupGrammar.TokenPrimitive("/\\\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"), MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_OverlappingTags_ReturnsSequenceAsIs() { const string input = @"<odd><auto-closed><even></odd></ignored></even>"; var expected = new[] { MarkupGrammar.TokenElementBegin(new DataName("odd")), MarkupGrammar.TokenElementBegin(new DataName("auto-closed")), MarkupGrammar.TokenElementBegin(new DataName("even")), MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenElementEnd, MarkupGrammar.TokenElementEnd }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }
public void GetTokens_AspNetPage_ReturnsUnparsed() { const string input = @"<%@ Page Language=""C#"" AutoEventWireup=""true"" CodeBehind=""Default.aspx.cs"" Inherits=""Foo._Default"" %> <!DOCTYPE html> <html lang=""<%= System.Globalization.CultureInfo.CurrentCulture.TwoLetterISOLanguageName %>""> <head runat=""server""> <title>ASP.NET Test</title> </head> <body> <asp:Literal runat=""server"" ID=""Example"" Text=""Hello world."" /> </body> </html>"; var expected = new[] { MarkupGrammar.TokenUnparsed("%@", "%", @" Page Language=""C#"" AutoEventWireup=""true"" CodeBehind=""Default.aspx.cs"" Inherits=""Foo._Default"" "), MarkupGrammar.TokenPrimitive("\r\n"), MarkupGrammar.TokenUnparsed("!", "", @"DOCTYPE html"), MarkupGrammar.TokenPrimitive("\r\n\r\n"), MarkupGrammar.TokenElementBegin(new DataName("html")), MarkupGrammar.TokenAttribute(new DataName("lang")), MarkupGrammar.TokenUnparsed("%=", "%", @" System.Globalization.CultureInfo.CurrentCulture.TwoLetterISOLanguageName "), MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementBegin(new DataName("head")), MarkupGrammar.TokenAttribute(new DataName("runat")), MarkupGrammar.TokenPrimitive("server"), MarkupGrammar.TokenPrimitive("\r\n\t\t"), MarkupGrammar.TokenElementBegin(new DataName("title")), MarkupGrammar.TokenPrimitive("ASP.NET Test"), MarkupGrammar.TokenElementEnd,// title MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementEnd,// head MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementBegin(new DataName("body")), MarkupGrammar.TokenPrimitive("\r\n\t\t"), MarkupGrammar.TokenElementVoid(new DataName("Literal", "asp", null)), MarkupGrammar.TokenAttribute(new DataName("runat")), MarkupGrammar.TokenPrimitive("server"), MarkupGrammar.TokenAttribute(new DataName("ID")), MarkupGrammar.TokenPrimitive("Example"), MarkupGrammar.TokenAttribute(new DataName("Text")), MarkupGrammar.TokenPrimitive("Hello world."), MarkupGrammar.TokenPrimitive("\r\n\t"), MarkupGrammar.TokenElementEnd,// body MarkupGrammar.TokenPrimitive("\r\n"), MarkupGrammar.TokenElementEnd,// html }; var tokenizer = new HtmlTokenizer(); var actual = tokenizer.GetTokens(input).ToArray(); Assert.Equal(expected, actual); }