public void TokenizationTagMixedCaseHandling() { var s = new SourceManager("<InpUT>"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual("input", ((HtmlTagToken)token).Name); }
public void TokenizationDoctypeDetected() { var s = new SourceManager("<!doctype html>"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual(HtmlTokenType.DOCTYPE, token.Type); }
public void TokenizationFinalEOF() { var s = new SourceManager(""); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual(HtmlTokenType.EOF, token.Type); }
public void TokenizationTagSelfClosingDetected() { var s = new SourceManager("<img />"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual(true, ((HtmlTagToken)token).IsSelfClosing); }
public void TokenizationAttributesDetected() { var s = new SourceManager("<a target='_blank' href='http://whatever' title='ho'>"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual(3, ((HtmlTagToken)token).Attributes.Count); }
public void TokenizationFinalEOF() { var s = new TextSource(""); var t = new HtmlTokenizer(s, null); var token = t.Get(); Assert.AreEqual(HtmlTokenType.EndOfFile, token.Type); }
public void TokenizationCommentDetected() { var s = new SourceManager("<!-- hi my friend -->"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual(HtmlTokenType.Comment, token.Type); }
public void TokenizationDoctypeDetected() { var s = new TextSource("<!doctype html>"); var t = new HtmlTokenizer(s, null); var token = t.Get(); Assert.AreEqual(HtmlTokenType.Doctype, token.Type); }
public void TokenizationAttributeNameDetection() { var s = new SourceManager("<input required>"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual("required", ((HtmlTagToken)token).Attributes[0].Key); }
public void TokenizationTagNameDetection() { var s = new SourceManager("<span>"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual("span", ((HtmlTagToken)token).Name); }
public void TokenizationTagSpacesBehind() { var s = new SourceManager("<i >"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual("i", ((HtmlTagToken)token).Name); }
public void TokenizationBogusCommentQuestionMark() { var s = new SourceManager("<?>"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual(HtmlTokenType.Comment, token.Type); Assert.AreEqual("?", ((HtmlCommentToken)token).Data); }
public void TokenizationBogusCommentClosingTag() { var s = new TextSource("</ >"); var t = new HtmlTokenizer(s, null); var token = t.Get(); Assert.AreEqual(HtmlTokenType.Comment, token.Type); Assert.AreEqual(" ", token.Data); }
public void TokenizationBogusCommentQuestionMark() { var s = new TextSource("<?>"); var t = new HtmlTokenizer(s, null); var token = t.Get(); Assert.AreEqual(HtmlTokenType.Comment, token.Type); Assert.AreEqual("?", token.Data); }
public void TokenizationBogusCommentEmpty() { var s = new TextSource("<!>"); var t = new HtmlTokenizer(s, null); var token = t.Get(); Assert.AreEqual(HtmlTokenType.Comment, token.Type); Assert.AreEqual(String.Empty, token.Data); }
public void TokenizationBogusCommentEmpty() { var s = new SourceManager("<!>"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual(HtmlTokenType.Comment, token.Type); Assert.AreEqual(String.Empty, ((HtmlCommentToken)token).Data); }
public void TokenizationStartTagDetection() { var s = new SourceManager("<p>"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual(HtmlTokenType.StartTag, token.Type); Assert.AreEqual("p", ((HtmlTagToken)token).Name); }
public void TokenizationBogusCommentClosingTag() { var s = new SourceManager("</ >"); var t = new HtmlTokenizer(s); var token = t.Get(); Assert.AreEqual(HtmlTokenType.Comment, token.Type); Assert.AreEqual(" ", ((HtmlCommentToken)token).Data); }
public void TokenizationCDataDetected() { var s = new SourceManager("<![CDATA[hi mum how <!-- are you doing />]]>"); var t = new HtmlTokenizer(s); t.AcceptsCharacterData = true; var token = t.Get(); Assert.AreEqual(HtmlTokenType.Character, token.Type); }
public void TokenizationLongerCharacterReference() { var content = "&abcdefghijklmnopqrstvwxyzABCDEFGHIJKLMNOPQRSTV;"; var s = new TextSource(content); var t = new HtmlTokenizer(s, null); var token = t.Get(); Assert.AreEqual(HtmlTokenType.Character, token.Type); Assert.AreEqual(content, token.Data); }
public void TokenizationUnusualDoctype() { var s = new SourceManager("<!DOCTYPE root_element SYSTEM \"DTD_location\">"); var t = new HtmlTokenizer(s); var e = t.Get(); Assert.AreEqual(HtmlTokenType.DOCTYPE, e.Type); var d = (HtmlDoctypeToken)e; Assert.IsFalse(d.IsNameMissing); Assert.AreEqual("root_element", d.Name); Assert.IsFalse(d.IsSystemIdentifierMissing); Assert.AreEqual("DTD_location", d.SystemIdentifier); }
/// <summary> /// Performs the tokenization on the given text source. /// </summary> /// <param name="source">The source of the tokenization.</param> /// <param name="provider">The custom entity provider, if any.</param> /// <param name="errorHandler">The error handler to be used, if any.</param> /// <returns>A stream of consumed tokens.</returns> public static IEnumerable <HtmlToken> Tokenize(this TextSource source, IEntityProvider provider = null, EventHandler <HtmlErrorEvent> errorHandler = null) { var resolver = provider ?? HtmlEntityService.Resolver; var htmlTokenizer = new HtmlTokenizer(source, resolver); var token = default(HtmlToken); if (errorHandler != null) { htmlTokenizer.Error += errorHandler; } do { token = htmlTokenizer.Get(); yield return(token); }while (token.Type != HtmlTokenType.EndOfFile); }
public HtmlToken Get() { if (_tokenizer == null) { throw new InvalidOperationException("You need to call Push first."); } var token = _tokenizer.Get(); // The tokenizer will advance to the end when you have an unclosed tag. // We don't want this, we want to resume before the unclosed tag. if (token.Type != HtmlTokenType.EndOfFile) { _position = _tokenizer.Position; } return(token); }
public void TokenizationCharacterReferenceNotIt() { var str = string.Empty; var src = "I'm ¬it; I tell you"; var s = new TextSource(src); var t = new HtmlTokenizer(s, null); var token = default(HtmlToken); do { token = t.Get(); if (token.Type == HtmlTokenType.Character) { str += token.Data; } }while (token.Type != HtmlTokenType.EndOfFile); Assert.AreEqual("I'm ¬it; I tell you", str); }
public void TokenizationCharacterReferenceNotin() { var str = string.Empty; var src = "I'm ∉ I tell you"; var s = new TextSource(src); var t = new HtmlTokenizer(s); HtmlToken token; do { token = t.Get(); if (token.Type == HtmlTokenType.Character) { str += token.Data; } }while (token != HtmlToken.EndOfFile); Assert.AreEqual("I'm ∉ I tell you", str); }
public void TokenizationCharacterReferenceNotIt() { var str = string.Empty; var src = "I'm ¬it; I tell you"; var s = new SourceManager(src); var t = new HtmlTokenizer(s); HtmlToken token; do { token = t.Get(); if (token.Type == HtmlTokenType.Character) { str += ((HtmlCharacterToken)token).Data; } }while (token != HtmlToken.EOF); Assert.AreEqual("I'm ¬it; I tell you", str); }
public void TokenizationCDataCorrectCharacters() { StringBuilder sb = new StringBuilder(); var s = new TextSource("<![CDATA[hi mum how <!-- are you doing />]]>"); var t = new HtmlTokenizer(s, null); var token = default(HtmlToken); t.IsAcceptingCharacterData = true; do { token = t.Get(); if (token.Type == HtmlTokenType.Character) { sb.Append(token.Data); } }while (token.Type != HtmlTokenType.EndOfFile); Assert.AreEqual("hi mum how <!-- are you doing />", sb.ToString()); }
public void TokenizationCDataCorrectCharacters() { StringBuilder sb = new StringBuilder(); var s = new SourceManager("<![CDATA[hi mum how <!-- are you doing />]]>"); var t = new HtmlTokenizer(s); t.AcceptsCharacterData = true; HtmlToken token; do { token = t.Get(); if (token.Type == HtmlTokenType.Character) { sb.Append(((HtmlCharacterToken)token).Data); } }while (token != HtmlToken.EOF); Assert.AreEqual("hi mum how <!-- are you doing />", sb.ToString()); }
/// <summary> /// Injects the Blazor boot code and supporting config data at a user-designated /// script tag identified with a <c>type</c> of <c>blazor-boot</c>. /// </summary> /// <remarks> /// <para> /// If a matching script tag is found, then it will be adjusted to inject /// supporting configuration data, including a <c>src</c> attribute that /// will load the Blazor client-side library. Any existing attribute /// names that match the boot config data will be overwritten, but other /// user-supplied attributes will be left intact. This allows, for example, /// to designate asynchronous loading or deferred running of the script /// reference. /// </para><para> /// If no matching script tag is found, it is assumed that the user is /// responsible for completing the Blazor boot process. /// </para> /// </remarks> public static string GetIndexHtmlContents( string htmlTemplate, string assemblyName, string assemblyEntryPoint, IEnumerable <string> assemblyReferences, IEnumerable <EmbeddedResourceInfo> embeddedContent, bool linkerEnabled) { var resultBuilder = new StringBuilder(); // Search for a tag of the form <script type="boot-blazor"></script>, and replace // it with a fully-configured Blazor boot script tag var tokenizer = new HtmlTokenizer( new TextSource(htmlTemplate), HtmlEntityService.Resolver); var currentRangeStartPos = 0; var isInBlazorBootTag = false; var resumeOnNextToken = false; while (true) { var token = tokenizer.Get(); var tokenCharIndex = token.Position.Position - 1; if (resumeOnNextToken) { resumeOnNextToken = false; currentRangeStartPos = tokenCharIndex; } switch (token.Type) { case HtmlTokenType.StartTag: { // Only do anything special if this is a Blazor boot tag var tag = token.AsTag(); if (IsBlazorBootTag(tag)) { // First, emit the original source text prior to this special tag, since // we want that to be unchanged resultBuilder.Append(htmlTemplate, currentRangeStartPos, tokenCharIndex - currentRangeStartPos); // Instead of emitting the source text for this special tag, emit a fully- // configured Blazor boot script tag AppendScriptTagWithBootConfig( resultBuilder, assemblyName, assemblyEntryPoint, assemblyReferences, linkerEnabled, tag.Attributes); // Emit tags to reference any specified JS/CSS files AppendReferenceTags( resultBuilder, embeddedContent.Where(c => c.Kind == EmbeddedResourceKind.Css).Select(c => c.RelativePath), "<link rel=\"stylesheet\" href=\"{0}\" />"); AppendReferenceTags( resultBuilder, embeddedContent.Where(c => c.Kind == EmbeddedResourceKind.JavaScript).Select(c => c.RelativePath), "<script src=\"{0}\" defer></script>"); // Set a flag so we know not to emit anything else until the special // tag is closed isInBlazorBootTag = true; } break; } case HtmlTokenType.EndTag: // If this is an end tag corresponding to the Blazor boot script tag, we // can switch back into the mode of emitting the original source text if (isInBlazorBootTag) { isInBlazorBootTag = false; resumeOnNextToken = true; } break; case HtmlTokenType.EndOfFile: // Finally, emit any remaining text from the original source file var remainingLength = htmlTemplate.Length - currentRangeStartPos; if (remainingLength > 0) { resultBuilder.Append(htmlTemplate, currentRangeStartPos, remainingLength); } return(resultBuilder.ToString()); } } }
/// <summary> /// Injects the Blazor boot code and supporting config data at a user-designated /// script tag identified with a <c>type</c> of <c>blazor-boot</c>. /// </summary> /// <remarks> /// <para> /// If a matching script tag is found, then it will be adjusted to inject /// supporting configuration data, including a <c>src</c> attribute that /// will load the Blazor client-side library. Any existing attribute /// names that match the boot config data will be overwritten, but other /// user-supplied attributes will be left intact. This allows, for example, /// to designate asynchronous loading or deferred running of the script /// reference. /// </para><para> /// If no matching script tag is found, it is assumed that the user is /// responsible for completing the Blazor boot process. /// </para> /// </remarks> public static string GetIndexHtmlContents( string htmlTemplate, string assemblyName, string assemblyEntryPoint, IEnumerable <string> binFiles) { var resultBuilder = new StringBuilder(); // Search for a tag of the form <script type="boot-blazor"></script>, and replace // it with a fully-configured Blazor boot script tag var tokenizer = new HtmlTokenizer( new TextSource(htmlTemplate), HtmlEntityService.Resolver); var currentRangeStartPos = 0; var isInBlazorBootTag = false; var resumeOnNextToken = false; while (true) { var token = tokenizer.Get(); if (resumeOnNextToken) { resumeOnNextToken = false; currentRangeStartPos = token.Position.Position; } switch (token.Type) { case HtmlTokenType.StartTag: { // Only do anything special if this is a Blazor boot tag var tag = token.AsTag(); if (IsBlazorBootTag(tag)) { // First, emit the original source text prior to this special tag, since // we want that to be unchanged resultBuilder.Append(htmlTemplate, currentRangeStartPos, token.Position.Position - currentRangeStartPos - 1); // Instead of emitting the source text for this special tag, emit a fully- // configured Blazor boot script tag AppendScriptTagWithBootConfig( resultBuilder, assemblyName, assemblyEntryPoint, binFiles, tag.Attributes); // Set a flag so we know not to emit anything else until the special // tag is closed isInBlazorBootTag = true; } break; } case HtmlTokenType.EndTag: // If this is an end tag corresponding to the Blazor boot script tag, we // can switch back into the mode of emitting the original source text if (isInBlazorBootTag) { isInBlazorBootTag = false; resumeOnNextToken = true; } break; case HtmlTokenType.EndOfFile: // Finally, emit any remaining text from the original source file resultBuilder.Append(htmlTemplate, currentRangeStartPos, htmlTemplate.Length - currentRangeStartPos); return(resultBuilder.ToString()); } } }