Пример #1
0
        public void TokenizationTagMixedCaseHandling()
        {
            var s     = new SourceManager("<InpUT>");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual("input", ((HtmlTagToken)token).Name);
        }
Пример #2
0
        public void TokenizationDoctypeDetected()
        {
            var s     = new SourceManager("<!doctype html>");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.DOCTYPE, token.Type);
        }
Пример #3
0
        public void TokenizationFinalEOF()
        {
            var s     = new SourceManager("");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.EOF, token.Type);
        }
Пример #4
0
        public void TokenizationTagSelfClosingDetected()
        {
            var s     = new SourceManager("<img />");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual(true, ((HtmlTagToken)token).IsSelfClosing);
        }
Пример #5
0
        public void TokenizationAttributesDetected()
        {
            var s     = new SourceManager("<a target='_blank' href='http://whatever' title='ho'>");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual(3, ((HtmlTagToken)token).Attributes.Count);
        }
Пример #6
0
        public void TokenizationFinalEOF()
        {
            var s     = new TextSource("");
            var t     = new HtmlTokenizer(s, null);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.EndOfFile, token.Type);
        }
Пример #7
0
        public void TokenizationCommentDetected()
        {
            var s     = new SourceManager("<!-- hi my friend -->");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.Comment, token.Type);
        }
Пример #8
0
        public void TokenizationDoctypeDetected()
        {
            var s     = new TextSource("<!doctype html>");
            var t     = new HtmlTokenizer(s, null);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.Doctype, token.Type);
        }
Пример #9
0
        public void TokenizationAttributeNameDetection()
        {
            var s     = new SourceManager("<input required>");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual("required", ((HtmlTagToken)token).Attributes[0].Key);
        }
Пример #10
0
        public void TokenizationTagNameDetection()
        {
            var s     = new SourceManager("<span>");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual("span", ((HtmlTagToken)token).Name);
        }
Пример #11
0
        public void TokenizationTagSpacesBehind()
        {
            var s     = new SourceManager("<i   >");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual("i", ((HtmlTagToken)token).Name);
        }
Пример #12
0
        public void TokenizationBogusCommentQuestionMark()
        {
            var s     = new SourceManager("<?>");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.Comment, token.Type);
            Assert.AreEqual("?", ((HtmlCommentToken)token).Data);
        }
Пример #13
0
        public void TokenizationBogusCommentClosingTag()
        {
            var s     = new TextSource("</ >");
            var t     = new HtmlTokenizer(s, null);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.Comment, token.Type);
            Assert.AreEqual(" ", token.Data);
        }
Пример #14
0
        public void TokenizationBogusCommentQuestionMark()
        {
            var s     = new TextSource("<?>");
            var t     = new HtmlTokenizer(s, null);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.Comment, token.Type);
            Assert.AreEqual("?", token.Data);
        }
Пример #15
0
        public void TokenizationBogusCommentEmpty()
        {
            var s     = new TextSource("<!>");
            var t     = new HtmlTokenizer(s, null);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.Comment, token.Type);
            Assert.AreEqual(String.Empty, token.Data);
        }
Пример #16
0
        public void TokenizationBogusCommentEmpty()
        {
            var s     = new SourceManager("<!>");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.Comment, token.Type);
            Assert.AreEqual(String.Empty, ((HtmlCommentToken)token).Data);
        }
Пример #17
0
        public void TokenizationStartTagDetection()
        {
            var s     = new SourceManager("<p>");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.StartTag, token.Type);
            Assert.AreEqual("p", ((HtmlTagToken)token).Name);
        }
Пример #18
0
        public void TokenizationBogusCommentClosingTag()
        {
            var s     = new SourceManager("</ >");
            var t     = new HtmlTokenizer(s);
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.Comment, token.Type);
            Assert.AreEqual(" ", ((HtmlCommentToken)token).Data);
        }
Пример #19
0
        public void TokenizationCDataDetected()
        {
            var s = new SourceManager("<![CDATA[hi mum how <!-- are you doing />]]>");
            var t = new HtmlTokenizer(s);

            t.AcceptsCharacterData = true;
            var token = t.Get();

            Assert.AreEqual(HtmlTokenType.Character, token.Type);
        }
Пример #20
0
        public void TokenizationLongerCharacterReference()
        {
            var content = "&abcdefghijklmnopqrstvwxyzABCDEFGHIJKLMNOPQRSTV;";
            var s       = new TextSource(content);
            var t       = new HtmlTokenizer(s, null);
            var token   = t.Get();

            Assert.AreEqual(HtmlTokenType.Character, token.Type);
            Assert.AreEqual(content, token.Data);
        }
Пример #21
0
        public void TokenizationUnusualDoctype()
        {
            var s = new SourceManager("<!DOCTYPE root_element SYSTEM \"DTD_location\">");
            var t = new HtmlTokenizer(s);
            var e = t.Get();

            Assert.AreEqual(HtmlTokenType.DOCTYPE, e.Type);
            var d = (HtmlDoctypeToken)e;

            Assert.IsFalse(d.IsNameMissing);
            Assert.AreEqual("root_element", d.Name);
            Assert.IsFalse(d.IsSystemIdentifierMissing);
            Assert.AreEqual("DTD_location", d.SystemIdentifier);
        }
        /// <summary>
        /// Performs the tokenization on the given text source.
        /// </summary>
        /// <param name="source">The source of the tokenization.</param>
        /// <param name="provider">The custom entity provider, if any.</param>
        /// <param name="errorHandler">The error handler to be used, if any.</param>
        /// <returns>A stream of consumed tokens.</returns>
        public static IEnumerable <HtmlToken> Tokenize(this TextSource source, IEntityProvider provider = null, EventHandler <HtmlErrorEvent> errorHandler = null)
        {
            var resolver      = provider ?? HtmlEntityService.Resolver;
            var htmlTokenizer = new HtmlTokenizer(source, resolver);
            var token         = default(HtmlToken);

            if (errorHandler != null)
            {
                htmlTokenizer.Error += errorHandler;
            }

            do
            {
                token = htmlTokenizer.Get();
                yield return(token);
            }while (token.Type != HtmlTokenType.EndOfFile);
        }
Пример #23
0
            public HtmlToken Get()
            {
                if (_tokenizer == null)
                {
                    throw new InvalidOperationException("You need to call Push first.");
                }

                var token = _tokenizer.Get();

                // The tokenizer will advance to the end when you have an unclosed tag.
                // We don't want this, we want to resume before the unclosed tag.
                if (token.Type != HtmlTokenType.EndOfFile)
                {
                    _position = _tokenizer.Position;
                }

                return(token);
            }
Пример #24
0
        public void TokenizationCharacterReferenceNotIt()
        {
            var str   = string.Empty;
            var src   = "I'm &notit; I tell you";
            var s     = new TextSource(src);
            var t     = new HtmlTokenizer(s, null);
            var token = default(HtmlToken);

            do
            {
                token = t.Get();

                if (token.Type == HtmlTokenType.Character)
                {
                    str += token.Data;
                }
            }while (token.Type != HtmlTokenType.EndOfFile);

            Assert.AreEqual("I'm ¬it; I tell you", str);
        }
Пример #25
0
        public void TokenizationCharacterReferenceNotin()
        {
            var       str = string.Empty;
            var       src = "I'm &notin; I tell you";
            var       s   = new TextSource(src);
            var       t   = new HtmlTokenizer(s);
            HtmlToken token;

            do
            {
                token = t.Get();

                if (token.Type == HtmlTokenType.Character)
                {
                    str += token.Data;
                }
            }while (token != HtmlToken.EndOfFile);

            Assert.AreEqual("I'm ∉ I tell you", str);
        }
Пример #26
0
        public void TokenizationCharacterReferenceNotIt()
        {
            var       str = string.Empty;
            var       src = "I'm &notit; I tell you";
            var       s   = new SourceManager(src);
            var       t   = new HtmlTokenizer(s);
            HtmlToken token;

            do
            {
                token = t.Get();

                if (token.Type == HtmlTokenType.Character)
                {
                    str += ((HtmlCharacterToken)token).Data;
                }
            }while (token != HtmlToken.EOF);

            Assert.AreEqual("I'm ¬it; I tell you", str);
        }
Пример #27
0
        public void TokenizationCDataCorrectCharacters()
        {
            StringBuilder sb    = new StringBuilder();
            var           s     = new TextSource("<![CDATA[hi mum how <!-- are you doing />]]>");
            var           t     = new HtmlTokenizer(s, null);
            var           token = default(HtmlToken);

            t.IsAcceptingCharacterData = true;

            do
            {
                token = t.Get();

                if (token.Type == HtmlTokenType.Character)
                {
                    sb.Append(token.Data);
                }
            }while (token.Type != HtmlTokenType.EndOfFile);

            Assert.AreEqual("hi mum how <!-- are you doing />", sb.ToString());
        }
Пример #28
0
        public void TokenizationCDataCorrectCharacters()
        {
            StringBuilder sb = new StringBuilder();
            var           s  = new SourceManager("<![CDATA[hi mum how <!-- are you doing />]]>");
            var           t  = new HtmlTokenizer(s);

            t.AcceptsCharacterData = true;
            HtmlToken token;

            do
            {
                token = t.Get();

                if (token.Type == HtmlTokenType.Character)
                {
                    sb.Append(((HtmlCharacterToken)token).Data);
                }
            }while (token != HtmlToken.EOF);

            Assert.AreEqual("hi mum how <!-- are you doing />", sb.ToString());
        }
Пример #29
0
        /// <summary>
        /// Injects the Blazor boot code and supporting config data at a user-designated
        /// script tag identified with a <c>type</c> of <c>blazor-boot</c>.
        /// </summary>
        /// <remarks>
        /// <para>
        /// If a matching script tag is found, then it will be adjusted to inject
        /// supporting configuration data, including a <c>src</c> attribute that
        /// will load the Blazor client-side library.  Any existing attribute
        /// names that match the boot config data will be overwritten, but other
        /// user-supplied attributes will be left intact.  This allows, for example,
        /// to designate asynchronous loading or deferred running of the script
        /// reference.
        /// </para><para>
        /// If no matching script tag is found, it is assumed that the user is
        /// responsible for completing the Blazor boot process.
        /// </para>
        /// </remarks>
        public static string GetIndexHtmlContents(
            string htmlTemplate,
            string assemblyName,
            string assemblyEntryPoint,
            IEnumerable <string> assemblyReferences,
            IEnumerable <EmbeddedResourceInfo> embeddedContent,
            bool linkerEnabled)
        {
            var resultBuilder = new StringBuilder();

            // Search for a tag of the form <script type="boot-blazor"></script>, and replace
            // it with a fully-configured Blazor boot script tag
            var tokenizer = new HtmlTokenizer(
                new TextSource(htmlTemplate),
                HtmlEntityService.Resolver);
            var currentRangeStartPos = 0;
            var isInBlazorBootTag    = false;
            var resumeOnNextToken    = false;

            while (true)
            {
                var token          = tokenizer.Get();
                var tokenCharIndex = token.Position.Position - 1;
                if (resumeOnNextToken)
                {
                    resumeOnNextToken    = false;
                    currentRangeStartPos = tokenCharIndex;
                }

                switch (token.Type)
                {
                case HtmlTokenType.StartTag:
                {
                    // Only do anything special if this is a Blazor boot tag
                    var tag = token.AsTag();
                    if (IsBlazorBootTag(tag))
                    {
                        // First, emit the original source text prior to this special tag, since
                        // we want that to be unchanged
                        resultBuilder.Append(htmlTemplate, currentRangeStartPos, tokenCharIndex - currentRangeStartPos);

                        // Instead of emitting the source text for this special tag, emit a fully-
                        // configured Blazor boot script tag
                        AppendScriptTagWithBootConfig(
                            resultBuilder,
                            assemblyName,
                            assemblyEntryPoint,
                            assemblyReferences,
                            linkerEnabled,
                            tag.Attributes);

                        // Emit tags to reference any specified JS/CSS files
                        AppendReferenceTags(
                            resultBuilder,
                            embeddedContent.Where(c => c.Kind == EmbeddedResourceKind.Css).Select(c => c.RelativePath),
                            "<link rel=\"stylesheet\" href=\"{0}\" />");
                        AppendReferenceTags(
                            resultBuilder,
                            embeddedContent.Where(c => c.Kind == EmbeddedResourceKind.JavaScript).Select(c => c.RelativePath),
                            "<script src=\"{0}\" defer></script>");

                        // Set a flag so we know not to emit anything else until the special
                        // tag is closed
                        isInBlazorBootTag = true;
                    }
                    break;
                }

                case HtmlTokenType.EndTag:
                    // If this is an end tag corresponding to the Blazor boot script tag, we
                    // can switch back into the mode of emitting the original source text
                    if (isInBlazorBootTag)
                    {
                        isInBlazorBootTag = false;
                        resumeOnNextToken = true;
                    }
                    break;

                case HtmlTokenType.EndOfFile:
                    // Finally, emit any remaining text from the original source file
                    var remainingLength = htmlTemplate.Length - currentRangeStartPos;
                    if (remainingLength > 0)
                    {
                        resultBuilder.Append(htmlTemplate, currentRangeStartPos, remainingLength);
                    }
                    return(resultBuilder.ToString());
                }
            }
        }
Пример #30
0
        /// <summary>
        /// Injects the Blazor boot code and supporting config data at a user-designated
        /// script tag identified with a <c>type</c> of <c>blazor-boot</c>.
        /// </summary>
        /// <remarks>
        /// <para>
        /// If a matching script tag is found, then it will be adjusted to inject
        /// supporting configuration data, including a <c>src</c> attribute that
        /// will load the Blazor client-side library.  Any existing attribute
        /// names that match the boot config data will be overwritten, but other
        /// user-supplied attributes will be left intact.  This allows, for example,
        /// to designate asynchronous loading or deferred running of the script
        /// reference.
        /// </para><para>
        /// If no matching script tag is found, it is assumed that the user is
        /// responsible for completing the Blazor boot process.
        /// </para>
        /// </remarks>
        public static string GetIndexHtmlContents(
            string htmlTemplate,
            string assemblyName,
            string assemblyEntryPoint,
            IEnumerable <string> binFiles)
        {
            var resultBuilder = new StringBuilder();

            // Search for a tag of the form <script type="boot-blazor"></script>, and replace
            // it with a fully-configured Blazor boot script tag
            var tokenizer = new HtmlTokenizer(
                new TextSource(htmlTemplate),
                HtmlEntityService.Resolver);
            var currentRangeStartPos = 0;
            var isInBlazorBootTag    = false;
            var resumeOnNextToken    = false;

            while (true)
            {
                var token = tokenizer.Get();
                if (resumeOnNextToken)
                {
                    resumeOnNextToken    = false;
                    currentRangeStartPos = token.Position.Position;
                }

                switch (token.Type)
                {
                case HtmlTokenType.StartTag:
                {
                    // Only do anything special if this is a Blazor boot tag
                    var tag = token.AsTag();
                    if (IsBlazorBootTag(tag))
                    {
                        // First, emit the original source text prior to this special tag, since
                        // we want that to be unchanged
                        resultBuilder.Append(htmlTemplate, currentRangeStartPos, token.Position.Position - currentRangeStartPos - 1);

                        // Instead of emitting the source text for this special tag, emit a fully-
                        // configured Blazor boot script tag
                        AppendScriptTagWithBootConfig(
                            resultBuilder,
                            assemblyName,
                            assemblyEntryPoint,
                            binFiles,
                            tag.Attributes);

                        // Set a flag so we know not to emit anything else until the special
                        // tag is closed
                        isInBlazorBootTag = true;
                    }
                    break;
                }

                case HtmlTokenType.EndTag:
                    // If this is an end tag corresponding to the Blazor boot script tag, we
                    // can switch back into the mode of emitting the original source text
                    if (isInBlazorBootTag)
                    {
                        isInBlazorBootTag = false;
                        resumeOnNextToken = true;
                    }
                    break;

                case HtmlTokenType.EndOfFile:
                    // Finally, emit any remaining text from the original source file
                    resultBuilder.Append(htmlTemplate, currentRangeStartPos, htmlTemplate.Length - currentRangeStartPos);
                    return(resultBuilder.ToString());
                }
            }
        }