An HTML tag token.
An HTML tag token.
Inheritance: HtmlToken
Beispiel #1
0
        // 8.2.4.35 Attribute name state
        HtmlToken ReadAttributeName()
        {
            do {
                int nc = Read ();
                char c;

                if (nc == -1) {
                    TokenizerState = HtmlTokenizerState.EndOfFile;
                    name.Length = 0;
                    tag = null;

                    return EmitDataToken (false);
                }

                c = (char) nc;

                // Note: we save the data in case we hit a parse error and have to emit a data token
                data.Append (c);

                switch (c) {
                case '\t': case '\r': case '\n': case '\f': case ' ':
                    TokenizerState = HtmlTokenizerState.AfterAttributeName;
                    break;
                case '/':
                    TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
                    break;
                case '=':
                    TokenizerState = HtmlTokenizerState.BeforeAttributeValue;
                    break;
                case '>':
                    EmitTagAttribute ();

                    return EmitTagToken ();
                default:
                    name.Append (c == '\0' ? '\uFFFD' : c);
                    break;
                }
            } while (TokenizerState == HtmlTokenizerState.AttributeName);

            EmitTagAttribute ();

            return null;
        }
Beispiel #2
0
        HtmlToken EmitTagToken()
        {
            if (!tag.IsEndTag && !tag.IsEmptyElement) {
                switch (tag.Id) {
                case HtmlTagId.Style: case HtmlTagId.Xmp: case HtmlTagId.IFrame: case HtmlTagId.NoEmbed: case HtmlTagId.NoFrames:
                    TokenizerState = HtmlTokenizerState.RawText;
                    activeTagName = tag.Name;
                    break;
                case HtmlTagId.Title: case HtmlTagId.TextArea:
                    TokenizerState = HtmlTokenizerState.RcData;
                    activeTagName = tag.Name;
                    break;
                case HtmlTagId.PlainText:
                    TokenizerState = HtmlTokenizerState.PlainText;
                    break;
                case HtmlTagId.Script:
                    TokenizerState = HtmlTokenizerState.ScriptData;
                    break;
                case HtmlTagId.NoScript:
                    // TODO: only switch into the RawText state if scripting is enabled
                    TokenizerState = HtmlTokenizerState.RawText;
                    activeTagName = tag.Name;
                    break;
                case HtmlTagId.Html:
                    TokenizerState = HtmlTokenizerState.Data;

                    for (int i = tag.Attributes.Count; i > 0; i--) {
                        var attr = tag.Attributes[i - 1];

                        if (attr.Id == HtmlAttributeId.XmlNS && attr.Value != null) {
                            HtmlNamespace = attr.Value.ToHtmlNamespace ();
                            break;
                        }
                    }
                    break;
                default:
                    TokenizerState = HtmlTokenizerState.Data;
                    break;
                }
            } else {
                TokenizerState = HtmlTokenizerState.Data;
            }

            var token = tag;
            data.Length = 0;
            tag = null;

            return token;
        }
Beispiel #3
0
        // 8.2.4.27 Script data escaped end tag name state
        HtmlToken ReadScriptDataEscapedEndTagName()
        {
            do {
                int nc = Read ();
                char c;

                if (nc == -1) {
                    TokenizerState = HtmlTokenizerState.EndOfFile;
                    name.Length = 0;

                    return EmitScriptDataToken ();
                }

                c = (char) nc;

                // Note: we save the data in case we hit a parse error and have to emit a data token
                data.Append (c);

                switch (c) {
                case '\t': case '\r': case '\n': case '\f': case ' ':
                    if (NameIs ("script")) {
                        TokenizerState = HtmlTokenizerState.BeforeAttributeName;
                        break;
                    }

                    goto default;
                case '/':
                    if (NameIs ("script")) {
                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
                        break;
                    }
                    goto default;
                case '>':
                    if (NameIs ("script")) {
                        var token = CreateTagToken (name.ToString (), true);
                        TokenizerState = HtmlTokenizerState.Data;
                        data.Length = 0;
                        name.Length = 0;
                        return token;
                    }
                    goto default;
                default:
                    if (!IsAsciiLetter (c)) {
                        TokenizerState = HtmlTokenizerState.ScriptData;
                        return null;
                    }

                    name.Append (c);
                    break;
                }
            } while (TokenizerState == HtmlTokenizerState.ScriptDataEscapedEndTagName);

            tag = CreateTagToken (name.ToString (), true);
            name.Length = 0;

            return null;
        }
Beispiel #4
0
        HtmlToken ReadGenericRawTextEndTagName(bool decoded, HtmlTokenizerState rawText)
        {
            var current = TokenizerState;

            do {
                int nc = Read ();
                char c;

                if (nc == -1) {
                    TokenizerState = HtmlTokenizerState.EndOfFile;
                    name.Length = 0;

                    return EmitDataToken (decoded);
                }

                c = (char) nc;

                // Note: we save the data in case we hit a parse error and have to emit a data token
                data.Append (c);

                switch (c) {
                case '\t': case '\r': case '\n': case '\f': case ' ':
                    if (NameIs (activeTagName)) {
                        TokenizerState = HtmlTokenizerState.BeforeAttributeName;
                        break;
                    }

                    goto default;
                case '/':
                    if (NameIs (activeTagName)) {
                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
                        break;
                    }
                    goto default;
                case '>':
                    if (NameIs (activeTagName)) {
                        var token = CreateTagToken (name.ToString (), true);
                        TokenizerState = HtmlTokenizerState.Data;
                        data.Length = 0;
                        name.Length = 0;
                        return token;
                    }
                    goto default;
                default:
                    if (!IsAsciiLetter (c)) {
                        TokenizerState = rawText;
                        return null;
                    }

                    name.Append (c == '\0' ? '\uFFFD' : c);
                    break;
                }
            } while (TokenizerState == current);

            tag = CreateTagToken (name.ToString (), true);
            name.Length = 0;

            return null;
        }
Beispiel #5
0
        // 8.2.4.37 Before attribute value state
        HtmlToken ReadBeforeAttributeValue()
        {
            do {
                int nc = Read ();
                char c;

                if (nc == -1) {
                    TokenizerState = HtmlTokenizerState.EndOfFile;
                    tag = null;

                    return EmitDataToken (false);
                }

                c = (char) nc;

                // Note: we save the data in case we hit a parse error and have to emit a data token
                data.Append (c);

                switch (c) {
                case '\t': case '\r': case '\n': case '\f': case ' ':
                    break;
                case '"': case '\'':
                    TokenizerState = HtmlTokenizerState.AttributeValueQuoted;
                    quote = c;
                    return null;
                case '&':
                    TokenizerState = HtmlTokenizerState.AttributeValueUnquoted;
                    return null;
                case '/':
                    TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
                    return null;
                case '>':
                    return EmitTagToken ();
                case '<': case '=': case '`':
                    // parse error
                    goto default;
                default:
                    TokenizerState = HtmlTokenizerState.AttributeValueUnquoted;
                    name.Append (c == '\0' ? '\uFFFD' : c);
                    return null;
                }
            } while (true);
        }
Beispiel #6
0
        /// <summary>
        /// 8.2.4.10 Tag name state
        /// </summary>
        void R10_TagName()
        {
            do
            {
                int nc = Read();
                char c;

                if (nc == -1)
                {
                    TokenizerState = HtmlTokenizerState.EndOfFile;
                    name.Length = 0;

                    EmitDataToken(false);
                    return;
                }

                c = (char)nc;

                // Note: we save the data in case we hit a parse error and have to emit a data token
                data.Append(c);

                switch (c)
                {
                    case '\t':
                    case '\r':
                    case '\n':
                    case '\f':
                    case ' ':
                        TokenizerState = HtmlTokenizerState.BeforeAttributeName;
                        break;
                    case '/':
                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
                        break;
                    case '>':
                        token = CreateTagToken(name.ToString(), isEndTag);
                        TokenizerState = HtmlTokenizerState.Data;
                        data.Length = 0;
                        name.Length = 0;
                        return;
                    default:
                        name.Append(c == '\0' ? '\uFFFD' : c);
                        break;
                }
            } while (TokenizerState == HtmlTokenizerState.TagName);

            tag = CreateTagToken(name.ToString(), isEndTag);
            name.Length = 0;
            token = null;


        }