// 8.2.4.35 Attribute name state HtmlToken ReadAttributeName() { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; name.Length = 0; tag = null; return EmitDataToken (false); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': TokenizerState = HtmlTokenizerState.AfterAttributeName; break; case '/': TokenizerState = HtmlTokenizerState.SelfClosingStartTag; break; case '=': TokenizerState = HtmlTokenizerState.BeforeAttributeValue; break; case '>': EmitTagAttribute (); return EmitTagToken (); default: name.Append (c == '\0' ? '\uFFFD' : c); break; } } while (TokenizerState == HtmlTokenizerState.AttributeName); EmitTagAttribute (); return null; }
HtmlToken EmitTagToken() { if (!tag.IsEndTag && !tag.IsEmptyElement) { switch (tag.Id) { case HtmlTagId.Style: case HtmlTagId.Xmp: case HtmlTagId.IFrame: case HtmlTagId.NoEmbed: case HtmlTagId.NoFrames: TokenizerState = HtmlTokenizerState.RawText; activeTagName = tag.Name; break; case HtmlTagId.Title: case HtmlTagId.TextArea: TokenizerState = HtmlTokenizerState.RcData; activeTagName = tag.Name; break; case HtmlTagId.PlainText: TokenizerState = HtmlTokenizerState.PlainText; break; case HtmlTagId.Script: TokenizerState = HtmlTokenizerState.ScriptData; break; case HtmlTagId.NoScript: // TODO: only switch into the RawText state if scripting is enabled TokenizerState = HtmlTokenizerState.RawText; activeTagName = tag.Name; break; case HtmlTagId.Html: TokenizerState = HtmlTokenizerState.Data; for (int i = tag.Attributes.Count; i > 0; i--) { var attr = tag.Attributes[i - 1]; if (attr.Id == HtmlAttributeId.XmlNS && attr.Value != null) { HtmlNamespace = attr.Value.ToHtmlNamespace (); break; } } break; default: TokenizerState = HtmlTokenizerState.Data; break; } } else { TokenizerState = HtmlTokenizerState.Data; } var token = tag; data.Length = 0; tag = null; return token; }
// 8.2.4.27 Script data escaped end tag name state HtmlToken ReadScriptDataEscapedEndTagName() { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; name.Length = 0; return EmitScriptDataToken (); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': if (NameIs ("script")) { TokenizerState = HtmlTokenizerState.BeforeAttributeName; break; } goto default; case '/': if (NameIs ("script")) { TokenizerState = HtmlTokenizerState.SelfClosingStartTag; break; } goto default; case '>': if (NameIs ("script")) { var token = CreateTagToken (name.ToString (), true); TokenizerState = HtmlTokenizerState.Data; data.Length = 0; name.Length = 0; return token; } goto default; default: if (!IsAsciiLetter (c)) { TokenizerState = HtmlTokenizerState.ScriptData; return null; } name.Append (c); break; } } while (TokenizerState == HtmlTokenizerState.ScriptDataEscapedEndTagName); tag = CreateTagToken (name.ToString (), true); name.Length = 0; return null; }
HtmlToken ReadGenericRawTextEndTagName(bool decoded, HtmlTokenizerState rawText) { var current = TokenizerState; do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; name.Length = 0; return EmitDataToken (decoded); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': if (NameIs (activeTagName)) { TokenizerState = HtmlTokenizerState.BeforeAttributeName; break; } goto default; case '/': if (NameIs (activeTagName)) { TokenizerState = HtmlTokenizerState.SelfClosingStartTag; break; } goto default; case '>': if (NameIs (activeTagName)) { var token = CreateTagToken (name.ToString (), true); TokenizerState = HtmlTokenizerState.Data; data.Length = 0; name.Length = 0; return token; } goto default; default: if (!IsAsciiLetter (c)) { TokenizerState = rawText; return null; } name.Append (c == '\0' ? '\uFFFD' : c); break; } } while (TokenizerState == current); tag = CreateTagToken (name.ToString (), true); name.Length = 0; return null; }
// 8.2.4.37 Before attribute value state HtmlToken ReadBeforeAttributeValue() { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; tag = null; return EmitDataToken (false); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': break; case '"': case '\'': TokenizerState = HtmlTokenizerState.AttributeValueQuoted; quote = c; return null; case '&': TokenizerState = HtmlTokenizerState.AttributeValueUnquoted; return null; case '/': TokenizerState = HtmlTokenizerState.SelfClosingStartTag; return null; case '>': return EmitTagToken (); case '<': case '=': case '`': // parse error goto default; default: TokenizerState = HtmlTokenizerState.AttributeValueUnquoted; name.Append (c == '\0' ? '\uFFFD' : c); return null; } } while (true); }
/// <summary> /// 8.2.4.10 Tag name state /// </summary> void R10_TagName() { do { int nc = Read(); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; name.Length = 0; EmitDataToken(false); return; } c = (char)nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append(c); switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': TokenizerState = HtmlTokenizerState.BeforeAttributeName; break; case '/': TokenizerState = HtmlTokenizerState.SelfClosingStartTag; break; case '>': token = CreateTagToken(name.ToString(), isEndTag); TokenizerState = HtmlTokenizerState.Data; data.Length = 0; name.Length = 0; return; default: name.Append(c == '\0' ? '\uFFFD' : c); break; } } while (TokenizerState == HtmlTokenizerState.TagName); tag = CreateTagToken(name.ToString(), isEndTag); name.Length = 0; token = null; }