void ReadGenericRawTextEndTagOpen(bool decoded, HtmlTokenizerState rawText, HtmlTokenizerState rawTextEndTagName) { char c; CharMode charMode; if (!Peek(out c, out charMode)) { TokenizerState = HtmlTokenizerState.EndOfFile; EmitDataToken(decoded); return; } switch (charMode) { //IsAsciiLetter(c) case CharMode.UpperAsciiLetter: case CharMode.LowerAsciiLetter: TokenizerState = rawTextEndTagName; name.Append(c); data.Append(c); ReadNext(); break; default: TokenizerState = rawText; break; } }
void ReadGenericRawTextLessThan(HtmlTokenizerState rawText, HtmlTokenizerState rawTextEndTagOpen) { char c; if (!Peek(out c)) { //? throw new System.NotSupportedException(); } data.Append('<'); switch (c) { case '/': TokenizerState = rawTextEndTagOpen; data.Append('/'); name.Length = 0; ReadNext(); break; default: TokenizerState = rawText; break; } }
private StateResult Transition(HtmlTokenizerState state) { return(Transition((int)state, result: null)); }
// 8.2.4.55 After DOCTYPE name state HtmlToken ReadAfterDocTypeName () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; doctype.ForceQuirksMode = true; return EmitDocType (); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': break; case '>': TokenizerState = HtmlTokenizerState.Data; return EmitDocType (); default: name.Append (c); if (name.Length < 6) break; if (NameIs ("public")) { TokenizerState = HtmlTokenizerState.AfterDocTypePublicKeyword; doctype.PublicKeyword = name.ToString (); } else if (NameIs ("system")) { TokenizerState = HtmlTokenizerState.AfterDocTypeSystemKeyword; doctype.SystemKeyword = name.ToString (); } else { TokenizerState = HtmlTokenizerState.BogusDocType; } name.Length = 0; return null; } } while (true); }
// 8.2.4.18 Script data end tag open state HtmlToken ReadScriptDataEndTagOpen () { int nc = Peek (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; return EmitScriptDataToken (); } c = (char) nc; if (c == 'S' || c == 's') { TokenizerState = HtmlTokenizerState.ScriptDataEndTagName; name.Append ('s'); data.Append (c); Read (); } else { TokenizerState = HtmlTokenizerState.ScriptData; } return null; }
// 8.2.4.21 Script data escape start dash state HtmlToken ReadScriptDataEscapeStartDash () { int nc = Peek (); if (nc == '-') { TokenizerState = HtmlTokenizerState.ScriptDataEscapedDashDash; data.Append ('-'); Read (); } else { TokenizerState = HtmlTokenizerState.ScriptData; } return null; }
// 8.2.4.8 Tag open state HtmlToken ReadTagOpen () { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; var token = CreateDataToken ("<"); return token; } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append ('<'); data.Append (c); switch ((c = (char) nc)) { case '!': TokenizerState = HtmlTokenizerState.MarkupDeclarationOpen; break; case '?': TokenizerState = HtmlTokenizerState.BogusComment; break; case '/': TokenizerState = HtmlTokenizerState.EndTagOpen; break; default: if (IsAsciiLetter (c)) { TokenizerState = HtmlTokenizerState.TagName; isEndTag = false; name.Append (c); } else { TokenizerState = HtmlTokenizerState.Data; } break; } return null; }
// 8.2.4.10 Tag name state HtmlToken ReadTagName () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; name.Length = 0; return EmitDataToken (false); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': TokenizerState = HtmlTokenizerState.BeforeAttributeName; break; case '/': TokenizerState = HtmlTokenizerState.SelfClosingStartTag; break; case '>': tag = CreateTagToken (name.ToString (), isEndTag); data.Length = 0; name.Length = 0; return EmitTagToken (); default: name.Append (c == '\0' ? '\uFFFD' : c); break; } } while (TokenizerState == HtmlTokenizerState.TagName); tag = CreateTagToken (name.ToString (), isEndTag); name.Length = 0; return null; }
// 8.2.4.3 RCDATA state HtmlToken ReadRcData () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; break; } c = (char) nc; switch (c) { case '&': if (DecodeCharacterReferences) { TokenizerState = HtmlTokenizerState.CharacterReferenceInRcData; return null; } goto default; case '<': TokenizerState = HtmlTokenizerState.RcDataLessThan; return EmitDataToken (DecodeCharacterReferences); default: data.Append (c == '\0' ? '\uFFFD' : c); break; } } while (TokenizerState == HtmlTokenizerState.RcData); return EmitDataToken (DecodeCharacterReferences); }
// 8.2.4.6 Script data state HtmlToken ReadScriptData () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; break; } c = (char) nc; switch (c) { case '<': TokenizerState = HtmlTokenizerState.ScriptDataLessThan; break; default: data.Append (c == '\0' ? '\uFFFD' : c); break; } } while (TokenizerState == HtmlTokenizerState.ScriptData); return EmitScriptDataToken (); }
// 8.2.4.54 DOCTYPE name state HtmlToken ReadDocTypeName () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; doctype.Name = name.ToString (); doctype.ForceQuirksMode = true; name.Length = 0; return EmitDocType (); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': TokenizerState = HtmlTokenizerState.AfterDocTypeName; break; case '>': TokenizerState = HtmlTokenizerState.Data; doctype.Name = name.ToString (); name.Length = 0; return EmitDocType (); case '\0': name.Append ('\uFFFD'); break; default: name.Append (c); break; } } while (TokenizerState == HtmlTokenizerState.DocTypeName); doctype.Name = name.ToString (); name.Length = 0; return null; }
HtmlToken ReadGenericRawTextEndTagOpen (bool decoded, HtmlTokenizerState rawText, HtmlTokenizerState rawTextEndTagName) { int nc = Peek (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; return EmitDataToken (decoded); } c = (char) nc; if (IsAsciiLetter (c)) { TokenizerState = rawTextEndTagName; name.Append (c); data.Append (c); Read (); } else { TokenizerState = rawText; } return null; }
// 8.2.4.52 DOCTYPE state HtmlToken ReadDocType () { int nc = Peek (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; doctype.ForceQuirksMode = true; name.Length = 0; return EmitDocType (); } TokenizerState = HtmlTokenizerState.BeforeDocTypeName; c = (char) nc; switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': data.Append (c); Read (); break; } return null; }
// 8.2.4.51 Comment end bang state HtmlToken ReadCommentEndBang () { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; return EmitCommentToken (name); } c = (char) nc; data.Append (c); switch (c) { case '-': TokenizerState = HtmlTokenizerState.CommentEndDash; name.Append ("--!"); break; case '>': TokenizerState = HtmlTokenizerState.Data; return EmitCommentToken (name); default: // parse error TokenizerState = HtmlTokenizerState.Comment; name.Append ("--!"); name.Append (c == '\0' ? '\uFFFD' : c); break; } return null; }
// 8.2.4.50 Comment end state HtmlToken ReadCommentEnd () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; return EmitCommentToken (name); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '>': TokenizerState = HtmlTokenizerState.Data; return EmitCommentToken (name); case '!': // parse error TokenizerState = HtmlTokenizerState.CommentEndBang; return null; case '-': name.Append ('-'); break; default: TokenizerState = HtmlTokenizerState.Comment; name.Append (c == '\0' ? '\uFFFD' : c); return null; } } while (true); }
// 8.2.4.49 Comment end dash state HtmlToken ReadCommentEndDash () { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.Data; return EmitCommentToken (name); } c = (char) nc; data.Append (c); switch (c) { case '-': TokenizerState = HtmlTokenizerState.CommentEnd; break; default: TokenizerState = HtmlTokenizerState.Comment; name.Append ('-'); name.Append (c == '\0' ? '\uFFFD' : c); break; } return null; }
private StateResult Transition(HtmlTokenizerState state, HtmlSymbol result) { return(Transition((int)state, result)); }
// 8.2.4.1 Data state HtmlToken ReadData () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; break; } c = (char) nc; switch (c) { case '&': if (DecodeCharacterReferences) { TokenizerState = HtmlTokenizerState.CharacterReferenceInData; return null; } goto default; case '<': TokenizerState = HtmlTokenizerState.TagOpen; break; //case 0: // parse error, but emit it anyway default: data.Append (c); break; } } while (TokenizerState == HtmlTokenizerState.Data); return EmitDataToken (DecodeCharacterReferences); }
// 8.2.4.62 After DOCTYPE system keyword state HtmlToken ReadAfterDocTypeSystemKeyword () { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; doctype.ForceQuirksMode = true; return EmitDocType (); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': TokenizerState = HtmlTokenizerState.BeforeDocTypeSystemIdentifier; break; case '"': case '\'': // parse error TokenizerState = HtmlTokenizerState.DocTypeSystemIdentifierQuoted; doctype.SystemIdentifier = string.Empty; quote = c; break; case '>': // parse error TokenizerState = HtmlTokenizerState.Data; doctype.ForceQuirksMode = true; return EmitDocType (); default: // parse error TokenizerState = HtmlTokenizerState.BogusDocType; doctype.ForceQuirksMode = true; break; } return null; }
// 8.2.4.5 RAWTEXT state HtmlToken ReadRawText () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; break; } c = (char) nc; switch (c) { case '<': TokenizerState = HtmlTokenizerState.RawTextLessThan; return EmitDataToken (false); default: data.Append (c == '\0' ? '\uFFFD' : c); break; } } while (TokenizerState == HtmlTokenizerState.RawText); return EmitDataToken (false); }
// 8.2.4.64 DOCTYPE system identifier (double-quoted) state HtmlToken ReadDocTypeSystemIdentifierQuoted () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; doctype.SystemIdentifier = name.ToString (); doctype.ForceQuirksMode = true; name.Length = 0; return EmitDocType (); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\0': // parse error name.Append ('\uFFFD'); break; case '>': // parse error TokenizerState = HtmlTokenizerState.Data; doctype.SystemIdentifier = name.ToString (); doctype.ForceQuirksMode = true; name.Length = 0; return EmitDocType (); default: if (c == quote) { TokenizerState = HtmlTokenizerState.AfterDocTypeSystemIdentifier; break; } name.Append (c); break; } } while (TokenizerState == HtmlTokenizerState.DocTypeSystemIdentifierQuoted); doctype.SystemIdentifier = name.ToString (); name.Length = 0; return null; }
// 8.2.4.7 PLAINTEXT state HtmlToken ReadPlainText () { int nc = Read (); while (nc != -1) { char c = (char) nc; data.Append (c == '\0' ? '\uFFFD' : c); nc = Read (); } TokenizerState = HtmlTokenizerState.EndOfFile; return EmitDataToken (false); }
// 8.2.4.66 After DOCTYPE system identifier state HtmlToken ReadAfterDocTypeSystemIdentifier () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; doctype.ForceQuirksMode = true; return EmitDocType (); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\t': case'\r': case '\n': case '\f': case ' ': break; case '>': TokenizerState = HtmlTokenizerState.Data; return EmitDocType (); default: // parse error TokenizerState = HtmlTokenizerState.BogusDocType; return null; } } while (true); }
// 8.2.4.9 End tag open state HtmlToken ReadEndTagOpen () { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; return EmitDataToken (false); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '>': // parse error TokenizerState = HtmlTokenizerState.Data; data.Length = 0; // FIXME: this is probably wrong break; default: if (IsAsciiLetter (c)) { TokenizerState = HtmlTokenizerState.TagName; isEndTag = true; name.Append (c); } else { TokenizerState = HtmlTokenizerState.BogusComment; } break; } return null; }
// 8.2.4.67 Bogus DOCTYPE state HtmlToken ReadBogusDocType () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; doctype.ForceQuirksMode = true; return EmitDocType (); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); if (c == '>') { TokenizerState = HtmlTokenizerState.Data; return EmitDocType (); } } while (true); }
// 8.2.4.17 Script data less-than sign state HtmlToken ReadScriptDataLessThan () { int nc = Peek (); data.Append ('<'); switch ((char) nc) { case '/': TokenizerState = HtmlTokenizerState.ScriptDataEndTagOpen; data.Append ('/'); name.Length = 0; Read (); break; case '!': TokenizerState = HtmlTokenizerState.ScriptDataEscapeStart; data.Append ('!'); Read (); break; default: TokenizerState = HtmlTokenizerState.ScriptData; break; } return null; }
// 8.2.4.68 CDATA section state HtmlToken ReadCDataSection () { int nc = Read (); while (nc != -1) { char c = (char) nc; if (cdataIndex >= 3) { data.Append (cdata[0]); cdata[0] = cdata[1]; cdata[1] = cdata[2]; cdata[2] = c; if (cdata[0] == ']' && cdata[1] == ']' && cdata[2] == '>') { TokenizerState = HtmlTokenizerState.Data; cdataIndex = 0; return EmitCDataToken (); } } else { cdata[cdataIndex++] = c; } nc = Read (); } TokenizerState = HtmlTokenizerState.EndOfFile; for (int i = 0; i < cdataIndex; i++) data.Append (cdata[i]); cdataIndex = 0; return EmitCDataToken (); }
// 8.2.4.19 Script data end tag name state HtmlToken ReadScriptDataEndTagName () { do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; name.Length = 0; return EmitScriptDataToken (); } c = (char) nc; // Note: we save the data in case we hit a parse error and have to emit a data token data.Append (c); switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': if (NameIs ("script")) { TokenizerState = HtmlTokenizerState.BeforeAttributeName; break; } goto default; case '/': if (NameIs ("script")) { TokenizerState = HtmlTokenizerState.SelfClosingStartTag; break; } goto default; case '>': if (NameIs ("script")) { var token = CreateTagToken (name.ToString (), true); TokenizerState = HtmlTokenizerState.Data; data.Length = 0; name.Length = 0; return token; } goto default; default: if (!IsAsciiLetter (c)) { TokenizerState = HtmlTokenizerState.ScriptData; name.Length = 0; return null; } name.Append (c == '\0' ? '\uFFFD' : c); break; } } while (TokenizerState == HtmlTokenizerState.ScriptDataEndTagName); tag = CreateTagToken (name.ToString (), true); name.Length = 0; return null; }
HtmlToken EmitTagToken () { if (!tag.IsEndTag && !tag.IsEmptyElement) { switch (tag.Id) { case HtmlTagId.Style: case HtmlTagId.Xmp: case HtmlTagId.IFrame: case HtmlTagId.NoEmbed: case HtmlTagId.NoFrames: TokenizerState = HtmlTokenizerState.RawText; activeTagName = tag.Name.ToLowerInvariant (); break; case HtmlTagId.Title: case HtmlTagId.TextArea: TokenizerState = HtmlTokenizerState.RcData; activeTagName = tag.Name.ToLowerInvariant (); break; case HtmlTagId.PlainText: TokenizerState = HtmlTokenizerState.PlainText; break; case HtmlTagId.Script: TokenizerState = HtmlTokenizerState.ScriptData; break; case HtmlTagId.NoScript: // TODO: only switch into the RawText state if scripting is enabled TokenizerState = HtmlTokenizerState.RawText; activeTagName = tag.Name.ToLowerInvariant (); break; case HtmlTagId.Html: TokenizerState = HtmlTokenizerState.Data; for (int i = tag.Attributes.Count; i > 0; i--) { var attr = tag.Attributes[i - 1]; if (attr.Id == HtmlAttributeId.XmlNS && attr.Value != null) { HtmlNamespace = attr.Value.ToHtmlNamespace (); break; } } break; default: TokenizerState = HtmlTokenizerState.Data; break; } } else { TokenizerState = HtmlTokenizerState.Data; } var token = tag; data.Length = 0; tag = null; return token; }
// 8.2.4.22 Script data escaped state HtmlToken ReadScriptDataEscaped () { HtmlToken token = null; do { int nc = Read (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; return EmitScriptDataToken (); } c = (char) nc; switch (c) { case '-': TokenizerState = HtmlTokenizerState.ScriptDataEscapedDash; data.Append ('-'); break; case '<': TokenizerState = HtmlTokenizerState.ScriptDataEscapedLessThan; token = EmitScriptDataToken (); data.Append ('<'); break; default: data.Append (c == '\0' ? '\uFFFD' : c); break; } } while (TokenizerState == HtmlTokenizerState.ScriptDataEscaped); return token; }
// 8.2.4.69 Tokenizing character references HtmlToken ReadCharacterReference (HtmlTokenizerState next) { int nc = Peek (); char c; if (nc == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; data.Append ('&'); return EmitDataToken (true); } c = (char) nc; switch (c) { case '\t': case '\r': case '\n': case '\f': case ' ': case '<': case '&': // no character is consumed, emit '&' TokenizerState = next; data.Append ('&'); return null; } entity.Push ('&'); while (entity.Push (c)) { Read (); if ((nc = Peek ()) == -1) { TokenizerState = HtmlTokenizerState.EndOfFile; data.Append (entity.GetPushedInput ()); entity.Reset (); return EmitDataToken (true); } c = (char) nc; } TokenizerState = next; data.Append (entity.GetValue ()); entity.Reset (); if (c == ';') { // consume the ';' Read (); } return null; }
private StateResult Transition(HtmlTokenizerState state, SyntaxToken result) { return(Transition((int)state, result)); }
HtmlToken ReadGenericRawTextLessThan (HtmlTokenizerState rawText, HtmlTokenizerState rawTextEndTagOpen) { int nc = Peek (); data.Append ('<'); switch ((char) nc) { case '/': TokenizerState = rawTextEndTagOpen; data.Append ('/'); name.Length = 0; Read (); break; default: TokenizerState = rawText; break; } return null; }