public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { int c; do { c = Read(reader); } while (IsWhitespace(c)); if (c == '>') { tokenizer.EmitToken(Token); return DataState.Instance; } if (c == -1) { ReportParseError(); Token.ForceQuirks = true; tokenizer.EmitToken(Token); return DataState.Instance; // Reconsume the EOF character (?) } ReportParseError(); return BogusDocTypeState.Instance; }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { int c = Read(reader); switch (c) { case '-': return CommentStartDashState.Instance; case 0: ReportParseError(); Token.Comment += '\uFFFD'; CommentState.Instance.Token = Token; return CommentState.Instance; case '>': ReportParseError(); tokenizer.EmitToken(Token); return DataState.Instance; case -1: ReportParseError(); tokenizer.EmitToken(Token); return DataState.Instance; // Reconsume the EOF character (?) default: Token.Comment += (char)c; CommentState.Instance.Token = Token; return CommentState.Instance; } }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { int c; do { c = Read(reader); } while (IsWhitespace(c)); if (c == '>') { tokenizer.EmitToken(Token); return DataState.Instance; } if (c == -1) { // Parse error. Switch to the data state. Set the DOCTYPE token's force-quirks flag to on. Emit that DOCTYPE token. Reconsume the EOF character. ReportParseError(); Token.ForceQuirks = true; tokenizer.EmitToken(Token); return DataState.Instance; } char[] buffer = new char[6]; buffer[0] = (char)c; ReadBlock(reader, buffer, 1, 5); string bufferStr = new string(buffer); // If the six characters starting from the current input character are an ASCII case-insensitive match for the // word "PUBLIC", then consume those characters and switch to the after DOCTYPE public keyword state. if (bufferStr.Equals("public", StringComparison.OrdinalIgnoreCase)) { AfterDocTypePublicKeywordState.Instance.Token = Token; return AfterDocTypePublicKeywordState.Instance; } // Otherwise, if the six characters starting from the current input character are an ASCII case-insensitive match // for the word "SYSTEM", then consume those characters and switch to the after DOCTYPE system keyword state. if (bufferStr.Equals("system", StringComparison.OrdinalIgnoreCase)) { return AfterDocTypeSystemKeywordState.Instance; } // Otherwise, this is a parse error. Set the DOCTYPE token's force-quirks flag to on. Switch to the bogus DOCTYPE // state. reader.BaseStream.Seek(-6, SeekOrigin.Current); ReportParseError(); Token.ForceQuirks = true; BogusCommentState.Instance.Comment.Append(bufferStr); return BogusCommentState.Instance; }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { int c = Read(reader); switch (c) { case '&': return CharacterReferenceInRCDATAState.Instance; case '<': return RCDATALessThanSignState.Instance; case 0: ReportParseError(); tokenizer.EmitChar('\uFFFD'); return this; case -1: tokenizer.EmitToken(new EndOfFileToken()); return this; default: tokenizer.EmitChar((char)c); return this; } }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { for (;;) { int c = Read(reader); switch (c) { case '&': CharacterReferenceInDataState.Instance.Process(tokenizer, reader, null); break; case '<': return TagOpenState.Instance; case '\0': ReportParseError(); tokenizer.EmitChar('\0'); break; case -1: tokenizer.EmitToken(new EndOfFileToken()); return null; default: tokenizer.EmitChar((char)c); return this; // Required to allow switching the state. } } }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { int c = Read(reader); if (IsWhitespace(c)) { BeforeAttributeNameState.Instance.Token = Token.ContainingTag; return BeforeAttributeNameState.Instance; } if (c == '/') { SelfClosingStartTagState.Instance.Token = Token.ContainingTag; return SelfClosingStartTagState.Instance; } if (c == '>') { tokenizer.EmitToken(Token); return DataState.Instance; } if (c == -1) { ReportParseError(); return DataState.Instance; //reconsume... } ReportParseError(); BeforeAttributeNameState.Instance.Token = Token.ContainingTag; BeforeDocTypeNameState.Instance.LastConsumedCharacters.Enqueue((char)c); return BeforeAttributeNameState.Instance; }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { int c = Read(reader); if (IsWhitespace(c)) { BeforeDocTypePublicIdentifierState.Instance.Token = Token; return BeforeDocTypePublicIdentifierState.Instance; } if (c == '"') { ReportParseError(); Token.PublicIdentifier = string.Empty; DocTypePublicIdentifierQuotedState.InstanceDoubleQuoted.Token = Token; return DocTypePublicIdentifierQuotedState.InstanceDoubleQuoted; } if (c == '\'') { ReportParseError(); Token.PublicIdentifier = string.Empty; DocTypePublicIdentifierQuotedState.InstanceSingleQuoted.Token = Token; return DocTypePublicIdentifierQuotedState.InstanceSingleQuoted; } if (c == '>') { ReportParseError(); Token.ForceQuirks = true; tokenizer.EmitToken(Token); return DataState.Instance; } if (c == -1) { ReportParseError(); Token.ForceQuirks = true; tokenizer.EmitToken(Token); return DataState.Instance; // Reconsume the EOF character. (?) } ReportParseError(); Token.ForceQuirks = true; BogusDocTypeState.Instance.Token = Token; return BogusDocTypeState.Instance; }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { for (;;) { int c = Read(reader); if (IsWhitespace(c)) { AfterDocTypeNameState.Instance.Token = Token; return AfterDocTypeNameState.Instance; } if (c == '>') { tokenizer.EmitToken(Token); return DataState.Instance; } if (base.IsUppercaseAsciiLetter(c)) { // Append the lowercase version of the current input character (add 0x0020 to the character's code point) to the current tag token's tag name. Token.Name += Char.ToLower((char)c); continue; } if (c == 0) { // Parse error. Append a U+FFFD REPLACEMENT CHARACTER character to the current tag token's tag name. ReportParseError(); Token.Name += "\uFFFD"; continue; } if (c == -1) { // Parse error. Switch to the data state. Reconsume the EOF character. ReportParseError(); Token.ForceQuirks = true; tokenizer.EmitToken(Token); return DataState.Instance; } // Append the current input character to the current tag token's tag name. Token.Name += (char)c; } }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { int c; do { c = Read(reader); } while (IsWhitespace(c)); if (c == '"') { AttributeValueQuotedState.InstanceDoubleQuoted.Token = Token; return AttributeValueQuotedState.InstanceDoubleQuoted; } if (c == '&') { AttributeValueUnquotedState.Instance.Token = Token; AttributeValueUnquotedState.Instance.LastConsumedCharacters.Enqueue((char)c); return AttributeValueUnquotedState.Instance; } if (c == '\'') { AttributeValueQuotedState.InstanceSingleQuoted.Token = Token; return AttributeValueQuotedState.InstanceSingleQuoted; } if (c == 0) { ReportParseError(); Token.AttributeValue += "\uFFFD"; AttributeValueUnquotedState.Instance.Token = Token; return AttributeValueUnquotedState.Instance; } if (c == '>') { ReportParseError(); tokenizer.EmitToken(Token); return DataState.Instance; } if (c == -1) { ReportParseError(); return DataState.Instance; //reconsume... ? } if (c == '<' || c == '=' || c == '`') { ReportParseError(); } Token.AttributeValue += (char)c; AttributeValueUnquotedState.Instance.Token = Token; return AttributeValueUnquotedState.Instance; }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { int c; do { c = Read(reader); } while (IsWhitespace(c)); DocTypeToken token = new DocTypeToken(); DocTypeNameState.Instance.Token = token; if (base.IsUppercaseAsciiLetter(c)) { token.Name = Char.ToLower((char)c).ToString(); return DocTypeNameState.Instance; } if (c == 0) { ReportParseError(); token.Name = "\uFFFD"; return DocTypeNameState.Instance; } if (c == '>') { ReportParseError(); token.ForceQuirks = true; tokenizer.EmitToken(token); return DataState.Instance; } if (c == -1) { ReportParseError(); token.ForceQuirks = true; tokenizer.EmitToken(token); return DataState.Instance; // Parse error. Switch to the data state. Reconsume the EOF character. } token.Name = ((char)c).ToString(); return DocTypeNameState.Instance; }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { for (;;) { int c = Read(reader); switch (c) { case '>': tokenizer.EmitToken(Token); return DataState.Instance; case 0: ReportParseError(); Token.Comment += "--\uFFFD"; CommentState.Instance.Token = Token; return CommentState.Instance; case '!': ReportParseError(); CommentEndBangState.Instance.Token = Token; return CommentEndBangState.Instance; case '-': ReportParseError(); Token.Comment += "-"; break; case -1: ReportParseError(); tokenizer.EmitToken(Token); return DataState.Instance; //Reconsume the EOF character (?) default: ReportParseError(); Token.Comment += "--" + (char)c; CommentState.Instance.Token = Token; return CommentState.Instance; } } }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { int c = Read(reader); if (IsWhitespace(c)) { if (tokenizer.IsAppropriateEndTagToken(Token)) { return BeforeAttributeNameState.Instance; } } else if (c == '/') { if (tokenizer.IsAppropriateEndTagToken(Token)) { return SelfClosingStartTagState.Instance; } } else if (c == '>') { if (tokenizer.IsAppropriateEndTagToken(Token)) { tokenizer.EmitToken(Token); // TODO - is this the right token to emit? return DataState.Instance; } } else if (IsUppercaseAsciiLetter(c)) { Token.TagName += Char.ToLower((char)c); tokenizer.TemporaryBuffer.Add((char)c); return this; } else if (IsLowercaseAsciiLetter(c)) { Token.TagName += (char)c; tokenizer.TemporaryBuffer.Add((char)c); return this; } tokenizer.EmitChar('<'); tokenizer.EmitChar('/'); foreach (char bc in tokenizer.TemporaryBuffer) { tokenizer.EmitChar(bc); } RCDATAState.Instance.LastConsumedCharacters.Enqueue((char)c); return RCDATAState.Instance; }
public override BaseState Process(HtmlTokenizer tokenizer, System.IO.StreamReader reader) { for (;;){ int c = Read(reader); switch(c) { case 0: ReportParseError(); tokenizer.EmitChar('\uFFFD'); break; case -1: tokenizer.EmitToken(new EndOfFileToken()); break; default: tokenizer.EmitChar((char)c); return this; } } }
public override BaseState Process(HtmlTokenizer tokenizer, StreamReader reader) { AttributeToken attrToken = null; int c; do { c = Read(reader); } while (IsWhitespace(c)); if (c == '/') { return SelfClosingStartTagState.Instance; } if (c == '>') { tokenizer.EmitToken(Token); return DataState.Instance; } if (base.IsUppercaseAsciiLetter(c)) { attrToken = new AttributeToken() { AttributeName = Char.ToLower((char)c).ToString(), AttributeValue = string.Empty, ContainingTag = Token }; Token.Attributes.Add(attrToken); AttributeNameState.Instance.Token = attrToken; return AttributeNameState.Instance; } if (c == 0) { ReportParseError(); attrToken = new AttributeToken() { AttributeName = "\uFFFD", AttributeValue = string.Empty, ContainingTag = Token }; Token.Attributes.Add(attrToken); AttributeNameState.Instance.Token = attrToken; return AttributeNameState.Instance; } if (c == -1) { ReportParseError(); return DataState.Instance; // Reconsume the EOF character (?) } if (c == '"' || c == '\'' || c == '<' || c == '=') { ReportParseError(); } attrToken = new AttributeToken() { AttributeName = ((char)c).ToString(), AttributeValue = string.Empty, ContainingTag = Token }; Token.Attributes.Add(attrToken); AttributeNameState.Instance.Token = attrToken; return AttributeNameState.Instance; }