// from < in data public override void Read(Tokeniser t, CharacterReader r) { switch (r.Current) { case '!': t.AdvanceTransition(MarkupDeclarationOpen); break; case '/': t.AdvanceTransition(EndTagOpen); break; case '?': t.AdvanceTransition(BogusComment); break; default: if (r.MatchesLetter()) { t.CreateTagPending(true); t.Transition(TagName); } else { t.Error(this); t.Emit('<'); // char that got us here t.Transition(Data); } break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.tagPending.AppendTagName(name.ToLowerInvariant()); t.dataBuffer.Append(name); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': if (t.IsAppropriateEndTagToken()) { t.Transition(BeforeAttributeName); } else { AnythingElse(t, r); } break; case '/': if (t.IsAppropriateEndTagToken()) { t.Transition(SelfClosingStartTag); } else { AnythingElse(t, r); } break; case '>': if (t.IsAppropriateEndTagToken()) { t.EmitTagPending(); t.Transition(Data); } else { AnythingElse(t, r); } break; default: AnythingElse(t, r); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTagPending(false); t.Transition(ScriptDataEndTagName); } else { t.Emit("</"); t.Transition(ScriptData); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTagPending(false); t.tagPending.AppendTagName(char.ToLowerInvariant(r.Current)); t.dataBuffer.Append(r.Current); t.AdvanceTransition(ScriptDataEscapedEndTagName); } else { t.Emit("</"); t.Transition(ScriptDataEscaped); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.doctypePending.name.Append(name.ToLowerInvariant()); return; } char c = r.Consume(); switch (c) { case '>': t.EmitDoctypePending(); t.Transition(Data); break; case '\t': case '\n': case '\f': case ' ': t.Transition(AfterDoctypeName); break; case nullChar: t.Error(this); t.doctypePending.name.Append(replacementChar); break; case eof: t.EofError(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.doctypePending.name.Append(c); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTempBuffer(); t.dataBuffer.Append(char.ToLowerInvariant(r.Current)); t.Emit("<" + r.Current); t.AdvanceTransition(ScriptDataDoubleEscapeStart); } else if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(ScriptDataEscapedEndTagOpen); } else { t.Emit('<'); t.Transition(ScriptDataEscaped); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateDoctypePending(); t.Transition(DoctypeName); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': break; // ignore whitespace case nullChar: t.Error(this); t.doctypePending.name.Append(replacementChar); t.Transition(DoctypeName); break; case eof: t.EofError(this); t.CreateDoctypePending(); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.CreateDoctypePending(); t.doctypePending.name.Append(c); t.Transition(DoctypeName); break; } }
// from < in rcdata public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(RCDATAEndTagOpen); } else if (r.MatchesLetter() && !r.ContainsIgnoreCase("</" + t.AppropriateEndTagName())) { // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than // consuming to EOF; break out here t.tagPending = new Token.EndTag(t.AppropriateEndTagName()); t.EmitTagPending(); r.Unconsume(); // undo "<" t.Transition(Data); } else { t.Emit("<"); t.Transition(Rcdata); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.dataBuffer.Append(name.ToLowerInvariant()); t.Emit(name); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': case '/': case '>': if (t.dataBuffer.ToString().Equals("script")) { t.Transition(ScriptDataEscaped); } else { t.Transition(ScriptDataDoubleEscaped); } t.Emit(c); break; default: r.Unconsume(); t.Transition(ScriptDataDoubleEscaped); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty) { t.EofError(this); t.Emit("</"); t.Transition(Data); } else if (r.MatchesLetter()) { t.CreateTagPending(false); t.Transition(TagName); } else if (r.Matches('>')) { t.Error(this); t.AdvanceTransition(Data); } else { t.Error(this); t.AdvanceTransition(BogusComment); } }
public string ConsumeCharacterReference(char?additionalAllowedCharacter, bool inAttribute) { if (reader.IsEmpty) { return(null); } if (additionalAllowedCharacter.HasValue && additionalAllowedCharacter.Value == reader.Current) { return(null); } if (reader.MatchesAny('\t', '\n', '\f', ' ', '<', '&')) { return(null); } reader.Mark(); if (reader.MatchConsume("#")) // numbered { bool isHexMode = reader.MatchConsumeIgnoreCase("X"); string numRef = isHexMode ? reader.ConsumeHexSequence() : reader.ConsumeDigitSequence(); if (numRef.Length == 0) // didn't match anything { ParseError.NumericReferenceWithNoNumerals(this); reader.RewindToMark(); return(null); } if (!reader.MatchConsume(";")) { ParseError.MissingSemicolon(this); } int charval = -1; try { int base2 = isHexMode ? 16 : 10; charval = Convert.ToInt32(numRef, base2); } catch (FormatException) { } // skip if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) { ParseError.CharOutsideRange(this); return(replacementStr); } else { // TODO: implement number replacement table // TODO: check for extra illegal unicode points as parse errors return(Char.ConvertFromUtf32(charval)); } } else // named // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found { string nameRef = reader.ConsumeLetterThenDigitSequence(); string origNameRef = nameRef; // for error reporting. nameRef gets chomped looking for matches bool looksLegit = reader.Matches(';'); bool found = false; while (nameRef.Length > 0 && !found) { if (HtmlEncoder.IsNamedEntity(nameRef)) { found = true; } else { nameRef = nameRef.Substring(0, nameRef.Length - 1); reader.Unconsume(); } } if (!found) { if (looksLegit) // named with semicolon { ParseError.InvalidNamedReference(this, origNameRef); } reader.RewindToMark(); return(null); } if (inAttribute && (reader.MatchesLetter() || reader.MatchesDigit() || reader.MatchesAny('=', '-', '_'))) { // don't want that to match reader.RewindToMark(); return(null); } if (!reader.MatchConsume(";")) { ParseError.MissingSemicolon(this); } return(HtmlEncoder.GetCharacterByName(nameRef)); } }