public override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty) { t.EofError(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); return; } if (r.MatchesAny('\t', '\n', '\f', ' ')) { r.Advance(); // ignore whitespace } else if (r.Matches('>')) { t.EmitDoctypePending(); t.AdvanceTransition(Data); } else if (r.MatchConsumeIgnoreCase("PUBLIC")) { t.Transition(AfterDoctypePublicKeyword); } else if (r.MatchConsumeIgnoreCase("SYSTEM")) { t.Transition(AfterDoctypeSystemKeyword); } else { t.Error(this); t.doctypePending.forceQuirks = true; t.AdvanceTransition(BogusDoctype); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('-')) { t.Emit('-'); t.AdvanceTransition(ScriptDataEscapedDashDash); } else { t.Transition(ScriptData); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(RawtextEndTagOpen); } else { t.Emit('<'); t.Transition(Rawtext); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.Emit('/'); t.CreateTempBuffer(); t.AdvanceTransition(ScriptDataDoubleEscapeEnd); } else { t.Transition(ScriptDataDoubleEscaped); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTempBuffer(); t.dataBuffer.Append(char.ToLowerInvariant(r.Current)); t.Emit("<" + r.Current); t.AdvanceTransition(ScriptDataDoubleEscapeStart); } else if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(ScriptDataEscapedEndTagOpen); } else { t.Emit('<'); t.Transition(ScriptDataEscaped); } }
// from < in rcdata public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(RCDATAEndTagOpen); } else if (r.MatchesLetter() && !r.ContainsIgnoreCase("</" + t.AppropriateEndTagName())) { // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than // consuming to EOF; break out here t.tagPending = new Token.EndTag(t.AppropriateEndTagName()); t.EmitTagPending(); r.Unconsume(); // undo "<" t.Transition(Data); } else { t.Emit("<"); t.Transition(Rcdata); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty) { t.EofError(this); t.Emit("</"); t.Transition(Data); } else if (r.MatchesLetter()) { t.CreateTagPending(false); t.Transition(TagName); } else if (r.Matches('>')) { t.Error(this); t.AdvanceTransition(Data); } else { t.Error(this); t.AdvanceTransition(BogusComment); } }
public string ConsumeCharacterReference(char?additionalAllowedCharacter, bool inAttribute) { if (reader.IsEmpty) { return(null); } if (additionalAllowedCharacter.HasValue && additionalAllowedCharacter.Value == reader.Current) { return(null); } if (reader.MatchesAny('\t', '\n', '\f', ' ', '<', '&')) { return(null); } reader.Mark(); if (reader.MatchConsume("#")) // numbered { bool isHexMode = reader.MatchConsumeIgnoreCase("X"); string numRef = isHexMode ? reader.ConsumeHexSequence() : reader.ConsumeDigitSequence(); if (numRef.Length == 0) // didn't match anything { ParseError.NumericReferenceWithNoNumerals(this); reader.RewindToMark(); return(null); } if (!reader.MatchConsume(";")) { ParseError.MissingSemicolon(this); } int charval = -1; try { int base2 = isHexMode ? 16 : 10; charval = Convert.ToInt32(numRef, base2); } catch (FormatException) { } // skip if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) { ParseError.CharOutsideRange(this); return(replacementStr); } else { // TODO: implement number replacement table // TODO: check for extra illegal unicode points as parse errors return(Char.ConvertFromUtf32(charval)); } } else // named // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found { string nameRef = reader.ConsumeLetterThenDigitSequence(); string origNameRef = nameRef; // for error reporting. nameRef gets chomped looking for matches bool looksLegit = reader.Matches(';'); bool found = false; while (nameRef.Length > 0 && !found) { if (HtmlEncoder.IsNamedEntity(nameRef)) { found = true; } else { nameRef = nameRef.Substring(0, nameRef.Length - 1); reader.Unconsume(); } } if (!found) { if (looksLegit) // named with semicolon { ParseError.InvalidNamedReference(this, origNameRef); } reader.RewindToMark(); return(null); } if (inAttribute && (reader.MatchesLetter() || reader.MatchesDigit() || reader.MatchesAny('=', '-', '_'))) { // don't want that to match reader.RewindToMark(); return(null); } if (!reader.MatchConsume(";")) { ParseError.MissingSemicolon(this); } return(HtmlEncoder.GetCharacterByName(nameRef)); } }