public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.DataBuffer.Append(name.ToLowerInvariant()); t.Emit(name); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': case '/': case '>': if (t.DataBuffer.ToString().Equals("script")) { t.Transition(ScriptDataEscaped); } else { t.Transition(ScriptDataDoubleEscaped); } t.Emit(c); break; default: r.Unconsume(); t.Transition(ScriptDataDoubleEscaped); break; } }
public char?ConsumeCharacterReference(char?additionalAllowedCharacter, bool inAttribute) { if (_reader.IsEmpty()) { return(null); } if (additionalAllowedCharacter != null && additionalAllowedCharacter == _reader.Current()) { return(null); } if (_reader.MatchesAny('\t', '\n', '\r', '\f', ' ', '<', '&')) { return(null); } _reader.Mark(); if (_reader.MatchConsume("#")) { // numbered bool isHexMode = _reader.MatchConsumeIgnoreCase("X"); string numRef = isHexMode ? _reader.ConsumeHexSequence() : _reader.ConsumeDigitSequence(); if (numRef.Length == 0) { // didn't match anything CharacterReferenceError("Numeric reference with no numerals"); _reader.RewindToMark(); return(null); } if (!_reader.MatchConsume(";")) { CharacterReferenceError("Missing semicolon"); // missing semi } int charval = -1; try { int numbase = isHexMode ? 16 : 10; charval = Convert.ToInt32(numRef, numbase); } catch (FormatException) { } // skip if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) { CharacterReferenceError("Character outside of valid range"); return(ReplacementChar); } else { // todo: implement number replacement table // todo: check for extra illegal unicode points as parse errors return((char)charval); } } else { // named // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found string nameRef = _reader.ConsumeLetterThenDigitSequence(); bool looksLegit = _reader.Matches(';'); // found if a base named entity without a ;, or an extended entity with the ;. bool found = (Entities.IsBaseNamedEntity(nameRef) || (Entities.IsNamedEntity(nameRef) && looksLegit)); if (!found) { _reader.RewindToMark(); if (looksLegit) { CharacterReferenceError(string.Format("Invalid named referenece '{0}'", nameRef)); } return(null); } if (inAttribute && (_reader.MatchesLetter() || _reader.MatchesDigit() || _reader.MatchesAny('=', '-', '_'))) { // don't want that to match _reader.RewindToMark(); return(null); } if (!_reader.MatchConsume(";")) { CharacterReferenceError("Missing semicolon"); // missing semi } return(Entities.GetCharacterByName(nameRef)); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTagPending(false); t.TagPending.AppendTagName(char.ToLowerInvariant(r.Current())); t.DataBuffer.Append(r.Current()); t.AdvanceTransition(ScriptDataEscapedEndTagName); } else { t.Emit("</"); t.Transition(ScriptDataEscaped); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.TagPending.AppendTagName(name.ToLowerInvariant()); t.DataBuffer.Append(name); return; } if (t.IsAppropriateEndTagToken() && !r.IsEmpty()) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; default: t.DataBuffer.Append(c); AnythingElse(t, r); break; } } else { AnythingElse(t, r); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTagPending(false); t.Transition(ScriptDataEndTagName); } else { t.Emit("</"); t.Transition(ScriptData); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTempBuffer(); t.DataBuffer.Append(char.ToLowerInvariant(r.Current())); t.Emit("<" + r.Current()); t.AdvanceTransition(ScriptDataDoubleEscapeStart); } else if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(ScriptDataEscapedEndTagOpen); } else { t.Emit('<'); t.Transition(ScriptDataEscaped); } }
// from < in rcdata public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(RcDataEndTagOpen); } else if (r.MatchesLetter() && !r.ContainsIgnoreCase("</" + t.AppropriateEndTagName())) { // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than // consuming to EOF; break out here t.TagPending = new Token.EndTag(t.AppropriateEndTagName()); t.EmitTagPending(); r.Unconsume(); // undo "<" t.Transition(Data); } else { t.Emit("<"); t.Transition(RcData); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty()) { t.EofError(this); t.Emit("</"); t.Transition(Data); } else if (r.MatchesLetter()) { t.CreateTagPending(false); t.Transition(TagName); } else if (r.Matches('>')) { t.Error(this); t.AdvanceTransition(Data); } else { t.Error(this); t.AdvanceTransition(BogusComment); } }
// from < in data public override void Read(Tokeniser t, CharacterReader r) { switch (r.Current()) { case '!': t.AdvanceTransition(MarkupDeclarationOpen); break; case '/': t.AdvanceTransition(EndTagOpen); break; case '?': t.AdvanceTransition(BogusComment); break; default: if (r.MatchesLetter()) { t.CreateTagPending(true); t.Transition(TagName); } else { t.Error(this); t.Emit('<'); // char that got us here t.Transition(Data); } break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.DoctypePending.Name.Append(name.ToLowerInvariant()); return; } char c = r.Consume(); switch (c) { case '>': t.EmitDoctypePending(); t.Transition(Data); break; case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(AfterDoctypeName); break; case _nullChar: t.Error(this); t.DoctypePending.Name.Append(_replacementChar); break; case _eof: t.EofError(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.DoctypePending.Name.Append(c); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateDoctypePending(); t.Transition(DoctypeName); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; // ignore whitespace case _nullChar: t.Error(this); t.DoctypePending.Name.Append(_replacementChar); t.Transition(DoctypeName); break; case _eof: t.EofError(this); t.CreateDoctypePending(); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.CreateDoctypePending(); t.DoctypePending.Name.Append(c); t.Transition(DoctypeName); break; } }