// from & in data public override void Read(Tokeniser t, CharacterReader r) { char? c = t.ConsumeCharacterReference(null, false); if (c == null) { t.Emit('&'); } else { t.Emit(c.Value); } t.Transition(Data); }
protected ParseErrorList _errors; // null when not tracking errors protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors) { if (input == null) { throw new ArgumentNullException("String input must not be null"); } if (baseUri == null) { throw new ArgumentNullException("BaseURI must not be null"); } _doc = new Document(baseUri); _reader = new CharacterReader(input); _errors = errors; _tokeniser = new Tokeniser(_reader, errors); _stack = new DescendableLinkedList<Element>(); this._baseUri = baseUri; }
protected ParseErrorList _errors; // null when not tracking errors protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors) { if (input == null) { throw new ArgumentNullException("String input must not be null"); } if (baseUri == null) { throw new ArgumentNullException("BaseURI must not be null"); } _doc = new Document(baseUri); _reader = new CharacterReader(input); _errors = errors; _tokeniser = new Tokeniser(_reader, errors); _stack = new DescendableLinkedList <Element>(); this._baseUri = baseUri; }
// in data state, gather characters until a character reference or tag is found public override void Read(Tokeniser t, CharacterReader r) { switch (r.Current()) { case '&': t.AdvanceTransition(CharacterReferenceInData); break; case '<': t.AdvanceTransition(TagOpen); break; case _nullChar: t.Error(this); // NOT replacement character (oddly?) t.Emit(r.Consume()); break; case _eof: t.Emit(new Token.EOF()); break; default: string data = r.ConsumeToAny('&', '<', _nullChar); t.Emit(data); break; } }
/// handles data in title, textarea etc public override void Read(Tokeniser t, CharacterReader r) { switch (r.Current()) { case '&': t.AdvanceTransition(CharacterReferenceInRcData); break; case '<': t.AdvanceTransition(RcDataLessThanSign); break; case _nullChar: t.Error(this); r.Advance(); t.Emit(_replacementChar); break; case _eof: t.Emit(new Token.EOF()); break; default: string data = r.ConsumeToAny('&', '<', _nullChar); t.Emit(data); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTempBuffer(); t.DataBuffer.Append(char.ToLowerInvariant(r.Current())); t.Emit("<" + r.Current()); t.AdvanceTransition(ScriptDataDoubleEscapeStart); } else if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(ScriptDataEscapedEndTagOpen); } else { t.Emit('<'); t.Transition(ScriptDataEscaped); } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '>': t.EmitDoctypePending(); t.Transition(Data); break; case _eof: t.EmitDoctypePending(); t.Transition(Data); break; default: // ignore char break; } }
/// <summary> /// Utility method to unescape HTML entities from a string /// </summary> /// <param name="s">HTML escaped string</param> /// <param name="inAttribute">If the string is to be escaped in strict mode (as attributes are)</param> /// <returns>An unescaped string</returns> public static string UnescapeEntities(string s, bool inAttribute) { Tokeniser tokeniser = new Tokeniser(new CharacterReader(s), ParseErrorList.NoTracking()); return(tokeniser.UnescapeEntities(inAttribute)); }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\'': t.Transition(AfterDoctypeSystemIdentifier); break; case _nullChar: t.Error(this); t.DoctypePending.SystemIdentifier.Append(_replacementChar); break; case '>': t.Error(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; case _eof: t.EofError(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.DoctypePending.SystemIdentifier.Append(c); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.DataBuffer.Append(name.ToLowerInvariant()); t.Emit(name); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': case '/': case '>': if (t.DataBuffer.ToString().Equals("script")) { t.Transition(ScriptDataEscaped); } else { t.Transition(ScriptDataDoubleEscaped); } t.Emit(c); break; default: r.Unconsume(); t.Transition(ScriptDataDoubleEscaped); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '-': t.Emit(c); break; case '<': t.Emit(c); t.Transition(ScriptDataDoubleEscapedLessthanSign); break; case '>': t.Emit(c); t.Transition(ScriptData); break; case _nullChar: t.Error(this); t.Emit(_replacementChar); t.Transition(ScriptDataDoubleEscaped); break; case _eof: t.EofError(this); t.Transition(Data); break; default: t.Emit(c); t.Transition(ScriptDataDoubleEscaped); break; } }
private void AnythingElse(Tokeniser t, CharacterReader r) { t.Emit("</" + t.DataBuffer.ToString()); t.Transition(ScriptDataEscaped); }
private void AnythingElse(Tokeniser t, CharacterReader r) { t.Emit("</" + t.DataBuffer.ToString()); t.Transition(RawText); }
public override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty()) { t.EofError(this); t.Emit("</"); t.Transition(Data); } else if (r.MatchesLetter()) { t.CreateTagPending(false); t.Transition(TagName); } else if (r.Matches('>')) { t.Error(this); t.AdvanceTransition(Data); } else { t.Error(this); t.AdvanceTransition(BogusComment); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(RawTextEndTagOpen); } else { t.Emit('<'); t.Transition(RawText); } }
// from < in rcdata public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(RcDataEndTagOpen); } else if (r.MatchesLetter() && !r.ContainsIgnoreCase("</" + t.AppropriateEndTagName())) { // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than // consuming to EOF; break out here t.TagPending = new Token.EndTag(t.AppropriateEndTagName()); t.EmitTagPending(); r.Unconsume(); // undo "<" t.Transition(Data); } else { t.Emit("<"); t.Transition(RcData); } }
// from < or </ in data, will have start or end tag pending public override void Read(Tokeniser t, CharacterReader r) { // previous TagOpen state did NOT Consume, will have a letter char in current string tagName = r.ConsumeToAny('\t', '\n', '\r', '\f', ' ', '/', '>', _nullChar).ToLowerInvariant(); t.TagPending.AppendTagName(tagName); switch (r.Consume()) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case _nullChar: // replacement t.TagPending.AppendTagName(_replacementStr); break; case _eof: // should Emit pending tag? t.EofError(this); t.Transition(Data); break; // no default, as covered with above ConsumeToAny } }
public abstract void Read(Tokeniser t, CharacterReader r);
public override void Read(Tokeniser t, CharacterReader r) { string data = r.ConsumeTo("]]>"); t.Emit(data); r.MatchConsume("]]>"); t.Transition(Data); }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTagPending(false); t.TagPending.AppendTagName(char.ToLowerInvariant(r.Current())); t.DataBuffer.Append(r.Current()); t.AdvanceTransition(ScriptDataEscapedEndTagName); } else { t.Emit("</"); t.Transition(ScriptDataEscaped); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.TagPending.AppendTagName(name.ToLowerInvariant()); t.DataBuffer.Append(name); return; } if (t.IsAppropriateEndTagToken() && !r.IsEmpty()) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; default: t.DataBuffer.Append(c); AnythingElse(t, r); break; } } else { AnythingElse(t, r); } }
public override void Read(Tokeniser t, CharacterReader r) { switch (r.Consume()) { case '/': t.CreateTempBuffer(); t.Transition(ScriptDataEndTagOpen); break; case '!': t.Emit("<!"); t.Transition(ScriptDataEscapeStart); break; default: t.Emit("<"); r.Unconsume(); t.Transition(ScriptData); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Current(); switch (c) { case '-': t.Emit(c); t.AdvanceTransition(ScriptDataDoubleEscapedDash); break; case '<': t.Emit(c); t.AdvanceTransition(ScriptDataDoubleEscapedLessthanSign); break; case _nullChar: t.Error(this); r.Advance(); t.Emit(_replacementChar); break; case _eof: t.EofError(this); t.Transition(Data); break; default: string data = r.ConsumeToAny('-', '<', _nullChar); t.Emit(data); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTagPending(false); t.Transition(ScriptDataEndTagName); } else { t.Emit("</"); t.Transition(ScriptData); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.Emit('/'); t.CreateTempBuffer(); t.AdvanceTransition(ScriptDataDoubleEscapeEnd); } else { t.Transition(ScriptDataDoubleEscaped); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('-')) { t.Emit('-'); t.AdvanceTransition(ScriptDataEscapedDashDash); } else { t.Transition(ScriptData); } }
// from tagname <xxx public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; // ignore whitespace case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case _nullChar: t.Error(this); t.TagPending.NewAttribute(); r.Unconsume(); t.Transition(AttributeName); break; case _eof: t.EofError(this); t.Transition(Data); break; case '"': case '\'': case '<': case '=': t.Error(this); t.TagPending.NewAttribute(); t.TagPending.AppendAttributeName(c); t.Transition(AttributeName); break; default: // A-Z, anything else t.TagPending.NewAttribute(); r.Unconsume(); t.Transition(AttributeName); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; case '"': // set system id to empty string t.Transition(DoctypeSystemIdentifierDoubleQuoted); break; case '\'': // set public id to empty string t.Transition(DoctypeSystemIdentifierSingleQuoted); break; case '>': t.Error(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; case _eof: t.EofError(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.Error(this); t.DoctypePending.ForceQuirks = true; t.Transition(BogusDoctype); break; } }
/// <summary> /// Utility method to unescape HTML entities from a string /// </summary> /// <param name="s">HTML escaped string</param> /// <param name="inAttribute">If the string is to be escaped in strict mode (as attributes are)</param> /// <returns>An unescaped string</returns> public static string UnescapeEntities(string s, bool inAttribute) { Tokeniser tokeniser = new Tokeniser(new CharacterReader(s), ParseErrorList.NoTracking()); return tokeniser.UnescapeEntities(inAttribute); }
public override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty()) { t.EofError(this); t.Transition(Data); return; } char c = r.Consume(); switch (c) { case '-': t.Emit(c); t.Transition(ScriptDataEscapedDashDash); break; case '<': t.Transition(ScriptDataEscapedLessThanSign); break; case _nullChar: t.Error(this); t.Emit(_replacementChar); t.Transition(ScriptDataEscaped); break; default: t.Emit(c); t.Transition(ScriptDataEscaped); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty()) { t.EofError(this); t.Transition(Data); return; } switch (r.Current()) { case '-': t.Emit('-'); t.AdvanceTransition(ScriptDataEscapedDash); break; case '<': t.AdvanceTransition(ScriptDataEscapedLessThanSign); break; case _nullChar: t.Error(this); r.Advance(); t.Emit(_replacementChar); break; default: string data = r.ConsumeToAny('-', '<', _nullChar); t.Emit(data); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; case '>': t.EmitDoctypePending(); t.Transition(Data); break; case _eof: t.EofError(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.Error(this); t.Transition(BogusDoctype); break; // NOT force quirks } }