public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': // ignore break; case '/': t.Transition(SelfClosingStartTag); break; case '=': t.Transition(BeforeAttributeValue); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case nullChar: t.Error(this); t.tagPending.AppendAttributeName(replacementChar); t.Transition(AttributeName); break; case eof: t.EofError(this); t.Transition(Data); break; case '"': case '\'': case '<': t.Error(this); t.tagPending.NewAttribute(); t.tagPending.AppendAttributeName(c); t.Transition(AttributeName); break; default: // A-Z, anything else t.tagPending.NewAttribute(); r.Unconsume(); t.Transition(AttributeName); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('-')) { t.Emit('-'); t.AdvanceTransition(ScriptDataEscapedDashDash); } else { t.Transition(ScriptData); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTagPending(false); t.Transition(ScriptDataEndTagName); } else { t.Emit("</"); t.Transition(ScriptData); } }
public override void Read(Tokeniser t, CharacterReader r) { // TODO: handle bogus comment starting from eof. when does that trigger? // rewind to capture char that lead us here r.Unconsume(); Token.Comment comment = new Token.Comment(); comment.data.Append(r.ConsumeTo('>')); comment.IsBogus = true; // TODO: replace nullChar with replaceChar t.Emit(comment); t.AdvanceTransition(Data); }
public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(RawtextEndTagOpen); } else { t.Emit('<'); t.Transition(Rawtext); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.Emit('/'); t.CreateTempBuffer(); t.AdvanceTransition(ScriptDataDoubleEscapeEnd); } else { t.Transition(ScriptDataDoubleEscaped); } }
protected virtual void InitialiseParse(string input, Uri baseUri, HtmlParseErrorCollection errors) { if (input == null) { throw new ArgumentNullException(nameof(input)); } this.doc = CreateDocument(baseUri); this.reader = new CharacterReader(input); this.errors = errors; this.tokeniser = new Tokeniser(reader, errors); this.stack = new DescendableLinkedList <DomContainer>(); this.baseUri = baseUri; }
// from before attribute name public override void Read(Tokeniser t, CharacterReader r) { string name = r.ConsumeToAny('\t', '\n', '\f', ' ', '/', '=', '>', nullChar, '"', '\'', '<'); t.tagPending.AppendAttributeName(name.ToLowerInvariant()); char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': t.Transition(AfterAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '=': t.Transition(BeforeAttributeValue); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case nullChar: t.Error(this); t.tagPending.AppendAttributeName(replacementChar); break; case eof: t.EofError(this); t.Transition(Data); break; case '"': case '\'': case '<': t.Error(this); t.tagPending.AppendAttributeName(c); // no default, as covered in consumeToAny break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTagPending(false); t.tagPending.AppendTagName(char.ToLowerInvariant(r.Current)); t.dataBuffer.Append(r.Current); t.AdvanceTransition(ScriptDataEscapedEndTagName); } else { t.Emit("</"); t.Transition(ScriptDataEscaped); } }
// from & in data public override void Read(Tokeniser t, CharacterReader r) { string c = t.ConsumeCharacterReference(null, false); if (c == null) { t.Emit('&'); } else { t.Emit(c); } t.Transition(Data); }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': t.Transition(BeforeDoctypeSystemIdentifier); break; case '>': t.Error(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; case '"': t.Error(this); // system id empty t.Transition(DoctypeSystemIdentifier_doubleQuoted); break; case '\'': t.Error(this); // system id empty t.Transition(DoctypeSystemIdentifier_singleQuoted); break; case eof: t.EofError(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.Error(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); break; } }
public override void Read(Tokeniser t, CharacterReader r) { string value = r.ConsumeToAny('\'', '&', nullChar); if (value.Length > 0) { t.tagPending.AppendAttributeValue(value); } char c = r.Consume(); switch (c) { case '\'': t.Transition(AfterAttributeValue_quoted); break; case '&': string ref2 = t.ConsumeCharacterReference('\'', true); if (ref2 == null) { t.tagPending.AppendAttributeValue('&'); } else { t.tagPending.AppendAttributeValue(ref2); } break; case nullChar: t.Error(this); t.tagPending.AppendAttributeValue(replacementChar); break; case eof: t.EofError(this); t.Transition(Data); break; // no default, handled in consume to any above } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.tagPending.AppendTagName(name.ToLowerInvariant()); t.dataBuffer.Append(name); return; } if (t.IsAppropriateEndTagToken() && !r.IsEmpty) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; default: t.dataBuffer.Append(c); AnythingElse(t, r); break; } } else { AnythingElse(t, r); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.doctypePending.name.Append(name.ToLowerInvariant()); return; } char c = r.Consume(); switch (c) { case '>': t.EmitDoctypePending(); t.Transition(Data); break; case '\t': case '\n': case '\f': case ' ': t.Transition(AfterDoctypeName); break; case nullChar: t.Error(this); t.doctypePending.name.Append(replacementChar); break; case eof: t.EofError(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.doctypePending.name.Append(c); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTempBuffer(); t.dataBuffer.Append(char.ToLowerInvariant(r.Current)); t.Emit("<" + r.Current); t.AdvanceTransition(ScriptDataDoubleEscapeStart); } else if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(ScriptDataEscapedEndTagOpen); } else { t.Emit('<'); t.Transition(ScriptDataEscaped); } }
public override void Read(Tokeniser t, CharacterReader r) { switch (r.Current) { case nullChar: t.Error(this); r.Advance(); t.Emit(replacementChar); break; case eof: t.Emit(Token.EOF.Instance); break; default: string data = r.ConsumeTo(nullChar); t.Emit(data); break; } }
public override void Read(Tokeniser t, CharacterReader r) { switch (r.Consume()) { case '/': t.CreateTempBuffer(); t.Transition(ScriptDataEndTagOpen); break; case '!': t.Emit("<!"); t.Transition(ScriptDataEscapeStart); break; default: t.Emit("<"); r.Unconsume(); t.Transition(ScriptData); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '>': t.EmitDoctypePending(); t.Transition(Data); break; case eof: t.EmitDoctypePending(); t.Transition(Data); break; default: // ignore char break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '>': t.EmitCommentPending(); t.Transition(Data); break; case nullChar: t.Error(this); t.commentPending.data.Append("--").Append(replacementChar); t.Transition(Comment); break; case '!': t.Error(this); t.Transition(CommentEndBang); break; case '-': t.Error(this); t.commentPending.data.Append('-'); break; case eof: t.EofError(this); t.EmitCommentPending(); t.Transition(Data); break; default: t.Error(this); t.commentPending.data.Append("--").Append(c); t.Transition(Comment); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateDoctypePending(); t.Transition(DoctypeName); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': break; // ignore whitespace case nullChar: t.Error(this); t.doctypePending.name.Append(replacementChar); t.Transition(DoctypeName); break; case eof: t.EofError(this); t.CreateDoctypePending(); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.CreateDoctypePending(); t.doctypePending.name.Append(c); t.Transition(DoctypeName); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.dataBuffer.Append(name.ToLowerInvariant()); t.Emit(name); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': case '/': case '>': if (t.dataBuffer.ToString().Equals("script")) { t.Transition(ScriptDataEscaped); } else { t.Transition(ScriptDataDoubleEscaped); } t.Emit(c); break; default: r.Unconsume(); t.Transition(ScriptDataDoubleEscaped); break; } }
// from < in rcdata public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(RCDATAEndTagOpen); } else if (r.MatchesLetter() && !r.ContainsIgnoreCase("</" + t.AppropriateEndTagName())) { // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than // consuming to EOF; break out here t.tagPending = new Token.EndTag(t.AppropriateEndTagName()); t.EmitTagPending(); r.Unconsume(); // undo "<" t.Transition(Data); } else { t.Emit("<"); t.Transition(Rcdata); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty) { t.EofError(this); t.Transition(Data); return; } char c = r.Consume(); switch (c) { case '-': t.Emit(c); break; case '<': t.Transition(ScriptDataEscapedLessThanSign); break; case '>': t.Emit(c); t.Transition(ScriptData); break; case nullChar: t.Error(this); t.Emit(replacementChar); t.Transition(ScriptDataEscaped); break; default: t.Emit(c); t.Transition(ScriptDataEscaped); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '>': t.tagPending.selfClosing = true; t.EmitTagPending(); t.Transition(Data); break; case eof: t.EofError(this); t.Transition(Data); break; default: t.Error(this); t.Transition(BeforeAttributeName); break; } }
// from < or </ in data, will have start or end tag pending public override void Read(Tokeniser t, CharacterReader r) { // previous TagOpen state did NOT consume, will have a letter char in current string tagName = r.ConsumeToAny('\t', '\n', '\f', ' ', '/', '>', nullChar).ToLowerInvariant(); t.tagPending.AppendTagName(tagName); switch (r.Consume()) { case '\t': case '\n': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case nullChar: // replacement t.tagPending.AppendTagName(replacementStr); break; case eof: // should emit pending tag? t.EofError(this); t.Transition(Data); // no default, as covered with above consumeToAny break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty) { t.EofError(this); t.Emit("</"); t.Transition(Data); } else if (r.MatchesLetter()) { t.CreateTagPending(false); t.Transition(TagName); } else if (r.Matches('>')) { t.Error(this); t.AdvanceTransition(Data); } else { t.Error(this); t.AdvanceTransition(BogusComment); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchConsume("--")) { t.CreateCommentPending(); t.Transition(CommentStart); } else if (r.MatchConsumeIgnoreCase("DOCTYPE")) { t.Transition(Doctype); } else if (r.MatchConsume("[CDATA[")) { // TODO: should actually check current namepspace, and only non-html allows cdata. until namespace // is implemented properly, keep handling as cdata (HtmlCDataSection) //} else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) { t.Transition(CdataSection); } else { t.Error(this); t.AdvanceTransition(BogusComment); // advance so this char gets in bogus comment data's rewind } }
public override void Read(Tokeniser t, CharacterReader r) { switch (r.Current) { case '<': t.AdvanceTransition(ScriptDataLessthanSign); break; case nullChar: t.Error(this); r.Advance(); t.Emit(replacementChar); break; case eof: t.Emit(Token.EOF.Instance); break; default: string data = r.ConsumeToAny('<', nullChar); t.Emit(data); break; } }
private void AnythingElse(Tokeniser t, CharacterReader r) { t.Emit("</" + t.dataBuffer.ToString()); t.Transition(ScriptDataEscaped); }
public abstract void Read(Tokeniser t, CharacterReader r);