public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': // ignore break; case '"': t.Transition(AttributeValue_doubleQuoted); break; case '&': r.Unconsume(); t.Transition(AttributeValue_unquoted); break; case '\'': t.Transition(AttributeValue_singleQuoted); break; case nullChar: t.Error(this); t.tagPending.AppendAttributeValue(replacementChar); t.Transition(AttributeValue_unquoted); break; case eof: t.EofError(this); t.Transition(Data); break; case '>': t.Error(this); t.EmitTagPending(); t.Transition(Data); break; case '<': case '=': case '`': t.Error(this); t.tagPending.AppendAttributeValue(c); t.Transition(AttributeValue_unquoted); break; default: r.Unconsume(); t.Transition(AttributeValue_unquoted); break; } }
// from tagname <xxx public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': break; // ignore whitespace case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case nullChar: t.Error(this); t.tagPending.NewAttribute(); r.Unconsume(); t.Transition(AttributeName); break; case eof: t.EofError(this); t.Transition(Data); break; case '"': case '\'': case '<': case '=': t.Error(this); t.tagPending.NewAttribute(); t.tagPending.AppendAttributeName(c); t.Transition(AttributeName); break; default: // A-Z, anything else t.tagPending.NewAttribute(); r.Unconsume(); t.Transition(AttributeName); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case eof: t.EofError(this); t.Transition(Data); break; default: t.Error(this); r.Unconsume(); t.Transition(BeforeAttributeName); break; } }
public override void Read(Tokeniser t, CharacterReader r) { // TODO: handle bogus comment starting from eof. when does that trigger? // rewind to capture char that lead us here r.Unconsume(); Token.Comment comment = new Token.Comment(); comment.data.Append(r.ConsumeTo('>')); comment.IsBogus = true; // TODO: replace nullChar with replaceChar t.Emit(comment); t.AdvanceTransition(Data); }
public override void Read(Tokeniser t, CharacterReader r) { switch (r.Consume()) { case '/': t.CreateTempBuffer(); t.Transition(ScriptDataEndTagOpen); break; case '!': t.Emit("<!"); t.Transition(ScriptDataEscapeStart); break; default: t.Emit("<"); r.Unconsume(); t.Transition(ScriptData); break; } }
// from < in rcdata public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(RCDATAEndTagOpen); } else if (r.MatchesLetter() && !r.ContainsIgnoreCase("</" + t.AppropriateEndTagName())) { // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than // consuming to EOF; break out here t.tagPending = new Token.EndTag(t.AppropriateEndTagName()); t.EmitTagPending(); r.Unconsume(); // undo "<" t.Transition(Data); } else { t.Emit("<"); t.Transition(Rcdata); } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.dataBuffer.Append(name.ToLowerInvariant()); t.Emit(name); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\f': case ' ': case '/': case '>': if (t.dataBuffer.ToString().Equals("script")) { t.Transition(ScriptDataEscaped); } else { t.Transition(ScriptDataDoubleEscaped); } t.Emit(c); break; default: r.Unconsume(); t.Transition(ScriptDataDoubleEscaped); break; } }
public string ConsumeCharacterReference(char?additionalAllowedCharacter, bool inAttribute) { if (reader.IsEmpty) { return(null); } if (additionalAllowedCharacter.HasValue && additionalAllowedCharacter.Value == reader.Current) { return(null); } if (reader.MatchesAny('\t', '\n', '\f', ' ', '<', '&')) { return(null); } reader.Mark(); if (reader.MatchConsume("#")) // numbered { bool isHexMode = reader.MatchConsumeIgnoreCase("X"); string numRef = isHexMode ? reader.ConsumeHexSequence() : reader.ConsumeDigitSequence(); if (numRef.Length == 0) // didn't match anything { ParseError.NumericReferenceWithNoNumerals(this); reader.RewindToMark(); return(null); } if (!reader.MatchConsume(";")) { ParseError.MissingSemicolon(this); } int charval = -1; try { int base2 = isHexMode ? 16 : 10; charval = Convert.ToInt32(numRef, base2); } catch (FormatException) { } // skip if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) { ParseError.CharOutsideRange(this); return(replacementStr); } else { // TODO: implement number replacement table // TODO: check for extra illegal unicode points as parse errors return(Char.ConvertFromUtf32(charval)); } } else // named // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found { string nameRef = reader.ConsumeLetterThenDigitSequence(); string origNameRef = nameRef; // for error reporting. nameRef gets chomped looking for matches bool looksLegit = reader.Matches(';'); bool found = false; while (nameRef.Length > 0 && !found) { if (HtmlEncoder.IsNamedEntity(nameRef)) { found = true; } else { nameRef = nameRef.Substring(0, nameRef.Length - 1); reader.Unconsume(); } } if (!found) { if (looksLegit) // named with semicolon { ParseError.InvalidNamedReference(this, origNameRef); } reader.RewindToMark(); return(null); } if (inAttribute && (reader.MatchesLetter() || reader.MatchesDigit() || reader.MatchesAny('=', '-', '_'))) { // don't want that to match reader.RewindToMark(); return(null); } if (!reader.MatchConsume(";")) { ParseError.MissingSemicolon(this); } return(HtmlEncoder.GetCharacterByName(nameRef)); } }