public override void Read(Tokeniser t, CharacterReader r) { string data = r.ConsumeTo("]]>"); t.Emit(data); r.MatchConsume("]]>"); t.Transition(Data); }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchConsume("--")) { t.CreateCommentPending(); t.Transition(CommentStart); } else if (r.MatchConsumeIgnoreCase("DOCTYPE")) { t.Transition(Doctype); } else if (r.MatchConsume("[CDATA[")) { // TODO: should actually check current namepspace, and only non-html allows cdata. until namespace // is implemented properly, keep handling as cdata (HtmlCDataSection) //} else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) { t.Transition(CdataSection); } else { t.Error(this); t.AdvanceTransition(BogusComment); // advance so this char gets in bogus comment data's rewind } }
public string ConsumeCharacterReference(char?additionalAllowedCharacter, bool inAttribute) { if (reader.IsEmpty) { return(null); } if (additionalAllowedCharacter.HasValue && additionalAllowedCharacter.Value == reader.Current) { return(null); } if (reader.MatchesAny('\t', '\n', '\f', ' ', '<', '&')) { return(null); } reader.Mark(); if (reader.MatchConsume("#")) // numbered { bool isHexMode = reader.MatchConsumeIgnoreCase("X"); string numRef = isHexMode ? reader.ConsumeHexSequence() : reader.ConsumeDigitSequence(); if (numRef.Length == 0) // didn't match anything { ParseError.NumericReferenceWithNoNumerals(this); reader.RewindToMark(); return(null); } if (!reader.MatchConsume(";")) { ParseError.MissingSemicolon(this); } int charval = -1; try { int base2 = isHexMode ? 16 : 10; charval = Convert.ToInt32(numRef, base2); } catch (FormatException) { } // skip if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) { ParseError.CharOutsideRange(this); return(replacementStr); } else { // TODO: implement number replacement table // TODO: check for extra illegal unicode points as parse errors return(Char.ConvertFromUtf32(charval)); } } else // named // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found { string nameRef = reader.ConsumeLetterThenDigitSequence(); string origNameRef = nameRef; // for error reporting. nameRef gets chomped looking for matches bool looksLegit = reader.Matches(';'); bool found = false; while (nameRef.Length > 0 && !found) { if (HtmlEncoder.IsNamedEntity(nameRef)) { found = true; } else { nameRef = nameRef.Substring(0, nameRef.Length - 1); reader.Unconsume(); } } if (!found) { if (looksLegit) // named with semicolon { ParseError.InvalidNamedReference(this, origNameRef); } reader.RewindToMark(); return(null); } if (inAttribute && (reader.MatchesLetter() || reader.MatchesDigit() || reader.MatchesAny('=', '-', '_'))) { // don't want that to match reader.RewindToMark(); return(null); } if (!reader.MatchConsume(";")) { ParseError.MissingSemicolon(this); } return(HtmlEncoder.GetCharacterByName(nameRef)); } }