public string ConsumeCharacterReference(char?additionalAllowedCharacter, bool inAttribute) { if (reader.IsEmpty) { return(null); } if (additionalAllowedCharacter.HasValue && additionalAllowedCharacter.Value == reader.Current) { return(null); } if (reader.MatchesAny('\t', '\n', '\f', ' ', '<', '&')) { return(null); } reader.Mark(); if (reader.MatchConsume("#")) // numbered { bool isHexMode = reader.MatchConsumeIgnoreCase("X"); string numRef = isHexMode ? reader.ConsumeHexSequence() : reader.ConsumeDigitSequence(); if (numRef.Length == 0) // didn't match anything { ParseError.NumericReferenceWithNoNumerals(this); reader.RewindToMark(); return(null); } if (!reader.MatchConsume(";")) { ParseError.MissingSemicolon(this); } int charval = -1; try { int base2 = isHexMode ? 16 : 10; charval = Convert.ToInt32(numRef, base2); } catch (FormatException) { } // skip if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) { ParseError.CharOutsideRange(this); return(replacementStr); } else { // TODO: implement number replacement table // TODO: check for extra illegal unicode points as parse errors return(Char.ConvertFromUtf32(charval)); } } else // named // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found { string nameRef = reader.ConsumeLetterThenDigitSequence(); string origNameRef = nameRef; // for error reporting. nameRef gets chomped looking for matches bool looksLegit = reader.Matches(';'); bool found = false; while (nameRef.Length > 0 && !found) { if (HtmlEncoder.IsNamedEntity(nameRef)) { found = true; } else { nameRef = nameRef.Substring(0, nameRef.Length - 1); reader.Unconsume(); } } if (!found) { if (looksLegit) // named with semicolon { ParseError.InvalidNamedReference(this, origNameRef); } reader.RewindToMark(); return(null); } if (inAttribute && (reader.MatchesLetter() || reader.MatchesDigit() || reader.MatchesAny('=', '-', '_'))) { // don't want that to match reader.RewindToMark(); return(null); } if (!reader.MatchConsume(";")) { ParseError.MissingSemicolon(this); } return(HtmlEncoder.GetCharacterByName(nameRef)); } }