public static void HtmlDecode(string value, TextWriter output) { if (value == null) { return; } if (output == null) { throw new ArgumentNullException("output"); } if (!StringRequiresHtmlDecoding(value)) { output.Write(value); // good as is return; } UnicodeDecodingConformance decodeConformance = HtmlDecodeConformance; int l = value.Length; for (int i = 0; i < l; i++) { char ch = value[i]; if (ch == '&') { // We found a '&'. Now look for the next ';' or '&'. The idea is that // if we find another '&' before finding a ';', then this is not an entity, // and the next '&' might start a real entity (VSWhidbey 275184) int index = value.IndexOfAny(_htmlEntityEndingChars, i + 1); if (index > 0 && value[index] == ';') { string entity = value.Substring(i + 1, index - i - 1); if (entity.Length > 1 && entity[0] == '#') { // The # syntax can be in decimal or hex, e.g. // å --> decimal // å --> same char in hex // See http://www.w3.org/TR/REC-html40/charset.html#entities bool parsedSuccessfully; uint parsedValue; if (entity[1] == 'x' || entity[1] == 'X') { parsedSuccessfully = UInt32.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out parsedValue); } else { parsedSuccessfully = UInt32.TryParse(entity.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out parsedValue); } if (parsedSuccessfully) { switch (decodeConformance) { case UnicodeDecodingConformance.Strict: // decoded character must be U+0000 .. U+10FFFF, excluding surrogates parsedSuccessfully = ((parsedValue < HIGH_SURROGATE_START) || (LOW_SURROGATE_END < parsedValue && parsedValue <= UNICODE_PLANE16_END)); break; case UnicodeDecodingConformance.Compat: // decoded character must be U+0001 .. U+FFFF // null chars disallowed for compat with 4.0 parsedSuccessfully = (0 < parsedValue && parsedValue <= UNICODE_PLANE00_END); break; case UnicodeDecodingConformance.Loose: // decoded character must be U+0000 .. U+10FFFF parsedSuccessfully = (parsedValue <= UNICODE_PLANE16_END); break; default: Debug.Assert(false, "Should never get here!"); parsedSuccessfully = false; break; } } if (parsedSuccessfully) { if (parsedValue <= UNICODE_PLANE00_END) { // single character output.Write((char)parsedValue); } else { // multi-character char leadingSurrogate, trailingSurrogate; ConvertSmpToUtf16(parsedValue, out leadingSurrogate, out trailingSurrogate); output.Write(leadingSurrogate); output.Write(trailingSurrogate); } i = index; // already looked at everything until semicolon continue; } } else { i = index; // already looked at everything until semicolon char entityChar = HtmlEntities.Lookup(entity); if (entityChar != (char)0) { ch = entityChar; } else { output.Write('&'); output.Write(entity); output.Write(';'); continue; } } } } output.Write(ch); } }
static WebUtility() { s_htmlDecodeConformance = UnicodeDecodingConformance.Strict; s_htmlEncodeConformance = UnicodeEncodingConformance.Strict; }