private static unsafe void HtmlEncode(string value, LowLevelTextWriter output) { if (value == null) { return; } if (output == null) { throw new ArgumentNullException("output"); } int index = IndexOfHtmlEncodingChars(value, 0); if (index == -1) { output.Write(value); return; } Debug.Assert(0 <= index && index <= value.Length, "0 <= index && index <= value.Length"); int cch = value.Length - index; fixed (char* str = value) { char* pch = str; while (index-- > 0) { output.Write(*pch++); } for (; cch > 0; cch--, pch++) { char ch = *pch; if (ch <= '>') { switch (ch) { case '<': output.Write("<"); break; case '>': output.Write(">"); break; case '"': output.Write("""); break; case '\'': output.Write("'"); break; case '&': output.Write("&"); break; default: output.Write(ch); break; } } else { int valueToEncode = -1; // set to >= 0 if needs to be encoded #if ENTITY_ENCODE_HIGH_ASCII_CHARS if (ch >= 160 && ch < 256) { // The seemingly arbitrary 160 comes from RFC valueToEncode = ch; } else #endif // ENTITY_ENCODE_HIGH_ASCII_CHARS if (s_htmlEncodeConformance == UnicodeEncodingConformance.Strict && Char.IsSurrogate(ch)) { int scalarValue = GetNextUnicodeScalarValueFromUtf16Surrogate(ref pch, ref cch); if (scalarValue >= UNICODE_PLANE01_START) { valueToEncode = scalarValue; } else { // Don't encode BMP characters (like U+FFFD) since they wouldn't have // been encoded if explicitly present in the string anyway. ch = (char)scalarValue; } } if (valueToEncode >= 0) { // value needs to be encoded output.Write("&#"); output.Write(valueToEncode.ToString(CultureInfo.InvariantCulture)); output.Write(';'); } else { // write out the character directly output.Write(ch); } } } } }
private static void HtmlDecode(string value, LowLevelTextWriter output) { if (value == null) { return; } if (output == null) { throw new ArgumentNullException("output"); } if (!StringRequiresHtmlDecoding(value)) { output.Write(value); // good as is return; } int l = value.Length; for (int i = 0; i < l; i++) { char ch = value[i]; if (ch == '&') { // We found a '&'. Now look for the next ';' or '&'. The idea is that // if we find another '&' before finding a ';', then this is not an entity, // and the next '&' might start a real entity (VSWhidbey 275184) int index = value.IndexOfAny(s_htmlEntityEndingChars, i + 1); if (index > 0 && value[index] == ';') { string entity = value.Substring(i + 1, index - i - 1); if (entity.Length > 1 && entity[0] == '#') { // The # syntax can be in decimal or hex, e.g. // å --> decimal // å --> same char in hex // See http://www.w3.org/TR/REC-html40/charset.html#entities bool parsedSuccessfully; uint parsedValue; if (entity[1] == 'x' || entity[1] == 'X') { parsedSuccessfully = UInt32.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out parsedValue); } else { parsedSuccessfully = UInt32.TryParse(entity.Substring(1), NumberStyles.Integer, CultureInfo.InvariantCulture, out parsedValue); } if (parsedSuccessfully) { switch (s_htmlDecodeConformance) { case UnicodeDecodingConformance.Strict: // decoded character must be U+0000 .. U+10FFFF, excluding surrogates parsedSuccessfully = ((parsedValue < HIGH_SURROGATE_START) || (LOW_SURROGATE_END < parsedValue && parsedValue <= UNICODE_PLANE16_END)); break; case UnicodeDecodingConformance.Compat: // decoded character must be U+0001 .. U+FFFF // null chars disallowed for compat with 4.0 parsedSuccessfully = (0 < parsedValue && parsedValue <= UNICODE_PLANE00_END); break; case UnicodeDecodingConformance.Loose: // decoded character must be U+0000 .. U+10FFFF parsedSuccessfully = (parsedValue <= UNICODE_PLANE16_END); break; default: Debug.Assert(false, "Should never get here!"); parsedSuccessfully = false; break; } } if (parsedSuccessfully) { if (parsedValue <= UNICODE_PLANE00_END) { // single character output.Write((char)parsedValue); } else { // multi-character char leadingSurrogate, trailingSurrogate; ConvertSmpToUtf16(parsedValue, out leadingSurrogate, out trailingSurrogate); output.Write(leadingSurrogate); output.Write(trailingSurrogate); } i = index; // already looked at everything until semicolon continue; } } else { i = index; // already looked at everything until semicolon char entityChar = HtmlEntities.Lookup(entity); if (entityChar != (char)0) { ch = entityChar; } else { output.Write('&'); output.Write(entity); output.Write(';'); continue; } } } } output.Write(ch); } }