/// <summary> /// A wrapper over Html Encode to help encode into valid HTML Entity Names (if present). It uses the same /// dataset used by the framework to do HtmlDecode so that decoding works fine. /// </summary> /// <param name="value"></param> /// <returns></returns> public static string HtmlEncode(string value) { // Calling the framework's HtmlEncode which takes care of encoding 5 hardcoded characters (<, >, ", \, &) // and converts High ASCII (160 to 255) to their unicode representation (i.e   etc) string response = WebUtility.HtmlEncode(value); // First, we take care of all unicode characters that have corresponding html entity names and haven't // been touched by the framework. We use the same lookup table that the framework leverages for Html.Decode // so that decoding works for all. Examples: Greek letters (Δ or Δ) string output = ""; foreach (var ch in response) { string entity = HtmlEntities.Lookup(ch); output += string.IsNullOrEmpty(entity) ? ch + "" : $"&{entity};"; } // For high ASCII characters (from 160 - 255), the framework's HtmlEncode method converts // them into   and so on. So in these step we parse these, check the lookup table for corresponding // HTML entity names and then, replace them if present. // For more: http://referencesource.microsoft.com/#System/net/System/Net/WebUtility.cs,117 // Example ¢ is converted to ¢ by the framework, we must convert it into ¢ instead. StringWriter writer = new StringWriter(CultureInfo.InvariantCulture); HtmlEncode(output, writer); return(writer.ToString()); }
/// <summary> /// Convierte una cadena de código RTF a formato HTML /// </summary> public string Convert(string rtf) { //Generar arbol DOM RtfTree rtfTree = new RtfTree(); rtfTree.LoadRtfText(rtf); //Inicializar variables empleadas _builder = new StringBuilder(); _htmlFormat = new Format(); _currentFormat = new Format(); _fontTable = rtfTree.GetFontTable(); _colorTable = rtfTree.GetColorTable(); //Buscar el inicio del contenido visible del documento int inicio; for (inicio = 0; inicio < rtfTree.RootNode.FirstChild.ChildNodes.Count; inicio++) { if (rtfTree.RootNode.FirstChild.ChildNodes[inicio].NodeKey == "pard") { break; } } //Procesar todos los nodos visibles ProcessChildNodes(rtfTree.RootNode.FirstChild.ChildNodes, inicio); //Cerrar etiquetas pendientes _currentFormat.Reset(); WriteText(string.Empty); //Arreglar HTML //Arreglar listas Regex repairList = new Regex("<span [^>]*>·</span><span style=\"([^\"]*)\">(.*?)<br\\s+/><" + "/span>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant); foreach (Match match in repairList.Matches(_builder.ToString())) { _builder.Replace(match.Value, string.Format("<li style=\"{0}\">{1}</li>", match.Groups[1].Value, match.Groups[2].Value)); } Regex repairUl = new Regex("(?<!</li>)<li", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); foreach (Match match in repairUl.Matches(_builder.ToString())) { _builder.Insert(match.Index, "<ul>"); } repairUl = new Regex("/li>(?!<li)", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); foreach (Match match in repairUl.Matches(_builder.ToString())) { _builder.Insert(match.Index + match.Length, "</ul>"); } //Generar párrafos (cada 2 <br /><br /> se cambiará por un <p>) if (AutoParagraph) { string[] partes = _builder.ToString().Split(new[] { "<br /><br />" }, StringSplitOptions.RemoveEmptyEntries); _builder = new StringBuilder(_builder.Length + 7 * partes.Length); foreach (string parte in partes) { _builder.Append("<p>"); _builder.Append(parte); _builder.Append("</p>"); } } return(EscapeHtmlEntities ? HtmlEntities.Encode(_builder.ToString()) : _builder.ToString()); }
private static void HtmlDecode(string value, StringBuilder output) { Debug.Assert(output != null); int l = value.Length; for (int i = 0; i < l; i++) { char ch = value[i]; if (ch == '&') { // We found a '&'. Now look for the next ';' or '&'. The idea is that // if we find another '&' before finding a ';', then this is not an entity, // and the next '&' might start a real entity (VSWhidbey 275184) int index = value.IndexOfAny(s_htmlEntityEndingChars, i + 1); if (index > 0 && value[index] == ';') { int entityOffset = i + 1; int entityLength = index - entityOffset; if (entityLength > 1 && value[entityOffset] == '#') { // The # syntax can be in decimal or hex, e.g. // å --> decimal // å --> same char in hex // See http://www.w3.org/TR/REC-html40/charset.html#entities bool parsedSuccessfully; uint parsedValue; if (value[entityOffset + 1] == 'x' || value[entityOffset + 1] == 'X') { parsedSuccessfully = uint.TryParse(value.Substring(entityOffset + 2, entityLength - 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out parsedValue); } else { parsedSuccessfully = uint.TryParse(value.Substring(entityOffset + 1, entityLength - 1), NumberStyles.Integer, CultureInfo.InvariantCulture, out parsedValue); } if (parsedSuccessfully) { // decoded character must be U+0000 .. U+10FFFF, excluding surrogates parsedSuccessfully = ((parsedValue < HIGH_SURROGATE_START) || (LOW_SURROGATE_END < parsedValue && parsedValue <= UNICODE_PLANE16_END)); } if (parsedSuccessfully) { if (parsedValue <= UNICODE_PLANE00_END) { // single character output.Append((char)parsedValue); } else { // multi-character char leadingSurrogate, trailingSurrogate; ConvertSmpToUtf16(parsedValue, out leadingSurrogate, out trailingSurrogate); output.Append(leadingSurrogate); output.Append(trailingSurrogate); } i = index; // already looked at everything until semicolon continue; } } else { string entity = value.Substring(entityOffset, entityLength); i = index; // already looked at everything until semicolon char entityChar = HtmlEntities.Lookup(entity); if (entityChar != (char)0) { ch = entityChar; } else { output.Append('&'); output.Append(entity); output.Append(';'); continue; } } } } output.Append(ch); } }
private static void HtmlDecode(ReadOnlySpan <char> input, ref ValueStringBuilder output) { for (int i = 0; i < input.Length; i++) { char ch = input[i]; if (ch == '&') { // We found a '&'. Now look for the next ';' or '&'. The idea is that // if we find another '&' before finding a ';', then this is not an entity, // and the next '&' might start a real entity (VSWhidbey 275184) ReadOnlySpan <char> inputSlice = input.Slice(i + 1); int entityLength = inputSlice.IndexOf(';'); if (entityLength >= 0) { int entityEndPosition = (i + 1) + entityLength; if (entityLength > 1 && inputSlice[0] == '#') { // The # syntax can be in decimal or hex, e.g. // å --> decimal // å --> same char in hex // See http://www.w3.org/TR/REC-html40/charset.html#entities bool parsedSuccessfully = inputSlice[1] == 'x' || inputSlice[1] == 'X' ? uint.TryParse(inputSlice.Slice(2, entityLength - 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out uint parsedValue) : uint.TryParse(inputSlice.Slice(1, entityLength - 1), NumberStyles.Integer, CultureInfo.InvariantCulture, out parsedValue); if (parsedSuccessfully) { // decoded character must be U+0000 .. U+10FFFF, excluding surrogates parsedSuccessfully = ((parsedValue < HIGH_SURROGATE_START) || (LOW_SURROGATE_END < parsedValue && parsedValue <= UNICODE_PLANE16_END)); } if (parsedSuccessfully) { if (parsedValue <= UNICODE_PLANE00_END) { // single character output.Append((char)parsedValue); } else { // multi-character ConvertSmpToUtf16(parsedValue, out char leadingSurrogate, out char trailingSurrogate); output.Append(leadingSurrogate); output.Append(trailingSurrogate); } i = entityEndPosition; // already looked at everything until semicolon continue; } } else { ReadOnlySpan <char> entity = inputSlice.Slice(0, entityLength); i = entityEndPosition; // already looked at everything until semicolon char entityChar = HtmlEntities.Lookup(entity); if (entityChar != (char)0) { ch = entityChar; } else { output.Append('&'); output.Append(entity); output.Append(';'); continue; } } } } output.Append(ch); } }
public static void HtmlDecode(string value, TextWriter output) { if (value != null) { if (output == null) { throw new ArgumentNullException("output"); } if (value.IndexOf('&') < 0) { output.Write(value); } else { int length = value.Length; for (int i = 0; i < length; i++) { char ch = value[i]; if (ch == '&') { int num3 = value.IndexOfAny(_htmlEntityEndingChars, i + 1); if ((num3 > 0) && (value[num3] == ';')) { string entity = value.Substring(i + 1, (num3 - i) - 1); if ((entity.Length > 1) && (entity[0] == '#')) { ushort num4; if ((entity[1] == 'x') || (entity[1] == 'X')) { ushort.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, (IFormatProvider)NumberFormatInfo.InvariantInfo, out num4); } else { ushort.TryParse(entity.Substring(1), NumberStyles.Integer, (IFormatProvider)NumberFormatInfo.InvariantInfo, out num4); } if (num4 != 0) { ch = (char)num4; i = num3; } } else { i = num3; char ch2 = HtmlEntities.Lookup(entity); if (ch2 != '\0') { ch = ch2; } else { output.Write('&'); output.Write(entity); output.Write(';'); continue; } } } } output.Write(ch); } } } }
//======================================================================= // Adapted (and improved) from Reflected source of .NET 2.0 System.Web.HttpUtility.HtmlDecode string htmlDecode(string s) { if (s == null) { return(null); } if (s.IndexOf('&') < 0) { return(s); } StringBuilder builder1 = new StringBuilder(); StringWriter output = new StringWriter(builder1); int length = s.Length; for (int currentPos = 0; currentPos < length; currentPos++) { char entityValue = s[currentPos]; if (entityValue == '&') { int endCharPos = s.IndexOfAny(_entityEndChars, currentPos + 1); if (endCharPos > 0) { string entityName = s.Substring(currentPos + 1, (endCharPos - currentPos) - 1); if ((entityName.Length > 1) && (entityName[0] == '#')) { try { if ((entityName[1] == 'x') || (entityName[1] == 'X')) { entityValue = (char)((ushort)int.Parse(entityName.Substring(2), NumberStyles.AllowHexSpecifier)); } else { entityValue = (char)((ushort)int.Parse(entityName.Substring(1))); } // Improvement to .NET 2.0 code if (s[endCharPos] == '&') { currentPos = endCharPos - 1; // therefore the '&' will be interpreted as the starter for the next entity } else { currentPos = endCharPos; } } catch (FormatException) { currentPos++; } catch (ArgumentException) { currentPos++; } } else { currentPos = endCharPos; char ch = HtmlEntities.Lookup(entityName); if (ch != '\0') { entityValue = ch; } else { output.Write('&'); output.Write(entityName); output.Write(';'); continue; } } } } output.Write(entityValue); } return(output.ToString()); }
public static void HtmlDecode(string value, TextWriter output) { if (value == null) { return; } if (output == null) { throw new ArgumentNullException("output"); } if (!StringRequiresHtmlDecoding(value)) { output.Write(value); // good as is return; } int l = value.Length; for (int i = 0; i < l; i++) { char ch = value[i]; if (ch == '&') { // We found a '&'. Now look for the next ';' or '&'. The idea is that // if we find another '&' before finding a ';', then this is not an entity, // and the next '&' might start a real entity (VSWhidbey 275184) int index = value.IndexOfAny(_htmlEntityEndingChars, i + 1); if (index > 0 && value[index] == ';') { string entity = value.Substring(i + 1, index - i - 1); if (entity.Length > 1 && entity[0] == '#') { // The # syntax can be in decimal or hex, e.g. // å --> decimal // å --> same char in hex // See http://www.w3.org/TR/REC-html40/charset.html#entities bool parsedSuccessfully; uint parsedValue; if (entity[1] == 'x' || entity[1] == 'X') { parsedSuccessfully = UInt32.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out parsedValue); } else { parsedSuccessfully = UInt32.TryParse(entity.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out parsedValue); } if (parsedSuccessfully) { parsedSuccessfully = (0 < parsedValue && parsedValue <= UNICODE_PLANE00_END); } if (parsedSuccessfully) { if (parsedValue <= UNICODE_PLANE00_END) { // single character output.Write((char)parsedValue); } else { // multi-character char leadingSurrogate, trailingSurrogate; ConvertSmpToUtf16(parsedValue, out leadingSurrogate, out trailingSurrogate); output.Write(leadingSurrogate); output.Write(trailingSurrogate); } i = index; // already looked at everything until semicolon continue; } } else { i = index; // already looked at everything until semicolon char entityChar = HtmlEntities.Lookup(entity); if (entityChar != (char)0) { ch = entityChar; } else { output.Write('&'); output.Write(entity); output.Write(';'); continue; } } } } output.Write(ch); } }
/// <summary> /// Converts a string that has been HTML-encoded into a decoded string, and sends /// the decoded string to a System.IO.TextWriter output stream. /// </summary> public static void HtmlDecode(string s, TextWriter output) { if (s == null) { return; } if (s.IndexOf('&') < 0) { output.Write(s); } char[] entityEndingChars = new char[] { ';', '&' }; int length = s.Length; for (int i = 0; i < length; i++) { char ch = s[i]; if (ch != '&') { output.Write(ch); continue; } int endIndex = s.IndexOfAny(entityEndingChars, i + 1); if (endIndex > 0 && s[endIndex] == ';') { string entity = s.Substring(i + 1, endIndex - i - 1); if (entity.Length > 0 && entity[0] == '#') { bool success; int result; if (entity[1] == 'x' || entity[1] == 'X') { success = Int32.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out result); } else { success = Int32.TryParse(entity.Substring(1), out result); } if (success) { if (IsLegalXmlChar(result)) { output.Write((char)result); } i = endIndex; } else { i++; } } else { i = endIndex; ch = HtmlEntities.Lookup(entity); if (ch != '\0') { output.Write(ch); } else { output.Write('&'); output.Write(entity); output.Write(';'); } } } } }
public static string HtmlDecode(string value) { StringBuilder builder = new StringBuilder(); if (value != null) { if (value.IndexOf('&') < 0) { builder.Append(value); } else { int length = value.Length; for (int i = 0; i < length; i++) { char ch = value[i]; if (ch == '&') { int num3 = value.IndexOfAny(htmlEntityEndingChars, i + 1); if ((num3 > 0) && (value[num3] == ';')) { string entity = value.Substring(i + 1, (num3 - i) - 1); if ((entity.Length > 1) && (entity[0] == '#')) { ushort num4; if ((entity[1] == 'x') || (entity[1] == 'X')) { ushort.TryParse( entity.Substring(2), NumberStyles.AllowHexSpecifier, (IFormatProvider)NumberFormatInfo.InvariantInfo, out num4); } else { ushort.TryParse( entity.Substring(1), NumberStyles.Integer, (IFormatProvider)NumberFormatInfo.InvariantInfo, out num4); } if (num4 != 0) { ch = (char)num4; i = num3; } } else { i = num3; char ch2 = HtmlEntities.Lookup(entity); if (ch2 != '\0') { ch = ch2; } else { builder.Append('&'); builder.Append(entity); builder.Append(';'); continue; } } } } builder.Append(ch); } } } return(builder.ToString()); }
public static void HtmlDecode(string s, TextWriter output) { if (s == null) { return; } if (s.IndexOf('&') < 0) { output.Write(s); // good as is return; } int l = s.Length; for (int i = 0; i < l; i++) { char ch = s[i]; if (ch == '&') { // We found a '&'. Now look for the next ';' or '&'. The idea is that // if we find another '&' before finding a ';', then this is not an entity, // and the next '&' might start a real entity (VSWhidbey 275184) int index = s.IndexOfAny(s_entityEndingChars, i + 1); if (index > 0 && s[index] == ';') { string entity = s.Substring(i + 1, index - i - 1); if (entity.Length > 1 && entity[0] == '#') { try { // The # syntax can be in decimal or hex, e.g. // å --> decimal // å --> same char in hex // See http://www.w3.org/TR/REC-html40/charset.html#entities if (entity[1] == 'x' || entity[1] == 'X') { ch = (char)Int32.Parse(entity.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture); } else { ch = (char)Int32.Parse(entity.Substring(1), CultureInfo.InvariantCulture); } i = index; // already looked at everything until semicolon } catch (System.FormatException e) { i++; //if the number isn't valid, ignore it if (Tracing.On) { Tracing.ExceptionCatch(TraceEventType.Warning, typeof(HttpUtility), "HtmlDecode", e); } } catch (System.ArgumentException e) { i++; // if there is no number, ignore it. if (Tracing.On) { Tracing.ExceptionCatch(TraceEventType.Warning, typeof(HttpUtility), "HtmlDecode", e); } } } else { i = index; // already looked at everything until semicolon char entityChar = HtmlEntities.Lookup(entity); if (entityChar != (char)0) { ch = entityChar; } else { output.Write('&'); output.Write(entity); output.Write(';'); continue; } } } } output.Write(ch); } }
/// <summary>Converts all HTML entities to their correct character representation. </summary> /// <param name="html">The string to convert. </param> /// <returns>The converted string. </returns> public static string ConvertHtmlCharacters(this string html) { if (html == null) { return(null); } if (html.IndexOf('&') < 0) { return(html); } var sb = new StringBuilder(); var writer = new StringWriter(sb, CultureInfo.InvariantCulture); var length = html.Length; for (var i = 0; i < length; i++) { var ch = html[i]; if (ch == '&') { var num3 = html.IndexOfAny(new char[] { ';', '&' }, i + 1); if ((num3 > 0) && (html[num3] == ';')) { var entity = html.Substring(i + 1, (num3 - i) - 1); if ((entity.Length > 1) && (entity[0] == '#')) { try { if ((entity[1] == 'x') || (entity[1] == 'X')) { ch = (char)int.Parse(entity.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture); } else { ch = (char)int.Parse(entity.Substring(1), CultureInfo.InvariantCulture); } i = num3; } catch (FormatException) { i++; } catch (ArgumentException) { i++; } } else { i = num3; var ch2 = HtmlEntities.Lookup(entity); if (ch2 != '\0') { ch = ch2; } else { writer.Write('&'); writer.Write(entity); writer.Write(';'); continue; } } } } writer.Write(ch); } return(sb.ToString()); }
/// <summary>Converts a string that has been HTML-encoded into a decoded string, and sends the decoded string to a <see cref="T:System.IO.TextWriter"></see> output stream.</summary> /// <param name="s">The string to decode. </param> /// <param name="output">A <see cref="T:System.IO.TextWriter"></see> stream of output. </param> public static void HtmlDecode(string s, TextWriter output) { if (s != null) { if (s.IndexOf('&') < 0) { output.Write(s); } else { int length = s.Length; for (int i = 0; i < length; i++) { char ch = s[i]; if (ch == '&') { int num3 = s.IndexOfAny(s_entityEndingChars, i + 1); if ((num3 > 0) && (s[num3] == ';')) { string entity = s.Substring(i + 1, (num3 - i) - 1); if ((entity.Length > 1) && (entity[0] == '#')) { try { if ((entity[1] == 'x') || (entity[1] == 'X')) { ch = (char)int.Parse(entity.Substring(2), NumberStyles.AllowHexSpecifier); } else { ch = (char)int.Parse(entity.Substring(1)); } i = num3; } catch (FormatException) { i++; } catch (ArgumentException) { i++; } } else { i = num3; char ch2 = HtmlEntities.Lookup(entity); if (ch2 != '\0') { ch = ch2; } else { output.Write('&'); output.Write(entity); output.Write(';'); return; } } } } output.Write(ch); } } } }
/// <summary> /// This private method takes care of converting all high ASCII characters (160 - 255) into their /// HTML entity names (if present). /// </summary> /// <param name="value"></param> /// <param name="output"></param> private static void HtmlEncode(string value, TextWriter output) { if (value == null) { return; } if (output == null) { throw new ArgumentNullException("output"); } int l = value.Length; for (int i = 0; i < l; i++) { char ch = value[i]; if (ch == '&') { // We found a '&'. Now look for the next ';' or '&'. The idea is that // if we find another '&' before finding a ';', then this is not an entity, // and the next '&' might start a real entity (VSWhidbey 275184) int index = value.IndexOfAny(_htmlEntityEndingChars, i + 1); if (index > 0 && value[index] == ';') { string entity = value.Substring(i + 1, index - i - 1); if (entity.Length > 1 && entity[0] == '#') { // The # syntax can be in decimal or hex, e.g. // å --> decimal // å --> same char in hex // See http://www.w3.org/TR/REC-html40/charset.html#entities bool parsedSuccessfully; uint parsedValue; if (entity[1] == 'x' || entity[1] == 'X') { parsedSuccessfully = UInt32.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out parsedValue); } else { parsedSuccessfully = UInt32.TryParse(entity.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out parsedValue); } if (parsedSuccessfully) { var entityName = HtmlEntities.Lookup((char)parsedValue); if (string.IsNullOrEmpty(entityName)) { output.Write((char)parsedValue); } else { output.Write($"&{entityName};"); } i = index; // already looked at everything until semicolon continue; } } } } output.Write(ch); } }
public static void HtmlDecode(string s, TextWriter output) { if (s != null) { if (s.IndexOf('&') < 0) { output.Write(s); } else { int length = s.Length; for (int i = 0; i < length; i++) { char ch = s[i]; if (ch == '&') { int num3 = s.IndexOfAny(s_entityEndingChars, i + 1); if ((num3 > 0) && (s[num3] == ';')) { string entity = s.Substring(i + 1, (num3 - i) - 1); if ((entity.Length > 1) && (entity[0] == '#')) { try { if ((entity[1] == 'x') || (entity[1] == 'X')) { ch = (char)int.Parse(entity.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture); } else { ch = (char)int.Parse(entity.Substring(1), CultureInfo.InvariantCulture); } i = num3; } catch (FormatException exception) { i++; if (Tracing.On) { Tracing.ExceptionCatch(TraceEventType.Warning, typeof(RequestResponseUtils.HttpUtility), "HtmlDecode", exception); } } catch (ArgumentException exception2) { i++; if (Tracing.On) { Tracing.ExceptionCatch(TraceEventType.Warning, typeof(RequestResponseUtils.HttpUtility), "HtmlDecode", exception2); } } } else { i = num3; char ch2 = HtmlEntities.Lookup(entity); if (ch2 != '\0') { ch = ch2; } else { output.Write('&'); output.Write(entity); output.Write(';'); continue; } } } } output.Write(ch); } } } }
public static void HtmlDecode(string value, TextWriter output) { if (value == null) { return; } if (output == null) { throw new ArgumentNullException("output"); } if (!StringRequiresHtmlDecoding(value)) { output.Write(value); // good as is return; } UnicodeDecodingConformance decodeConformance = HtmlDecodeConformance; int l = value.Length; for (int i = 0; i < l; i++) { char ch = value[i]; if (ch == '&') { // We found a '&'. Now look for the next ';' or '&'. The idea is that // if we find another '&' before finding a ';', then this is not an entity, // and the next '&' might start a real entity (VSWhidbey 275184) int index = value.IndexOfAny(_htmlEntityEndingChars, i + 1); if (index > 0 && value[index] == ';') { string entity = value.Substring(i + 1, index - i - 1); if (entity.Length > 1 && entity[0] == '#') { // The # syntax can be in decimal or hex, e.g. // å --> decimal // å --> same char in hex // See http://www.w3.org/TR/REC-html40/charset.html#entities bool parsedSuccessfully; uint parsedValue; if (entity[1] == 'x' || entity[1] == 'X') { parsedSuccessfully = UInt32.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out parsedValue); } else { parsedSuccessfully = UInt32.TryParse(entity.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out parsedValue); } if (parsedSuccessfully) { switch (decodeConformance) { case UnicodeDecodingConformance.Strict: // decoded character must be U+0000 .. U+10FFFF, excluding surrogates parsedSuccessfully = ((parsedValue < HIGH_SURROGATE_START) || (LOW_SURROGATE_END < parsedValue && parsedValue <= UNICODE_PLANE16_END)); break; case UnicodeDecodingConformance.Compat: // decoded character must be U+0001 .. U+FFFF // null chars disallowed for compat with 4.0 parsedSuccessfully = (0 < parsedValue && parsedValue <= UNICODE_PLANE00_END); break; case UnicodeDecodingConformance.Loose: // decoded character must be U+0000 .. U+10FFFF parsedSuccessfully = (parsedValue <= UNICODE_PLANE16_END); break; default: Debug.Assert(false, "Should never get here!"); parsedSuccessfully = false; break; } } if (parsedSuccessfully) { if (parsedValue <= UNICODE_PLANE00_END) { // single character output.Write((char)parsedValue); } else { // multi-character char leadingSurrogate, trailingSurrogate; ConvertSmpToUtf16(parsedValue, out leadingSurrogate, out trailingSurrogate); output.Write(leadingSurrogate); output.Write(trailingSurrogate); } i = index; // already looked at everything until semicolon continue; } } else { i = index; // already looked at everything until semicolon char entityChar = HtmlEntities.Lookup(entity); if (entityChar != (char)0) { ch = entityChar; } else { output.Write('&'); output.Write(entity); output.Write(';'); continue; } } } } output.Write(ch); } }
/// <summary> /// Converts a string that has been HTML-encoded into a decoded string, and sends the decoded string to a TextWriter output stream. /// </summary> public static void HtmlDecode(string s, TextWriter output) { if (s != null) { if (s.IndexOf('&') < 0) { output.Write(s); } else { int num1 = s.Length; for (int num2 = 0; num2 < num1; num2++) { char ch1 = s[num2]; if (ch1 == '&') { int num3 = s.IndexOfAny(HttpUtility.s_entityEndingChars, num2 + 1); if ((num3 > 0) && (s[num3] == ';')) { string text1 = s.Substring(num2 + 1, (num3 - num2) - 1); if ((text1.Length > 1) && (text1[0] == '#')) { try { if ((text1[1] == 'x') || (text1[1] == 'X')) { ch1 = (char)((ushort)int.Parse(text1.Substring(2), NumberStyles.AllowHexSpecifier)); } else { ch1 = (char)((ushort)int.Parse(text1.Substring(1))); } num2 = num3; } catch (FormatException) { num2++; } catch (ArgumentException) { num2++; } } else { num2 = num3; char ch2 = HtmlEntities.Lookup(text1); if (ch2 != '\0') { ch1 = ch2; } else { output.Write('&'); output.Write(text1); output.Write(';'); goto Label_0103; } } } } output.Write(ch1); Label_0103 :; } } } }