/// <summary> /// Encodes input strings for use in HTML. /// </summary> /// <param name="input">String to be encoded</param> /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param> /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param> /// <returns> /// Encoded string for use in HTML. /// </returns> private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) { if (string.IsNullOrEmpty(input)) { return(input); } if (characterValues == null) { InitialiseSafeList(); } char[][] namedEntities = null; if (useNamedEntities) { namedEntities = namedEntitiesLazy.Value; } // Setup a new StringBuilder for output. // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;. StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 10); AcquireReadLock(); try { Utf16StringReader stringReader = new Utf16StringReader(input); while (true) { int currentCodePoint = stringReader.ReadNextScalarValue(); if (currentCodePoint < 0) { break; // EOF } if (currentCodePoint > char.MaxValue) { // We don't have a pre-generated mapping of characters beyond the Basic Multilingual // Plane (BMP), so we need to generate these encodings on-the-fly. We should encode // the code point rather than the surrogate code units that make up this code point. // See: http://www.w3.org/International/questions/qa-escapes#bytheway char[] encodedCharacter = SafeList.HashThenValueGenerator(currentCodePoint); builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else { // If we reached this point, the code point is within the BMP. char currentCharacter = (char)currentCodePoint; if (encoderTweak != null && encoderTweak(currentCharacter, out char[] tweekedValue))
/// <summary> /// Encodes according to the CSS encoding rules. /// </summary> /// <param name="input">The string to encode.</param> /// <returns>The encoded string.</returns> internal static string Encode(string input) { if (string.IsNullOrEmpty(input)) { return(input); } char[][] characterValues = characterValuesLazy.Value; // Setup a new StringBuilder for output. // Worse case scenario - CSS encoding wants \XXXXXX for encoded characters. StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 7); Utf16StringReader stringReader = new Utf16StringReader(input); while (true) { int currentCodePoint = stringReader.ReadNextScalarValue(); if (currentCodePoint < 0) { break; // EOF } if (currentCodePoint >= characterValues.Length) { // We don't have a pre-generated mapping of characters beyond the U+00FF, so we need // to generate these encodings on-the-fly. We should encode the code point rather // than the surrogate code units that make up this code point. // See: http://www.w3.org/International/questions/qa-escapes#cssescapes char[] encodedCharacter = SafeList.SlashThenSixDigitHexValueGenerator(currentCodePoint); builder.Append(encodedCharacter); } else if (characterValues[currentCodePoint] != null) { // character needs to be encoded char[] encodedCharacter = characterValues[currentCodePoint]; builder.Append(encodedCharacter); } else { // character does not need encoding builder.Append((char)currentCodePoint); } } return(builder.ToString()); }
/// <summary> /// Encodes according to the CSS encoding rules. /// </summary> /// <param name="input">The string to encode.</param> /// <returns>The encoded string.</returns> internal static string Encode(string input) { if (string.IsNullOrEmpty(input)) { return input; } char[][] characterValues = characterValuesLazy.Value; // Setup a new StringBuilder for output. // Worse case scenario - CSS encoding wants \XXXXXX for encoded characters. StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 7); Utf16StringReader stringReader = new Utf16StringReader(input); while (true) { int currentCodePoint = stringReader.ReadNextScalarValue(); if (currentCodePoint < 0) { break; // EOF } if (currentCodePoint >= characterValues.Length) { // We don't have a pre-generated mapping of characters beyond the U+00FF, so we need // to generate these encodings on-the-fly. We should encode the code point rather // than the surrogate code units that make up this code point. // See: http://www.w3.org/International/questions/qa-escapes#cssescapes char[] encodedCharacter = SafeList.SlashThenSixDigitHexValueGenerator(currentCodePoint); builder.Append(encodedCharacter); } else if (characterValues[currentCodePoint] != null) { // character needs to be encoded char[] encodedCharacter = characterValues[currentCodePoint]; builder.Append(encodedCharacter); } else { // character does not need encoding builder.Append((char)currentCodePoint); } } return builder.ToString(); }