Ejemplo n.º 1
0
        /// <summary>
        /// Encodes input strings for use in HTML.
        /// </summary>
        /// <param name="input">String to be encoded</param>
        /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param>
        /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param>
        /// <returns>
        /// Encoded string for use in HTML.
        /// </returns>
        private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(input);
            }

            if (characterValues == null)
            {
                InitialiseSafeList();
            }

            char[][] namedEntities = null;
            if (useNamedEntities)
            {
                namedEntities = namedEntitiesLazy.Value;
            }

            // Setup a new StringBuilder for output.
            // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;.
            StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 10);

            AcquireReadLock();
            try
            {
                Utf16StringReader stringReader = new Utf16StringReader(input);
                while (true)
                {
                    int currentCodePoint = stringReader.ReadNextScalarValue();
                    if (currentCodePoint < 0)
                    {
                        break; // EOF
                    }

                    if (currentCodePoint > char.MaxValue)
                    {
                        // We don't have a pre-generated mapping of characters beyond the Basic Multilingual
                        // Plane (BMP), so we need to generate these encodings on-the-fly. We should encode
                        // the code point rather than the surrogate code units that make up this code point.
                        // See: http://www.w3.org/International/questions/qa-escapes#bytheway
                        char[] encodedCharacter = SafeList.HashThenValueGenerator(currentCodePoint);
                        builder.Append('&');
                        builder.Append(encodedCharacter);
                        builder.Append(';');
                    }
                    else
                    {
                        // If we reached this point, the code point is within the BMP.
                        char currentCharacter = (char)currentCodePoint;

                        if (encoderTweak != null && encoderTweak(currentCharacter, out char[] tweekedValue))
Ejemplo n.º 2
0
        /// <summary>
        /// Encodes according to the CSS encoding rules.
        /// </summary>
        /// <param name="input">The string to encode.</param>
        /// <returns>The encoded string.</returns>
        internal static string Encode(string input)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(input);
            }

            char[][] characterValues = characterValuesLazy.Value;

            // Setup a new StringBuilder for output.
            // Worse case scenario - CSS encoding wants \XXXXXX for encoded characters.
            StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 7);

            Utf16StringReader stringReader = new Utf16StringReader(input);

            while (true)
            {
                int currentCodePoint = stringReader.ReadNextScalarValue();
                if (currentCodePoint < 0)
                {
                    break; // EOF
                }

                if (currentCodePoint >= characterValues.Length)
                {
                    // We don't have a pre-generated mapping of characters beyond the U+00FF, so we need
                    // to generate these encodings on-the-fly. We should encode the code point rather
                    // than the surrogate code units that make up this code point.
                    // See: http://www.w3.org/International/questions/qa-escapes#cssescapes
                    char[] encodedCharacter = SafeList.SlashThenSixDigitHexValueGenerator(currentCodePoint);
                    builder.Append(encodedCharacter);
                }
                else if (characterValues[currentCodePoint] != null)
                {
                    // character needs to be encoded
                    char[] encodedCharacter = characterValues[currentCodePoint];
                    builder.Append(encodedCharacter);
                }
                else
                {
                    // character does not need encoding
                    builder.Append((char)currentCodePoint);
                }
            }

            return(builder.ToString());
        }