/// <summary>
        /// Encodes input strings for use in HTML.
        /// </summary>
        /// <param name="input">String to be encoded</param>
        /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param>
        /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param>
        /// <returns>
        /// Encoded string for use in HTML.
        /// </returns>
        private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(input);
            }

            if (characterValues == null)
            {
                InitialiseSafeList();
            }

            char[][] namedEntities = null;
            if (useNamedEntities)
            {
                namedEntities = namedEntitiesLazy.Value;
            }

            // Setup a new StringBuilder for output.
            // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;.
            StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 10);

            AcquireReadLock();
            try
            {
                Utf16StringReader stringReader = new Utf16StringReader(input);
                while (true)
                {
                    int currentCodePoint = stringReader.ReadNextScalarValue();
                    if (currentCodePoint < 0)
                    {
                        break; // EOF
                    }

                    if (currentCodePoint > char.MaxValue)
                    {
                        // We don't have a pre-generated mapping of characters beyond the Basic Multilingual
                        // Plane (BMP), so we need to generate these encodings on-the-fly. We should encode
                        // the code point rather than the surrogate code units that make up this code point.
                        // See: http://www.w3.org/International/questions/qa-escapes#bytheway
                        char[] encodedCharacter = SafeList.HashThenValueGenerator(currentCodePoint);
                        builder.Append('&');
                        builder.Append(encodedCharacter);
                        builder.Append(';');
                    }
                    else
                    {
                        // If we reached this point, the code point is within the BMP.
                        char currentCharacter = (char)currentCodePoint;

                        if (encoderTweak != null && encoderTweak(currentCharacter, out char[] tweekedValue))
Example #2
0
        /// <summary>
        /// Encodes input strings for use in HTML.
        /// </summary>
        /// <param name="input">String to be encoded</param>
        /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param>
        /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param>
        /// <returns>
        /// Encoded string for use in HTML.
        /// </returns>
        /// <exception cref="InvalidUnicodeValueException">Thrown if a character with an invalid Unicode value is encountered within the input string.</exception>
        /// <exception cref="InvalidSurrogatePairException">Thrown if a high surrogate code point is encoded without a following low surrogate code point, or a
        /// low surrogate code point is encounter without having been preceded by a high surrogate code point.</exception>
        private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(input);
            }

            if (characterValues == null)
            {
                InitialiseSafeList();
            }

            if (useNamedEntities && namedEntities == null)
            {
                InitialiseNamedEntityList();
            }

            // Setup a new character array for output.
            char[] inputAsArray = input.ToCharArray();
            int    outputLength = 0;
            int    inputLength  = inputAsArray.Length;

            char[] encodedInput = new char[inputLength * 10]; // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;.

            SyncLock.EnterReadLock();
            try
            {
                for (int i = 0; i < inputLength; i++)
                {
                    char   currentCharacter = inputAsArray[i];
                    int    currentCodePoint = inputAsArray[i];
                    char[] tweekedValue;

                    // Check for invalid values
                    if (currentCodePoint == 0xFFFE ||
                        currentCodePoint == 0xFFFF)
                    {
                        throw new InvalidUnicodeValueException(currentCodePoint);
                    }
                    else if (char.IsHighSurrogate(currentCharacter))
                    {
                        if (i + 1 == inputLength)
                        {
                            throw new InvalidSurrogatePairException(currentCharacter, '\0');
                        }

                        // Now peak ahead and check if the following character is a low surrogate.
                        char nextCharacter = inputAsArray[i + 1];
                        char nextCodePoint = inputAsArray[i + 1];
                        if (!char.IsLowSurrogate(nextCharacter))
                        {
                            throw new InvalidSurrogatePairException(currentCharacter, nextCharacter);
                        }

                        // Look-ahead was good, so skip.
                        i++;

                        // Calculate the combined code point
                        long combinedCodePoint =
                            0x10000 + ((currentCodePoint - 0xD800) * 0x400) + (nextCodePoint - 0xDC00);
                        char[] encodedCharacter = SafeList.HashThenValueGenerator(combinedCodePoint);
                        encodedInput[outputLength++] = '&';

                        for (int j = 0; j < encodedCharacter.Length; j++)
                        {
                            encodedInput[outputLength++] = encodedCharacter[j];
                        }

                        encodedInput[outputLength++] = ';';
                    }
                    else if (char.IsLowSurrogate(currentCharacter))
                    {
                        throw new InvalidSurrogatePairException('\0', currentCharacter);
                    }
                    else if (encoderTweak != null && encoderTweak(currentCharacter, out tweekedValue))
                    {
                        for (int j = 0; j < tweekedValue.Length; j++)
                        {
                            encodedInput[outputLength++] = tweekedValue[j];
                        }
                    }
                    else if (useNamedEntities && namedEntities[currentCodePoint] != null)
                    {
                        char[] encodedCharacter = namedEntities[currentCodePoint];
                        encodedInput[outputLength++] = '&';

                        for (int j = 0; j < encodedCharacter.Length; j++)
                        {
                            encodedInput[outputLength++] = encodedCharacter[j];
                        }

                        encodedInput[outputLength++] = ';';
                    }
                    else if (characterValues[currentCodePoint] != null)
                    {
                        // character needs to be encoded
                        char[] encodedCharacter = characterValues[currentCodePoint];
                        encodedInput[outputLength++] = '&';

                        for (int j = 0; j < encodedCharacter.Length; j++)
                        {
                            encodedInput[outputLength++] = encodedCharacter[j];
                        }

                        encodedInput[outputLength++] = ';';
                    }
                    else
                    {
                        // character does not need encoding
                        encodedInput[outputLength++] = currentCharacter;
                    }
                }
            }
            finally
            {
                SyncLock.ExitReadLock();
            }

            return(new string(encodedInput, 0, outputLength));
        }
        /// <summary>
        /// Encodes input strings for use in HTML.
        /// </summary>
        /// <param name="input">String to be encoded</param>
        /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param>
        /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param>
        /// <returns>
        /// Encoded string for use in HTML.
        /// </returns>
        /// <exception cref="InvalidUnicodeValueException">Thrown if a character with an invalid Unicode value is encountered within the input string.</exception>
        /// <exception cref="InvalidSurrogatePairException">Thrown if a high surrogate code point is encoded without a following low surrogate code point, or a 
        /// low surrogate code point is encounter without having been preceded by a high surrogate code point.</exception>
        private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak)
        {
            if (string.IsNullOrEmpty(input))
            {
                return input;
            }

            if (characterValues == null)
            {
                InitialiseSafeList();
            }

            if (useNamedEntities && namedEntities == null)
            {
                InitialiseNamedEntityList();
            }

            // Setup a new character array for output.
            char[] inputAsArray = input.ToCharArray();
            int outputLength = 0;
            int inputLength = inputAsArray.Length;
            char[] encodedInput = new char[inputLength * 10]; // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;.

            SyncLock.EnterReadLock();
            try
            {
                for (int i = 0; i < inputLength; i++)
                {
                    char currentCharacter = inputAsArray[i];
                    int currentCodePoint = inputAsArray[i];
                    char[] tweekedValue;

                    // Check for invalid values
                    if (currentCodePoint == 0xFFFE ||
                        currentCodePoint == 0xFFFF)
                    {
                        throw new InvalidUnicodeValueException(currentCodePoint);
                    }
                    else if (char.IsHighSurrogate(currentCharacter))
                    {
                        if (i + 1 == inputLength)
                        {
                            throw new InvalidSurrogatePairException(currentCharacter, '\0');
                        }

                        // Now peak ahead and check if the following character is a low surrogate.
                        char nextCharacter = inputAsArray[i + 1];
                        char nextCodePoint = inputAsArray[i + 1];
                        if (!char.IsLowSurrogate(nextCharacter))
                        {
                            throw new InvalidSurrogatePairException(currentCharacter, nextCharacter);
                        }

                        // Look-ahead was good, so skip.
                        i++;

                        // Calculate the combined code point
                        long combinedCodePoint =
                            0x10000 + ((currentCodePoint - 0xD800) * 0x400) + (nextCodePoint - 0xDC00);
                        char[] encodedCharacter = SafeList.HashThenValueGenerator(combinedCodePoint);
                        encodedInput[outputLength++] = '&';

                        for (int j = 0; j < encodedCharacter.Length; j++)
                        {
                            encodedInput[outputLength++] = encodedCharacter[j];
                        }

                        encodedInput[outputLength++] = ';';
                    }
                    else if (char.IsLowSurrogate(currentCharacter))
                    {
                        throw new InvalidSurrogatePairException('\0', currentCharacter);
                    }
                    else if (encoderTweak != null && encoderTweak(currentCharacter, out tweekedValue))
                    {
                        for (int j = 0; j < tweekedValue.Length; j++)
                        {
                            encodedInput[outputLength++] = tweekedValue[j];
                        }
                    }
                    else if (useNamedEntities && namedEntities[currentCodePoint] != null)
                    {
                        char[] encodedCharacter = namedEntities[currentCodePoint];
                        encodedInput[outputLength++] = '&';

                        for (int j = 0; j < encodedCharacter.Length; j++)
                        {
                            encodedInput[outputLength++] = encodedCharacter[j];
                        }

                        encodedInput[outputLength++] = ';';
                    }
                    else if (characterValues[currentCodePoint] != null)
                    {
                        // character needs to be encoded
                        char[] encodedCharacter = characterValues[currentCodePoint];
                        encodedInput[outputLength++] = '&';

                        for (int j = 0; j < encodedCharacter.Length; j++)
                        {
                            encodedInput[outputLength++] = encodedCharacter[j];
                        }

                        encodedInput[outputLength++] = ';';
                    }
                    else
                    {
                        // character does not need encoding
                        encodedInput[outputLength++] = currentCharacter;
                    }
                }
            }
            finally
            {
                SyncLock.ExitReadLock();
            }

            return new string(encodedInput, 0, outputLength);
        }
        /// <summary>
        /// Encodes input strings for use in HTML.
        /// </summary>
        /// <param name="input">String to be encoded</param>
        /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param>
        /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param>
        /// <returns>
        /// Encoded string for use in HTML.
        /// </returns>
        private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) {
            if (string.IsNullOrEmpty(input)) {
                return input;
            }

            if (characterValues == null) {
                InitialiseSafeList();
            }

            char[][] namedEntities = null;
            if (useNamedEntities) {
                namedEntities = namedEntitiesLazy.Value;
            }

            // Setup a new StringBuilder for output.
            // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;.
            StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 10 /* worstCaseOutputCharsPerInputChar */);

            SyncLock.EnterReadLock();
            try {
                Utf16StringReader stringReader = new Utf16StringReader(input);
                while (true) {
                    int currentCodePoint = stringReader.ReadNextScalarValue();
                    if (currentCodePoint < 0) {
                        break; // EOF
                    }

                    if (currentCodePoint > Char.MaxValue) {
                        // We don't have a pre-generated mapping of characters beyond the Basic Multilingual
                        // Plane (BMP), so we need to generate these encodings on-the-fly. We should encode
                        // the code point rather than the surrogate code units that make up this code point.
                        // See: http://www.w3.org/International/questions/qa-escapes#bytheway

                        char[] encodedCharacter = SafeList.HashThenValueGenerator(currentCodePoint);
                        builder.Append('&');
                        builder.Append(encodedCharacter);
                        builder.Append(';');
                    }
                    else {
                        // If we reached this point, the code point is within the BMP.
                        char currentCharacter = (char)currentCodePoint;
                        char[] tweekedValue;

                        if (encoderTweak != null && encoderTweak(currentCharacter, out tweekedValue)) {
                            builder.Append(tweekedValue);
                        }
                        else if (useNamedEntities && namedEntities[currentCodePoint] != null) {
                            char[] encodedCharacter = namedEntities[currentCodePoint];
                            builder.Append('&');
                            builder.Append(encodedCharacter);
                            builder.Append(';');
                        }
                        else if (characterValues[currentCodePoint] != null) {
                            // character needs to be encoded
                            char[] encodedCharacter = characterValues[currentCodePoint];
                            builder.Append('&');
                            builder.Append(encodedCharacter);
                            builder.Append(';');
                        }
                        else {
                            // character does not need encoding
                            builder.Append(currentCharacter);
                        }
                    }
                }
            }
            finally {
                SyncLock.ExitReadLock();
            }

            return builder.ToString();
        }
        /// <summary>
        /// Encodes input strings for use in HTML.
        /// </summary>
        /// <param name="input">String to be encoded</param>
        /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param>
        /// <param name="encoderTweak">A <see cref="T:Microsoft.Security.Application.MethodSpecificEncoder" /> function, if needed.</param>
        /// <returns>
        /// Encoded string for use in HTML.
        /// </returns>
        private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(input);
            }
            if (characterValues == null)
            {
                InitialiseSafeList();
            }
            char[][] array = null;
            if (useNamedEntities)
            {
                array = namedEntitiesLazy.Value;
            }
            StringBuilder outputStringBuilder = EncoderUtil.GetOutputStringBuilder(input.Length, 10);

            AcquireReadLock();
            try
            {
                Utf16StringReader utf16StringReader = new Utf16StringReader(input);
                while (true)
                {
                    int num = utf16StringReader.ReadNextScalarValue();
                    if (num < 0)
                    {
                        break;
                    }
                    if (num > 65535)
                    {
                        char[] value = SafeList.HashThenValueGenerator(num);
                        outputStringBuilder.Append('&');
                        outputStringBuilder.Append(value);
                        outputStringBuilder.Append(';');
                    }
                    else
                    {
                        char   c = (char)num;
                        char[] value2;
                        if (encoderTweak != null && encoderTweak(c, out value2))
                        {
                            outputStringBuilder.Append(value2);
                        }
                        else if (useNamedEntities && array[num] != null)
                        {
                            char[] value3 = array[num];
                            outputStringBuilder.Append('&');
                            outputStringBuilder.Append(value3);
                            outputStringBuilder.Append(';');
                        }
                        else if (characterValues[num] != null)
                        {
                            char[] value4 = characterValues[num];
                            outputStringBuilder.Append('&');
                            outputStringBuilder.Append(value4);
                            outputStringBuilder.Append(';');
                        }
                        else
                        {
                            outputStringBuilder.Append(c);
                        }
                    }
                }
            }
            finally
            {
                ReleaseReadLock();
            }
            return(outputStringBuilder.ToString());
        }