/// <summary> /// Encodes input strings for use in HTML. /// </summary> /// <param name="input">String to be encoded</param> /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param> /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param> /// <returns> /// Encoded string for use in HTML. /// </returns> private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) { if (string.IsNullOrEmpty(input)) { return(input); } if (characterValues == null) { InitialiseSafeList(); } char[][] namedEntities = null; if (useNamedEntities) { namedEntities = namedEntitiesLazy.Value; } // Setup a new StringBuilder for output. // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;. StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 10); AcquireReadLock(); try { Utf16StringReader stringReader = new Utf16StringReader(input); while (true) { int currentCodePoint = stringReader.ReadNextScalarValue(); if (currentCodePoint < 0) { break; // EOF } if (currentCodePoint > char.MaxValue) { // We don't have a pre-generated mapping of characters beyond the Basic Multilingual // Plane (BMP), so we need to generate these encodings on-the-fly. We should encode // the code point rather than the surrogate code units that make up this code point. // See: http://www.w3.org/International/questions/qa-escapes#bytheway char[] encodedCharacter = SafeList.HashThenValueGenerator(currentCodePoint); builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else { // If we reached this point, the code point is within the BMP. char currentCharacter = (char)currentCodePoint; if (encoderTweak != null && encoderTweak(currentCharacter, out char[] tweekedValue))
/// <summary> /// Encodes input strings for use in HTML. /// </summary> /// <param name="input">String to be encoded</param> /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param> /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param> /// <returns> /// Encoded string for use in HTML. /// </returns> /// <exception cref="InvalidUnicodeValueException">Thrown if a character with an invalid Unicode value is encountered within the input string.</exception> /// <exception cref="InvalidSurrogatePairException">Thrown if a high surrogate code point is encoded without a following low surrogate code point, or a /// low surrogate code point is encounter without having been preceded by a high surrogate code point.</exception> private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) { if (string.IsNullOrEmpty(input)) { return(input); } if (characterValues == null) { InitialiseSafeList(); } if (useNamedEntities && namedEntities == null) { InitialiseNamedEntityList(); } // Setup a new character array for output. char[] inputAsArray = input.ToCharArray(); int outputLength = 0; int inputLength = inputAsArray.Length; char[] encodedInput = new char[inputLength * 10]; // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;. SyncLock.EnterReadLock(); try { for (int i = 0; i < inputLength; i++) { char currentCharacter = inputAsArray[i]; int currentCodePoint = inputAsArray[i]; char[] tweekedValue; // Check for invalid values if (currentCodePoint == 0xFFFE || currentCodePoint == 0xFFFF) { throw new InvalidUnicodeValueException(currentCodePoint); } else if (char.IsHighSurrogate(currentCharacter)) { if (i + 1 == inputLength) { throw new InvalidSurrogatePairException(currentCharacter, '\0'); } // Now peak ahead and check if the following character is a low surrogate. char nextCharacter = inputAsArray[i + 1]; char nextCodePoint = inputAsArray[i + 1]; if (!char.IsLowSurrogate(nextCharacter)) { throw new InvalidSurrogatePairException(currentCharacter, nextCharacter); } // Look-ahead was good, so skip. i++; // Calculate the combined code point long combinedCodePoint = 0x10000 + ((currentCodePoint - 0xD800) * 0x400) + (nextCodePoint - 0xDC00); char[] encodedCharacter = SafeList.HashThenValueGenerator(combinedCodePoint); encodedInput[outputLength++] = '&'; for (int j = 0; j < encodedCharacter.Length; j++) { encodedInput[outputLength++] = encodedCharacter[j]; } encodedInput[outputLength++] = ';'; } else if (char.IsLowSurrogate(currentCharacter)) { throw new InvalidSurrogatePairException('\0', currentCharacter); } else if (encoderTweak != null && encoderTweak(currentCharacter, out tweekedValue)) { for (int j = 0; j < tweekedValue.Length; j++) { encodedInput[outputLength++] = tweekedValue[j]; } } else if (useNamedEntities && namedEntities[currentCodePoint] != null) { char[] encodedCharacter = namedEntities[currentCodePoint]; encodedInput[outputLength++] = '&'; for (int j = 0; j < encodedCharacter.Length; j++) { encodedInput[outputLength++] = encodedCharacter[j]; } encodedInput[outputLength++] = ';'; } else if (characterValues[currentCodePoint] != null) { // character needs to be encoded char[] encodedCharacter = characterValues[currentCodePoint]; encodedInput[outputLength++] = '&'; for (int j = 0; j < encodedCharacter.Length; j++) { encodedInput[outputLength++] = encodedCharacter[j]; } encodedInput[outputLength++] = ';'; } else { // character does not need encoding encodedInput[outputLength++] = currentCharacter; } } } finally { SyncLock.ExitReadLock(); } return(new string(encodedInput, 0, outputLength)); }
/// <summary> /// Encodes input strings for use in HTML. /// </summary> /// <param name="input">String to be encoded</param> /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param> /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param> /// <returns> /// Encoded string for use in HTML. /// </returns> /// <exception cref="InvalidUnicodeValueException">Thrown if a character with an invalid Unicode value is encountered within the input string.</exception> /// <exception cref="InvalidSurrogatePairException">Thrown if a high surrogate code point is encoded without a following low surrogate code point, or a /// low surrogate code point is encounter without having been preceded by a high surrogate code point.</exception> private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) { if (string.IsNullOrEmpty(input)) { return input; } if (characterValues == null) { InitialiseSafeList(); } if (useNamedEntities && namedEntities == null) { InitialiseNamedEntityList(); } // Setup a new character array for output. char[] inputAsArray = input.ToCharArray(); int outputLength = 0; int inputLength = inputAsArray.Length; char[] encodedInput = new char[inputLength * 10]; // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;. SyncLock.EnterReadLock(); try { for (int i = 0; i < inputLength; i++) { char currentCharacter = inputAsArray[i]; int currentCodePoint = inputAsArray[i]; char[] tweekedValue; // Check for invalid values if (currentCodePoint == 0xFFFE || currentCodePoint == 0xFFFF) { throw new InvalidUnicodeValueException(currentCodePoint); } else if (char.IsHighSurrogate(currentCharacter)) { if (i + 1 == inputLength) { throw new InvalidSurrogatePairException(currentCharacter, '\0'); } // Now peak ahead and check if the following character is a low surrogate. char nextCharacter = inputAsArray[i + 1]; char nextCodePoint = inputAsArray[i + 1]; if (!char.IsLowSurrogate(nextCharacter)) { throw new InvalidSurrogatePairException(currentCharacter, nextCharacter); } // Look-ahead was good, so skip. i++; // Calculate the combined code point long combinedCodePoint = 0x10000 + ((currentCodePoint - 0xD800) * 0x400) + (nextCodePoint - 0xDC00); char[] encodedCharacter = SafeList.HashThenValueGenerator(combinedCodePoint); encodedInput[outputLength++] = '&'; for (int j = 0; j < encodedCharacter.Length; j++) { encodedInput[outputLength++] = encodedCharacter[j]; } encodedInput[outputLength++] = ';'; } else if (char.IsLowSurrogate(currentCharacter)) { throw new InvalidSurrogatePairException('\0', currentCharacter); } else if (encoderTweak != null && encoderTweak(currentCharacter, out tweekedValue)) { for (int j = 0; j < tweekedValue.Length; j++) { encodedInput[outputLength++] = tweekedValue[j]; } } else if (useNamedEntities && namedEntities[currentCodePoint] != null) { char[] encodedCharacter = namedEntities[currentCodePoint]; encodedInput[outputLength++] = '&'; for (int j = 0; j < encodedCharacter.Length; j++) { encodedInput[outputLength++] = encodedCharacter[j]; } encodedInput[outputLength++] = ';'; } else if (characterValues[currentCodePoint] != null) { // character needs to be encoded char[] encodedCharacter = characterValues[currentCodePoint]; encodedInput[outputLength++] = '&'; for (int j = 0; j < encodedCharacter.Length; j++) { encodedInput[outputLength++] = encodedCharacter[j]; } encodedInput[outputLength++] = ';'; } else { // character does not need encoding encodedInput[outputLength++] = currentCharacter; } } } finally { SyncLock.ExitReadLock(); } return new string(encodedInput, 0, outputLength); }
/// <summary> /// Encodes input strings for use in HTML. /// </summary> /// <param name="input">String to be encoded</param> /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param> /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param> /// <returns> /// Encoded string for use in HTML. /// </returns> private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) { if (string.IsNullOrEmpty(input)) { return input; } if (characterValues == null) { InitialiseSafeList(); } char[][] namedEntities = null; if (useNamedEntities) { namedEntities = namedEntitiesLazy.Value; } // Setup a new StringBuilder for output. // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;. StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 10 /* worstCaseOutputCharsPerInputChar */); SyncLock.EnterReadLock(); try { Utf16StringReader stringReader = new Utf16StringReader(input); while (true) { int currentCodePoint = stringReader.ReadNextScalarValue(); if (currentCodePoint < 0) { break; // EOF } if (currentCodePoint > Char.MaxValue) { // We don't have a pre-generated mapping of characters beyond the Basic Multilingual // Plane (BMP), so we need to generate these encodings on-the-fly. We should encode // the code point rather than the surrogate code units that make up this code point. // See: http://www.w3.org/International/questions/qa-escapes#bytheway char[] encodedCharacter = SafeList.HashThenValueGenerator(currentCodePoint); builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else { // If we reached this point, the code point is within the BMP. char currentCharacter = (char)currentCodePoint; char[] tweekedValue; if (encoderTweak != null && encoderTweak(currentCharacter, out tweekedValue)) { builder.Append(tweekedValue); } else if (useNamedEntities && namedEntities[currentCodePoint] != null) { char[] encodedCharacter = namedEntities[currentCodePoint]; builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else if (characterValues[currentCodePoint] != null) { // character needs to be encoded char[] encodedCharacter = characterValues[currentCodePoint]; builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else { // character does not need encoding builder.Append(currentCharacter); } } } } finally { SyncLock.ExitReadLock(); } return builder.ToString(); }
/// <summary> /// Encodes input strings for use in HTML. /// </summary> /// <param name="input">String to be encoded</param> /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param> /// <param name="encoderTweak">A <see cref="T:Microsoft.Security.Application.MethodSpecificEncoder" /> function, if needed.</param> /// <returns> /// Encoded string for use in HTML. /// </returns> private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) { if (string.IsNullOrEmpty(input)) { return(input); } if (characterValues == null) { InitialiseSafeList(); } char[][] array = null; if (useNamedEntities) { array = namedEntitiesLazy.Value; } StringBuilder outputStringBuilder = EncoderUtil.GetOutputStringBuilder(input.Length, 10); AcquireReadLock(); try { Utf16StringReader utf16StringReader = new Utf16StringReader(input); while (true) { int num = utf16StringReader.ReadNextScalarValue(); if (num < 0) { break; } if (num > 65535) { char[] value = SafeList.HashThenValueGenerator(num); outputStringBuilder.Append('&'); outputStringBuilder.Append(value); outputStringBuilder.Append(';'); } else { char c = (char)num; char[] value2; if (encoderTweak != null && encoderTweak(c, out value2)) { outputStringBuilder.Append(value2); } else if (useNamedEntities && array[num] != null) { char[] value3 = array[num]; outputStringBuilder.Append('&'); outputStringBuilder.Append(value3); outputStringBuilder.Append(';'); } else if (characterValues[num] != null) { char[] value4 = characterValues[num]; outputStringBuilder.Append('&'); outputStringBuilder.Append(value4); outputStringBuilder.Append(';'); } else { outputStringBuilder.Append(c); } } } } finally { ReleaseReadLock(); } return(outputStringBuilder.ToString()); }