/// <summary> /// Encodes input strings for use in HTML. /// </summary> /// <param name="input">String to be encoded</param> /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param> /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param> /// <returns> /// Encoded string for use in HTML. /// </returns> private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) { if (string.IsNullOrEmpty(input)) { return(input); } if (characterValues == null) { InitialiseSafeList(); } char[][] namedEntities = null; if (useNamedEntities) { namedEntities = namedEntitiesLazy.Value; } // Setup a new StringBuilder for output. // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;. StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 10); AcquireReadLock(); try { Utf16StringReader stringReader = new Utf16StringReader(input); while (true) { int currentCodePoint = stringReader.ReadNextScalarValue(); if (currentCodePoint < 0) { break; // EOF } if (currentCodePoint > char.MaxValue) { // We don't have a pre-generated mapping of characters beyond the Basic Multilingual // Plane (BMP), so we need to generate these encodings on-the-fly. We should encode // the code point rather than the surrogate code units that make up this code point. // See: http://www.w3.org/International/questions/qa-escapes#bytheway char[] encodedCharacter = SafeList.HashThenValueGenerator(currentCodePoint); builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else { // If we reached this point, the code point is within the BMP. char currentCharacter = (char)currentCodePoint; if (encoderTweak != null && encoderTweak(currentCharacter, out char[] tweekedValue))
/// <summary> /// Initializes the HTML safe list. /// </summary> private static void InitializeSafeList() { SyncLock.EnterWriteLock(); try { if (characterValues == null) { characterValues = SafeList.Generate(0xFF, SafeList.PercentThenHexValueGenerator); SafeList.PunchSafeList(ref characterValues, UrlSafeList()); } } finally { SyncLock.ExitWriteLock(); } }
/// <summary> /// Initializes the LDAP DN safe lists. /// </summary> /// <returns>The DN safe list.</returns> private static char[][] InitialiseDistinguishedNameSafeList() { char[][] result = SafeList.Generate(255, SafeList.HashThenHexValueGenerator); SafeList.PunchSafeList(ref result, DistinguishedNameSafeList()); // Now mark up the specially listed characters from http://www.ietf.org/rfc/rfc2253.txt EscapeDistinguisedNameCharacter(ref result, ','); EscapeDistinguisedNameCharacter(ref result, '+'); EscapeDistinguisedNameCharacter(ref result, '"'); EscapeDistinguisedNameCharacter(ref result, '\\'); EscapeDistinguisedNameCharacter(ref result, '<'); EscapeDistinguisedNameCharacter(ref result, '>'); EscapeDistinguisedNameCharacter(ref result, ';'); return(result); }
/// <summary> /// Encodes according to the CSS encoding rules. /// </summary> /// <param name="input">The string to encode.</param> /// <returns>The encoded string.</returns> internal static string Encode(string input) { if (string.IsNullOrEmpty(input)) { return(input); } char[][] characterValues = characterValuesLazy.Value; // Setup a new StringBuilder for output. // Worse case scenario - CSS encoding wants \XXXXXX for encoded characters. StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 7); Utf16StringReader stringReader = new Utf16StringReader(input); while (true) { int currentCodePoint = stringReader.ReadNextScalarValue(); if (currentCodePoint < 0) { break; // EOF } if (currentCodePoint >= characterValues.Length) { // We don't have a pre-generated mapping of characters beyond the U+00FF, so we need // to generate these encodings on-the-fly. We should encode the code point rather // than the surrogate code units that make up this code point. // See: http://www.w3.org/International/questions/qa-escapes#cssescapes char[] encodedCharacter = SafeList.SlashThenSixDigitHexValueGenerator(currentCodePoint); builder.Append(encodedCharacter); } else if (characterValues[currentCodePoint] != null) { // character needs to be encoded char[] encodedCharacter = characterValues[currentCodePoint]; builder.Append(encodedCharacter); } else { // character does not need encoding builder.Append((char)currentCodePoint); } } return(builder.ToString()); }
/// <summary> /// Marks characters from the specified languages as safe. /// </summary> /// <param name="lowerCodeCharts">The combination of lower code charts to use.</param> /// <param name="lowerMidCodeCharts">The combination of lower mid code charts to use.</param> /// <param name="midCodeCharts">The combination of mid code charts to use.</param> /// <param name="upperMidCodeCharts">The combination of upper mid code charts to use.</param> /// <param name="upperCodeCharts">The combination of upper code charts to use.</param> /// <remarks>The safe list affects all HTML and XML encoding functions.</remarks> public static void MarkAsSafe( LowerCodeCharts lowerCodeCharts, LowerMidCodeCharts lowerMidCodeCharts, MidCodeCharts midCodeCharts, UpperMidCodeCharts upperMidCodeCharts, UpperCodeCharts upperCodeCharts) { if (lowerCodeCharts == currentLowerCodeChartSettings && lowerMidCodeCharts == currentLowerMidCodeChartSettings && midCodeCharts == currentMidCodeChartSettings && upperMidCodeCharts == currentUpperMidCodeChartSettings && upperCodeCharts == currentUpperCodeChartSettings) { return; } SyncLock.EnterWriteLock(); try { if (characterValues == null) { characterValues = SafeList.Generate(65536, SafeList.HashThenValueGenerator); } SafeList.PunchUnicodeThrough( ref characterValues, lowerCodeCharts, lowerMidCodeCharts, midCodeCharts, upperMidCodeCharts, upperCodeCharts); ApplyHtmlSpecificValues(); currentLowerCodeChartSettings = lowerCodeCharts; currentLowerMidCodeChartSettings = lowerMidCodeCharts; currentMidCodeChartSettings = midCodeCharts; currentUpperMidCodeChartSettings = upperMidCodeCharts; currentUpperCodeChartSettings = upperCodeCharts; } finally { SyncLock.ExitWriteLock(); } }
/// <summary> /// Marks characters from the specified languages as safe. /// </summary> /// <param name="lowerCodeCharts">The combination of lower code charts to use.</param> /// <param name="lowerMidCodeCharts">The combination of lower mid code charts to use.</param> /// <param name="midCodeCharts">The combination of mid code charts to use.</param> /// <param name="upperMidCodeCharts">The combination of upper mid code charts to use.</param> /// <param name="upperCodeCharts">The combination of upper code charts to use.</param> /// <remarks>The safe list affects all HTML and XML encoding functions.</remarks> public static void MarkAsSafe( LowerCodeCharts lowerCodeCharts, LowerMidCodeCharts lowerMidCodeCharts, MidCodeCharts midCodeCharts, UpperMidCodeCharts upperMidCodeCharts, UpperCodeCharts upperCodeCharts) { if (lowerCodeCharts == currentLowerCodeChartSettings && lowerMidCodeCharts == currentLowerMidCodeChartSettings && midCodeCharts == currentMidCodeChartSettings && upperMidCodeCharts == currentUpperMidCodeChartSettings && upperCodeCharts == currentUpperCodeChartSettings) { return; } AcquireWriteLock(); try { // Reset back to everything hashed. characterValues = SafeList.Generate(65536, SafeList.HashThenValueGenerator); SafeList.PunchUnicodeThrough( ref characterValues, lowerCodeCharts, lowerMidCodeCharts, midCodeCharts, upperMidCodeCharts, upperCodeCharts); ApplyHtmlSpecificValues(); currentLowerCodeChartSettings = lowerCodeCharts; currentLowerMidCodeChartSettings = lowerMidCodeCharts; currentMidCodeChartSettings = midCodeCharts; currentUpperMidCodeChartSettings = upperMidCodeCharts; currentUpperCodeChartSettings = upperCodeCharts; } finally { ReleaseWriteLock(); } }
/// <summary> /// Initializes the HTML safe list. /// </summary> private static void InitialiseSafeList() { SyncLock.EnterWriteLock(); try { if (characterValues == null) { characterValues = SafeList.Generate(0xFFFF, SafeList.HashThenValueGenerator); SafeList.PunchUnicodeThrough( ref characterValues, LowerCodeCharts.Default, LowerMidCodeCharts.None, MidCodeCharts.None, UpperMidCodeCharts.None, UpperCodeCharts.None); ApplyHtmlSpecificValues(); } } finally { SyncLock.ExitWriteLock(); } }
/// <summary> /// Initializes the HTML safe list. /// </summary> private static void InitialiseSafeList() { AcquireWriteLock(); try { if (characterValues == null) { // We use decimal encoding to support some older Japanese mobile browsers which don't support hex encoding. characterValues = SafeList.Generate(0xFFFF, SafeList.HashThenValueGenerator); SafeList.PunchUnicodeThrough( ref characterValues, currentLowerCodeChartSettings, currentLowerMidCodeChartSettings, currentMidCodeChartSettings, currentUpperMidCodeChartSettings, currentUpperCodeChartSettings); ApplyHtmlSpecificValues(); } } finally { ReleaseWriteLock(); } }
/// <summary> /// Encodes input strings for use in HTML. /// </summary> /// <param name="input">String to be encoded</param> /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param> /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param> /// <returns> /// Encoded string for use in HTML. /// </returns> /// <exception cref="InvalidUnicodeValueException">Thrown if a character with an invalid Unicode value is encountered within the input string.</exception> /// <exception cref="InvalidSurrogatePairException">Thrown if a high surrogate code point is encoded without a following low surrogate code point, or a /// low surrogate code point is encounter without having been preceded by a high surrogate code point.</exception> private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) { if (string.IsNullOrEmpty(input)) { return(input); } if (characterValues == null) { InitialiseSafeList(); } if (useNamedEntities && namedEntities == null) { InitialiseNamedEntityList(); } // Setup a new character array for output. char[] inputAsArray = input.ToCharArray(); int outputLength = 0; int inputLength = inputAsArray.Length; char[] encodedInput = new char[inputLength * 10]; // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;. SyncLock.EnterReadLock(); try { for (int i = 0; i < inputLength; i++) { char currentCharacter = inputAsArray[i]; int currentCodePoint = inputAsArray[i]; char[] tweekedValue; // Check for invalid values if (currentCodePoint == 0xFFFE || currentCodePoint == 0xFFFF) { throw new InvalidUnicodeValueException(currentCodePoint); } else if (char.IsHighSurrogate(currentCharacter)) { if (i + 1 == inputLength) { throw new InvalidSurrogatePairException(currentCharacter, '\0'); } // Now peak ahead and check if the following character is a low surrogate. char nextCharacter = inputAsArray[i + 1]; char nextCodePoint = inputAsArray[i + 1]; if (!char.IsLowSurrogate(nextCharacter)) { throw new InvalidSurrogatePairException(currentCharacter, nextCharacter); } // Look-ahead was good, so skip. i++; // Calculate the combined code point long combinedCodePoint = 0x10000 + ((currentCodePoint - 0xD800) * 0x400) + (nextCodePoint - 0xDC00); char[] encodedCharacter = SafeList.HashThenValueGenerator(combinedCodePoint); encodedInput[outputLength++] = '&'; for (int j = 0; j < encodedCharacter.Length; j++) { encodedInput[outputLength++] = encodedCharacter[j]; } encodedInput[outputLength++] = ';'; } else if (char.IsLowSurrogate(currentCharacter)) { throw new InvalidSurrogatePairException('\0', currentCharacter); } else if (encoderTweak != null && encoderTweak(currentCharacter, out tweekedValue)) { for (int j = 0; j < tweekedValue.Length; j++) { encodedInput[outputLength++] = tweekedValue[j]; } } else if (useNamedEntities && namedEntities[currentCodePoint] != null) { char[] encodedCharacter = namedEntities[currentCodePoint]; encodedInput[outputLength++] = '&'; for (int j = 0; j < encodedCharacter.Length; j++) { encodedInput[outputLength++] = encodedCharacter[j]; } encodedInput[outputLength++] = ';'; } else if (characterValues[currentCodePoint] != null) { // character needs to be encoded char[] encodedCharacter = characterValues[currentCodePoint]; encodedInput[outputLength++] = '&'; for (int j = 0; j < encodedCharacter.Length; j++) { encodedInput[outputLength++] = encodedCharacter[j]; } encodedInput[outputLength++] = ';'; } else { // character does not need encoding encodedInput[outputLength++] = currentCharacter; } } } finally { SyncLock.ExitReadLock(); } return(new string(encodedInput, 0, outputLength)); }
/// <summary> /// Initializes the Url Path safe list. /// </summary> /// <returns>A list of characters and their encoded values for URL encoding.</returns> private static char[][] InitialisePathSafeList() { char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator); SafeList.PunchSafeList(ref result, UrlPathSafeList()); return(result); }
/// <summary> /// Initializes the CSS safe list. /// </summary> /// <returns> /// The CSS safe list. /// </returns> private static char[][] InitialiseSafeList() { char[][] result = SafeList.Generate(0xFF, SafeList.SlashThenSixDigitHexValueGenerator); SafeList.PunchSafeList(ref result, CssSafeList()); return(result); }
/// <summary> /// Initializes the LDAP filter safe list. /// </summary> /// <returns>The LDAP filter safe list.</returns> private static char[][] InitialiseFilterSafeList() { char[][] result = SafeList.Generate(255, SafeList.SlashThenHexValueGenerator); SafeList.PunchSafeList(ref result, FilterEncodingSafeList()); return(result); }