/// <summary> /// Marks characters from the specified languages as safe. /// </summary> /// <param name="lowerCodeCharts">The combination of lower code charts to use.</param> /// <param name="lowerMidCodeCharts">The combination of lower mid code charts to use.</param> /// <param name="midCodeCharts">The combination of mid code charts to use.</param> /// <param name="upperMidCodeCharts">The combination of upper mid code charts to use.</param> /// <param name="upperCodeCharts">The combination of upper code charts to use.</param> /// <remarks>The safe list affects all HTML and XML encoding functions.</remarks> public static void MarkAsSafe( LowerCodeCharts lowerCodeCharts, LowerMidCodeCharts lowerMidCodeCharts, MidCodeCharts midCodeCharts, UpperMidCodeCharts upperMidCodeCharts, UpperCodeCharts upperCodeCharts) { if (lowerCodeCharts == currentLowerCodeChartSettings && lowerMidCodeCharts == currentLowerMidCodeChartSettings && midCodeCharts == currentMidCodeChartSettings && upperMidCodeCharts == currentUpperMidCodeChartSettings && upperCodeCharts == currentUpperCodeChartSettings) { return; } AcquireWriteLock(); try { // Reset back to everything hashed. characterValues = SafeList.Generate(65536, SafeList.HashThenValueGenerator); SafeList.PunchUnicodeThrough( ref characterValues, lowerCodeCharts, lowerMidCodeCharts, midCodeCharts, upperMidCodeCharts, upperCodeCharts); ApplyHtmlSpecificValues(); currentLowerCodeChartSettings = lowerCodeCharts; currentLowerMidCodeChartSettings = lowerMidCodeCharts; currentMidCodeChartSettings = midCodeCharts; currentUpperMidCodeChartSettings = upperMidCodeCharts; currentUpperCodeChartSettings = upperCodeCharts; } finally { ReleaseWriteLock(); } }
/// <summary> /// Initializes the HTML safe list. /// </summary> private static void InitialiseSafeList() { AcquireWriteLock(); try { if (characterValues == null) { // We use decimal encoding to support some older Japanese mobile browsers which don't support hex encoding. characterValues = SafeList.Generate(0xFFFF, SafeList.HashThenValueGenerator); SafeList.PunchUnicodeThrough( ref characterValues, currentLowerCodeChartSettings, currentLowerMidCodeChartSettings, currentMidCodeChartSettings, currentUpperMidCodeChartSettings, currentUpperCodeChartSettings); ApplyHtmlSpecificValues(); } } finally { ReleaseWriteLock(); } }
/// <summary> /// Encodes input strings for use in HTML. /// </summary> /// <param name="input">String to be encoded</param> /// <param name="useNamedEntities">Value indicating if the HTML 4.0 named entities should be used.</param> /// <param name="encoderTweak">A <see cref="MethodSpecificEncoder"/> function, if needed.</param> /// <returns> /// Encoded string for use in HTML. /// </returns> private static string HtmlEncode(string input, bool useNamedEntities, MethodSpecificEncoder encoderTweak) { if (string.IsNullOrEmpty(input)) { return(input); } if (characterValues == null) { InitialiseSafeList(); } char[][] namedEntities = null; if (useNamedEntities) { namedEntities = namedEntitiesLazy.Value; } // Setup a new StringBuilder for output. // Worse case scenario - the longest entity name, thetasym is 10 characters, including the & and ;. StringBuilder builder = EncoderUtil.GetOutputStringBuilder(input.Length, 10); AcquireReadLock(); try { Utf16StringReader stringReader = new Utf16StringReader(input); while (true) { int currentCodePoint = stringReader.ReadNextScalarValue(); if (currentCodePoint < 0) { break; // EOF } if (currentCodePoint > char.MaxValue) { // We don't have a pre-generated mapping of characters beyond the Basic Multilingual // Plane (BMP), so we need to generate these encodings on-the-fly. We should encode // the code point rather than the surrogate code units that make up this code point. // See: http://www.w3.org/International/questions/qa-escapes#bytheway char[] encodedCharacter = SafeList.HashThenValueGenerator(currentCodePoint); builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else { // If we reached this point, the code point is within the BMP. char currentCharacter = (char)currentCodePoint; char[] tweekedValue; if (encoderTweak != null && encoderTweak(currentCharacter, out tweekedValue)) { builder.Append(tweekedValue); } else if (useNamedEntities && namedEntities[currentCodePoint] != null) { char[] encodedCharacter = namedEntities[currentCodePoint]; builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else if (characterValues[currentCodePoint] != null) { // character needs to be encoded char[] encodedCharacter = characterValues[currentCodePoint]; builder.Append('&'); builder.Append(encodedCharacter); builder.Append(';'); } else { // character does not need encoding builder.Append(currentCharacter); } } } } finally { ReleaseReadLock(); } return(builder.ToString()); }