private EncodedCharacter PeekNextCharacter(char currentCharacter) { // if we're on the last character if (testCharacter == input.Length - 1) { testCharacter++; return(new EncodedCharacter(currentCharacter)); } else if (currentCharacter == '&') { // if parsing an entity returns null - then we should skip it by // returning null here EncodedCharacter encoded = ParseEntity(input, testCharacter); return(encoded); } else if (currentCharacter == '%') { // if parsing a % encoded character returns null, then just // return the % and keep going EncodedCharacter encoded = ParsePercent(input, testCharacter); if (encoded != null) { return(encoded); } // FIXME: AAA add UTF-7 decoding // FIXME: others? } testCharacter++; return(new EncodedCharacter(currentCharacter)); }
/// <summary> Simplifies percent-encoded and entity-encoded characters to their /// simplest form so that they can be properly validated. Attackers /// frequently use encoding schemes to disguise their attacks and bypass /// validation routines. /// /// Handling multiple encoding schemes simultaneously is difficult, and /// requires some special consideration. In particular, the problem of /// double-encoding is difficult for parsers, and combining several encoding /// schemes in double-encoding makes it even harder. Consider decoding /// /// [PRE] /// &lt; /// [/PRE] /// /// or /// /// [PRE] /// %26lt; /// [/PRE] /// /// or /// /// [PRE] /// &lt; /// [/PRE]. /// /// This implementation disallows ALL double-encoded characters and throws an /// IntrusionException when they are detected. Also, named entities that are /// not known are simply removed. /// /// Note that most data from the browser is likely to be encoded with URL /// encoding (FIXME: RFC). The web server will decode the URL and form data /// once, so most encoded data received in the application must have been /// double-encoded by the attacker. However, some HTTP inputs are not decoded /// by the browser, so this routine allows a single level of decoding. /// /// </summary> /// <param name="input">Unvalidated input from an HTTP request. /// </param> /// <returns> The canonicalized string. /// </returns> /// <seealso cref="Owasp.Esapi.Interfaces.IEncoder.Canonicalize(string)"> /// </seealso> public string Canonicalize(string input) { StringBuilder sb = new StringBuilder(); EncodedStringReader reader = new EncodedStringReader(input); while (reader.HasNext()) { EncodedCharacter c = reader.NextCharacter; if (c != null) { sb.Append(c.Unencoded); } } return(sb.ToString()); }
/// <summary> HTML Entity encode utility method. To avoid double-encoding, this method /// logs a warning if HTML entity encoded characters are passed in as input. /// Double-encoded characters in the input cause an exception to be thrown. /// </summary> /// <param name="input">The input to encode. /// </param> /// <param name="immune">The immune characters. /// </param> /// <param name="baseChars">The base characters. /// </param> /// <returns> The encoded string. /// </returns> private string EntityEncode(string input, char[] baseChars, char[] immune) { System.Text.StringBuilder sb = new System.Text.StringBuilder(); EncodedStringReader reader = new EncodedStringReader(input); while (reader.HasNext()) { EncodedCharacter c = reader.NextCharacter; if (c != null) { if (IsContained(baseChars, c.Unencoded) || IsContained(immune, c.Unencoded)) { sb.Append(c.Unencoded); } else { sb.Append(c.GetEncoded(ENTITY_ENCODING)); } } } return(sb.ToString()); }