// Encode a string using RFC 5987 encoding. // encoding'lang'PercentEncodedSpecials private string Encode5987(string input) { StringBuilder builder = new StringBuilder("utf-8\'\'"); foreach (char c in input) { // attr-char = ALPHA / DIGIT / "!" / "#" / "$" / "&" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" // ; token except ( "*" / "'" / "%" ) if (c > 0x7F) // Encodes as multiple utf-8 bytes { byte[] bytes = Encoding.UTF8.GetBytes(c.ToString()); foreach (byte b in bytes) { builder.Append(UriShim.HexEscape((char)b)); } } else if (!HttpRuleParser.IsTokenChar(c) || c == '*' || c == '\'' || c == '%') { // ASCII - Only one encoded byte. builder.Append(UriShim.HexEscape(c)); } else { builder.Append(c); } } return(builder.ToString()); }
// IsHexEncoding // // Determines whether a substring has the URI hex encoding format of '%' // followed by 2 hexadecimal characters // // Inputs: // <argument> pattern // String to check // // <argument> index // Offset in <pattern> at which to check substring for hex encoding // // Assumes: // 0 <= <index> < <pattern>.Length // // Returns: // true if <pattern>[<index>] is hex encoded, else false // // Throws: // Nothing public static bool IsHexEncoding(string pattern, int index) { if ((pattern.Length - index) < 3) { return(false); } if ((pattern[index] == '%') && UriShim.EscapedAscii(pattern[index + 1], pattern[index + 2]) != c_DummyChar) { return(true); } return(false); }
// Transforms a character into its hexadecimal representation. public static string HexEscape(char character) { if (character > '\xff') { throw new ArgumentOutOfRangeException("character"); } char[] chars = new char[3]; int pos = 0; UriShim.EscapeAsciiChar(character, chars, ref pos); return(new string(chars)); }
// Attempt to decode using RFC 5987 encoding. // encoding'language'my%20string private bool TryDecode5987(string input, out string output) { output = null; string[] parts = input.Split('\''); if (parts.Length != 3) { return(false); } StringBuilder decoded = new StringBuilder(); try { Encoding encoding = Encoding.GetEncoding(parts[0]); string dataString = parts[2]; byte[] unescapedBytes = new byte[dataString.Length]; int unescapedBytesCount = 0; for (int index = 0; index < dataString.Length; index++) { if (UriShim.IsHexEncoding(dataString, index)) // %FF { // Unescape and cache bytes, multi-byte characters must be decoded all at once. unescapedBytes[unescapedBytesCount++] = (byte)UriShim.HexUnescape(dataString, ref index); index--; // HexUnescape did +=3; Offset the for loop's ++ } else { if (unescapedBytesCount > 0) { // Decode any previously cached bytes. decoded.Append(encoding.GetString(unescapedBytes, 0, unescapedBytesCount)); unescapedBytesCount = 0; } decoded.Append(dataString[index]); // Normal safe character. } } if (unescapedBytesCount > 0) { // Decode any previously cached bytes. decoded.Append(encoding.GetString(unescapedBytes, 0, unescapedBytesCount)); } } catch (ArgumentException) { return(false); // Unknown encoding or bad characters. } output = decoded.ToString(); return(true); }
// HexUnescape // // Converts a substring of the form "%XX" to the single character represented // by the hexadecimal value XX. If the substring s[Index] does not conform to // the hex encoding format then the character at s[Index] is returned // // Inputs: // <argument> pattern // String from which to read the hexadecimal encoded substring // // <argument> index // Offset within <pattern> from which to start reading the hexadecimal // encoded substring // // Outputs: // <argument> index // Incremented to the next character position within the string. This // may be EOS if this was the last character/encoding within <pattern> // // Returns: // Either the converted character if <pattern>[<index>] was hex encoded, or // the character at <pattern>[<index>] // // Throws: // ArgumentOutOfRangeException public static char HexUnescape(string pattern, ref int index) { if ((index < 0) || (index >= pattern.Length)) { throw new ArgumentOutOfRangeException("index"); } if ((pattern[index] == '%') && (pattern.Length - index >= 3)) { char ret = UriShim.EscapedAscii(pattern[index + 1], pattern[index + 2]); if (ret != c_DummyChar) { index += 3; return(ret); } } return(pattern[index++]); }