/// <summary> /// Validate that a character is valid for a uri at a specific encoding level. /// </summary> /// <param name="ch">Character to check.</param> /// <param name="level">Encoding level.</param> /// <returns><see langword="True"/> if the character is valid for the uri.</returns> public static bool IsValidCharInUri(char ch, UriEncoding level) { if((((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'))) || ((ch >= '0') && (ch <= '9'))) { return true; } // the following characters are always safe switch(ch) { case '\'': case '(': case ')': case '*': case '-': case '.': case '_': case '!': return true; } // based on encoding level, additional character may be safe switch(level) { case UriEncoding.Fragment: // the following characters are safe when used in the fragment of a uri switch(ch) { case '#': return true; } // all characters safe for UriEncoding.Query are also safe for UriEncoding.Fragment goto case UriEncoding.Query; case UriEncoding.Query: // the following characters are safe when used in the query of a uri switch(ch) { case '/': case ':': case '~': case '$': case ',': case ';': // NOTE (steveb): we don't encode | characters case '|': // NOTE (steveb): don't decode '?', because it's handling is different on various web-servers (e.g. Apache vs. IIS) // case '?': return true; } // all characters safe for UriEncoding.Segment are also safe for UriEncoding.Query goto case UriEncoding.Segment; case UriEncoding.Segment: // the following characters are safe when used in a segment of a uri switch(ch) { case '@': // NOTE (steveb): we don't encode ^ characters case '^': return true; } break; case UriEncoding.UserInfo: // the following characters are safe when used in the UserInfo part of a uri switch(ch) { case '&': case '=': return true; } break; } return false; }
/// <summary> /// Uri encode a string. /// </summary> /// <param name="text">Input text.</param> /// <param name="level">Encoding level.</param> /// <returns>Encoded string.</returns> public static string Encode(string text, UriEncoding level) { if(string.IsNullOrEmpty(text)) { return text; } byte[] original = Encoding.UTF8.GetBytes(text); // count how many characters are affected by the encoding int charsToReplace = 0; int charsToEncode = 0; int length = original.Length; for(int i = 0; i < length; i++) { var ch = (char)original[i]; if(ch == ' ') { charsToReplace++; } else if(!IsValidCharInUri(ch, level)) { charsToEncode++; } } // check if any characters are affected if((charsToReplace == 0) && (charsToEncode == 0)) { return text; } // copy, replace, and encode characters var encoded = new byte[length + (charsToEncode * 2)]; int index = 0; for(int j = 0; j < length; j++) { byte asciiByte = original[j]; char asciiChar = (char)asciiByte; if(IsValidCharInUri(asciiChar, level)) { encoded[index++] = asciiByte; } else if(asciiChar == ' ') { // replace ' ' with '+' encoded[index++] = 0x2b; // '+' } else { // replace char with '%' + code encoded[index++] = 0x25; // '%' encoded[index++] = (byte)StringUtil.IntToHexChar((asciiByte >> 4) & 15); encoded[index++] = (byte)StringUtil.IntToHexChar(asciiByte & 15); } } return Encoding.ASCII.GetString(encoded); }