/// <summary> Converts a Unicode string to ASCII using the procedure in RFC3490 /// section 4.1. Unassigned characters are not allowed and STD3 ASCII /// rules are enforced. /// * /// </summary> /// <param name="input">Unicode string. /// </param> /// <param name="allowUnassigned">Unassigned characters, allowed or not? /// </param> /// <param name="useSTD3ASCIIRules">STD3 ASCII rules, enforced or not? /// </param> /// <returns> Encoded string. /// /// </returns> public static System.String toASCII(System.String input, bool allowUnassigned, bool useSTD3ASCIIRules) { // Step 1: Check if the string contains code points outside // the ASCII range 0..0x7c. bool nonASCII = false; for (int i = 0; i < input.Length; i++) { int c = input[i]; if (c > 0x7f) { nonASCII = true; break; } } // Step 2: Perform the nameprep operation. if (nonASCII) { try { input = Stringprep.nameprep(input, allowUnassigned); } catch (StringprepException e) { // TODO throw new IDNAException(e); } } // Step 3: - Verify the absence of non-LDH ASCII code points // (char) 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, // (char) 0x7b..0x7f // - Verify the absence of leading and trailing // hyphen-minus if (useSTD3ASCIIRules) { for (int i = 0; i < input.Length; i++) { int c = input[i]; if ((c <= 0x2c) || (c >= 0x2e && c <= 0x2f) || (c >= 0x3a && c <= 0x40) || (c >= 0x5b && c <= 0x60) || (c >= 0x7b && c <= 0x7f)) { throw new IDNAException(IDNAException.CONTAINS_NON_LDH); } } if (input.StartsWith("-") || input.EndsWith("-")) { throw new IDNAException(IDNAException.CONTAINS_HYPHEN); } } // Step 4: If all code points are inside 0..0x7f, skip to step 8 nonASCII = false; for (int i = 0; i < input.Length; i++) { int c = input[i]; if (c > 0x7f) { nonASCII = true; break; } } System.String output = input; if (nonASCII) { // Step 5: Verify that the sequence does not begin with the ACE prefix. if (input.StartsWith(ACE_PREFIX)) { throw new IDNAException(IDNAException.CONTAINS_ACE_PREFIX); } // Step 6: Punycode try { output = Punycode.encode(input); } catch (PunycodeException e) { // TODO throw new IDNAException(e); } // Step 7: Prepend the ACE prefix. output = ACE_PREFIX + output; } // Step 8: Check that the length is inside 1..63. if (output.Length < 1 || output.Length > 63) { throw new IDNAException(IDNAException.TOO_LONG); } return(output); }
/// <summary> Converts an ASCII-encoded string to Unicode. /// * /// </summary> /// <param name="input">ASCII input string. /// </param> /// <param name="allowUnassigned">Allow unassigned Unicode characters. /// </param> /// <param name="useSTD3ASCIIRules">Check that the output conforms to STD3. /// </param> /// <returns> Unicode string. /// /// </returns> public static System.String toUnicode(System.String input, bool allowUnassigned, bool useSTD3ASCIIRules) { System.String original = input; bool nonASCII = false; // Step 1: If all code points are inside 0..0x7f, skip to step 3. for (int i = 0; i < input.Length; i++) { int c = input[i]; if (c > 0x7f) { nonASCII = true; break; } } // Step 2: Perform the Nameprep operation. if (nonASCII) { try { input = Stringprep.nameprep(input, allowUnassigned); } catch (StringprepException e) { // ToUnicode never fails! return(original); } } // Step 3: Verify the sequence starts with the ACE prefix. if (!input.StartsWith(ACE_PREFIX)) { // ToUnicode never fails! return(original); } System.String stored = input; // Step 4: Remove the ACE prefix. input = input.Substring(ACE_PREFIX.Length); // Step 5: Decode using punycode System.String output; try { output = Punycode.decode(input); } catch (PunycodeException e) { // ToUnicode never fails! return(original); } // Step 6: Apply toASCII System.String ascii; try { ascii = toASCII(output, allowUnassigned, useSTD3ASCIIRules); } catch (IDNAException e) { // ToUnicode never fails! return(original); } // Step 7: Compare case-insensitively. if (!ascii.ToUpper().Equals(stored.ToUpper())) { // ToUnicode never fails! return(original); } // Step 8: Return the result. return(output); }