/// <summary> /// Function that implements the ToUnicode operation as defined in the IDNA /// RFC. This operation is done on <b>single labels</b> before sending it to /// something that expects Unicode names. A label is an individual part of a /// domain name. Labels are usually separated by dots; for /// e.g." "www.example.com" is composed of 3 labels "www","example", and /// "com". /// </summary> /// /// <param name="src">The input string as UCharacterIterator to be processed</param> /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param> /// <returns>StringBuffer the converted String</returns> /// <exception cref="ParseException"></exception> /// @stable ICU 2.8 public static StringBuilder ConvertToUnicode(UCharacterIterator src, int options) { bool[] caseFlags = null; // the source contains all ascii codepoints bool srcIsASCII = true; // assume the source contains all LDH codepoints // boolean srcIsLDH = true; // get the options // boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); // int failPos = -1; int ch; int saveIndex = src.GetIndex(); // step 1: find out if all the codepoints in src are ASCII while ((ch = src.Next()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE) { if (ch > 0x7F) { srcIsASCII = false; } /* * else if((srcIsLDH = isLDHChar(ch))==false){ failPos = * src.getIndex(); } */ } StringBuilder processOut; if (srcIsASCII == false) { try { // step 2: process the string src.SetIndex(saveIndex); processOut = singleton.namePrep.Prepare(src, options); } catch (StringPrepParseException ex) { return(new StringBuilder(src.GetText())); } } else { // just point to source processOut = new StringBuilder(src.GetText()); } // TODO: // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> // step 3: verify ACE Prefix if (StartsWithPrefix(processOut)) { StringBuilder decodeOut = null; // step 4: Remove the ACE Prefix String temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length); // step 5: Decode using punycode try { decodeOut = IBM.ICU.Text.Punycode.Decode(new StringBuilder(temp), caseFlags); } catch (StringPrepParseException e) { decodeOut = null; } // step 6:Apply toASCII if (decodeOut != null) { StringBuilder toASCIIOut = ConvertToASCII(decodeOut, options); // step 7: verify if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) { // throw new // StringPrepParseException("The verification step prescribed by the RFC 3491 failed", // StringPrepParseException.VERIFICATION_ERROR); decodeOut = null; } } // step 8: return output of step 5 if (decodeOut != null) { return(decodeOut); } } // }else{ // // verify that STD3 ASCII rules are satisfied // if(useSTD3ASCIIRules == true){ // if( srcIsLDH == false /* source contains some non-LDH characters */ // || processOut.charAt(0) == HYPHEN // || processOut.charAt(processOut.length()-1) == HYPHEN){ // // if(srcIsLDH==false){ // throw new // StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(), // (failPos>0) ? (failPos-1) : failPos); // }else if(processOut.charAt(0) == HYPHEN){ // throw new // StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR, // processOut.toString(),0); // // }else{ // throw new // StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR, // processOut.toString(), // processOut.length()); // // } // } // } // // just return the source // return new StringBuffer(src.getText()); // } return(new StringBuilder(src.GetText())); }
/// <summary> /// This function implements the ToASCII operation as defined in the IDNA /// RFC. This operation is done on <b>single labels</b> before sending it to /// something that expects ASCII names. A label is an individual part of a /// domain name. Labels are usually separated by dots; /// e.g." "www.example.com" is composed of 3 labels "www","example", and /// "com". /// </summary> /// /// <param name="src">The input string as UCharacterIterator to be processed</param> /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param> /// <returns>StringBuffer the converted String</returns> /// <exception cref="ParseException"></exception> /// @stable ICU 2.8 public static StringBuilder ConvertToASCII(UCharacterIterator src, int options) { bool[] caseFlags = null; // the source contains all ascii codepoints bool srcIsASCII = true; // assume the source contains all LDH codepoints bool srcIsLDH = true; // get the options bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); int ch; // step 1 while ((ch = src.Next()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE) { if (ch > 0x7f) { srcIsASCII = false; } } int failPos = -1; src.SetToStart(); StringBuilder processOut = null; // step 2 is performed only if the source contains non ASCII if (!srcIsASCII) { // step 2 processOut = singleton.namePrep.Prepare(src, options); } else { processOut = new StringBuilder(src.GetText()); } int poLen = processOut.Length; if (poLen == 0) { throw new StringPrepParseException( "Found zero length lable after NamePrep.", IBM.ICU.Text.StringPrepParseException.ZERO_LENGTH_LABEL); } StringBuilder dest = new StringBuilder(); // reset the variable to verify if output of prepare is ASCII or not srcIsASCII = true; // step 3 & 4 for (int j = 0; j < poLen; j++) { ch = processOut[j]; if (ch > 0x7F) { srcIsASCII = false; } else if (IsLDHChar(ch) == false) { // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = false; failPos = j; } } if (useSTD3ASCIIRules == true) { // verify 3a and 3b if (srcIsLDH == false || /* source contains some non-LDH characters */ processOut[0] == HYPHEN || processOut[processOut.Length - 1] == HYPHEN) { /* populate the parseError struct */ if (srcIsLDH == false) { throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules", IBM.ICU.Text.StringPrepParseException.STD3_ASCII_RULES_ERROR, processOut.ToString(), (failPos > 0) ? (failPos - 1) : failPos); } else if (processOut[0] == HYPHEN) { throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules", IBM.ICU.Text.StringPrepParseException.STD3_ASCII_RULES_ERROR, processOut.ToString(), 0); } else { throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules", IBM.ICU.Text.StringPrepParseException.STD3_ASCII_RULES_ERROR, processOut.ToString(), (poLen > 0) ? poLen - 1 : poLen); } } } if (srcIsASCII) { dest = processOut; } else { // step 5 : verify the sequence does not begin with ACE prefix if (!StartsWithPrefix(processOut)) { // step 6: encode the sequence with punycode caseFlags = new bool[poLen]; StringBuilder punyout = IBM.ICU.Text.Punycode.Encode(processOut, caseFlags); // convert all codepoints to lower case ASCII StringBuilder lowerOut = ToASCIILower(punyout); // Step 7: prepend the ACE prefix dest.Append(ACE_PREFIX, 0, ACE_PREFIX.Length); // Step 6: copy the contents in b2 into dest dest.Append(lowerOut); } else { throw new StringPrepParseException( "The input does not start with the ACE Prefix.", IBM.ICU.Text.StringPrepParseException.ACE_PREFIX_ERROR, processOut.ToString(), 0); } } if (dest.Length > MAX_LABEL_LENGTH) { throw new StringPrepParseException( "The labels in the input are too long. Length > 63.", IBM.ICU.Text.StringPrepParseException.LABEL_TOO_LONG_ERROR, dest.ToString(), 0); } return(dest); }