Esempio n. 1
0
        /// <summary>
        /// Function that implements the ToUnicode operation as defined in the IDNA
        /// RFC. This operation is done on <b>single labels</b> before sending it to
        /// something that expects Unicode names. A label is an individual part of a
        /// domain name. Labels are usually separated by dots; for
        /// e.g." "www.example.com" is composed of 3 labels "www","example", and
        /// "com".
        /// </summary>
        ///
        /// <param name="src">The input string as UCharacterIterator to be processed</param>
        /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param>
        /// <returns>StringBuffer the converted String</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        public static StringBuilder ConvertToUnicode(UCharacterIterator src,
                                                     int options)
        {
            bool[] caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            // boolean srcIsLDH = true;

            // get the options
            // boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            // int failPos = -1;
            int ch;
            int saveIndex = src.GetIndex();

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = src.Next()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }    /*
                      * else if((srcIsLDH = isLDHChar(ch))==false){ failPos =
                      * src.getIndex(); }
                      */
            }
            StringBuilder processOut;

            if (srcIsASCII == false)
            {
                try {
                    // step 2: process the string
                    src.SetIndex(saveIndex);
                    processOut = singleton.namePrep.Prepare(src, options);
                } catch (StringPrepParseException ex) {
                    return(new StringBuilder(src.GetText()));
                }
            }
            else
            {
                // just point to source
                processOut = new StringBuilder(src.GetText());
            }
            // TODO:
            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>

            // step 3: verify ACE Prefix
            if (StartsWithPrefix(processOut))
            {
                StringBuilder decodeOut = null;

                // step 4: Remove the ACE Prefix
                String temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length);

                // step 5: Decode using punycode
                try {
                    decodeOut = IBM.ICU.Text.Punycode.Decode(new StringBuilder(temp), caseFlags);
                } catch (StringPrepParseException e) {
                    decodeOut = null;
                }

                // step 6:Apply toASCII
                if (decodeOut != null)
                {
                    StringBuilder toASCIIOut = ConvertToASCII(decodeOut, options);

                    // step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        // throw new
                        // StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
                        // StringPrepParseException.VERIFICATION_ERROR);
                        decodeOut = null;
                    }
                }

                // step 8: return output of step 5
                if (decodeOut != null)
                {
                    return(decodeOut);
                }
            }

            // }else{
            // // verify that STD3 ASCII rules are satisfied
            // if(useSTD3ASCIIRules == true){
            // if( srcIsLDH == false /* source contains some non-LDH characters */
            // || processOut.charAt(0) == HYPHEN
            // || processOut.charAt(processOut.length()-1) == HYPHEN){
            //
            // if(srcIsLDH==false){
            // throw new
            // StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            // StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
            // (failPos>0) ? (failPos-1) : failPos);
            // }else if(processOut.charAt(0) == HYPHEN){
            // throw new
            // StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            // StringPrepParseException.STD3_ASCII_RULES_ERROR,
            // processOut.toString(),0);
            //
            // }else{
            // throw new
            // StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            // StringPrepParseException.STD3_ASCII_RULES_ERROR,
            // processOut.toString(),
            // processOut.length());
            //
            // }
            // }
            // }
            // // just return the source
            // return new StringBuffer(src.getText());
            // }

            return(new StringBuilder(src.GetText()));
        }
Esempio n. 2
0
        /// <summary>
        /// This function implements the ToASCII operation as defined in the IDNA
        /// RFC. This operation is done on <b>single labels</b> before sending it to
        /// something that expects ASCII names. A label is an individual part of a
        /// domain name. Labels are usually separated by dots;
        /// e.g." "www.example.com" is composed of 3 labels "www","example", and
        /// "com".
        /// </summary>
        ///
        /// <param name="src">The input string as UCharacterIterator to be processed</param>
        /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param>
        /// <returns>StringBuffer the converted String</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        public static StringBuilder ConvertToASCII(UCharacterIterator src,
                                                   int options)
        {
            bool[] caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            bool srcIsLDH = true;

            // get the options
            bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
            int  ch;

            // step 1
            while ((ch = src.Next()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                if (ch > 0x7f)
                {
                    srcIsASCII = false;
                }
            }
            int failPos = -1;

            src.SetToStart();
            StringBuilder processOut = null;

            // step 2 is performed only if the source contains non ASCII
            if (!srcIsASCII)
            {
                // step 2
                processOut = singleton.namePrep.Prepare(src, options);
            }
            else
            {
                processOut = new StringBuilder(src.GetText());
            }
            int poLen = processOut.Length;

            if (poLen == 0)
            {
                throw new StringPrepParseException(
                          "Found zero length lable after NamePrep.",
                          IBM.ICU.Text.StringPrepParseException.ZERO_LENGTH_LABEL);
            }
            StringBuilder dest = new StringBuilder();

            // reset the variable to verify if output of prepare is ASCII or not
            srcIsASCII = true;

            // step 3 & 4
            for (int j = 0; j < poLen; j++)
            {
                ch = processOut[j];
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }
                else if (IsLDHChar(ch) == false)
                {
                    // here we do not assemble surrogates
                    // since we know that LDH code points
                    // are in the ASCII range only
                    srcIsLDH = false;
                    failPos  = j;
                }
            }

            if (useSTD3ASCIIRules == true)
            {
                // verify 3a and 3b
                if (srcIsLDH == false ||  /* source contains some non-LDH characters */
                    processOut[0] == HYPHEN ||
                    processOut[processOut.Length - 1] == HYPHEN)
                {
                    /* populate the parseError struct */
                    if (srcIsLDH == false)
                    {
                        throw new StringPrepParseException(
                                  "The input does not conform to the STD 3 ASCII rules",
                                  IBM.ICU.Text.StringPrepParseException.STD3_ASCII_RULES_ERROR,
                                  processOut.ToString(),
                                  (failPos > 0) ? (failPos - 1) : failPos);
                    }
                    else if (processOut[0] == HYPHEN)
                    {
                        throw new StringPrepParseException(
                                  "The input does not conform to the STD 3 ASCII rules",
                                  IBM.ICU.Text.StringPrepParseException.STD3_ASCII_RULES_ERROR,
                                  processOut.ToString(), 0);
                    }
                    else
                    {
                        throw new StringPrepParseException(
                                  "The input does not conform to the STD 3 ASCII rules",
                                  IBM.ICU.Text.StringPrepParseException.STD3_ASCII_RULES_ERROR,
                                  processOut.ToString(), (poLen > 0) ? poLen - 1
                                            : poLen);
                    }
                }
            }
            if (srcIsASCII)
            {
                dest = processOut;
            }
            else
            {
                // step 5 : verify the sequence does not begin with ACE prefix
                if (!StartsWithPrefix(processOut))
                {
                    // step 6: encode the sequence with punycode
                    caseFlags = new bool[poLen];

                    StringBuilder punyout = IBM.ICU.Text.Punycode.Encode(processOut, caseFlags);

                    // convert all codepoints to lower case ASCII
                    StringBuilder lowerOut = ToASCIILower(punyout);

                    // Step 7: prepend the ACE prefix
                    dest.Append(ACE_PREFIX, 0, ACE_PREFIX.Length);
                    // Step 6: copy the contents in b2 into dest
                    dest.Append(lowerOut);
                }
                else
                {
                    throw new StringPrepParseException(
                              "The input does not start with the ACE Prefix.",
                              IBM.ICU.Text.StringPrepParseException.ACE_PREFIX_ERROR,
                              processOut.ToString(), 0);
                }
            }
            if (dest.Length > MAX_LABEL_LENGTH)
            {
                throw new StringPrepParseException(
                          "The labels in the input are too long. Length > 63.",
                          IBM.ICU.Text.StringPrepParseException.LABEL_TOO_LONG_ERROR,
                          dest.ToString(), 0);
            }
            return(dest);
        }