예제 #1
0
        /// <summary>
        /// Compare two IDN strings for equivalence. This function splits the domain
        /// names into labels and compares them. According to IDN RFC, whenever two
        /// labels are compared, they are considered equal if and only if their ASCII
        /// forms (obtained by applying toASCII) match using an case-insensitive
        /// ASCII comparison. Two domain names are considered a match if and only if
        /// all labels match regardless of whether label separators match.
        /// </summary>
        ///
        /// <param name="s1">First IDN string as UCharacterIterator</param>
        /// <param name="s2">Second IDN string as UCharacterIterator</param>
        /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param>
        /// <returns>0 if the strings are equal, > 0 if i1 > i2 and < 0 if i1 < i2</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        // TODO: optimize
        public static int Compare(UCharacterIterator s1, UCharacterIterator s2,
                                  int options)
        {
            if (s1 == null || s2 == null)
            {
                throw new ArgumentException(
                          "One of the source buffers is null");
            }
            StringBuilder s1Out = ConvertIDNToASCII(s1.GetText(), options);
            StringBuilder s2Out = ConvertIDNToASCII(s2.GetText(), options);

            return(CompareCaseInsensitiveASCII(s1Out, s2Out));
        }
예제 #2
0
        /// <summary>
        /// Convenience function that implements the IDNToUnicode operation as
        /// defined in the IDNA RFC. This operation is done on complete domain names,
        /// e.g: "www.example.com".
        /// <b>Note:</b> IDNA RFC specifies that a conformant application should
        /// divide a domain name into separate labels, decide whether to apply
        /// allowUnassigned and useSTD3ASCIIRules on each, and then convert. This
        /// function does not offer that level of granularity. The options once set
        /// will apply to all labels in the domain name
        /// </summary>
        ///
        /// <param name="src">The input string to be processed</param>
        /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param>
        /// <returns>StringBuffer the converted String</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        public static StringBuilder ConvertIDNToUnicode(String src, int options)
        {
            char[]        srcArr      = src.ToCharArray();
            StringBuilder result      = new StringBuilder();
            int           sepIndex    = 0;
            int           oldSepIndex = 0;

            for (;;)
            {
                sepIndex = GetSeparatorIndex(srcArr, sepIndex, srcArr.Length);
                String label = new String(srcArr, oldSepIndex, sepIndex
                                          - oldSepIndex);
                if (label.Length == 0 && sepIndex != srcArr.Length)
                {
                    throw new StringPrepParseException(
                              "Found zero length lable after NamePrep.",
                              IBM.ICU.Text.StringPrepParseException.ZERO_LENGTH_LABEL);
                }
                UCharacterIterator iter = IBM.ICU.Text.UCharacterIterator.GetInstance(label);
                result.Append(ConvertToUnicode(iter, options));
                if (sepIndex == srcArr.Length)
                {
                    break;
                }
                // Unlike the ToASCII operation we don't normalize the label
                // separators
                result.Append(srcArr[sepIndex]);
                // increment the sepIndex to skip past the separator
                sepIndex++;
                oldSepIndex = sepIndex;
            }
            if (result.Length > MAX_DOMAIN_NAME_LENGTH)
            {
                throw new StringPrepParseException(
                          "The output exceed the max allowed length.",
                          IBM.ICU.Text.StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
            }
            return(result);
        }
예제 #3
0
        /// <summary>
        /// Convenience function that implements the IDNToASCII operation as defined
        /// in the IDNA RFC. This operation is done on complete domain names, e.g:
        /// "www.example.com". It is important to note that this operation can fail.
        /// If it fails, then the input domain name cannot be used as an
        /// Internationalized Domain Name and the application should have methods
        /// defined to deal with the failure.
        /// <b>Note:</b> IDNA RFC specifies that a conformant application should
        /// divide a domain name into separate labels, decide whether to apply
        /// allowUnassigned and useSTD3ASCIIRules on each, and then convert. This
        /// function does not offer that level of granularity. The options once set
        /// will apply to all labels in the domain name
        /// </summary>
        ///
        /// <param name="src">The input string to be processed</param>
        /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param>
        /// <returns>StringBuffer the converted String</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        public static StringBuilder ConvertIDNToASCII(String src, int options)
        {
            char[]        srcArr      = src.ToCharArray();
            StringBuilder result      = new StringBuilder();
            int           sepIndex    = 0;
            int           oldSepIndex = 0;

            for (;;)
            {
                sepIndex = GetSeparatorIndex(srcArr, sepIndex, srcArr.Length);
                String label = new String(srcArr, oldSepIndex, sepIndex
                                          - oldSepIndex);
                // make sure this is not a root label separator.
                if (!(label.Length == 0 && sepIndex == srcArr.Length))
                {
                    UCharacterIterator iter = IBM.ICU.Text.UCharacterIterator.GetInstance(label);
                    result.Append(ConvertToASCII(iter, options));
                }
                if (sepIndex == srcArr.Length)
                {
                    break;
                }

                // increment the sepIndex to skip past the separator
                sepIndex++;
                oldSepIndex = sepIndex;
                result.Append((char)FULL_STOP);
            }
            if (result.Length > MAX_DOMAIN_NAME_LENGTH)
            {
                throw new StringPrepParseException(
                          "The output exceed the max allowed length.",
                          IBM.ICU.Text.StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
            }
            return(result);
        }
예제 #4
0
        /*
         * boolean isLabelSeparator(int ch){ int result = getCodePointValue(ch); if(
         * (result & 0x07) == LABEL_SEPARATOR){ return true; } return false; }
         */
        /*
         * 1) Map -- For each character in the input, check if it has a mapping and,
         * if so, replace it with its mapping.
         *
         * 2) Normalize -- Possibly normalize the result of step 1 using Unicode
         * normalization.
         *
         * 3) Prohibit -- Check for any characters that are not allowed in the
         * output. If any are found, return an error.
         *
         * 4) Check bidi -- Possibly check for right-to-left characters, and if any
         * are found, make sure that the whole string satisfies the requirements for
         * bidirectional strings. If the string does not satisfy the requirements
         * for bidirectional strings, return an error. [Unicode3.2] defines several
         * bidirectional categories; each character has one bidirectional category
         * assigned to it. For the purposes of the requirements below, an
         * "RandALCat character" is a character that has Unicode bidirectional
         * categories "R" or "AL"; an "LCat character" is a character that has
         * Unicode bidirectional category "L". Note
         *
         *
         * that there are many characters which fall in neither of the above
         * definitions; Latin digits (<U+0030> through <U+0039>) are examples of
         * this because they have bidirectional category "EN".
         *
         * In any profile that specifies bidirectional character handling, all three
         * of the following requirements MUST be met:
         *
         * 1) The characters in section 5.8 MUST be prohibited.
         *
         * 2) If a string contains any RandALCat character, the string MUST NOT
         * contain any LCat character.
         *
         * 3) If a string contains any RandALCat character, a RandALCat character
         * MUST be the first character of the string, and a RandALCat character MUST
         * be the last character of the string.
         */
        /// <summary>
        /// Prepare the input buffer for use in applications with the given profile.
        /// This operation maps, normalizes(NFKC), checks for prohited and BiDi
        /// characters in the order defined by RFC 3454 depending on the options
        /// specified in the profile.
        /// </summary>
        ///
        /// <param name="src">A UCharacterIterator object containing the source string</param>
        /// <param name="options">A bit set of options:- StringPrep.NONE Prohibit processing of unassigned codepoints in the input- StringPrep.ALLOW_UNASSIGNED Treat the unassigned code pointsare in the input as normal Unicode code points.</param>
        /// <returns>StringBuffer A StringBuffer containing the output</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        public StringBuilder Prepare(UCharacterIterator src, int options)
        {
            // map
            StringBuilder mapOut  = Map(src, options);
            StringBuilder normOut = mapOut;    // initialize

            if (doNFKC)
            {
                // normalize
                normOut = Normalize(mapOut);
            }

            int  ch;
            char result;
            UCharacterIterator iter = IBM.ICU.Text.UCharacterIterator.GetInstance(normOut);

            StringPrep.Values val = new StringPrep.Values();
            int  direction = IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.CHAR_DIRECTION_COUNT, firstCharDir = IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.CHAR_DIRECTION_COUNT;
            int  rtlPos = -1, ltrPos = -1;
            bool rightToLeft = false, leftToRight = false;

            while ((ch = iter.NextCodePoint()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                result = GetCodePointValue(ch);
                GetValues(result, val);

                if (val.type == PROHIBITED)
                {
                    throw new StringPrepParseException(
                              "A prohibited code point was found in the input",
                              IBM.ICU.Text.StringPrepParseException.PROHIBITED_ERROR,
                              iter.GetText(), val.value_ren);
                }

                if (checkBiDi)
                {
                    direction = bdp.GetClass(ch);
                    if (firstCharDir == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.CHAR_DIRECTION_COUNT)
                    {
                        firstCharDir = direction;
                    }
                    if (direction == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.LEFT_TO_RIGHT)
                    {
                        leftToRight = true;
                        ltrPos      = iter.GetIndex() - 1;
                    }
                    if (direction == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT ||
                        direction == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT_ARABIC)
                    {
                        rightToLeft = true;
                        rtlPos      = iter.GetIndex() - 1;
                    }
                }
            }
            if (checkBiDi == true)
            {
                // satisfy 2
                if (leftToRight == true && rightToLeft == true)
                {
                    throw new StringPrepParseException(
                              "The input does not conform to the rules for BiDi code points.",
                              IBM.ICU.Text.StringPrepParseException.CHECK_BIDI_ERROR, iter
                              .GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos);
                }

                // satisfy 3
                if (rightToLeft == true &&
                    !((firstCharDir == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT || firstCharDir == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT_ARABIC) && (direction == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT || direction == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT_ARABIC)))
                {
                    throw new StringPrepParseException(
                              "The input does not conform to the rules for BiDi code points.",
                              IBM.ICU.Text.StringPrepParseException.CHECK_BIDI_ERROR, iter
                              .GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos);
                }
            }
            return(normOut);
        }
예제 #5
0
        private StringBuilder Map(UCharacterIterator iter, int options)
        {
            StringPrep.Values val         = new StringPrep.Values();
            char          result          = (char)(0);
            int           ch              = IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE;
            StringBuilder dest            = new StringBuilder();
            bool          allowUnassigned = ((options & ALLOW_UNASSIGNED) > 0);

            while ((ch = iter.NextCodePoint()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                result = GetCodePointValue(ch);
                GetValues(result, val);

                // check if the source codepoint is unassigned
                if (val.type == UNASSIGNED && allowUnassigned == false)
                {
                    throw new StringPrepParseException(
                              "An unassigned code point was found in the input",
                              IBM.ICU.Text.StringPrepParseException.UNASSIGNED_ERROR,
                              iter.GetText(), iter.GetIndex());
                }
                else if ((val.type == MAP))
                {
                    int index, length;

                    if (val.isIndex)
                    {
                        index = val.value_ren;
                        if (index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
                            index < indexes[TWO_UCHARS_MAPPING_INDEX_START])
                        {
                            length = 1;
                        }
                        else if (index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
                                 index < indexes[THREE_UCHARS_MAPPING_INDEX_START])
                        {
                            length = 2;
                        }
                        else if (index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
                                 index < indexes[FOUR_UCHARS_MAPPING_INDEX_START])
                        {
                            length = 3;
                        }
                        else
                        {
                            length = mappingData[index++];
                        }
                        /* copy mapping to destination */
                        dest.Append(mappingData, index, length);
                        continue;
                    }
                    else
                    {
                        ch -= val.value_ren;
                    }
                }
                else if (val.type == DELETE)
                {
                    // just consume the codepoint and contine
                    continue;
                }
                // copy the source into destination
                IBM.ICU.Text.UTF16.Append(dest, ch);
            }

            return(dest);
        }
예제 #6
0
 /// <summary>
 /// Convenience function that implements the IDNToUnicode operation as
 /// defined in the IDNA RFC. This operation is done on complete domain names,
 /// e.g: "www.example.com".
 /// <b>Note:</b> IDNA RFC specifies that a conformant application should
 /// divide a domain name into separate labels, decide whether to apply
 /// allowUnassigned and useSTD3ASCIIRules on each, and then convert. This
 /// function does not offer that level of granularity. The options once set
 /// will apply to all labels in the domain name
 /// </summary>
 ///
 /// <param name="src">The input string as UCharacterIterator to be processed</param>
 /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param>
 /// <returns>StringBuffer the converted String</returns>
 /// <exception cref="ParseException"></exception>
 /// @stable ICU 2.8
 public static StringBuilder ConvertIDNToUnicode(UCharacterIterator src,
                                                 int options)
 {
     return(ConvertIDNToUnicode(src.GetText(), options));
 }
예제 #7
0
        /// <summary>
        /// Function that implements the ToUnicode operation as defined in the IDNA
        /// RFC. This operation is done on <b>single labels</b> before sending it to
        /// something that expects Unicode names. A label is an individual part of a
        /// domain name. Labels are usually separated by dots; for
        /// e.g." "www.example.com" is composed of 3 labels "www","example", and
        /// "com".
        /// </summary>
        ///
        /// <param name="src">The input string as UCharacterIterator to be processed</param>
        /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param>
        /// <returns>StringBuffer the converted String</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        public static StringBuilder ConvertToUnicode(UCharacterIterator src,
                                                     int options)
        {
            bool[] caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            // boolean srcIsLDH = true;

            // get the options
            // boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            // int failPos = -1;
            int ch;
            int saveIndex = src.GetIndex();

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = src.Next()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }    /*
                      * else if((srcIsLDH = isLDHChar(ch))==false){ failPos =
                      * src.getIndex(); }
                      */
            }
            StringBuilder processOut;

            if (srcIsASCII == false)
            {
                try {
                    // step 2: process the string
                    src.SetIndex(saveIndex);
                    processOut = singleton.namePrep.Prepare(src, options);
                } catch (StringPrepParseException ex) {
                    return(new StringBuilder(src.GetText()));
                }
            }
            else
            {
                // just point to source
                processOut = new StringBuilder(src.GetText());
            }
            // TODO:
            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>

            // step 3: verify ACE Prefix
            if (StartsWithPrefix(processOut))
            {
                StringBuilder decodeOut = null;

                // step 4: Remove the ACE Prefix
                String temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length);

                // step 5: Decode using punycode
                try {
                    decodeOut = IBM.ICU.Text.Punycode.Decode(new StringBuilder(temp), caseFlags);
                } catch (StringPrepParseException e) {
                    decodeOut = null;
                }

                // step 6:Apply toASCII
                if (decodeOut != null)
                {
                    StringBuilder toASCIIOut = ConvertToASCII(decodeOut, options);

                    // step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        // throw new
                        // StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
                        // StringPrepParseException.VERIFICATION_ERROR);
                        decodeOut = null;
                    }
                }

                // step 8: return output of step 5
                if (decodeOut != null)
                {
                    return(decodeOut);
                }
            }

            // }else{
            // // verify that STD3 ASCII rules are satisfied
            // if(useSTD3ASCIIRules == true){
            // if( srcIsLDH == false /* source contains some non-LDH characters */
            // || processOut.charAt(0) == HYPHEN
            // || processOut.charAt(processOut.length()-1) == HYPHEN){
            //
            // if(srcIsLDH==false){
            // throw new
            // StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            // StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
            // (failPos>0) ? (failPos-1) : failPos);
            // }else if(processOut.charAt(0) == HYPHEN){
            // throw new
            // StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            // StringPrepParseException.STD3_ASCII_RULES_ERROR,
            // processOut.toString(),0);
            //
            // }else{
            // throw new
            // StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            // StringPrepParseException.STD3_ASCII_RULES_ERROR,
            // processOut.toString(),
            // processOut.length());
            //
            // }
            // }
            // }
            // // just return the source
            // return new StringBuffer(src.getText());
            // }

            return(new StringBuilder(src.GetText()));
        }
예제 #8
0
        /// <summary>
        /// This function implements the ToUnicode operation as defined in the IDNA
        /// RFC. This operation is done on <b>single labels</b> before sending it to
        /// something that expects Unicode names. A label is an individual part of a
        /// domain name. Labels are usually separated by dots; for
        /// e.g." "www.example.com" is composed of 3 labels "www","example", and
        /// "com".
        /// </summary>
        ///
        /// <param name="src">The input string as StringBuffer to be processed</param>
        /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param>
        /// <returns>StringBuffer the converted String</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        public static StringBuilder ConvertToUnicode(StringBuilder src, int options)
        {
            UCharacterIterator iter = IBM.ICU.Text.UCharacterIterator.GetInstance(src);

            return(ConvertToUnicode(iter, options));
        }
예제 #9
0
        /// <summary>
        /// This function implements the ToASCII operation as defined in the IDNA
        /// RFC. This operation is done on <b>single labels</b> before sending it to
        /// something that expects ASCII names. A label is an individual part of a
        /// domain name. Labels are usually separated by dots;
        /// e.g." "www.example.com" is composed of 3 labels "www","example", and
        /// "com".
        /// </summary>
        ///
        /// <param name="src">The input string as UCharacterIterator to be processed</param>
        /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param>
        /// <returns>StringBuffer the converted String</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        public static StringBuilder ConvertToASCII(UCharacterIterator src,
                                                   int options)
        {
            bool[] caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            bool srcIsLDH = true;

            // get the options
            bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
            int  ch;

            // step 1
            while ((ch = src.Next()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                if (ch > 0x7f)
                {
                    srcIsASCII = false;
                }
            }
            int failPos = -1;

            src.SetToStart();
            StringBuilder processOut = null;

            // step 2 is performed only if the source contains non ASCII
            if (!srcIsASCII)
            {
                // step 2
                processOut = singleton.namePrep.Prepare(src, options);
            }
            else
            {
                processOut = new StringBuilder(src.GetText());
            }
            int poLen = processOut.Length;

            if (poLen == 0)
            {
                throw new StringPrepParseException(
                          "Found zero length lable after NamePrep.",
                          IBM.ICU.Text.StringPrepParseException.ZERO_LENGTH_LABEL);
            }
            StringBuilder dest = new StringBuilder();

            // reset the variable to verify if output of prepare is ASCII or not
            srcIsASCII = true;

            // step 3 & 4
            for (int j = 0; j < poLen; j++)
            {
                ch = processOut[j];
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }
                else if (IsLDHChar(ch) == false)
                {
                    // here we do not assemble surrogates
                    // since we know that LDH code points
                    // are in the ASCII range only
                    srcIsLDH = false;
                    failPos  = j;
                }
            }

            if (useSTD3ASCIIRules == true)
            {
                // verify 3a and 3b
                if (srcIsLDH == false ||  /* source contains some non-LDH characters */
                    processOut[0] == HYPHEN ||
                    processOut[processOut.Length - 1] == HYPHEN)
                {
                    /* populate the parseError struct */
                    if (srcIsLDH == false)
                    {
                        throw new StringPrepParseException(
                                  "The input does not conform to the STD 3 ASCII rules",
                                  IBM.ICU.Text.StringPrepParseException.STD3_ASCII_RULES_ERROR,
                                  processOut.ToString(),
                                  (failPos > 0) ? (failPos - 1) : failPos);
                    }
                    else if (processOut[0] == HYPHEN)
                    {
                        throw new StringPrepParseException(
                                  "The input does not conform to the STD 3 ASCII rules",
                                  IBM.ICU.Text.StringPrepParseException.STD3_ASCII_RULES_ERROR,
                                  processOut.ToString(), 0);
                    }
                    else
                    {
                        throw new StringPrepParseException(
                                  "The input does not conform to the STD 3 ASCII rules",
                                  IBM.ICU.Text.StringPrepParseException.STD3_ASCII_RULES_ERROR,
                                  processOut.ToString(), (poLen > 0) ? poLen - 1
                                            : poLen);
                    }
                }
            }
            if (srcIsASCII)
            {
                dest = processOut;
            }
            else
            {
                // step 5 : verify the sequence does not begin with ACE prefix
                if (!StartsWithPrefix(processOut))
                {
                    // step 6: encode the sequence with punycode
                    caseFlags = new bool[poLen];

                    StringBuilder punyout = IBM.ICU.Text.Punycode.Encode(processOut, caseFlags);

                    // convert all codepoints to lower case ASCII
                    StringBuilder lowerOut = ToASCIILower(punyout);

                    // Step 7: prepend the ACE prefix
                    dest.Append(ACE_PREFIX, 0, ACE_PREFIX.Length);
                    // Step 6: copy the contents in b2 into dest
                    dest.Append(lowerOut);
                }
                else
                {
                    throw new StringPrepParseException(
                              "The input does not start with the ACE Prefix.",
                              IBM.ICU.Text.StringPrepParseException.ACE_PREFIX_ERROR,
                              processOut.ToString(), 0);
                }
            }
            if (dest.Length > MAX_LABEL_LENGTH)
            {
                throw new StringPrepParseException(
                          "The labels in the input are too long. Length > 63.",
                          IBM.ICU.Text.StringPrepParseException.LABEL_TOO_LONG_ERROR,
                          dest.ToString(), 0);
            }
            return(dest);
        }