C# (CSharp) IBM.ICU.Text UCharacterIterator.GetIndex Examples

Programming Language: C# (CSharp)

Namespace/Package Name: IBM.ICU.Text

Method/Function: GetIndex

Examples at hotexamples.com: 3

C# (CSharp) IBM.ICU.Text UCharacterIterator.GetIndex - 3 examples found. These are the top rated real world C# (CSharp) examples of IBM.ICU.Text.UCharacterIterator.GetIndex extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GetText(6)

GetIndex(3)

Next(2)

NextCodePoint(2)

SetIndex(1)

SetToStart(1)

Example #1

Show file

File: StringPrep.cs Project: bdqnghi/j2cstranslator

        private StringBuilder Map(UCharacterIterator iter, int options)
        {
            StringPrep.Values val         = new StringPrep.Values();
            char          result          = (char)(0);
            int           ch              = IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE;
            StringBuilder dest            = new StringBuilder();
            bool          allowUnassigned = ((options & ALLOW_UNASSIGNED) > 0);

            while ((ch = iter.NextCodePoint()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                result = GetCodePointValue(ch);
                GetValues(result, val);

                // check if the source codepoint is unassigned
                if (val.type == UNASSIGNED && allowUnassigned == false)
                {
                    throw new StringPrepParseException(
                              "An unassigned code point was found in the input",
                              IBM.ICU.Text.StringPrepParseException.UNASSIGNED_ERROR,
                              iter.GetText(), iter.GetIndex());
                }
                else if ((val.type == MAP))
                {
                    int index, length;

                    if (val.isIndex)
                    {
                        index = val.value_ren;
                        if (index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
                            index < indexes[TWO_UCHARS_MAPPING_INDEX_START])
                        {
                            length = 1;
                        }
                        else if (index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
                                 index < indexes[THREE_UCHARS_MAPPING_INDEX_START])
                        {
                            length = 2;
                        }
                        else if (index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
                                 index < indexes[FOUR_UCHARS_MAPPING_INDEX_START])
                        {
                            length = 3;
                        }
                        else
                        {
                            length = mappingData[index++];
                        }
                        /* copy mapping to destination */
                        dest.Append(mappingData, index, length);
                        continue;
                    }
                    else
                    {
                        ch -= val.value_ren;
                    }
                }
                else if (val.type == DELETE)
                {
                    // just consume the codepoint and contine
                    continue;
                }
                // copy the source into destination
                IBM.ICU.Text.UTF16.Append(dest, ch);
            }

            return(dest);
        }

Example #2

Show file

File: StringPrep.cs Project: bdqnghi/j2cstranslator

        /*
         * boolean isLabelSeparator(int ch){ int result = getCodePointValue(ch); if(
         * (result & 0x07) == LABEL_SEPARATOR){ return true; } return false; }
         */
        /*
         * 1) Map -- For each character in the input, check if it has a mapping and,
         * if so, replace it with its mapping.
         *
         * 2) Normalize -- Possibly normalize the result of step 1 using Unicode
         * normalization.
         *
         * 3) Prohibit -- Check for any characters that are not allowed in the
         * output. If any are found, return an error.
         *
         * 4) Check bidi -- Possibly check for right-to-left characters, and if any
         * are found, make sure that the whole string satisfies the requirements for
         * bidirectional strings. If the string does not satisfy the requirements
         * for bidirectional strings, return an error. [Unicode3.2] defines several
         * bidirectional categories; each character has one bidirectional category
         * assigned to it. For the purposes of the requirements below, an
         * "RandALCat character" is a character that has Unicode bidirectional
         * categories "R" or "AL"; an "LCat character" is a character that has
         * Unicode bidirectional category "L". Note
         *
         *
         * that there are many characters which fall in neither of the above
         * definitions; Latin digits (<U+0030> through <U+0039>) are examples of
         * this because they have bidirectional category "EN".
         *
         * In any profile that specifies bidirectional character handling, all three
         * of the following requirements MUST be met:
         *
         * 1) The characters in section 5.8 MUST be prohibited.
         *
         * 2) If a string contains any RandALCat character, the string MUST NOT
         * contain any LCat character.
         *
         * 3) If a string contains any RandALCat character, a RandALCat character
         * MUST be the first character of the string, and a RandALCat character MUST
         * be the last character of the string.
         */
        /// <summary>
        /// Prepare the input buffer for use in applications with the given profile.
        /// This operation maps, normalizes(NFKC), checks for prohited and BiDi
        /// characters in the order defined by RFC 3454 depending on the options
        /// specified in the profile.
        /// </summary>
        ///
        /// <param name="src">A UCharacterIterator object containing the source string</param>
        /// <param name="options">A bit set of options:- StringPrep.NONE Prohibit processing of unassigned codepoints in the input- StringPrep.ALLOW_UNASSIGNED Treat the unassigned code pointsare in the input as normal Unicode code points.</param>
        /// <returns>StringBuffer A StringBuffer containing the output</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        public StringBuilder Prepare(UCharacterIterator src, int options)
        {
            // map
            StringBuilder mapOut  = Map(src, options);
            StringBuilder normOut = mapOut;    // initialize

            if (doNFKC)
            {
                // normalize
                normOut = Normalize(mapOut);
            }

            int  ch;
            char result;
            UCharacterIterator iter = IBM.ICU.Text.UCharacterIterator.GetInstance(normOut);

            StringPrep.Values val = new StringPrep.Values();
            int  direction = IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.CHAR_DIRECTION_COUNT, firstCharDir = IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.CHAR_DIRECTION_COUNT;
            int  rtlPos = -1, ltrPos = -1;
            bool rightToLeft = false, leftToRight = false;

            while ((ch = iter.NextCodePoint()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                result = GetCodePointValue(ch);
                GetValues(result, val);

                if (val.type == PROHIBITED)
                {
                    throw new StringPrepParseException(
                              "A prohibited code point was found in the input",
                              IBM.ICU.Text.StringPrepParseException.PROHIBITED_ERROR,
                              iter.GetText(), val.value_ren);
                }

                if (checkBiDi)
                {
                    direction = bdp.GetClass(ch);
                    if (firstCharDir == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.CHAR_DIRECTION_COUNT)
                    {
                        firstCharDir = direction;
                    }
                    if (direction == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.LEFT_TO_RIGHT)
                    {
                        leftToRight = true;
                        ltrPos      = iter.GetIndex() - 1;
                    }
                    if (direction == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT ||
                        direction == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT_ARABIC)
                    {
                        rightToLeft = true;
                        rtlPos      = iter.GetIndex() - 1;
                    }
                }
            }
            if (checkBiDi == true)
            {
                // satisfy 2
                if (leftToRight == true && rightToLeft == true)
                {
                    throw new StringPrepParseException(
                              "The input does not conform to the rules for BiDi code points.",
                              IBM.ICU.Text.StringPrepParseException.CHECK_BIDI_ERROR, iter
                              .GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos);
                }

                // satisfy 3
                if (rightToLeft == true &&
                    !((firstCharDir == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT || firstCharDir == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT_ARABIC) && (direction == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT || direction == IBM.ICU.Lang.UCharacterEnums.ECharacterDirection.RIGHT_TO_LEFT_ARABIC)))
                {
                    throw new StringPrepParseException(
                              "The input does not conform to the rules for BiDi code points.",
                              IBM.ICU.Text.StringPrepParseException.CHECK_BIDI_ERROR, iter
                              .GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos);
                }
            }
            return(normOut);
        }

Example #3

Show file

File: IDNA.cs Project: bdqnghi/j2cstranslator

        /// <summary>
        /// Function that implements the ToUnicode operation as defined in the IDNA
        /// RFC. This operation is done on <b>single labels</b> before sending it to
        /// something that expects Unicode names. A label is an individual part of a
        /// domain name. Labels are usually separated by dots; for
        /// e.g." "www.example.com" is composed of 3 labels "www","example", and
        /// "com".
        /// </summary>
        ///
        /// <param name="src">The input string as UCharacterIterator to be processed</param>
        /// <param name="options">A bit set of options: - IDNA.DEFAULT Use default options,i.e., do not process unassigned code points and do not useSTD3 ASCII rules If unassigned code points are found theoperation fails with ParseException.- IDNA.ALLOW_UNASSIGNED Unassigned values can be converted toASCII for query operations If this option is set, theunassigned code points are in the input are treated as normalUnicode code points.- IDNA.USE_STD3_RULES Use STD3 ASCII rules for host namesyntax restrictions If this option is set and the input doesnot satisfy STD3 rules, the operation will fail withParseException</param>
        /// <returns>StringBuffer the converted String</returns>
        /// <exception cref="ParseException"></exception>
        /// @stable ICU 2.8
        public static StringBuilder ConvertToUnicode(UCharacterIterator src,
                                                     int options)
        {
            bool[] caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            // boolean srcIsLDH = true;

            // get the options
            // boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            // int failPos = -1;
            int ch;
            int saveIndex = src.GetIndex();

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = src.Next()) != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }    /*
                      * else if((srcIsLDH = isLDHChar(ch))==false){ failPos =
                      * src.getIndex(); }
                      */
            }
            StringBuilder processOut;

            if (srcIsASCII == false)
            {
                try {
                    // step 2: process the string
                    src.SetIndex(saveIndex);
                    processOut = singleton.namePrep.Prepare(src, options);
                } catch (StringPrepParseException ex) {
                    return(new StringBuilder(src.GetText()));
                }
            }
            else
            {
                // just point to source
                processOut = new StringBuilder(src.GetText());
            }
            // TODO:
            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>

            // step 3: verify ACE Prefix
            if (StartsWithPrefix(processOut))
            {
                StringBuilder decodeOut = null;

                // step 4: Remove the ACE Prefix
                String temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length);

                // step 5: Decode using punycode
                try {
                    decodeOut = IBM.ICU.Text.Punycode.Decode(new StringBuilder(temp), caseFlags);
                } catch (StringPrepParseException e) {
                    decodeOut = null;
                }

                // step 6:Apply toASCII
                if (decodeOut != null)
                {
                    StringBuilder toASCIIOut = ConvertToASCII(decodeOut, options);

                    // step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        // throw new
                        // StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
                        // StringPrepParseException.VERIFICATION_ERROR);
                        decodeOut = null;
                    }
                }

                // step 8: return output of step 5
                if (decodeOut != null)
                {
                    return(decodeOut);
                }
            }

            // }else{
            // // verify that STD3 ASCII rules are satisfied
            // if(useSTD3ASCIIRules == true){
            // if( srcIsLDH == false /* source contains some non-LDH characters */
            // || processOut.charAt(0) == HYPHEN
            // || processOut.charAt(processOut.length()-1) == HYPHEN){
            //
            // if(srcIsLDH==false){
            // throw new
            // StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            // StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
            // (failPos>0) ? (failPos-1) : failPos);
            // }else if(processOut.charAt(0) == HYPHEN){
            // throw new
            // StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            // StringPrepParseException.STD3_ASCII_RULES_ERROR,
            // processOut.toString(),0);
            //
            // }else{
            // throw new
            // StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            // StringPrepParseException.STD3_ASCII_RULES_ERROR,
            // processOut.toString(),
            // processOut.length());
            //
            // }
            // }
            // }
            // // just return the source
            // return new StringBuffer(src.getText());
            // }

            return(new StringBuilder(src.GetText()));
        }