C# (CSharp) UTF16Util.NextCodePoint Examples

Programming Language: C# (CSharp)

Class/Type: UTF16Util

Method/Function: NextCodePoint

Examples at hotexamples.com: 5

C# (CSharp) UTF16Util.NextCodePoint - 5 examples found. These are the top rated real world C# (CSharp) examples of UTF16Util.NextCodePoint extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CodePointLength(5)

NextCodePoint(5)

AppendCodePoint(2)

CountCodePoint(1)

InsertCodePoint(1)

PrevCodePoint(1)

SetCodePointAt(1)

Example #1

Show file

File: NormalizerData.cs Project: bongohrtech/ICU4N

        /**
         * Gets recursive decomposition of a character from the
         * Unicode Character Database.
         * @param   canonical    If true
         *                  bit is on in this byte, then selects the recursive
         *                  canonical decomposition, otherwise selects
         *                  the recursive compatibility and canonical decomposition.
         * @param   ch      the source character
         * @param   buffer  buffer to be filled with the decomposition
         */
        public void GetRecursiveDecomposition(bool canonical, int ch, StringBuffer buffer)
        {
            string decomp = decompose.Get(ch);

            if (decomp != null && !(canonical && isCompatibility.SafeGet(ch)))
            {
                for (int i = 0; i < decomp.Length; i += UTF16Util.CodePointLength(ch))
                {
                    ch = UTF16Util.NextCodePoint(decomp, i);
                    GetRecursiveDecomposition(canonical, ch, buffer);
                }
            }
            else
            {                    // if no decomp, append
                UTF16Util.AppendCodePoint(buffer, ch);
            }
        }

Example #2

Show file

        /**
         * Decomposes text, either canonical or compatibility,
         * replacing contents of the target buffer.
         * @param   form        the normalization form. If COMPATIBILITY_MASK
         *                      bit is on in this byte, then selects the recursive
         *                      compatibility decomposition, otherwise selects
         *                      the recursive canonical decomposition.
         * @param   source      the original text, unnormalized
         * @param   target      the resulting normalized text
         */
        private void internalDecompose(String source, StringBuffer target)
        {
            StringBuffer buffer    = new StringBuffer();
            bool         canonical = (form & COMPATIBILITY_MASK) == 0;
            int          ch;

            for (int i = 0; i < source.Length;)
            {
                buffer.Length = (0);
                ch            = UTF16Util.NextCodePoint(source, i);
                i            += UTF16Util.CodePointLength(ch);
                data.GetRecursiveDecomposition(canonical, ch, buffer);

                // add all of the characters in the decomposition.
                // (may be just the original character, if there was
                // no decomposition mapping)

                for (int j = 0; j < buffer.Length;)
                {
                    ch = UTF16Util.NextCodePoint(buffer, j);
                    j += UTF16Util.CodePointLength(ch);
                    int chClass = data.GetCanonicalClass(ch);
                    int k       = target.Length; // insertion point
                    if (chClass != 0)
                    {
                        // bubble-sort combining marks as necessary

                        int ch2;
                        for (; k > 0; k -= UTF16Util.CodePointLength(ch2))
                        {
                            ch2 = UTF16Util.PrevCodePoint(target, k);
                            if (data.GetCanonicalClass(ch2) <= chClass)
                            {
                                break;
                            }
                        }
                    }
                    UTF16Util.InsertCodePoint(target, k, ch);
                }
            }
        }

Example #3

Show file

        /**
         * Composes text in place. Target must already
         * have been decomposed.
         * @param   target      input: decomposed text.
         *                      output: the resulting normalized text.
         */
        private void internalCompose(StringBuffer target)
        {
            int starterPos = 0;
            int starterCh  = UTF16Util.NextCodePoint(target, 0);
            int compPos    = UTF16Util.CodePointLength(starterCh);
            int lastClass  = data.GetCanonicalClass(starterCh);

            if (lastClass != 0)
            {
                lastClass = 256;                 // fix for irregular combining sequence
            }
            // Loop on the decomposed characters, combining where possible

            for (int decompPos = UTF16Util.CodePointLength(starterCh); decompPos < target.Length;)
            {
                int ch = UTF16Util.NextCodePoint(target, decompPos);
                decompPos += UTF16Util.CodePointLength(ch);
                int chClass   = data.GetCanonicalClass(ch);
                int composite = data.GetPairwiseComposition(starterCh, ch);
                if (composite != NormalizerData.NOT_COMPOSITE &&
                    (lastClass < chClass || lastClass == 0))
                {
                    UTF16Util.SetCodePointAt(target, starterPos, composite);
                    starterCh = composite;
                }
                else
                {
                    if (chClass == 0)
                    {
                        starterPos = compPos;
                        starterCh  = ch;
                    }
                    lastClass  = chClass;
                    decompPos += UTF16Util.SetCodePointAt(target, compPos, ch);
                    compPos   += UTF16Util.CodePointLength(ch);
                }
            }
            target.Length = (compPos);
        }

Example #4

Show file

            SCount = LCount * NCount;   // 11172

        /**
         * For use in an applet: just load a minimal set of data.
         */
        private static void SetMinimalDecomp(IntHashtable canonicalClass, IntStringHashtable decompose,
                                             LongHashtable compose, BitSet isCompatibility, BitSet isExcluded)
        {
            String[] decomposeData =
            {
                "\u005E", "\u0020\u0302",       "K",
                "\u005F", "\u0020\u0332",       "K",
                "\u0060", "\u0020\u0300",       "K",
                "\u00A0", "\u0020",             "K",
                "\u00A8", "\u0020\u0308",       "K",
                "\u00AA", "\u0061",             "K",
                "\u00AF", "\u0020\u0304",       "K",
                "\u00B2", "\u0032",             "K",
                "\u00B3", "\u0033",             "K",
                "\u00B4", "\u0020\u0301",       "K",
                "\u00B5", "\u03BC",             "K",
                "\u00B8", "\u0020\u0327",       "K",
                "\u00B9", "\u0031",             "K",
                "\u00BA", "\u006F",             "K",
                "\u00BC", "\u0031\u2044\u0034", "K",
                "\u00BD", "\u0031\u2044\u0032", "K",
                "\u00BE", "\u0033\u2044\u0034", "K",
                "\u00C0", "\u0041\u0300",       "",
                "\u00C1", "\u0041\u0301",       "",
                "\u00C2", "\u0041\u0302",       "",
                "\u00C3", "\u0041\u0303",       "",
                "\u00C4", "\u0041\u0308",       "",
                "\u00C5", "\u0041\u030A",       "",
                "\u00C7", "\u0043\u0327",       "",
                "\u00C8", "\u0045\u0300",       "",
                "\u00C9", "\u0045\u0301",       "",
                "\u00CA", "\u0045\u0302",       "",
                "\u00CB", "\u0045\u0308",       "",
                "\u00CC", "\u0049\u0300",       "",
                "\u00CD", "\u0049\u0301",       "",
                "\u00CE", "\u0049\u0302",       "",
                "\u00CF", "\u0049\u0308",       "",
                "\u00D1", "\u004E\u0303",       "",
                "\u00D2", "\u004F\u0300",       "",
                "\u00D3", "\u004F\u0301",       "",
                "\u00D4", "\u004F\u0302",       "",
                "\u00D5", "\u004F\u0303",       "",
                "\u00D6", "\u004F\u0308",       "",
                "\u00D9", "\u0055\u0300",       "",
                "\u00DA", "\u0055\u0301",       "",
                "\u00DB", "\u0055\u0302",       "",
                "\u00DC", "\u0055\u0308",       "",
                "\u00DD", "\u0059\u0301",       "",
                "\u00E0", "\u0061\u0300",       "",
                "\u00E1", "\u0061\u0301",       "",
                "\u00E2", "\u0061\u0302",       "",
                "\u00E3", "\u0061\u0303",       "",
                "\u00E4", "\u0061\u0308",       "",
                "\u00E5", "\u0061\u030A",       "",
                "\u00E7", "\u0063\u0327",       "",
                "\u00E8", "\u0065\u0300",       "",
                "\u00E9", "\u0065\u0301",       "",
                "\u00EA", "\u0065\u0302",       "",
                "\u00EB", "\u0065\u0308",       "",
                "\u00EC", "\u0069\u0300",       "",
                "\u00ED", "\u0069\u0301",       "",
                "\u00EE", "\u0069\u0302",       "",
                "\u00EF", "\u0069\u0308",       "",
                "\u00F1", "\u006E\u0303",       "",
                "\u00F2", "\u006F\u0300",       "",
                "\u00F3", "\u006F\u0301",       "",
                "\u00F4", "\u006F\u0302",       "",
                "\u00F5", "\u006F\u0303",       "",
                "\u00F6", "\u006F\u0308",       "",
                "\u00F9", "\u0075\u0300",       "",
                "\u00FA", "\u0075\u0301",       "",
                "\u00FB", "\u0075\u0302",       "",
                "\u00FC", "\u0075\u0308",       "",
                "\u00FD", "\u0079\u0301",       "",
// EXTRAS, outside of Latin 1
                "\u1EA4", "\u00C2\u0301",       "",
                "\u1EA5", "\u00E2\u0301",       "",
                "\u1EA6", "\u00C2\u0300",       "",
                "\u1EA7", "\u00E2\u0300",       "",
            };

            int[] classData =
            {
                0x0300, 230,
                0x0301, 230,
                0x0302, 230,
                0x0303, 230,
                0x0304, 230,
                0x0305, 230,
                0x0306, 230,
                0x0307, 230,
                0x0308, 230,
                0x0309, 230,
                0x030A, 230,
                0x030B, 230,
                0x030C, 230,
                0x030D, 230,
                0x030E, 230,
                0x030F, 230,
                0x0310, 230,
                0x0311, 230,
                0x0312, 230,
                0x0313, 230,
                0x0314, 230,
                0x0315, 232,
                0x0316, 220,
                0x0317, 220,
                0x0318, 220,
                0x0319, 220,
                0x031A, 232,
                0x031B, 216,
                0x031C, 220,
                0x031D, 220,
                0x031E, 220,
                0x031F, 220,
                0x0320, 220,
                0x0321, 202,
                0x0322, 202,
                0x0323, 220,
                0x0324, 220,
                0x0325, 220,
                0x0326, 220,
                0x0327, 202,
                0x0328, 202,
                0x0329, 220,
                0x032A, 220,
                0x032B, 220,
                0x032C, 220,
                0x032D, 220,
                0x032E, 220,
                0x032F, 220,
                0x0330, 220,
                0x0331, 220,
                0x0332, 220,
                0x0333, 220,
                0x0334,   1,
                0x0335,   1,
                0x0336,   1,
                0x0337,   1,
                0x0338,   1,
                0x0339, 220,
                0x033A, 220,
                0x033B, 220,
                0x033C, 220,
                0x033D, 230,
                0x033E, 230,
                0x033F, 230,
                0x0340, 230,
                0x0341, 230,
                0x0342, 230,
                0x0343, 230,
                0x0344, 230,
                0x0345, 240,
                0x0360, 234,
                0x0361, 234
            };

            // build the same tables we would otherwise get from the
            // Unicode Character Database, just with limited data

            for (int i = 0; i < decomposeData.Length; i += 3)
            {
                char   value  = decomposeData[i][0];
                String decomp = decomposeData[i + 1];
                bool   compat = decomposeData[i + 2].Equals("K");
                if (compat)
                {
                    isCompatibility.Set(value);
                }
                decompose.Put(value, decomp);
                if (!compat)
                {
                    int first  = '\u0000';
                    int second = UTF16Util.NextCodePoint(decomp, 0);
                    if (decomp.Length > 1)
                    {
                        first  = second;
                        second = UTF16Util.NextCodePoint(decomp,
                                                         UTF16Util.CodePointLength(first));
                    }
                    long pair = (first << 16) | second;
                    compose.Put(pair, value);
                }
            }

            for (int i = 0; i < classData.Length;)
            {
                canonicalClass.Put(classData[i++], classData[i++]);
            }
        }

Example #5

Show file

        /**
         * Builds a decomposition table from a UnicodeData file
         */
        private static void BuildDecompositionTables(
            IntHashtable canonicalClass, IntStringHashtable decompose,
            LongHashtable compose, BitSet isCompatibility, BitSet isExcluded)
        {
            if (DEBUG)
            {
                Console.Out.WriteLine("Reading Unicode Character Database");
            }
            //BufferedReader in = new BufferedReader(new FileReader(UNICODE_DATA), 64*1024);
            TextReader @in = null;

            try
            {
                @in = TestUtil.GetDataReader("unicode.UnicodeData.txt");
            }
            catch (Exception e)
            {
                Console.Error.WriteLine("Failed to read UnicodeData.txt");
                Environment.Exit(1);
            }

            int  value;
            long pair;
            int  counter = 0;

            while (true)
            {
                // read a line, discarding comments and blank lines

                String line = @in.ReadLine();
                if (line == null)
                {
                    break;
                }
                int comment = line.IndexOf('#');                    // strip comments
                if (comment != -1)
                {
                    line = line.Substring(0, comment - 0);                // ICU4N: Checked 2nd substring parameter
                }
                if (line.Length == 0)
                {
                    continue;
                }
                if (DEBUG)
                {
                    counter++;
                    if ((counter & 0xFF) == 0)
                    {
                        Console.Out.WriteLine("At: " + line);
                    }
                }

                // find the values of the particular fields that we need
                // Sample line: 00C0;LATIN ...A GRAVE;Lu;0;L;0041 0300;;;;N;LATIN ... GRAVE;;;00E0;

                int start = 0;
                int end   = line.IndexOf(';');                                                                               // code
                value = int.Parse(line.Substring(start, end - start), NumberStyles.HexNumber, CultureInfo.InvariantCulture); // ICU4N: Corrected 2nd substring parameter
                if (true && value == '\u00c0')
                {
                    //Console.Out.WriteLine("debug: " + line);
                }
                end = line.IndexOf(';', start = end + 1); // name
                                                          /*String name = line.substring(start,end);*/
                end = line.IndexOf(';', start = end + 1); // general category
                end = line.IndexOf(';', start = end + 1); // canonical class

                // check consistency: canonical classes must be from 0 to 255

                int cc = int.Parse(line.Substring(start, end - start), CultureInfo.InvariantCulture); // ICU4N: Corrected 2nd substring parameter
                if (cc != (cc & 0xFF))
                {
                    Console.Error.WriteLine("Bad canonical class at: " + line);
                }
                canonicalClass.Put(value, cc);
                end = line.IndexOf(';', start = end + 1); // BIDI
                end = line.IndexOf(';', start = end + 1); // decomp

                // decomp requires more processing.
                // store whether it is canonical or compatibility.
                // store the decomp in one table, and the reverse mapping (from pairs) in another

                if (start != end)
                {
                    String segment = line.Substring(start, end - start); // ICU4N: Corrected 2nd parameter
                    bool   compat  = segment[0] == '<';
                    if (compat)
                    {
                        isCompatibility.Set(value);
                    }
                    String decomp = fromHex(segment);

                    // a small snippet of code to generate the Applet data

                    /*if (GENERATING) {
                     *  if (value < 0xFF) {
                     *      Console.Out.WriteLine(
                     *          "\"\\u" + hex((char)value) + "\", "
                     + "\"\\u" + hex(decomp, "\\u") + "\", "
                     + (compat ? "\"K\"," : "\"\",")
                     + "// " + name);
                     +  }
                     + }*/

                    // check consistency: all canon decomps must be singles or pairs!
                    int decompLen = UTF16Util.CountCodePoint(decomp);
                    if (decompLen < 1 || decompLen > 2 && !compat)
                    {
                        Console.Error.WriteLine("Bad decomp at: " + line);
                    }
                    decompose.Put(value, decomp);

                    // only compositions are canonical pairs
                    // skip if script exclusion

                    if (!compat && !isExcluded.Get(value))
                    {
                        int first  = '\u0000';
                        int second = UTF16Util.NextCodePoint(decomp, 0);
                        if (decompLen > 1)
                        {
                            first  = second;
                            second = UTF16Util.NextCodePoint(decomp,
                                                             UTF16Util.CodePointLength(first));
                        }

                        // store composition pair in single integer

                        pair = ((long)first << 32) | (uint)second;
                        if (DEBUG && value == '\u00C0')
                        {
                            Console.Out.WriteLine("debug2: " + line);
                        }
                        compose.Put(pair, value);
                    }
                    else if (DEBUG)
                    {
                        Console.Out.WriteLine("Excluding: " + decomp);
                    }
                }
            }
            @in.Dispose();
            if (DEBUG)
            {
                Console.Out.WriteLine("Done reading Unicode Character Database");
            }

            // add algorithmic Hangul decompositions
            // this is more compact if done at runtime, but for simplicity we
            // do it this way.

            if (DEBUG)
            {
                Console.Out.WriteLine("Adding Hangul");
            }

            for (int SIndex = 0; SIndex < SCount; ++SIndex)
            {
                int  TIndex = SIndex % TCount;
                char first, second;
                if (TIndex != 0)
                { // triple
                    first  = (char)(SBase + SIndex - TIndex);
                    second = (char)(TBase + TIndex);
                }
                else
                {
                    first  = (char)(LBase + SIndex / NCount);
                    second = (char)(VBase + (SIndex % NCount) / TCount);
                }
                pair  = ((long)first << 32) | second;
                value = SIndex + SBase;
                decompose.Put(value, Convert.ToString(first, CultureInfo.InvariantCulture) + second);
                compose.Put(pair, value);
            }
            if (DEBUG)
            {
                Console.Out.WriteLine("Done adding Hangul");
            }
        }