Ejemplo n.º 1
0
        /// <summary>
        /// Temporary test, just to see how the stuff works.
        /// </summary>
        ///
        static public void Main(String[] args)
        {
            String[]     testCases = { "fiss", "h\u03a3" };
            CaseIterator ci        = new CaseIterator();

            for (int i = 0; i < testCases.Length; ++i)
            {
                String item = testCases[i];
                System.Console.Out.WriteLine();
                System.Console.Out.WriteLine("Testing: " + toName.Transliterate(item));
                System.Console.Out.WriteLine();
                ci.Reset(item);
                int count_0 = 0;
                for (String temp = ci.Next(); temp != null; temp = ci.Next())
                {
                    System.Console.Out.WriteLine(toName.Transliterate(temp));
                    count_0++;
                }
                System.Console.Out.WriteLine("Total: " + count_0);
            }

            // generate a list of all caseless characters -- characters whose
            // case closure is themselves.

            UnicodeSet caseless = new UnicodeSet();

            for (int i_1 = 0; i_1 <= 0x10FFFF; ++i_1)
            {
                String cp = IBM.ICU.Text.UTF16.ValueOf(i_1);
                ci.Reset(cp);
                int    count_2 = 0;
                String fold    = null;
                for (String temp_3 = ci.Next(); temp_3 != null; temp_3 = ci.Next())
                {
                    fold = temp_3;
                    if (++count_2 > 1)
                    {
                        break;
                    }
                }
                if (count_2 == 1 && fold.Equals(cp))
                {
                    caseless.Add(i_1);
                }
            }

            System.Console.Out.WriteLine("caseless = " + caseless.ToPattern(true));

            UnicodeSet not_lc = new UnicodeSet("[:^lc:]");

            UnicodeSet a = new UnicodeSet();

            a.Set(not_lc);
            a.RemoveAll(caseless);
            System.Console.Out.WriteLine("[:^lc:] - caseless = " + a.ToPattern(true));

            a.Set(caseless);
            a.RemoveAll(not_lc);
            System.Console.Out.WriteLine("caseless - [:^lc:] = " + a.ToPattern(true));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Modifies Unicode set to flatten the strings. Eg [abc{da}] => [abcd]
        /// Returns the set for chaining.
        /// </summary>
        ///
        /// <param name="exemplar1"></param>
        /// <returns></returns>
        public static UnicodeSet Flatten(UnicodeSet exemplar1)
        {
            UnicodeSet result    = new UnicodeSet();
            bool       gotString = false;

            for (UnicodeSetIterator it = new UnicodeSetIterator(exemplar1); it
                 .NextRange();)
            {
                if (it.codepoint == IBM.ICU.Text.UnicodeSetIterator.IS_STRING)
                {
                    result.AddAll(it.str0);
                    gotString = true;
                }
                else
                {
                    result.Add(it.codepoint, it.codepointEnd);
                }
            }
            if (gotString)
            {
                exemplar1.Set(result);
            }
            return(exemplar1);
        }