Example #1
0
        /// <summary>
        /// Temporary test, just to see how the stuff works.
        /// </summary>
        ///
        static public void Main(String[] args)
        {
            String[]     testCases = { "fiss", "h\u03a3" };
            CaseIterator ci        = new CaseIterator();

            for (int i = 0; i < testCases.Length; ++i)
            {
                String item = testCases[i];
                System.Console.Out.WriteLine();
                System.Console.Out.WriteLine("Testing: " + toName.Transliterate(item));
                System.Console.Out.WriteLine();
                ci.Reset(item);
                int count_0 = 0;
                for (String temp = ci.Next(); temp != null; temp = ci.Next())
                {
                    System.Console.Out.WriteLine(toName.Transliterate(temp));
                    count_0++;
                }
                System.Console.Out.WriteLine("Total: " + count_0);
            }

            // generate a list of all caseless characters -- characters whose
            // case closure is themselves.

            UnicodeSet caseless = new UnicodeSet();

            for (int i_1 = 0; i_1 <= 0x10FFFF; ++i_1)
            {
                String cp = IBM.ICU.Text.UTF16.ValueOf(i_1);
                ci.Reset(cp);
                int    count_2 = 0;
                String fold    = null;
                for (String temp_3 = ci.Next(); temp_3 != null; temp_3 = ci.Next())
                {
                    fold = temp_3;
                    if (++count_2 > 1)
                    {
                        break;
                    }
                }
                if (count_2 == 1 && fold.Equals(cp))
                {
                    caseless.Add(i_1);
                }
            }

            System.Console.Out.WriteLine("caseless = " + caseless.ToPattern(true));

            UnicodeSet not_lc = new UnicodeSet("[:^lc:]");

            UnicodeSet a = new UnicodeSet();

            a.Set(not_lc);
            a.RemoveAll(caseless);
            System.Console.Out.WriteLine("[:^lc:] - caseless = " + a.ToPattern(true));

            a.Set(caseless);
            a.RemoveAll(not_lc);
            System.Console.Out.WriteLine("caseless - [:^lc:] = " + a.ToPattern(true));
        }
 /// <summary>
 /// Removes from this set all of its elements that are contained in the
 /// specified set.  This operation effectively modifies this
 /// set so that its value is the <i>asymmetric set difference</i> of
 /// the two sets.
 /// </summary>
 /// <param name="set">This set.</param>
 /// <param name="c">Set that defines which elements will be removed from
 /// this set.</param>
 /// <draft>ICU4N 60.1</draft>
 /// <provisional>This API might change or be removed in a future release.</provisional>
 public static UnicodeSet RemoveAll(this UnicodeSet set, UnicodeSet c)
 {
     if (set == null)
     {
         throw new ArgumentNullException(nameof(set));
     }
     return(set.RemoveAll(c));
 }
Example #3
0
        public void TestScriptMetadata()
        {
            UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]");
            // So far, sample characters are uppercase.
            // Georgian is special.
            UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]");

            for (int sc = 0; sc < UScript.CodeLimit; ++sc)
            {
                String      sn        = UScript.GetShortName(sc);
                ScriptUsage usage     = UScript.GetUsage(sc);
                String      sample    = UScript.GetSampleString(sc);
                UnicodeSet  scriptSet = new UnicodeSet();
                scriptSet.ApplyInt32PropertyValue(UProperty.Script, sc);
                if (usage == ScriptUsage.NotEncoded)
                {
                    assertTrue(sn + " not encoded, no sample", sample.Length == 0);  // Java 6: sample.isEmpty()
                    assertFalse(sn + " not encoded, not RTL", UScript.IsRightToLeft(sc));
                    assertFalse(sn + " not encoded, not LB letters", UScript.BreaksBetweenLetters(sc));
                    assertFalse(sn + " not encoded, not cased", UScript.IsCased(sc));
                    assertTrue(sn + " not encoded, no characters", scriptSet.IsEmpty);
                }
                else
                {
                    assertFalse(sn + " encoded, has a sample character", sample.Length == 0);  // Java 6: sample.isEmpty()
                    int firstChar  = sample.CodePointAt(0);
                    int charScript = GetCharScript(sc);
                    assertEquals(sn + " script(sample(script))",
                                 charScript, UScript.GetScript(firstChar));
                    assertEquals(sn + " RTL vs. set", rtl.Contains(firstChar), UScript.IsRightToLeft(sc));
                    assertEquals(sn + " cased vs. set", cased.Contains(firstChar), UScript.IsCased(sc));
                    assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.IsEmpty);
                    if (UScript.IsRightToLeft(sc))
                    {
                        rtl.RemoveAll(scriptSet);
                    }
                    if (UScript.IsCased(sc))
                    {
                        cased.RemoveAll(scriptSet);
                    }
                }
            }
            assertEquals("no remaining RTL characters", "[]", rtl.ToPattern(true));
            assertEquals("no remaining cased characters", "[]", cased.ToPattern(true));

            assertTrue("Hani breaks between letters", UScript.BreaksBetweenLetters(UScript.Han));
            assertTrue("Thai breaks between letters", UScript.BreaksBetweenLetters(UScript.Thai));
            assertFalse("Latn does not break between letters", UScript.BreaksBetweenLetters(UScript.Latin));
        }