Example #1
0
        public void TestAllCodepoints()
        {
            int code;

            //String oldId="";
            //String oldAbbrId="";
            for (int i = 0; i <= 0x10ffff; i++)
            {
                code = UScript.InvalidCode;
                code = UScript.GetScript(i);
                if (code == UScript.InvalidCode)
                {
                    Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed");
                }
                String id = UScript.GetName(code);
                if (id.IndexOf("INVALID", StringComparison.Ordinal) >= 0)
                {
                    Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed");
                }
                String abbr = UScript.GetShortName(code);
                if (abbr.IndexOf("INV", StringComparison.Ordinal) >= 0)
                {
                    Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed");
                }
            }
        }
Example #2
0
            public void TestGetShortName(int testCode, string expected)
            {
                string shortName = UScript.GetShortName(testCode);

                if (!expected.Equals(shortName))
                {
                    Errln("Error testing UScript.getShortName(). Got: " + shortName + " Expected: " + expected);
                }
            }
Example #3
0
        public void TestScriptMetadata()
        {
            UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]");
            // So far, sample characters are uppercase.
            // Georgian is special.
            UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]");

            for (int sc = 0; sc < UScript.CodeLimit; ++sc)
            {
                String      sn        = UScript.GetShortName(sc);
                ScriptUsage usage     = UScript.GetUsage(sc);
                String      sample    = UScript.GetSampleString(sc);
                UnicodeSet  scriptSet = new UnicodeSet();
                scriptSet.ApplyInt32PropertyValue(UProperty.Script, sc);
                if (usage == ScriptUsage.NotEncoded)
                {
                    assertTrue(sn + " not encoded, no sample", sample.Length == 0);  // Java 6: sample.isEmpty()
                    assertFalse(sn + " not encoded, not RTL", UScript.IsRightToLeft(sc));
                    assertFalse(sn + " not encoded, not LB letters", UScript.BreaksBetweenLetters(sc));
                    assertFalse(sn + " not encoded, not cased", UScript.IsCased(sc));
                    assertTrue(sn + " not encoded, no characters", scriptSet.IsEmpty);
                }
                else
                {
                    assertFalse(sn + " encoded, has a sample character", sample.Length == 0);  // Java 6: sample.isEmpty()
                    int firstChar  = sample.CodePointAt(0);
                    int charScript = GetCharScript(sc);
                    assertEquals(sn + " script(sample(script))",
                                 charScript, UScript.GetScript(firstChar));
                    assertEquals(sn + " RTL vs. set", rtl.Contains(firstChar), UScript.IsRightToLeft(sc));
                    assertEquals(sn + " cased vs. set", cased.Contains(firstChar), UScript.IsCased(sc));
                    assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.IsEmpty);
                    if (UScript.IsRightToLeft(sc))
                    {
                        rtl.RemoveAll(scriptSet);
                    }
                    if (UScript.IsCased(sc))
                    {
                        cased.RemoveAll(scriptSet);
                    }
                }
            }
            assertEquals("no remaining RTL characters", "[]", rtl.ToPattern(true));
            assertEquals("no remaining cased characters", "[]", cased.ToPattern(true));

            assertTrue("Hani breaks between letters", UScript.BreaksBetweenLetters(UScript.Han));
            assertTrue("Thai breaks between letters", UScript.BreaksBetweenLetters(UScript.Thai));
            assertFalse("Latn does not break between letters", UScript.BreaksBetweenLetters(UScript.Latin));
        }
Example #4
0
        public void TestTokenAttributes()
        {
            using TokenStream ts = a.GetTokenStream("dummy", "This is a test");
            IScriptAttribute scriptAtt = ts.AddAttribute <IScriptAttribute>();

            ts.Reset();
            while (ts.IncrementToken())
            {
                assertEquals(UScript.Latin, scriptAtt.Code);
                assertEquals(UScript.GetName(UScript.Latin), scriptAtt.GetName());
                assertEquals(UScript.GetShortName(UScript.Latin), scriptAtt.GetShortName());
                assertTrue(ts.ReflectAsString(false).Contains("script=Latin"));
            }
            ts.End();
        }
Example #5
0
 public void TestScriptNames()
 {
     for (int i = 0; i < UScript.CodeLimit; i++)
     {
         String name = UScript.GetName(i);
         if (name.Equals(""))
         {
             Errln("FAILED: getName for code : " + i);
         }
         String shortName = UScript.GetShortName(i);
         if (shortName.Equals(""))
         {
             Errln("FAILED: getName for code : " + i);
         }
     }
 }
Example #6
0
        private static String ScriptsToString(int[] scripts)
        {
            if (scripts == null)
            {
                return("null");
            }
            StringBuilder sb = new StringBuilder();

            foreach (int script in scripts)
            {
                if (sb.Length > 0)
                {
                    sb.Append(' ');
                }
                sb.Append(UScript.GetShortName(script));
            }
            return(sb.ToString());
        }
Example #7
0
        public void TestNewCode()
        {
            /*
             * These script codes were originally added to ICU pre-3.6, so that ICU would
             * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
             * These script codes were added with only short names because we don't
             * want to invent long names ourselves.
             * Unicode 5 and later encode some of these scripts and give them long names.
             * Whenever this happens, the long script names here need to be updated.
             */
            String[] expectedLong = new String[] {
                "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
                "Egyd", "Egyh", "Egyptian_Hieroglyphs",
                "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
                "Javanese", "Kayah_Li", "Latf", "Latg",
                "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
                "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
                "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
                "Zxxx", "Unknown",
                "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
                "Moon", "Meetei_Mayek",
                /* new in ICU 4.0 */
                "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
                "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
                "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
                "Zmth", "Zsym",
                /* new in ICU 4.4 */
                "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
                /* new in ICU 4.6 */
                "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
                "Loma", "Mende_Kikakui", "Meroitic_Cursive",
                "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
                /* new in ICU 4.8 */
                "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
                /* new in ICU 49 */
                "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
                /* new in ICU 52 */
                "Caucasian_Albanian", "Mahajani",
                /* new in ICU 54 */
                "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
                // new in ICU 58
                "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
                // new in ICU 60
                "Masaram_Gondi", "Soyombo", "Zanabazar_Square"
            };
            String[] expectedShort = new String[] {
                "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
                "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
                "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
                "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
                "Zxxx", "Zzzz",
                "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
                "Moon", "Mtei",
                /* new in ICU 4.0 */
                "Armi", "Avst", "Cakm", "Kore",
                "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
                "Zmth", "Zsym",
                /* new in ICU 4.4 */
                "Bamu", "Lisu", "Nkgb", "Sarb",
                /* new in ICU 4.6 */
                "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
                "Narb", "Nbat", "Palm", "Sind", "Wara",
                /* new in ICU 4.8 */
                "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
                /* new in ICU 49 */
                "Hluw", "Khoj", "Tirh",
                /* new in ICU 52 */
                "Aghb", "Mahj",
                /* new in ICU 54 */
                "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
                // new in ICU 58
                "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
                // new in ICU 60
                "Gonm", "Soyo", "Zanb"
            };
            if (expectedLong.Length != (UScript.CodeLimit - UScript.Balinese))
            {
                Errln("need to add new script codes in lang.TestUScript.java!");
                return;
            }
            int j = 0;
            int i = 0;

            for (i = UScript.Balinese; i < UScript.CodeLimit; i++, j++)
            {
                String name = UScript.GetName(i);
                if (name == null || !name.Equals(expectedLong[j]))
                {
                    Errln("UScript.getName failed for code" + i + name + "!=" + expectedLong[j]);
                }
                name = UScript.GetShortName(i);
                if (name == null || !name.Equals(expectedShort[j]))
                {
                    Errln("UScript.getShortName failed for code" + i + name + "!=" + expectedShort[j]);
                }
            }
            for (i = 0; i < expectedLong.Length; i++)
            {
                int[] ret = UScript.GetCode(expectedShort[i]);
                if (ret.Length > 1)
                {
                    Errln("UScript.getCode did not return expected number of codes for script" + expectedShort[i] + ". EXPECTED: 1 GOT: " + ret.Length);
                }
                if (ret[0] != (UScript.Balinese + i))
                {
                    Errln("UScript.getCode did not return expected code for script" + expectedShort[i] + ". EXPECTED: " + (UScript.Balinese + i) + " GOT: %i\n" + ret[0]);
                }
            }
        }
Example #8
0
        public void TestScripts()
        {
            // get a couple of characters of each script for testing

            StringBuffer testBuffer = new StringBuffer();

            for (int script = 0; script < UScript.CodeLimit; ++script)
            {
                UnicodeSet test  = new UnicodeSet().ApplyPropertyAlias("script", UScript.GetName(script));
                int        count = Math.Min(20, test.Count);
                for (int i = 0; i < count; ++i)
                {
                    testBuffer.Append(UTF16.ValueOf(test[i]));
                }
            }
            {
                String test = testBuffer.ToString();
                Logln("Test line: " + test);

                int  inclusion = TestFmwk.GetExhaustiveness();
                bool testedUnavailableScript = false;

                for (int script = 0; script < UScript.CodeLimit; ++script)
                {
                    if (script == UScript.Common || script == UScript.Inherited)
                    {
                        continue;
                    }
                    // if the inclusion rate is not 10, skip all but a small number of items.
                    // Make sure, however, that we test at least one unavailable script
                    if (inclusion < 10 && script != UScript.Latin &&
                        script != UScript.Han &&
                        script != UScript.Hiragana &&
                        testedUnavailableScript
                        )
                    {
                        continue;
                    }

                    String       scriptName = UScript.GetName(script); // long name
                    UCultureInfo locale     = new UCultureInfo(scriptName);
                    if (locale.Language.Equals("new") || locale.Language.Equals("pau"))
                    {
                        if (logKnownIssue("11171",
                                          "long script name loosely looks like a locale ID with a known likely script"))
                        {
                            continue;
                        }
                    }
                    Transliterator t;
                    try
                    {
                        t = Transliterator.GetInstance("any-" + scriptName);
                    }
                    catch (Exception e)
                    {
                        testedUnavailableScript = true;
                        Logln("Skipping unavailable: " + scriptName);
                        continue; // we don't handle all scripts
                    }
                    Logln("Checking: " + scriptName);
                    if (t != null)
                    {
                        t.Transform(test);                                 // just verify we don't crash
                    }
                    String shortScriptName = UScript.GetShortName(script); // 4-letter script code
                    try
                    {
                        t = Transliterator.GetInstance("any-" + shortScriptName);
                    }
                    catch (Exception e)
                    {
                        Errln("Transliterator.GetInstance() worked for \"any-" + scriptName +
                              "\" but not for \"any-" + shortScriptName + '\"');
                    }
                    t.Transform(test); // just verify we don't crash
                }
            }
        }
Example #9
0
 public override string GetScriptDisplayName(int scriptCode)
 {
     return(UScript.GetShortName(scriptCode));
 }
Example #10
0
 public virtual string GetShortName()
 {
     return(UScript.GetShortName(code));
 }