Exemple #1
0
        public void TestAllCodepoints()
        {
            int code;

            //String oldId="";
            //String oldAbbrId="";
            for (int i = 0; i <= 0x10ffff; i++)
            {
                code = UScript.InvalidCode;
                code = UScript.GetScript(i);
                if (code == UScript.InvalidCode)
                {
                    Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed");
                }
                String id = UScript.GetName(code);
                if (id.IndexOf("INVALID", StringComparison.Ordinal) >= 0)
                {
                    Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed");
                }
                String abbr = UScript.GetShortName(code);
                if (abbr.IndexOf("INV", StringComparison.Ordinal) >= 0)
                {
                    Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed");
                }
            }
        }
Exemple #2
0
            public void TestGetName(int testCode, string expected)
            {
                String scriptName = UScript.GetName(testCode);

                if (!expected.Equals(scriptName))
                {
                    Errln("Error testing UScript.getName(). Got: " + scriptName + " Expected: " + expected);
                }
            }
            public Spec(string theSpec)
            {
                top        = theSpec;
                spec       = null;
                scriptName = null;
                try
                {
                    // Canonicalize script name.  If top is a script name then
                    // script != UScript.INVALID_CODE.
                    int script = UScript.GetCodeFromName(top);

                    // Canonicalize script name -or- do locale->script mapping
                    int[] s = UScript.GetCode(top);
                    if (s != null)
                    {
                        scriptName = UScript.GetName(s[0]);
                        // If the script name is the same as top then it's redundant
                        if (scriptName.Equals(top, StringComparison.OrdinalIgnoreCase))
                        {
                            scriptName = null;
                        }
                    }

                    isSpecLocale = false;
                    res          = null;
                    // If 'top' is not a script name, try a locale lookup
                    if (script == UScript.InvalidCode)
                    {
                        // ICU4N specific - CultureInfo doesn't support IANA culture names, so we use ULocale instead.
                        ULocale toploc = new ULocale(top);

                        //CultureInfo toploc = LocaleUtility.GetLocaleFromName(top);
                        res = (ICUResourceBundle)UResourceBundle.GetBundleInstance(ICUData.IcuTransliteratorBaseName, toploc, Transliterator.ICU_DATA_CLASS_LOADER);
                        // Make sure we got the bundle we wanted; otherwise, don't use it
                        if (res != null && LocaleUtility.IsFallbackOf(res.GetULocale().ToString(), top))
                        {
                            isSpecLocale = true;
                        }
                    }
                }
                catch (MissingManifestResourceException)
                {
                    ////CLOVER:OFF
                    // The constructor is called from multiple private methods
                    //  that protects an invalid scriptName
                    scriptName = null;
                    ////CLOVER:ON
                }
                // assert(spec != top);
                Reset();
            }
Exemple #4
0
        public void TestTokenAttributes()
        {
            using TokenStream ts = a.GetTokenStream("dummy", "This is a test");
            IScriptAttribute scriptAtt = ts.AddAttribute <IScriptAttribute>();

            ts.Reset();
            while (ts.IncrementToken())
            {
                assertEquals(UScript.Latin, scriptAtt.Code);
                assertEquals(UScript.GetName(UScript.Latin), scriptAtt.GetName());
                assertEquals(UScript.GetShortName(UScript.Latin), scriptAtt.GetShortName());
                assertTrue(ts.ReflectAsString(false).Contains("script=Latin"));
            }
            ts.End();
        }
Exemple #5
0
 public void TestScriptNames()
 {
     for (int i = 0; i < UScript.CodeLimit; i++)
     {
         String name = UScript.GetName(i);
         if (name.Equals(""))
         {
             Errln("FAILED: getName for code : " + i);
         }
         String shortName = UScript.GetShortName(i);
         if (shortName.Equals(""))
         {
             Errln("FAILED: getName for code : " + i);
         }
     }
 }
Exemple #6
0
        public void TestNewCode()
        {
            /*
             * These script codes were originally added to ICU pre-3.6, so that ICU would
             * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
             * These script codes were added with only short names because we don't
             * want to invent long names ourselves.
             * Unicode 5 and later encode some of these scripts and give them long names.
             * Whenever this happens, the long script names here need to be updated.
             */
            String[] expectedLong = new String[] {
                "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
                "Egyd", "Egyh", "Egyptian_Hieroglyphs",
                "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
                "Javanese", "Kayah_Li", "Latf", "Latg",
                "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
                "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
                "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
                "Zxxx", "Unknown",
                "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
                "Moon", "Meetei_Mayek",
                /* new in ICU 4.0 */
                "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
                "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
                "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
                "Zmth", "Zsym",
                /* new in ICU 4.4 */
                "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
                /* new in ICU 4.6 */
                "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
                "Loma", "Mende_Kikakui", "Meroitic_Cursive",
                "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
                /* new in ICU 4.8 */
                "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
                /* new in ICU 49 */
                "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
                /* new in ICU 52 */
                "Caucasian_Albanian", "Mahajani",
                /* new in ICU 54 */
                "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
                // new in ICU 58
                "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
                // new in ICU 60
                "Masaram_Gondi", "Soyombo", "Zanabazar_Square"
            };
            String[] expectedShort = new String[] {
                "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
                "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
                "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
                "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
                "Zxxx", "Zzzz",
                "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
                "Moon", "Mtei",
                /* new in ICU 4.0 */
                "Armi", "Avst", "Cakm", "Kore",
                "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
                "Zmth", "Zsym",
                /* new in ICU 4.4 */
                "Bamu", "Lisu", "Nkgb", "Sarb",
                /* new in ICU 4.6 */
                "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
                "Narb", "Nbat", "Palm", "Sind", "Wara",
                /* new in ICU 4.8 */
                "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
                /* new in ICU 49 */
                "Hluw", "Khoj", "Tirh",
                /* new in ICU 52 */
                "Aghb", "Mahj",
                /* new in ICU 54 */
                "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
                // new in ICU 58
                "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
                // new in ICU 60
                "Gonm", "Soyo", "Zanb"
            };
            if (expectedLong.Length != (UScript.CodeLimit - UScript.Balinese))
            {
                Errln("need to add new script codes in lang.TestUScript.java!");
                return;
            }
            int j = 0;
            int i = 0;

            for (i = UScript.Balinese; i < UScript.CodeLimit; i++, j++)
            {
                String name = UScript.GetName(i);
                if (name == null || !name.Equals(expectedLong[j]))
                {
                    Errln("UScript.getName failed for code" + i + name + "!=" + expectedLong[j]);
                }
                name = UScript.GetShortName(i);
                if (name == null || !name.Equals(expectedShort[j]))
                {
                    Errln("UScript.getShortName failed for code" + i + name + "!=" + expectedShort[j]);
                }
            }
            for (i = 0; i < expectedLong.Length; i++)
            {
                int[] ret = UScript.GetCode(expectedShort[i]);
                if (ret.Length > 1)
                {
                    Errln("UScript.getCode did not return expected number of codes for script" + expectedShort[i] + ". EXPECTED: 1 GOT: " + ret.Length);
                }
                if (ret[0] != (UScript.Balinese + i))
                {
                    Errln("UScript.getCode did not return expected code for script" + expectedShort[i] + ". EXPECTED: " + (UScript.Balinese + i) + " GOT: %i\n" + ret[0]);
                }
            }
        }
Exemple #7
0
        public void TestScripts()
        {
            // get a couple of characters of each script for testing

            StringBuffer testBuffer = new StringBuffer();

            for (int script = 0; script < UScript.CodeLimit; ++script)
            {
                UnicodeSet test  = new UnicodeSet().ApplyPropertyAlias("script", UScript.GetName(script));
                int        count = Math.Min(20, test.Count);
                for (int i = 0; i < count; ++i)
                {
                    testBuffer.Append(UTF16.ValueOf(test[i]));
                }
            }
            {
                String test = testBuffer.ToString();
                Logln("Test line: " + test);

                int  inclusion = TestFmwk.GetExhaustiveness();
                bool testedUnavailableScript = false;

                for (int script = 0; script < UScript.CodeLimit; ++script)
                {
                    if (script == UScript.Common || script == UScript.Inherited)
                    {
                        continue;
                    }
                    // if the inclusion rate is not 10, skip all but a small number of items.
                    // Make sure, however, that we test at least one unavailable script
                    if (inclusion < 10 && script != UScript.Latin &&
                        script != UScript.Han &&
                        script != UScript.Hiragana &&
                        testedUnavailableScript
                        )
                    {
                        continue;
                    }

                    String       scriptName = UScript.GetName(script); // long name
                    UCultureInfo locale     = new UCultureInfo(scriptName);
                    if (locale.Language.Equals("new") || locale.Language.Equals("pau"))
                    {
                        if (logKnownIssue("11171",
                                          "long script name loosely looks like a locale ID with a known likely script"))
                        {
                            continue;
                        }
                    }
                    Transliterator t;
                    try
                    {
                        t = Transliterator.GetInstance("any-" + scriptName);
                    }
                    catch (Exception e)
                    {
                        testedUnavailableScript = true;
                        Logln("Skipping unavailable: " + scriptName);
                        continue; // we don't handle all scripts
                    }
                    Logln("Checking: " + scriptName);
                    if (t != null)
                    {
                        t.Transform(test);                                 // just verify we don't crash
                    }
                    String shortScriptName = UScript.GetShortName(script); // 4-letter script code
                    try
                    {
                        t = Transliterator.GetInstance("any-" + shortScriptName);
                    }
                    catch (Exception e)
                    {
                        Errln("Transliterator.GetInstance() worked for \"any-" + scriptName +
                              "\" but not for \"any-" + shortScriptName + '\"');
                    }
                    t.Transform(test); // just verify we don't crash
                }
            }
        }
Exemple #8
0
 public virtual string GetName()
 {
     return(UScript.GetName(code));
 }
Exemple #9
0
        /// <summary>
        /// Returns a transliterator from the given source to our target or
        /// target/variant.  Returns NULL if the source is the same as our
        /// target script, or if the source is <see cref="UScript.InvalidCode"/>.
        /// Caches the result and returns the same transliterator the next
        /// time.  The caller does NOT own the result and must not delete
        /// it.
        /// </summary>
        private Transliterator GetTransliterator(int source)
        {
            if (source == targetScript || source == UScript.InvalidCode)
            {
                if (IsWide(targetScript))
                {
                    return(null);
                }
                else
                {
                    return(widthFix);
                }
            }

            int            key = (int)source;
            Transliterator t   = cache.Get(key);

            if (!cache.TryGetValue(key, out t) || t == null)
            {
                string sourceName = UScript.GetName(source);
                string id         = sourceName + TARGET_SEP + target;

                try
                {
                    t = Transliterator.GetInstance(id, FORWARD);
                }
                catch (Exception e) { }
                if (t == null)
                {
                    // Try to pivot around Latin, our most common script
                    id = sourceName + LATIN_PIVOT + target;
                    try
                    {
                        t = Transliterator.GetInstance(id, FORWARD);
                    }
                    catch (Exception e) { }
                }

                if (t != null)
                {
                    if (!IsWide(targetScript))
                    {
                        IList <Transliterator> v = new List <Transliterator>();
                        v.Add(widthFix);
                        v.Add(t);
                        t = new CompoundTransliterator(v);
                    }
                    //Transliterator prevCachedT = cache.putIfAbsent(key, t);
                    Transliterator prevCachedT;
                    // ICU4N: This is to simulate putIfAbsent
                    // ICU4N TODO: If this works, make it into a PutIfAbsent extension method so we can go back to using ConcurrentDictionary elsewhere
                    if (!cache.TryGetValue(key, out prevCachedT))
                    {
                        // If another thread beat us here, set the prevCachedT
                        // value to NullTransliterator to indicate it already exists
                        if (!cache.TryAdd(key, t))
                        {
                            prevCachedT = new NullTransliterator();
                        }
                    }
                    if (prevCachedT != null)
                    {
                        t = prevCachedT;
                    }
                }
                else if (!IsWide(targetScript))
                {
                    return(widthFix);
                }
            }

            return(t);
        }