Beispiel #1
0
        public void TestAllCodepoints()
        {
            int code;

            //String oldId="";
            //String oldAbbrId="";
            for (int i = 0; i <= 0x10ffff; i++)
            {
                code = UScript.InvalidCode;
                code = UScript.GetScript(i);
                if (code == UScript.InvalidCode)
                {
                    Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed");
                }
                String id = UScript.GetName(code);
                if (id.IndexOf("INVALID", StringComparison.Ordinal) >= 0)
                {
                    Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed");
                }
                String abbr = UScript.GetShortName(code);
                if (abbr.IndexOf("INV", StringComparison.Ordinal) >= 0)
                {
                    Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed");
                }
            }
        }
Beispiel #2
0
 static ScriptIterator()
 {
     for (int i = 0; i < basicLatin.Length; i++)
     {
         basicLatin[i] = UScript.GetScript(i);
     }
 }
Beispiel #3
0
 /// <summary>Fast version of <see cref="UScript.GetScript(int)"/>. Basic Latin is an array lookup.</summary>
 private int GetScript(int codepoint)
 {
     if (0 <= codepoint && codepoint < basicLatin.Length)
     {
         return(basicLatin[codepoint]);
     }
     else
     {
         int script = UScript.GetScript(codepoint);
         if (combineCJ)
         {
             if (script == UScript.Han || script == UScript.Hiragana || script == UScript.Katakana)
             {
                 return(UScript.Japanese);
             }
             else if (codepoint >= 0xFF10 && codepoint <= 0xFF19)
             {
                 // when using CJK dictionary breaking, don't let full width numbers go to it, otherwise
                 // they are treated as punctuation. we currently have no cleaner way to fix this!
                 return(UScript.Latin);
             }
             else
             {
                 return(script);
             }
         }
         else
         {
             return(script);
         }
     }
 }
Beispiel #4
0
            public void TestLocaleGetCode(UCultureInfo testLocaleName, int expected)
            {
                int[] code = UScript.GetCode(testLocaleName);
                if (code == null)
                {
                    if (expected != UScript.InvalidCode)
                    {
                        Errln("Error testing UScript.getCode(). Got: null" + " Expected: " + expected + " for locale "
                              + testLocaleName);
                    }
                }
                else if ((code[0] != expected))
                {
                    Errln("Error testing UScript.getCode(). Got: " + code[0] + " Expected: " + expected + " for locale "
                          + testLocaleName);
                }

                UCultureInfo esperanto = new UCultureInfo("eo_DE");

                using (new ThreadCultureChange(esperanto, esperanto))
                {
                    code = UScript.GetCode(esperanto);
                    if (code != null)
                    {
                        if (code[0] != UScript.Latin)
                        {
                            Errln("Did not get the expected script code for Esperanto");
                        }
                    }
                    else
                    {
                        Warnln("Could not load the locale data.");
                    }
                }

                // Should work regardless of whether we have locale data for the language.
                AssertEqualScripts("tg script: Cyrl",  // Tajik
                                   new int[] { UScript.Cyrillic }, UScript.GetCode(new UCultureInfo("tg")));
                AssertEqualScripts("xsr script: Deva", // Sherpa
                                   new int[] { UScript.Devanagari }, UScript.GetCode(new UCultureInfo("xsr")));

                // Multi-script languages.
                AssertEqualScripts("ja scripts: Kana Hira Hani",
                                   new int[] { UScript.Katakana, UScript.Hiragana, UScript.Han }, UScript.GetCode(new UCultureInfo("ja")));
                AssertEqualScripts("ko scripts: Hang Hani", new int[] { UScript.Hangul, UScript.Han },
                                   UScript.GetCode(new UCultureInfo("ko")));
                AssertEqualScripts("zh script: Hani", new int[] { UScript.Han }, UScript.GetCode(new UCultureInfo("zh")));
                AssertEqualScripts("zh-Hant scripts: Hani Bopo", new int[] { UScript.Han, UScript.Bopomofo },
                                   UScript.GetCode(new UCultureInfo("zh_Hant")));
                AssertEqualScripts("zh-TW scripts: Hani Bopo", new int[] { UScript.Han, UScript.Bopomofo },
                                   UScript.GetCode(new UCultureInfo("zh_Hant_TW")));

                // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
                AssertEqualScripts("ro-RO script: Latn", new int[] { UScript.Latin }, UScript.GetCode("ro-RO")); // String
                                                                                                                 // not
                                                                                                                 // UCultureInfo
            }
Beispiel #5
0
            public void TestGetShortName(int testCode, string expected)
            {
                string shortName = UScript.GetShortName(testCode);

                if (!expected.Equals(shortName))
                {
                    Errln("Error testing UScript.getShortName(). Got: " + shortName + " Expected: " + expected);
                }
            }
Beispiel #6
0
        private static int[] LoadBasicLatin() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
        {
            var basicLatin = new int[128];

            for (int i = 0; i < basicLatin.Length; i++)
            {
                basicLatin[i] = UScript.GetScript(i);
            }
            return(basicLatin);
        }
Beispiel #7
0
            /// <summary>
            /// Returns TRUE if there are any more runs.  TRUE is always
            /// returned at least once.  Upon return, the caller should
            /// examine scriptCode, start, and limit.
            /// </summary>
            public virtual bool Next()
            {
                int ch;
                int s;

                ScriptCode = UScript.InvalidCode; // don't know script yet
                Start      = Limit;

                // Are we done?
                if (Start == textLimit)
                {
                    return(false);
                }

                // Move start back to include adjacent <see cref="UScript.Common"/> / <see cref="UScript.Inherited"/>
                // characters
                while (Start > textStart)
                {
                    ch = text.Char32At(Start - 1); // look back
                    s  = UScript.GetScript(ch);
                    if (s == UScript.Common || s == UScript.Inherited)
                    {
                        --Start;
                    }
                    else
                    {
                        break;
                    }
                }

                // Move limit ahead to include COMMON, INHERITED, and characters
                // of the current script.
                while (Limit < textLimit)
                {
                    ch = text.Char32At(Limit); // look ahead
                    s  = UScript.GetScript(ch);
                    if (s != UScript.Common && s != UScript.Inherited)
                    {
                        if (ScriptCode == UScript.InvalidCode)
                        {
                            ScriptCode = s;
                        }
                        else if (s != ScriptCode)
                        {
                            break;
                        }
                    }
                    ++Limit;
                }

                // Return TRUE even if the entire text is COMMON / INHERITED, in
                // which case scriptCode will be UScript.InvalidCode.
                return(true);
            }
Beispiel #8
0
            public void TestGetScript(int codepoint, int expected)
            {
                int code = UScript.InvalidCode;

                code = UScript.GetScript(codepoint);

                if (code != expected)
                {
                    Errln("Error testing UScript.getScript(). Got: " + code + " Expected: " + expected
                          + " for codepoint 0x + Hex(codepoint).");
                }
            }
Beispiel #9
0
 public void TestHasScript()
 {
     if (!(
             !UScript.HasScript(0x063f, UScript.Common) &&
             UScript.HasScript(0x063f, UScript.Arabic) && /* main Script value */
             !UScript.HasScript(0x063f, UScript.Syriac) &&
             !UScript.HasScript(0x063f, UScript.Thaana))
         )
     {
         Errln("UScript.hasScript(U+063F, ...) is wrong");
     }
     if (!(
             !UScript.HasScript(0x0640, UScript.Common) && /* main Script value */
             UScript.HasScript(0x0640, UScript.Arabic) &&
             UScript.HasScript(0x0640, UScript.Syriac) &&
             !UScript.HasScript(0x0640, UScript.Thaana))
         )
     {
         Errln("UScript.hasScript(U+0640, ...) is wrong");
     }
     if (!(
             !UScript.HasScript(0x0650, UScript.Inherited) && /* main Script value */
             UScript.HasScript(0x0650, UScript.Arabic) &&
             UScript.HasScript(0x0650, UScript.Syriac) &&
             !UScript.HasScript(0x0650, UScript.Thaana))
         )
     {
         Errln("UScript.hasScript(U+0650, ...) is wrong");
     }
     if (!(
             !UScript.HasScript(0x0660, UScript.Common) && /* main Script value */
             UScript.HasScript(0x0660, UScript.Arabic) &&
             !UScript.HasScript(0x0660, UScript.Syriac) &&
             UScript.HasScript(0x0660, UScript.Thaana))
         )
     {
         Errln("UScript.hasScript(U+0660, ...) is wrong");
     }
     if (!(
             !UScript.HasScript(0xfdf2, UScript.Common) &&
             UScript.HasScript(0xfdf2, UScript.Arabic) && /* main Script value */
             !UScript.HasScript(0xfdf2, UScript.Syriac) &&
             UScript.HasScript(0xfdf2, UScript.Thaana))
         )
     {
         Errln("UScript.hasScript(U+FDF2, ...) is wrong");
     }
     if (UScript.HasScript(0x0640, 0xaffe))
     {
         // An unguarded implementation might go into an infinite loop.
         Errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong");
     }
 }
Beispiel #10
0
 public void TestGetScriptOfCharsWithScriptExtensions()
 {
     /* test characters which have Script_Extensions */
     if (!(
             UScript.Common == UScript.GetScript(0x0640) &&
             UScript.Inherited == UScript.GetScript(0x0650) &&
             UScript.Arabic == UScript.GetScript(0xfdf2))
         )
     {
         Errln("UScript.getScript(character with Script_Extensions) failed");
     }
 }
Beispiel #11
0
        public void TestScriptMetadataAPI()
        {
            /* API & code coverage. */
            String sample = UScript.GetSampleString(UScript.Latin);

            if (sample.Length != 1 || UScript.GetScript(sample[0]) != UScript.Latin)
            {
                Errln("UScript.getSampleString(Latn) failed");
            }
            sample = UScript.GetSampleString(UScript.InvalidCode);
            if (sample.Length != 0)
            {
                Errln("UScript.getSampleString(invalid) failed");
            }

            if (UScript.GetUsage(UScript.Latin) != ScriptUsage.Recommended ||
                // Unicode 10 gives up on "aspirational".
                UScript.GetUsage(UScript.Yi) != ScriptUsage.LimitedUse ||
                UScript.GetUsage(UScript.Cherokee) != ScriptUsage.LimitedUse ||
                UScript.GetUsage(UScript.Coptic) != ScriptUsage.Excluded ||
                UScript.GetUsage(UScript.Cirth) != ScriptUsage.NotEncoded ||
                UScript.GetUsage(UScript.InvalidCode) != ScriptUsage.NotEncoded ||
                UScript.GetUsage(UScript.CodeLimit) != ScriptUsage.NotEncoded)
            {
                Errln("UScript.getUsage() failed");
            }

            if (UScript.IsRightToLeft(UScript.Latin) ||
                UScript.IsRightToLeft(UScript.Cirth) ||
                !UScript.IsRightToLeft(UScript.Arabic) ||
                !UScript.IsRightToLeft(UScript.Hebrew))
            {
                Errln("UScript.isRightToLeft() failed");
            }

            if (UScript.BreaksBetweenLetters(UScript.Latin) ||
                UScript.BreaksBetweenLetters(UScript.Cirth) ||
                !UScript.BreaksBetweenLetters(UScript.Han) ||
                !UScript.BreaksBetweenLetters(UScript.Thai))
            {
                Errln("UScript.breaksBetweenLetters() failed");
            }

            if (UScript.IsCased(UScript.Cirth) ||
                UScript.IsCased(UScript.Han) ||
                !UScript.IsCased(UScript.Latin) ||
                !UScript.IsCased(UScript.Greek))
            {
                Errln("UScript.isCased() failed");
            }
        }
            public Spec(string theSpec)
            {
                top        = theSpec;
                spec       = null;
                scriptName = null;
                try
                {
                    // Canonicalize script name.  If top is a script name then
                    // script != UScript.INVALID_CODE.
                    int script = UScript.GetCodeFromName(top);

                    // Canonicalize script name -or- do locale->script mapping
                    int[] s = UScript.GetCode(top);
                    if (s != null)
                    {
                        scriptName = UScript.GetName(s[0]);
                        // If the script name is the same as top then it's redundant
                        if (scriptName.Equals(top, StringComparison.OrdinalIgnoreCase))
                        {
                            scriptName = null;
                        }
                    }

                    isSpecLocale = false;
                    res          = null;
                    // If 'top' is not a script name, try a locale lookup
                    if (script == UScript.InvalidCode)
                    {
                        // ICU4N specific - CultureInfo doesn't support IANA culture names, so we use ULocale instead.
                        ULocale toploc = new ULocale(top);

                        //CultureInfo toploc = LocaleUtility.GetLocaleFromName(top);
                        res = (ICUResourceBundle)UResourceBundle.GetBundleInstance(ICUData.IcuTransliteratorBaseName, toploc, Transliterator.ICU_DATA_CLASS_LOADER);
                        // Make sure we got the bundle we wanted; otherwise, don't use it
                        if (res != null && LocaleUtility.IsFallbackOf(res.GetULocale().ToString(), top))
                        {
                            isSpecLocale = true;
                        }
                    }
                }
                catch (MissingManifestResourceException)
                {
                    ////CLOVER:OFF
                    // The constructor is called from multiple private methods
                    //  that protects an invalid scriptName
                    scriptName = null;
                    ////CLOVER:ON
                }
                // assert(spec != top);
                Reset();
            }
Beispiel #13
0
 /// <summary>
 /// Return the script code for a given name, or
 /// <see cref="UScript.InvalidCode"/> if not found.
 /// </summary>
 private static int ScriptNameToCode(string name)
 {
     try
     {
         int[] codes = UScript.GetCode(name);
         return(codes != null ? codes[0] : UScript.InvalidCode);
     }
     catch (MissingManifestResourceException)
     {
         ///CLOVER:OFF
         return(UScript.InvalidCode);
         ///CLOVER:ON
     }
 }
Beispiel #14
0
        public void TestScriptMetadata()
        {
            UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]");
            // So far, sample characters are uppercase.
            // Georgian is special.
            UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]");

            for (int sc = 0; sc < UScript.CodeLimit; ++sc)
            {
                String      sn        = UScript.GetShortName(sc);
                ScriptUsage usage     = UScript.GetUsage(sc);
                String      sample    = UScript.GetSampleString(sc);
                UnicodeSet  scriptSet = new UnicodeSet();
                scriptSet.ApplyInt32PropertyValue(UProperty.Script, sc);
                if (usage == ScriptUsage.NotEncoded)
                {
                    assertTrue(sn + " not encoded, no sample", sample.Length == 0);  // Java 6: sample.isEmpty()
                    assertFalse(sn + " not encoded, not RTL", UScript.IsRightToLeft(sc));
                    assertFalse(sn + " not encoded, not LB letters", UScript.BreaksBetweenLetters(sc));
                    assertFalse(sn + " not encoded, not cased", UScript.IsCased(sc));
                    assertTrue(sn + " not encoded, no characters", scriptSet.IsEmpty);
                }
                else
                {
                    assertFalse(sn + " encoded, has a sample character", sample.Length == 0);  // Java 6: sample.isEmpty()
                    int firstChar  = sample.CodePointAt(0);
                    int charScript = GetCharScript(sc);
                    assertEquals(sn + " script(sample(script))",
                                 charScript, UScript.GetScript(firstChar));
                    assertEquals(sn + " RTL vs. set", rtl.Contains(firstChar), UScript.IsRightToLeft(sc));
                    assertEquals(sn + " cased vs. set", cased.Contains(firstChar), UScript.IsCased(sc));
                    assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.IsEmpty);
                    if (UScript.IsRightToLeft(sc))
                    {
                        rtl.RemoveAll(scriptSet);
                    }
                    if (UScript.IsCased(sc))
                    {
                        cased.RemoveAll(scriptSet);
                    }
                }
            }
            assertEquals("no remaining RTL characters", "[]", rtl.ToPattern(true));
            assertEquals("no remaining cased characters", "[]", cased.ToPattern(true));

            assertTrue("Hani breaks between letters", UScript.BreaksBetweenLetters(UScript.Han));
            assertTrue("Thai breaks between letters", UScript.BreaksBetweenLetters(UScript.Thai));
            assertFalse("Latn does not break between letters", UScript.BreaksBetweenLetters(UScript.Latin));
        }
Beispiel #15
0
        public void TestTokenAttributes()
        {
            using TokenStream ts = a.GetTokenStream("dummy", "This is a test");
            IScriptAttribute scriptAtt = ts.AddAttribute <IScriptAttribute>();

            ts.Reset();
            while (ts.IncrementToken())
            {
                assertEquals(UScript.Latin, scriptAtt.Code);
                assertEquals(UScript.GetName(UScript.Latin), scriptAtt.GetName());
                assertEquals(UScript.GetShortName(UScript.Latin), scriptAtt.GetShortName());
                assertTrue(ts.ReflectAsString(false).Contains("script=Latin"));
            }
            ts.End();
        }
Beispiel #16
0
 public void TestScriptNames()
 {
     for (int i = 0; i < UScript.CodeLimit; i++)
     {
         String name = UScript.GetName(i);
         if (name.Equals(""))
         {
             Errln("FAILED: getName for code : " + i);
         }
         String shortName = UScript.GetShortName(i);
         if (shortName.Equals(""))
         {
             Errln("FAILED: getName for code : " + i);
         }
     }
 }
Beispiel #17
0
        private static String ScriptsToString(int[] scripts)
        {
            if (scripts == null)
            {
                return("null");
            }
            StringBuilder sb = new StringBuilder();

            foreach (int script in scripts)
            {
                if (sb.Length > 0)
                {
                    sb.Append(' ');
                }
                sb.Append(UScript.GetShortName(script));
            }
            return(sb.ToString());
        }
Beispiel #18
0
 public void TestGetCode(string testName, int expected)
 {
     int[] code = UScript.GetCode(testName);
     if (code == null)
     {
         if (expected != UScript.InvalidCode)
         {
             // getCode returns null if the code could not be found
             Errln("Error testing UScript.getCode(). Got: null" + " Expected: " + expected + " for locale "
                   + testName);
         }
     }
     else if ((code[0] != expected))
     {
         Errln("Error testing UScript.getCode(). Got: " + code[0] + " Expected: " + expected + " for locale "
               + testName);
     }
 }
Beispiel #19
0
        public void TestGetScriptExtensions()
        {
            BitArray scripts = new BitArray(UScript.CodeLimit);

            /* invalid code points */
            if (UScript.GetScriptExtensions(-1, scripts) != UScript.Unknown || scripts.Cardinality() != 1 ||
                !scripts.Get(UScript.Unknown))
            {
                Errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}");
            }
            if (UScript.GetScriptExtensions(0x110000, scripts) != UScript.Unknown || scripts.Cardinality() != 1 ||
                !scripts.Get(UScript.Unknown))
            {
                Errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}");
            }

            /* normal usage */
            if (UScript.GetScriptExtensions(0x063f, scripts) != UScript.Arabic || scripts.Cardinality() != 1 ||
                !scripts.Get(UScript.Arabic))
            {
                Errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}");
            }
            if (UScript.GetScriptExtensions(0x0640, scripts) > -3 || scripts.Cardinality() < 3 ||
                !scripts.Get(UScript.Arabic) || !scripts.Get(UScript.Syriac) || !scripts.Get(UScript.Mandaic)
                )
            {
                Errln("UScript.getScriptExtensions(U+0640) failed");
            }
            if (UScript.GetScriptExtensions(0xfdf2, scripts) != -2 || scripts.Cardinality() != 2 ||
                !scripts.Get(UScript.Arabic) || !scripts.Get(UScript.Thaana))
            {
                Errln("UScript.getScriptExtensions(U+FDF2) failed");
            }
            if (UScript.GetScriptExtensions(0xff65, scripts) != -6 || scripts.Cardinality() != 6 ||
                !scripts.Get(UScript.Bopomofo) || !scripts.Get(UScript.Yi))
            {
                Errln("UScript.getScriptExtensions(U+FF65) failed");
            }
        }
Beispiel #20
0
        public void TestAllCodepointsUsingTry()
        {
            int code;

            for (int i = 0; i <= 0x10ffff; i++)
            {
                code = UScript.GetScript(i);
                if (code == UScript.InvalidCode)
                {
                    Errln("UScript.GetScript for codepoint 0x" + Hex(i) + " failed");
                }

                if (!UScript.TryGetName(code, out string id) || id.IndexOf("INVALID", StringComparison.Ordinal) >= 0)
                {
                    Errln("UScript.GetScript for codepoint 0x" + Hex(i) + " failed");
                }

                if (!UScript.TryGetShortName(code, out string abbr) || abbr.IndexOf("INV", StringComparison.Ordinal) >= 0)
                {
                    Errln("UScript.GetScript for codepoint 0x" + Hex(i) + " failed");
                }
            }
        }
Beispiel #21
0
            public void TestMultipleCodes(string testLocaleName, int[] expected, CultureInfo testLocale)
            {
                int[] code = UScript.GetCode(testLocaleName);
                if (code != null)
                {
                    for (int j = 0; j < code.Length; j++)
                    {
                        if (code[j] != expected[j])
                        {
                            Errln("Error testing UScript.getCode(). Got: " + code[j] + " Expected: " + expected[j]
                                  + " for locale " + testLocaleName);
                        }
                    }
                }
                else
                {
                    Errln("Error testing UScript.getCode() for locale " + testLocaleName);
                }

                Logln("  Testing UScript.getCode(Locale) with locale: " + testLocale.DisplayName);
                code = UScript.GetCode(testLocale);
                if (code != null)
                {
                    for (int j = 0; j < code.Length; j++)
                    {
                        if (code[j] != expected[j])
                        {
                            Errln("Error testing UScript.getCode(). Got: " + code[j] + " Expected: " + expected[j]
                                  + " for locale " + testLocaleName);
                        }
                    }
                }
                else
                {
                    Errln("Error testing UScript.getCode() for locale " + testLocaleName);
                }
            }
Beispiel #22
0
        public void TestScriptNamesUsingTry()
        {
            int v, rev;

            for (int i = 0; i < UScript.CodeLimit; i++)
            {
                if (!UScript.TryGetName(i, out string name) || name.Equals(""))
                {
                    Errln("FAILED: getName for code : " + i);
                }
                if (name != null)
                {
                    /* test reverse mapping */
                    rev = UScript.GetCodeFromName(name);
                    if (rev != (int)i)
                    {
                        Errln("Property round-trip failure: " + i + " -> " +
                              name + " -> " + rev);
                    }
                }
                if (!UScript.TryGetShortName(i, out string shortName) || shortName.Equals(""))
                {
                    Errln("FAILED: getName for code : " + i);
                }
                if (shortName != null)
                {
                    /* test reverse mapping */
                    rev = UScript.GetCodeFromName(shortName);
                    if (rev != (int)i)
                    {
                        Errln("Property round-trip failure: " + i + " -> " +
                              shortName + " -> " + rev);
                    }
                }
            }
        }
Beispiel #23
0
 public override string GetScriptDisplayName(int scriptCode)
 {
     return(UScript.GetShortName(scriptCode));
 }
Beispiel #24
0
        /// <summary>
        /// Returns a transliterator from the given source to our target or
        /// target/variant.  Returns NULL if the source is the same as our
        /// target script, or if the source is <see cref="UScript.InvalidCode"/>.
        /// Caches the result and returns the same transliterator the next
        /// time.  The caller does NOT own the result and must not delete
        /// it.
        /// </summary>
        private Transliterator GetTransliterator(int source)
        {
            if (source == targetScript || source == UScript.InvalidCode)
            {
                if (IsWide(targetScript))
                {
                    return(null);
                }
                else
                {
                    return(widthFix);
                }
            }

            int            key = (int)source;
            Transliterator t   = cache.Get(key);

            if (!cache.TryGetValue(key, out t) || t == null)
            {
                string sourceName = UScript.GetName(source);
                string id         = sourceName + TARGET_SEP + target;

                try
                {
                    t = Transliterator.GetInstance(id, FORWARD);
                }
                catch (Exception e) { }
                if (t == null)
                {
                    // Try to pivot around Latin, our most common script
                    id = sourceName + LATIN_PIVOT + target;
                    try
                    {
                        t = Transliterator.GetInstance(id, FORWARD);
                    }
                    catch (Exception e) { }
                }

                if (t != null)
                {
                    if (!IsWide(targetScript))
                    {
                        IList <Transliterator> v = new List <Transliterator>();
                        v.Add(widthFix);
                        v.Add(t);
                        t = new CompoundTransliterator(v);
                    }
                    //Transliterator prevCachedT = cache.putIfAbsent(key, t);
                    Transliterator prevCachedT;
                    // ICU4N: This is to simulate putIfAbsent
                    // ICU4N TODO: If this works, make it into a PutIfAbsent extension method so we can go back to using ConcurrentDictionary elsewhere
                    if (!cache.TryGetValue(key, out prevCachedT))
                    {
                        // If another thread beat us here, set the prevCachedT
                        // value to NullTransliterator to indicate it already exists
                        if (!cache.TryAdd(key, t))
                        {
                            prevCachedT = new NullTransliterator();
                        }
                    }
                    if (prevCachedT != null)
                    {
                        t = prevCachedT;
                    }
                }
                else if (!IsWide(targetScript))
                {
                    return(widthFix);
                }
            }

            return(t);
        }
Beispiel #25
0
        public void TestScripts()
        {
            // get a couple of characters of each script for testing

            StringBuffer testBuffer = new StringBuffer();

            for (int script = 0; script < UScript.CodeLimit; ++script)
            {
                UnicodeSet test  = new UnicodeSet().ApplyPropertyAlias("script", UScript.GetName(script));
                int        count = Math.Min(20, test.Count);
                for (int i = 0; i < count; ++i)
                {
                    testBuffer.Append(UTF16.ValueOf(test[i]));
                }
            }
            {
                String test = testBuffer.ToString();
                Logln("Test line: " + test);

                int  inclusion = TestFmwk.GetExhaustiveness();
                bool testedUnavailableScript = false;

                for (int script = 0; script < UScript.CodeLimit; ++script)
                {
                    if (script == UScript.Common || script == UScript.Inherited)
                    {
                        continue;
                    }
                    // if the inclusion rate is not 10, skip all but a small number of items.
                    // Make sure, however, that we test at least one unavailable script
                    if (inclusion < 10 && script != UScript.Latin &&
                        script != UScript.Han &&
                        script != UScript.Hiragana &&
                        testedUnavailableScript
                        )
                    {
                        continue;
                    }

                    String       scriptName = UScript.GetName(script); // long name
                    UCultureInfo locale     = new UCultureInfo(scriptName);
                    if (locale.Language.Equals("new") || locale.Language.Equals("pau"))
                    {
                        if (logKnownIssue("11171",
                                          "long script name loosely looks like a locale ID with a known likely script"))
                        {
                            continue;
                        }
                    }
                    Transliterator t;
                    try
                    {
                        t = Transliterator.GetInstance("any-" + scriptName);
                    }
                    catch (Exception e)
                    {
                        testedUnavailableScript = true;
                        Logln("Skipping unavailable: " + scriptName);
                        continue; // we don't handle all scripts
                    }
                    Logln("Checking: " + scriptName);
                    if (t != null)
                    {
                        t.Transform(test);                                 // just verify we don't crash
                    }
                    String shortScriptName = UScript.GetShortName(script); // 4-letter script code
                    try
                    {
                        t = Transliterator.GetInstance("any-" + shortScriptName);
                    }
                    catch (Exception e)
                    {
                        Errln("Transliterator.GetInstance() worked for \"any-" + scriptName +
                              "\" but not for \"any-" + shortScriptName + '\"');
                    }
                    t.Transform(test); // just verify we don't crash
                }
            }
        }
Beispiel #26
0
 public virtual string GetShortName()
 {
     return(UScript.GetShortName(code));
 }
Beispiel #27
0
        public void TestNewCode()
        {
            /*
             * These script codes were originally added to ICU pre-3.6, so that ICU would
             * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
             * These script codes were added with only short names because we don't
             * want to invent long names ourselves.
             * Unicode 5 and later encode some of these scripts and give them long names.
             * Whenever this happens, the long script names here need to be updated.
             */
            String[] expectedLong = new String[] {
                "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
                "Egyd", "Egyh", "Egyptian_Hieroglyphs",
                "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
                "Javanese", "Kayah_Li", "Latf", "Latg",
                "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
                "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
                "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
                "Zxxx", "Unknown",
                "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
                "Moon", "Meetei_Mayek",
                /* new in ICU 4.0 */
                "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
                "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
                "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
                "Zmth", "Zsym",
                /* new in ICU 4.4 */
                "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
                /* new in ICU 4.6 */
                "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
                "Loma", "Mende_Kikakui", "Meroitic_Cursive",
                "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
                /* new in ICU 4.8 */
                "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
                /* new in ICU 49 */
                "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
                /* new in ICU 52 */
                "Caucasian_Albanian", "Mahajani",
                /* new in ICU 54 */
                "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
                // new in ICU 58
                "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
                // new in ICU 60
                "Masaram_Gondi", "Soyombo", "Zanabazar_Square"
            };
            String[] expectedShort = new String[] {
                "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
                "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
                "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
                "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
                "Zxxx", "Zzzz",
                "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
                "Moon", "Mtei",
                /* new in ICU 4.0 */
                "Armi", "Avst", "Cakm", "Kore",
                "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
                "Zmth", "Zsym",
                /* new in ICU 4.4 */
                "Bamu", "Lisu", "Nkgb", "Sarb",
                /* new in ICU 4.6 */
                "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
                "Narb", "Nbat", "Palm", "Sind", "Wara",
                /* new in ICU 4.8 */
                "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
                /* new in ICU 49 */
                "Hluw", "Khoj", "Tirh",
                /* new in ICU 52 */
                "Aghb", "Mahj",
                /* new in ICU 54 */
                "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
                // new in ICU 58
                "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
                // new in ICU 60
                "Gonm", "Soyo", "Zanb"
            };
            if (expectedLong.Length != (UScript.CodeLimit - UScript.Balinese))
            {
                Errln("need to add new script codes in lang.TestUScript.java!");
                return;
            }
            int j = 0;
            int i = 0;

            for (i = UScript.Balinese; i < UScript.CodeLimit; i++, j++)
            {
                String name = UScript.GetName(i);
                if (name == null || !name.Equals(expectedLong[j]))
                {
                    Errln("UScript.getName failed for code" + i + name + "!=" + expectedLong[j]);
                }
                name = UScript.GetShortName(i);
                if (name == null || !name.Equals(expectedShort[j]))
                {
                    Errln("UScript.getShortName failed for code" + i + name + "!=" + expectedShort[j]);
                }
            }
            for (i = 0; i < expectedLong.Length; i++)
            {
                int[] ret = UScript.GetCode(expectedShort[i]);
                if (ret.Length > 1)
                {
                    Errln("UScript.getCode did not return expected number of codes for script" + expectedShort[i] + ". EXPECTED: 1 GOT: " + ret.Length);
                }
                if (ret[0] != (UScript.Balinese + i))
                {
                    Errln("UScript.getCode did not return expected code for script" + expectedShort[i] + ". EXPECTED: " + (UScript.Balinese + i) + " GOT: %i\n" + ret[0]);
                }
            }
        }