public void TestAllCodepoints() { int code; //String oldId=""; //String oldAbbrId=""; for (int i = 0; i <= 0x10ffff; i++) { code = UScript.InvalidCode; code = UScript.GetScript(i); if (code == UScript.InvalidCode) { Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed"); } String id = UScript.GetName(code); if (id.IndexOf("INVALID", StringComparison.Ordinal) >= 0) { Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed"); } String abbr = UScript.GetShortName(code); if (abbr.IndexOf("INV", StringComparison.Ordinal) >= 0) { Errln("UScript.getScript for codepoint 0x" + Hex(i) + " failed"); } } }
public void TestGetShortName(int testCode, string expected) { string shortName = UScript.GetShortName(testCode); if (!expected.Equals(shortName)) { Errln("Error testing UScript.getShortName(). Got: " + shortName + " Expected: " + expected); } }
public void TestScriptMetadata() { UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]"); // So far, sample characters are uppercase. // Georgian is special. UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]"); for (int sc = 0; sc < UScript.CodeLimit; ++sc) { String sn = UScript.GetShortName(sc); ScriptUsage usage = UScript.GetUsage(sc); String sample = UScript.GetSampleString(sc); UnicodeSet scriptSet = new UnicodeSet(); scriptSet.ApplyInt32PropertyValue(UProperty.Script, sc); if (usage == ScriptUsage.NotEncoded) { assertTrue(sn + " not encoded, no sample", sample.Length == 0); // Java 6: sample.isEmpty() assertFalse(sn + " not encoded, not RTL", UScript.IsRightToLeft(sc)); assertFalse(sn + " not encoded, not LB letters", UScript.BreaksBetweenLetters(sc)); assertFalse(sn + " not encoded, not cased", UScript.IsCased(sc)); assertTrue(sn + " not encoded, no characters", scriptSet.IsEmpty); } else { assertFalse(sn + " encoded, has a sample character", sample.Length == 0); // Java 6: sample.isEmpty() int firstChar = sample.CodePointAt(0); int charScript = GetCharScript(sc); assertEquals(sn + " script(sample(script))", charScript, UScript.GetScript(firstChar)); assertEquals(sn + " RTL vs. set", rtl.Contains(firstChar), UScript.IsRightToLeft(sc)); assertEquals(sn + " cased vs. set", cased.Contains(firstChar), UScript.IsCased(sc)); assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.IsEmpty); if (UScript.IsRightToLeft(sc)) { rtl.RemoveAll(scriptSet); } if (UScript.IsCased(sc)) { cased.RemoveAll(scriptSet); } } } assertEquals("no remaining RTL characters", "[]", rtl.ToPattern(true)); assertEquals("no remaining cased characters", "[]", cased.ToPattern(true)); assertTrue("Hani breaks between letters", UScript.BreaksBetweenLetters(UScript.Han)); assertTrue("Thai breaks between letters", UScript.BreaksBetweenLetters(UScript.Thai)); assertFalse("Latn does not break between letters", UScript.BreaksBetweenLetters(UScript.Latin)); }
public void TestTokenAttributes() { using TokenStream ts = a.GetTokenStream("dummy", "This is a test"); IScriptAttribute scriptAtt = ts.AddAttribute <IScriptAttribute>(); ts.Reset(); while (ts.IncrementToken()) { assertEquals(UScript.Latin, scriptAtt.Code); assertEquals(UScript.GetName(UScript.Latin), scriptAtt.GetName()); assertEquals(UScript.GetShortName(UScript.Latin), scriptAtt.GetShortName()); assertTrue(ts.ReflectAsString(false).Contains("script=Latin")); } ts.End(); }
public void TestScriptNames() { for (int i = 0; i < UScript.CodeLimit; i++) { String name = UScript.GetName(i); if (name.Equals("")) { Errln("FAILED: getName for code : " + i); } String shortName = UScript.GetShortName(i); if (shortName.Equals("")) { Errln("FAILED: getName for code : " + i); } } }
private static String ScriptsToString(int[] scripts) { if (scripts == null) { return("null"); } StringBuilder sb = new StringBuilder(); foreach (int script in scripts) { if (sb.Length > 0) { sb.Append(' '); } sb.Append(UScript.GetShortName(script)); } return(sb.ToString()); }
public void TestNewCode() { /* * These script codes were originally added to ICU pre-3.6, so that ICU would * have all ISO 15924 script codes. ICU was then based on Unicode 4.1. * These script codes were added with only short names because we don't * want to invent long names ourselves. * Unicode 5 and later encode some of these scripts and give them long names. * Whenever this happens, the long script names here need to be updated. */ String[] expectedLong = new String[] { "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs", "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg", "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs", "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician", "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform", "Zxxx", "Unknown", "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese", "Moon", "Meetei_Mayek", /* new in ICU 4.0 */ "Imperial_Aramaic", "Avestan", "Chakma", "Kore", "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv", "Inscriptional_Parthian", "Samaritan", "Tai_Viet", "Zmth", "Zsym", /* new in ICU 4.4 */ "Bamum", "Lisu", "Nkgb", "Old_South_Arabian", /* new in ICU 4.6 */ "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel", "Loma", "Mende_Kikakui", "Meroitic_Cursive", "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi", /* new in ICU 4.8 */ "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole", /* new in ICU 49 */ "Anatolian_Hieroglyphs", "Khojki", "Tirhuta", /* new in ICU 52 */ "Caucasian_Albanian", "Mahajani", /* new in ICU 54 */ "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham", // new in ICU 58 "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye", // new in ICU 60 "Masaram_Gondi", "Soyombo", "Zanabazar_Square" }; String[] expectedShort = new String[] { "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp", "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg", "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx", "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux", "Zxxx", "Zzzz", "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund", "Moon", "Mtei", /* new in ICU 4.0 */ "Armi", "Avst", "Cakm", "Kore", "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt", "Zmth", "Zsym", /* new in ICU 4.4 */ "Bamu", "Lisu", "Nkgb", "Sarb", /* new in ICU 4.6 */ "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc", "Narb", "Nbat", "Palm", "Sind", "Wara", /* new in ICU 4.8 */ "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole", /* new in ICU 49 */ "Hluw", "Khoj", "Tirh", /* new in ICU 52 */ "Aghb", "Mahj", /* new in ICU 54 */ "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd", // new in ICU 58 "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye", // new in ICU 60 "Gonm", "Soyo", "Zanb" }; if (expectedLong.Length != (UScript.CodeLimit - UScript.Balinese)) { Errln("need to add new script codes in lang.TestUScript.java!"); return; } int j = 0; int i = 0; for (i = UScript.Balinese; i < UScript.CodeLimit; i++, j++) { String name = UScript.GetName(i); if (name == null || !name.Equals(expectedLong[j])) { Errln("UScript.getName failed for code" + i + name + "!=" + expectedLong[j]); } name = UScript.GetShortName(i); if (name == null || !name.Equals(expectedShort[j])) { Errln("UScript.getShortName failed for code" + i + name + "!=" + expectedShort[j]); } } for (i = 0; i < expectedLong.Length; i++) { int[] ret = UScript.GetCode(expectedShort[i]); if (ret.Length > 1) { Errln("UScript.getCode did not return expected number of codes for script" + expectedShort[i] + ". EXPECTED: 1 GOT: " + ret.Length); } if (ret[0] != (UScript.Balinese + i)) { Errln("UScript.getCode did not return expected code for script" + expectedShort[i] + ". EXPECTED: " + (UScript.Balinese + i) + " GOT: %i\n" + ret[0]); } } }
public void TestScripts() { // get a couple of characters of each script for testing StringBuffer testBuffer = new StringBuffer(); for (int script = 0; script < UScript.CodeLimit; ++script) { UnicodeSet test = new UnicodeSet().ApplyPropertyAlias("script", UScript.GetName(script)); int count = Math.Min(20, test.Count); for (int i = 0; i < count; ++i) { testBuffer.Append(UTF16.ValueOf(test[i])); } } { String test = testBuffer.ToString(); Logln("Test line: " + test); int inclusion = TestFmwk.GetExhaustiveness(); bool testedUnavailableScript = false; for (int script = 0; script < UScript.CodeLimit; ++script) { if (script == UScript.Common || script == UScript.Inherited) { continue; } // if the inclusion rate is not 10, skip all but a small number of items. // Make sure, however, that we test at least one unavailable script if (inclusion < 10 && script != UScript.Latin && script != UScript.Han && script != UScript.Hiragana && testedUnavailableScript ) { continue; } String scriptName = UScript.GetName(script); // long name UCultureInfo locale = new UCultureInfo(scriptName); if (locale.Language.Equals("new") || locale.Language.Equals("pau")) { if (logKnownIssue("11171", "long script name loosely looks like a locale ID with a known likely script")) { continue; } } Transliterator t; try { t = Transliterator.GetInstance("any-" + scriptName); } catch (Exception e) { testedUnavailableScript = true; Logln("Skipping unavailable: " + scriptName); continue; // we don't handle all scripts } Logln("Checking: " + scriptName); if (t != null) { t.Transform(test); // just verify we don't crash } String shortScriptName = UScript.GetShortName(script); // 4-letter script code try { t = Transliterator.GetInstance("any-" + shortScriptName); } catch (Exception e) { Errln("Transliterator.GetInstance() worked for \"any-" + scriptName + "\" but not for \"any-" + shortScriptName + '\"'); } t.Transform(test); // just verify we don't crash } } }
public override string GetScriptDisplayName(int scriptCode) { return(UScript.GetShortName(scriptCode)); }
public virtual string GetShortName() { return(UScript.GetShortName(code)); }