public LanguageSuggestion(IWritingSystemFactory wsFactory, string languageTag, string keyboardLayout) : base(wsFactory) { _languageTag = languageTag; _keyboardLayout = keyboardLayout; LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; IetfLanguageTag.TryGetSubtags(languageTag, out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags); var s = new StringBuilder(); if (!string.IsNullOrEmpty(languageSubtag.Name)) { s.Append(languageSubtag.Name); } if (scriptSubtag != null && !string.IsNullOrEmpty(scriptSubtag.Name) && !IetfLanguageTag.IsScriptImplied(languageTag)) { s.AppendFormat("-{0}", scriptSubtag.Name); } if (regionSubtag != null && !string.IsNullOrEmpty(regionSubtag.Name)) { s.AppendFormat("-{0}", regionSubtag.Name); } Label = s.ToString(); }
public void TryGetSubtags_XETIC_ReturnsFalse() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; Assert.That( IetfLanguageTag.TryGetSubtags("en_Latn_US_X_ETIC", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.False); }
/// <summary> /// Creates a new writing system. /// </summary> /// <returns></returns> public CoreWritingSystemDefinition Create(string ietfLanguageTag) { LanguageSubtag language; ScriptSubtag script; RegionSubtag region; IEnumerable <VariantSubtag> variants; if (!IetfLanguageTag.TryGetSubtags(ietfLanguageTag, out language, out script, out region, out variants)) { throw new ArgumentException("The IETF language tag is invalid.", "ietfLanguageTag"); } return(Create(language, script, region, variants)); }
public void TryGetSubtags_CompatibleForm_ReturnsScript() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; Assert.That( IetfLanguageTag.TryGetSubtags("zh-cN-fonipa-x-etic", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.True); Assert.That(languageSubtag, Is.EqualTo((LanguageSubtag)"zh")); Assert.That(scriptSubtag, Is.EqualTo((ScriptSubtag)"Hans")); Assert.That(regionSubtag, Is.EqualTo((RegionSubtag)"CN")); Assert.That(variantSubtags, Is.EqualTo(new VariantSubtag[] { "fonipa", "etic" })); }
public void TryGetSubtags_XDupl0_ReturnsDupl0VariantSubtag() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; // Although dupl0 is in a position where it would normally be interpreted as a private language code, since it isn't a valid one, // we instead interpret it as simply a variant of qaa, the unknown language. Assert.That(IetfLanguageTag.TryGetSubtags("qaa-x-dupl0", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.True); Assert.That(languageSubtag, Is.EqualTo((LanguageSubtag)"qaa")); Assert.That(scriptSubtag, Is.Null); Assert.That(regionSubtag, Is.Null); Assert.That(variantSubtags, Is.EqualTo(new VariantSubtag[] { "dupl0" })); }
public void TryGetSubtags_XFakeQD_ReturnsEmptyVariantSubtags() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; Assert.That( IetfLanguageTag.TryGetSubtags("en-Qaaa-QM-x-Fake-QD", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.True); Assert.That(languageSubtag, Is.EqualTo((LanguageSubtag)"en")); Assert.That(scriptSubtag, Is.EqualTo((ScriptSubtag)"Fake")); Assert.That(regionSubtag, Is.EqualTo((RegionSubtag)"QD")); Assert.That(variantSubtags, Is.Empty); }
public void TryGetSubtags_InvalidConventionForScript_ReturnsPrivateUseScript() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; Assert.That(IetfLanguageTag.TryGetSubtags("en-Qaaa-x-toolong", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.True); Assert.That(languageSubtag, Is.EqualTo((LanguageSubtag)"en")); Assert.IsNull(regionSubtag); // SUT Assert.That(scriptSubtag, Is.EqualTo((ScriptSubtag)"Qaaa")); Assert.IsTrue(scriptSubtag.IsPrivateUse); }
public void TryGetSubtags_XKalXA_ReturnsEmptyScriptSubtag() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; Assert.That( IetfLanguageTag.TryGetSubtags("qaa-QM-x-kal-XA", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.True); Assert.That(languageSubtag, Is.EqualTo((LanguageSubtag)"kal")); Assert.That(scriptSubtag, Is.Null); Assert.That(regionSubtag, Is.EqualTo((RegionSubtag)"XA")); Assert.That(variantSubtags, Is.Empty); }
public void TryGetSubtags_FonipaXEtic_ReturnsFonipaEtic() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; Assert.That( IetfLanguageTag.TryGetSubtags("en-Latn-US-fonipa-x-etic", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.True); Assert.That(languageSubtag, Is.EqualTo((LanguageSubtag)"en")); Assert.That(scriptSubtag, Is.EqualTo((ScriptSubtag)"Latn")); Assert.That(regionSubtag, Is.EqualTo((RegionSubtag)"US")); Assert.That(variantSubtags, Is.EqualTo(new VariantSubtag[] { "fonipa", "etic" })); }
public void TryGetSubtags_EmptyVariant2_ReturnsEmpty() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; Assert.That( IetfLanguageTag.TryGetSubtags("en-US", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.True); Assert.That(languageSubtag, Is.EqualTo((LanguageSubtag)"en")); Assert.That(scriptSubtag, Is.EqualTo((ScriptSubtag)"Latn")); Assert.That(regionSubtag, Is.EqualTo((RegionSubtag)"US")); Assert.That(variantSubtags, Is.Empty); }
public void TryGetSubtags_ComplexPrivateLanguageCode_ReturnsValidResults() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; Assert.That( IetfLanguageTag.TryGetSubtags("qed-Lepc-cN-fonipa-x-etic", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.True); Assert.That(languageSubtag, Is.EqualTo((LanguageSubtag)"qed")); Assert.That(scriptSubtag, Is.EqualTo((ScriptSubtag)"Lepc")); Assert.That(regionSubtag, Is.EqualTo((RegionSubtag)"CN")); Assert.That(variantSubtags, Is.EqualTo(new VariantSubtag[] { "fonipa", "etic" })); }
public void TryGetSubtags_SimplePrivateUseLanguage_ReturnsValidResults() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; Assert.That( IetfLanguageTag.TryGetSubtags("qtz", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.True); Assert.That(languageSubtag, Is.EqualTo((LanguageSubtag)"qtz")); Assert.That(scriptSubtag, Is.Null); Assert.That(regionSubtag, Is.Null); Assert.That(variantSubtags, Is.EqualTo(new VariantSubtag[0])); }
public void TryGetSubtags_FullPUAConventionWithVariantAndPUAVariant_ReturnsAllParts() { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; Assert.That(IetfLanguageTag.TryGetSubtags("qaa-Qaaa-QM-fonipa-x-kal-Fake-RG-extravar", out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags), Is.True); // SUT Assert.That(languageSubtag, Is.EqualTo((LanguageSubtag)"kal")); Assert.That(scriptSubtag, Is.EqualTo((ScriptSubtag)"Fake")); Assert.That(regionSubtag, Is.EqualTo((RegionSubtag)"RG")); CollectionAssert.IsNotEmpty(variantSubtags); CollectionAssert.AreEquivalent(new[] { "International Phonetic Alphabet", "extravar" }, variantSubtags.Select(x => x.ToString())); }
/// <summary> /// Initializes a new instance of the <see cref="LanguageLookup"/> class. /// </summary> public LanguageLookup() { var threeToTwoLetter = new Dictionary <string, string>(); foreach (string line in LanguageRegistryResources.TwoToThreeCodes.Replace("\r\n", "\n").Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)) { string[] items = line.Split('\t'); threeToTwoLetter.Add(items[1].Trim(), items[0].Trim()); } //LanguageIndex.txt Format: LangID CountryID NameType Name //a language appears on one row for each of its alternative langauges var entries = new List <string>(LanguageRegistryResources.LanguageIndex.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)); entries.Add("qaa\t?\tL\tUnlisted Language"); foreach (string entry in entries.Skip(1)) //skip the header { string[] items = entry.Split('\t'); if (items.Length != 4) { continue; } if (items[2].Contains('!')) //temporary suppression of entries while waiting for Ethnologue changes { continue; } string code = items[0].Trim(); string twoLetterCode; if (threeToTwoLetter.TryGetValue(code, out twoLetterCode)) { code = twoLetterCode; } string regionCode = items[1].Trim(); LanguageInfo language = GetOrCreateLanguageFromCode(code, regionCode == "?" ? "?" : StandardSubtags.RegisteredRegions[regionCode].Name); string name = items[3].Trim(); if (items[2] == "L") { while (language.Names.Contains(name)) { language.Names.Remove(name); } language.Names.Insert(0, name); } else { if (items[2].Contains("P")) { //Skip pejorative } else if (items[1] == ("ET")) { //Skip alternatives for Ethiopia, as per request } else if (items[0] == "gax" || items[0] == "om") { //For these two "Oromo" languages, skip all related languages as per request } else if (!language.Names.Contains(name)) { language.Names.Add(name); //intentionally not lower-casing } } } IEnumerable <IGrouping <string, string> > languageGroups = Sldr.LanguageTags.Where(info => info.IsAvailable && IetfLanguageTag.IsValid(info.LanguageTag)) .Select(info => IetfLanguageTag.Canonicalize(info.LanguageTag)) .GroupBy(IetfLanguageTag.GetLanguagePart); foreach (IGrouping <string, string> languageGroup in languageGroups) { string[] langTags = languageGroup.ToArray(); if (langTags.Length == 1) { string langTag = langTags[0]; LanguageInfo language; if (langTag != languageGroup.Key && _codeToLanguageIndex.TryGetValue(languageGroup.Key, out language)) { _codeToLanguageIndex.Remove(languageGroup.Key); language.LanguageTag = langTag; _codeToLanguageIndex[langTag] = language; } } else { foreach (string langTag in langTags) { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; if (IetfLanguageTag.TryGetSubtags(langTag, out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags)) { if (langTag == languageSubtag) { continue; } LanguageInfo language = GetOrCreateLanguageFromCode(langTag, regionSubtag == null ? "?" : regionSubtag.Name); bool displayScript = scriptSubtag != null && !IetfLanguageTag.IsScriptImplied(langTag); LanguageInfo otherLanguage; if (langTag != languageSubtag && !displayScript && _codeToLanguageIndex.TryGetValue(languageSubtag, out otherLanguage) && language.Countries.SetEquals(otherLanguage.Countries)) { language.Names.AddRange(otherLanguage.Names); } else { string name = displayScript ? string.Format("{0} ({1})", languageSubtag.Name, scriptSubtag.Name) : languageSubtag.Name; if (!language.Names.Contains(name)) { language.Names.Add(name); //intentionally not lower-casing } } } } } } foreach (LanguageInfo languageInfo in _codeToLanguageIndex.Values) { foreach (string name in languageInfo.Names) { GetOrCreateListFromName(name).Add(languageInfo); } if (languageInfo.Names.Count == 0) { continue; // this language is suppressed } //Why just this small set? Only out of convenience. Ideally we'd have a db of all languages as they write it in their literature. string localName = null; switch (languageInfo.Names[0]) { case "French": localName = "français"; break; case "Spanish": localName = "español"; break; case "Chinese": localName = "中文"; break; case "Hindi": localName = "हिन्दी"; break; case "Bengali": localName = "বাংলা"; break; case "Telugu": localName = "తెలుగు"; break; case "Tamil": localName = "தமிழ்"; break; case "Urdu": localName = "اُردُو"; break; case "Arabic": localName = "العربية/عربي"; break; case "Thai": localName = "ภาษาไทย"; break; case "Indonesian": localName = "Bahasa Indonesia"; break; } if (!string.IsNullOrEmpty(localName)) { if (!languageInfo.Names.Remove(localName)) { GetOrCreateListFromName(localName).Add(languageInfo); } languageInfo.Names.Insert(0, localName); } } }
/// <summary> /// Initializes a new instance of the <see cref="LanguageDataIndex"/> class. /// </summary> public LanguageDataIndex(IDictionary <string, string> sourcefiles) { string twotothreecodes = sourcefiles["TwoToThreeCodes.txt"]; string subtagregistry = sourcefiles["ianaSubtagRegistry.txt"]; StandardSubtags.InitialiseIanaSubtags(twotothreecodes, subtagregistry); // First read in Ethnologue data file into temporary dictionary var threeToTwoLetter = StandardSubtags.TwoAndThreeMap(twotothreecodes, true); //LanguageIndex.txt Format: LangID CountryID NameType Name //a language appears on one row for each of its alternative langauges string languageindex = sourcefiles["LanguageIndex.txt"]; var entries = new List <string>(languageindex.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)); entries.Add("qaa\t?\tL\tUnlisted Language"); foreach (string entry in entries.Skip(1)) //skip the header { string[] items = entry.Split('\t'); if (items.Length != 4) { continue; } if (items[2].StartsWith("!")) //temporary suppression of entries while waiting for Ethnologue changes { continue; } // excluded by ! // all gax (ET,KE,SO) including L // all gaz (ET) including L // all hae (ET) including L string code = items[0].Trim(); string twoLetterCode; string threelettercode = code; if (threeToTwoLetter.TryGetValue(code, out twoLetterCode)) { code = twoLetterCode; } //temporary suppression of entries while waiting for Ethnologue changes (those excluded by !) if (ExcludedCodes.Contains(code)) { continue; } string regionCode = items[1].Trim(); LanguageInfo language = GetOrCreateLanguageFromCode(code, threelettercode, regionCode == "?" ? "" : StandardSubtags.RegisteredRegions[regionCode].Name); string name = items[3].Trim(); if (items[2].Trim() == "L") { while (language.Names.Contains(name)) { language.Names.Remove(name); } language.Names.Insert(0, name); } else { if (items[2].Contains("P")) { //Skip pejorative } else if (ExcludedRegions.Contains(StandardSubtags.RegisteredRegions[regionCode].Name)) { //Skip alternatives for Ethiopia, as per request } else if (code == "gax" || code == "om") { //For these two "Oromo" languages, skip all related languages as per request } else if (!language.Names.Contains(name)) { language.Names.Add(name); //intentionally not lower-casing } } } // Then for each registered ietf language tag create a real entry and add the ethnologue data to it IOrderedEnumerable <LanguageSubtag> languages = StandardSubtags.RegisteredLanguages.OrderBy(lang => lang.Iso3Code); foreach (LanguageSubtag language in languages) { bool singlename = false; if (language.IsDeprecated || ExcludedCodes.Contains(language.Code)) { continue; } LanguageInfo langinfo = GetOrCreateLanguageFromCode(language.Code, language.Iso3Code, null); langinfo.DesiredName = language.Name.Replace("'", "’"); langinfo.IsMacroLanguage = language.IsMacroLanguage; foreach (string country in langinfo.Countries) { if (ExcludedRegions.Contains(country)) { singlename = true; } } foreach (string name in language.Names) { string langname = name.Replace("'", "’"); if (!langinfo.Names.Contains(langname)) { if (singlename && langinfo.Names.Count == 1) { // leave single ethnologue names break; } else { langinfo.Names.Add(langname); } } if (singlename) { break; } } _codeToLanguageIndex.Add(language.Code, langinfo); } IEnumerable <IGrouping <string, string> > languageGroups = Sldr.LanguageTags.Where(info => info.IsAvailable && IetfLanguageTag.IsValid(info.LanguageTag)) .Select(info => IetfLanguageTag.Canonicalize(info.LanguageTag)) .GroupBy(IetfLanguageTag.GetLanguagePart); foreach (IGrouping <string, string> languageGroup in languageGroups) { string[] langTags = languageGroup.ToArray(); if (langTags.Length == 1) { string langTag = langTags[0]; LanguageInfo language; if (langTag != languageGroup.Key && _codeToLanguageIndex.TryGetValue(languageGroup.Key, out language)) { _codeToLanguageIndex.Remove(languageGroup.Key); language.LanguageTag = langTag; _codeToLanguageIndex[langTag] = language; } } else { foreach (string langTag in langTags) { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; if (IetfLanguageTag.TryGetSubtags(langTag, out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags)) { if (langTag == languageSubtag) { continue; } LanguageInfo language = GetOrCreateLanguageFromCode(langTag, langTag, regionSubtag == null ? "" : regionSubtag.Name); // changed to default to "" 2017-04-24 bool displayScript = scriptSubtag != null && !IetfLanguageTag.IsScriptImplied(langTag); LanguageInfo otherLanguage; if (langTag != languageSubtag && !displayScript && _codeToLanguageIndex.TryGetValue(languageSubtag, out otherLanguage) && language.Countries.SetEquals(otherLanguage.Countries)) { language.Names.AddRange(otherLanguage.Names); } else { string name = displayScript ? string.Format("{0} ({1})", languageSubtag.Name, scriptSubtag.Name) : languageSubtag.Name; if (!language.Names.Contains(name)) { language.Names.Add(name); //intentionally not lower-casing } } LanguageInfo keylanguage; if (_codeToLanguageIndex.TryGetValue(languageGroup.Key, out keylanguage)) { language.IsMacroLanguage = keylanguage.IsMacroLanguage; } _codeToLanguageIndex.Add(langTag, language); } } } } string languagecodes = sourcefiles["LanguageCodes.txt"]; var codeentries = new List <string>(languagecodes.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)); foreach (var languageCode in codeentries) { var data = languageCode.Split(new[] { '\t' }, StringSplitOptions.RemoveEmptyEntries); if (data.Length < 2) { continue; } var langCode = data[0]; string twoLetterCode; if (threeToTwoLetter.TryGetValue(langCode, out twoLetterCode)) { langCode = twoLetterCode; } if (langCode == "fuv") { langCode = "fuv-Arab"; // special case because the script has been added to this language code } // which is probably something to do with the SLDR var countryCode = data[1]; LanguageInfo lang; if (_codeToLanguageIndex.TryGetValue(langCode, out lang)) { lang.PrimaryCountry = StandardSubtags.RegisteredRegions[countryCode].Name; } } // localise some language names foreach (LanguageInfo languageInfo in _codeToLanguageIndex.Values) { if (languageInfo.Names.Count == 0) { continue; // this language is suppressed } //Why just this small set? Only out of convenience. Ideally we'd have a db of all languages as they write it in their literature. string localName = null; switch (languageInfo.Names[0]) { case "French": localName = "français"; break; case "Spanish": localName = "español"; break; case "Chinese": localName = "中文"; break; case "Hindi": localName = "हिन्दी"; break; case "Bengali": localName = "বাংলা"; break; case "Telugu": localName = "తెలుగు"; break; case "Tamil": localName = "தமிழ்"; break; case "Urdu": localName = "اُردُو"; break; case "Arabic": localName = "العربية/عربي"; break; case "Thai": localName = "ภาษาไทย"; break; case "Indonesian": localName = "Bahasa Indonesia"; break; } if (!string.IsNullOrEmpty(localName)) { if (languageInfo.Names.Contains(localName)) { languageInfo.Names.Remove(localName); } languageInfo.Names.Insert(0, localName); languageInfo.DesiredName = localName; } switch (languageInfo.ThreeLetterTag) { case "itd": // 2 temporary special cases because the LanguageCodes.txt files needs to be updated with LanguageIndex.txt languageInfo.PrimaryCountry = "Indonesia"; break; case "xak": languageInfo.PrimaryCountry = "Venezuela"; break; default: // Also set the PrimaryCountry if there is only one country if (String.IsNullOrEmpty(languageInfo.PrimaryCountry) && languageInfo.Countries.Count == 1) { languageInfo.PrimaryCountry = languageInfo.Countries.First(); } break; } } // check if any languages are found in multiple countries but do not have a primary country // there is a test for this in LanguageLookupTests.llExpectedLanguagesHaveUniquePrimaryCountries var languagesWithoutRegions = new List <LanguageInfo>(); foreach (var lang in _codeToLanguageIndex.Values) { if (String.IsNullOrEmpty(lang.PrimaryCountry)) { languagesWithoutRegions.Add(lang); } } var languagesWithAmbiguousPrimaryCountry = languagesWithoutRegions.Where(l => l.Countries.Count() > 1); foreach (var lang in languagesWithAmbiguousPrimaryCountry) { Console.WriteLine("Language {0}({1}) has no primary country but is found in multiple countries", lang.DesiredName, lang.LanguageTag); } }