// look up 3 letter codes private LanguageInfo Set3LetterCode(LanguageInfo langInfo) { if (!Force3LetterCodes) { return(langInfo); } if (langInfo.Code.Length == 3) { return(langInfo); } var found = ThreeToTwoLetter.Where(p => p.Value == langInfo.Code).Select(p => p.Key).FirstOrDefault(); if (!string.IsNullOrEmpty(found)) { langInfo.Code = found; } return(langInfo); }
private LanguageInfo GetOrCreateLanguageFromCode(string code, string country) { LanguageInfo language; var countryName = CountryCodeToCountryName[country]; if (!CodeToLanguageIndex.TryGetValue(code, out language)) { language = new LanguageInfo() { Code = code, Country = countryName }; CodeToLanguageIndex.Add(code, language); } else { if (!language.Country.Contains(countryName)) { language.Country += ", " + countryName; } } return(language); }
public EthnologueLookup() { foreach (var line in LanguageRegistryResources.TwoToThreeCodes.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)) { var items = line.Split('\t'); ThreeToTwoLetter.Add(items[1].Trim(), items[0].Trim()); } foreach (var line in LanguageRegistryResources.CountryCodes.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)) { var items = line.Split('\t'); //id name area CountryCodeToCountryName.Add(items[0].Trim(), items[1].Trim()); } //LanguageIndex.txt Format: LangID CountryID NameType Name //a language appears on one row for each of its alternative langauges string[] entries = LanguageRegistryResources.LanguageIndex.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries); foreach (string entry in entries.Skip(1)) //skip the header { var items = entry.Split('\t'); if (items.Length != 4) { continue; } var code = items[0].Trim(); string TwoLetterCode; if (ThreeToTwoLetter.TryGetValue(code, out TwoLetterCode)) { code = TwoLetterCode; } LanguageInfo language = GetOrCreateLanguageFromCode(code, items[1].Trim()); var name = items[3].Trim(); if (items[2] == "L") { while (language.Names.Contains(name)) { language.Names.Remove(name); } language.Names.Insert(0, name); } else { if (!language.Names.Contains(name)) { language.Names.Add(name); //intentionally not lower-casing } } } foreach (var languageInfo in CodeToLanguageIndex.Values) { foreach (var name in languageInfo.Names) { GetOrCreateListFromName(name).Add(languageInfo); } } }
public EthnologueLookup() { Force3LetterCodes = false; foreach (var line in LanguageRegistryResources.TwoToThreeCodes.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)) { var items = line.Split('\t'); ThreeToTwoLetter.Add(items[1].Trim(), items[0].Trim()); } foreach (var line in LanguageRegistryResources.CountryCodes.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)) { var items = line.Split('\t'); //id name area CountryCodeToCountryName.Add(items[0].Trim(), items[1].Trim()); } CountryCodeToCountryName.Add("?", "?"); //for unlisted language //LanguageIndex.txt Format: LangID CountryID NameType Name //a language appears on one row for each of its alternative langauges List <string> entries = new List <string>(LanguageRegistryResources.LanguageIndex.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)); entries.Add("qaa\t?\tL\tUnlisted Language"); foreach (string entry in entries.Skip(1)) //skip the header { var items = entry.Split('\t'); if (items.Length != 4) { continue; } var code = items[0].Trim(); string TwoLetterCode; if (ThreeToTwoLetter.TryGetValue(code, out TwoLetterCode)) { code = TwoLetterCode; } LanguageInfo language = GetOrCreateLanguageFromCode(code, items[1].Trim()); var name = items[3].Trim(); if (items[2] == "L") { while (language.Names.Contains(name)) { language.Names.Remove(name); } language.Names.Insert(0, name); } else { if (!language.Names.Contains(name)) { language.Names.Add(name); //intentionally not lower-casing } } } //Why just this small set? Only out of convenience. Ideally we'd have a db of all languages as they write it in their literature. this.CodeToLanguageIndex["fr"].LocalName = "français"; this.CodeToLanguageIndex["es"].LocalName = "español"; this.CodeToLanguageIndex["zho"].LocalName = "中文"; //chinese this.CodeToLanguageIndex["hi"].LocalName = "हिन्दी"; //hindi this.CodeToLanguageIndex["bn"].LocalName = "বাংলা"; //bengali this.CodeToLanguageIndex["te"].LocalName = "తెలుగు"; //telugu this.CodeToLanguageIndex["ta"].LocalName = "தமிழ்"; //tamil this.CodeToLanguageIndex["ur"].LocalName = "اُردُو"; //urdu this.CodeToLanguageIndex["ar"].LocalName = "العربية/عربي"; //arabic this.CodeToLanguageIndex["th"].LocalName = "ภาษาไทย"; //thai this.CodeToLanguageIndex["id"].LocalName = "Bahasa Indonesia"; //indonesian foreach (var languageInfo in CodeToLanguageIndex.Values) { foreach (var name in languageInfo.Names) { GetOrCreateListFromName(name).Add(languageInfo); } if (!string.IsNullOrEmpty(languageInfo.LocalName)) { GetOrCreateListFromName(languageInfo.LocalName).Add(languageInfo); } } }