private bool RegionalDialectsFilter(LanguageInfo li) { if (IncludeRegionalDialects) return true; // always include Chinese languages with region codes if (li.LanguageTag.IsOneOf("zh-CN", "zh-TW")) return true; return string.IsNullOrEmpty(IetfLanguageTag.GetRegionPart(li.LanguageTag)); }
private LanguageInfo GetOrCreateLanguageFromCode(string code, string countryName) { LanguageInfo language; if (!_codeToLanguageIndex.TryGetValue(code, out language)) { language = new LanguageInfo { LanguageTag = code }; _codeToLanguageIndex.Add(code, language); } if (!string.IsNullOrEmpty(countryName)) { language.Countries.Add(countryName); } return(language); }
private bool AddLanguage(string code, string threelettercode, string full = null, string name = null, string localName = null, string region = null, List <string> names = null, string regions = null, List <string> tags = null) { string primarycountry; if (region == null) { primarycountry = ""; } else if (StandardSubtags.IsValidIso3166RegionCode(region)) { if (StandardSubtags.IsPrivateUseRegionCode(region)) { if (region == "XK") { primarycountry = "Kosovo"; } else { primarycountry = "Unknown private use"; } } else { primarycountry = StandardSubtags.RegisteredRegions[region].Name; // convert to full region name } } else { primarycountry = "Invalid region"; } LanguageInfo language = new LanguageInfo { LanguageTag = code, ThreeLetterTag = threelettercode, // DesiredName defaults to Names[0], which is set below. PrimaryCountry = primarycountry }; language.Countries.Add(primarycountry); if (regions != null) { string[] countries = regions.Split(); foreach (string country in countries) { if (!country.Contains('?') && country != "") { language.Countries.Add(StandardSubtags.RegisteredRegions[country].Name); } } } // For sorting, it is better to store name first instead of localName, which may be in a local script. // Names[0] is used in several ways in sorting languages in the list of possible matches: 1) bring // to the top of the list languages where Names[0] matches what the user typed, 2) order by the // "typing distance" of Names[0] from what the user typed, and 3) order by comparing the Names[0] // value of the two languages if neither matches the search string and their typing distances from // the search string are the same. // Names[1] (if it exists) is used to move the language toward the top of the list if it exactly // matches the search string. It is not used otherwise in the sorting heuristics. No other // values in the Names list are involved in the sorting process. if (name != null) { language.Names.Add(name.Trim()); } if (localName != null && localName != name) { language.Names.Add(localName.Trim()); } if (names != null) { foreach (string langname in names) { if (!language.Names.Contains(langname)) { language.Names.Add(langname.Trim()); } } } // If we end up needing to add the language code, that reflects a deficiency in the data. But // having a bogus name value probably hurts less that not having any name at all. The sort // process mentioned above using the language tag as well as the first two items in the Names list. Debug.Assert(language.Names.Count > 0); if (language.Names.Count == 0) { language.Names.Add(code); } // add language to _codeToLanguageIndex and _nameToLanguageIndex // if 2 letter code then add both 2 and 3 letter codes to _codeToLanguageIndex _codeToLanguageIndex[code] = language; if (full != null && !string.Equals(full, code)) { _codeToLanguageIndex[full] = language; // add the full expanded tag } if (threelettercode != null && !string.Equals(code, threelettercode)) { _codeToLanguageIndex[threelettercode] = language; } if (tags != null) { foreach (string langtag in tags) { _codeToLanguageIndex[langtag] = language; } } foreach (string langname in language.Names) { GetOrCreateListFromName(langname).Add(language); } // add to _countryToLanguageIndex foreach (var country in language.Countries) { if (!string.IsNullOrEmpty(country)) { List <LanguageInfo> list; if (!_countryToLanguageIndex.TryGetValue(country, out list)) { list = new List <LanguageInfo>(); _countryToLanguageIndex[country] = list; } list.Add(language); } } return(true); }
/// <summary> /// Initializes a new instance of the <see cref="LanguageLookup"/> class. /// </summary> public LanguageLookup() { var threeToTwoLetter = new Dictionary <string, string>(); foreach (string line in LanguageRegistryResources.TwoToThreeCodes.Replace("\r\n", "\n").Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)) { string[] items = line.Split('\t'); threeToTwoLetter.Add(items[1].Trim(), items[0].Trim()); } //LanguageIndex.txt Format: LangID CountryID NameType Name //a language appears on one row for each of its alternative langauges var entries = new List <string>(LanguageRegistryResources.LanguageIndex.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)); entries.Add("qaa\t?\tL\tUnlisted Language"); foreach (string entry in entries.Skip(1)) //skip the header { string[] items = entry.Split('\t'); if (items.Length != 4) { continue; } if (items[2].Contains('!')) //temporary suppression of entries while waiting for Ethnologue changes { continue; } string code = items[0].Trim(); string twoLetterCode; if (threeToTwoLetter.TryGetValue(code, out twoLetterCode)) { code = twoLetterCode; } string regionCode = items[1].Trim(); LanguageInfo language = GetOrCreateLanguageFromCode(code, regionCode == "?" ? "?" : StandardSubtags.RegisteredRegions[regionCode].Name); string name = items[3].Trim(); if (items[2] == "L") { while (language.Names.Contains(name)) { language.Names.Remove(name); } language.Names.Insert(0, name); } else { if (items[2].Contains("P")) { //Skip pejorative } else if (items[1] == ("ET")) { //Skip alternatives for Ethiopia, as per request } else if (items[0] == "gax" || items[0] == "om") { //For these two "Oromo" languages, skip all related languages as per request } else if (!language.Names.Contains(name)) { language.Names.Add(name); //intentionally not lower-casing } } } IEnumerable <IGrouping <string, string> > languageGroups = Sldr.LanguageTags.Where(info => info.IsAvailable && IetfLanguageTag.IsValid(info.LanguageTag)) .Select(info => IetfLanguageTag.Canonicalize(info.LanguageTag)) .GroupBy(IetfLanguageTag.GetLanguagePart); foreach (IGrouping <string, string> languageGroup in languageGroups) { string[] langTags = languageGroup.ToArray(); if (langTags.Length == 1) { string langTag = langTags[0]; LanguageInfo language; if (langTag != languageGroup.Key && _codeToLanguageIndex.TryGetValue(languageGroup.Key, out language)) { _codeToLanguageIndex.Remove(languageGroup.Key); language.LanguageTag = langTag; _codeToLanguageIndex[langTag] = language; } } else { foreach (string langTag in langTags) { LanguageSubtag languageSubtag; ScriptSubtag scriptSubtag; RegionSubtag regionSubtag; IEnumerable <VariantSubtag> variantSubtags; if (IetfLanguageTag.TryGetSubtags(langTag, out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags)) { if (langTag == languageSubtag) { continue; } LanguageInfo language = GetOrCreateLanguageFromCode(langTag, regionSubtag == null ? "?" : regionSubtag.Name); bool displayScript = scriptSubtag != null && !IetfLanguageTag.IsScriptImplied(langTag); LanguageInfo otherLanguage; if (langTag != languageSubtag && !displayScript && _codeToLanguageIndex.TryGetValue(languageSubtag, out otherLanguage) && language.Countries.SetEquals(otherLanguage.Countries)) { language.Names.AddRange(otherLanguage.Names); } else { string name = displayScript ? string.Format("{0} ({1})", languageSubtag.Name, scriptSubtag.Name) : languageSubtag.Name; if (!language.Names.Contains(name)) { language.Names.Add(name); //intentionally not lower-casing } } } } } } foreach (LanguageInfo languageInfo in _codeToLanguageIndex.Values) { foreach (string name in languageInfo.Names) { GetOrCreateListFromName(name).Add(languageInfo); } if (languageInfo.Names.Count == 0) { continue; // this language is suppressed } //Why just this small set? Only out of convenience. Ideally we'd have a db of all languages as they write it in their literature. string localName = null; switch (languageInfo.Names[0]) { case "French": localName = "français"; break; case "Spanish": localName = "español"; break; case "Chinese": localName = "中文"; break; case "Hindi": localName = "हिन्दी"; break; case "Bengali": localName = "বাংলা"; break; case "Telugu": localName = "తెలుగు"; break; case "Tamil": localName = "தமிழ்"; break; case "Urdu": localName = "اُردُو"; break; case "Arabic": localName = "العربية/عربي"; break; case "Thai": localName = "ภาษาไทย"; break; case "Indonesian": localName = "Bahasa Indonesia"; break; } if (!string.IsNullOrEmpty(localName)) { if (!languageInfo.Names.Remove(localName)) { GetOrCreateListFromName(localName).Add(languageInfo); } languageInfo.Names.Insert(0, localName); } } }
private LanguageInfo ChangeLanguage(string iso639Code, string potentiallyCustomName=null) { using (var dlg = new LanguageLookupDialog()) { //at this point, we don't let them customize the national languages dlg.IsDesiredLanguageNameFieldVisible = potentiallyCustomName != null; var language = new LanguageInfo() { LanguageTag = iso639Code}; if (!string.IsNullOrEmpty(potentiallyCustomName)) { language.DesiredName = potentiallyCustomName; // to be noticed, must set before dlg.SelectedLanguage } dlg.SelectedLanguage = language; dlg.SearchText = iso639Code; if (DialogResult.OK != dlg.ShowDialog()) { return null; } return dlg.SelectedLanguage; } }
/// <summary> /// Initializes a new instance of the <see cref="LanguageLookup"/> class. /// It gets its data from the LanguageDataIndex resource /// </summary> public LanguageLookup() { // Load from file into the data structures instead of creating it from scratch var entries = LanguageRegistryResources.LanguageDataIndex.Replace("\r\n", "\n").Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries); foreach (string entry in entries) { // Fields in LanguageDataIndex // Code ThreeLetterCode DesiredName Names Countries PrimaryCountry string[] items = entry.Split('\t'); if (items.Length != 7) // This needs to be changed if the number of fields changes { continue; } string code = items[0]; string threelettercode = items[1]; string desiredname = items[2]; bool macrolanguage = String.Equals("M", items[3]); string[] names = items[4].Split(';'); string[] countries = items[5].Split(';'); string primarycountry = items[6]; LanguageInfo language = new LanguageInfo { LanguageTag = code, ThreeLetterTag = threelettercode, DesiredName = desiredname, IsMacroLanguage = macrolanguage, PrimaryCountry = primarycountry }; foreach (string country in countries) { language.Countries.Add(country); } foreach (string langname in names) { language.Names.Add(langname.Trim()); } // Do not add anything to LanguageInfo manually here if it would be useful in LanguageDataIndex.txt/json // add language to _codeToLanguageIndex and _nameToLanguageIndex // if 2 letter code then add both 2 and 3 letter codes to _codeToLanguageIndex _codeToLanguageIndex[code] = language; if (!String.Equals(code, threelettercode)) { _codeToLanguageIndex[threelettercode] = language; } foreach (string langname in language.Names) { GetOrCreateListFromName(langname).Add(language); } // add to _countryToLanguageIndex foreach (var country in language.Countries) { if (!string.IsNullOrEmpty(country)) { List <LanguageInfo> list; if (!_countryToLanguageIndex.TryGetValue(country, out list)) { list = new List <LanguageInfo>(); _countryToLanguageIndex[country] = list; } list.Add(language); } } } }