private bool RegionalDialectsFilter(LanguageInfo li)
		{
			if (IncludeRegionalDialects)
				return true;

			// always include Chinese languages with region codes
			if (li.LanguageTag.IsOneOf("zh-CN", "zh-TW"))
				return true;

			return string.IsNullOrEmpty(IetfLanguageTag.GetRegionPart(li.LanguageTag));
		}
        private LanguageInfo GetOrCreateLanguageFromCode(string code, string countryName)
        {
            LanguageInfo language;

            if (!_codeToLanguageIndex.TryGetValue(code, out language))
            {
                language = new LanguageInfo {
                    LanguageTag = code
                };
                _codeToLanguageIndex.Add(code, language);
            }
            if (!string.IsNullOrEmpty(countryName))
            {
                language.Countries.Add(countryName);
            }
            return(language);
        }
Beispiel #3
0
        private bool AddLanguage(string code, string threelettercode, string full = null,
                                 string name = null, string localName = null, string region = null, List <string> names = null, string regions = null, List <string> tags = null)
        {
            string primarycountry;

            if (region == null)
            {
                primarycountry = "";
            }
            else if (StandardSubtags.IsValidIso3166RegionCode(region))
            {
                if (StandardSubtags.IsPrivateUseRegionCode(region))
                {
                    if (region == "XK")
                    {
                        primarycountry = "Kosovo";
                    }
                    else
                    {
                        primarycountry = "Unknown private use";
                    }
                }
                else
                {
                    primarycountry = StandardSubtags.RegisteredRegions[region].Name;                     // convert to full region name
                }
            }
            else
            {
                primarycountry = "Invalid region";
            }
            LanguageInfo language = new LanguageInfo
            {
                LanguageTag    = code,
                ThreeLetterTag = threelettercode,
                // DesiredName defaults to Names[0], which is set below.
                PrimaryCountry = primarycountry
            };

            language.Countries.Add(primarycountry);

            if (regions != null)
            {
                string[] countries = regions.Split();
                foreach (string country in countries)
                {
                    if (!country.Contains('?') && country != "")
                    {
                        language.Countries.Add(StandardSubtags.RegisteredRegions[country].Name);
                    }
                }
            }

            // For sorting, it is better to store name first instead of localName, which may be in a local script.
            // Names[0] is used in several ways in sorting languages in the list of possible matches: 1) bring
            // to the top of the list languages where Names[0] matches what the user typed, 2) order by the
            // "typing distance" of Names[0] from what the user typed, and 3) order by comparing the Names[0]
            // value of the two languages if neither matches the search string and their typing distances from
            // the search string are the same.
            // Names[1] (if it exists) is used to move the language toward the top of the list if it exactly
            // matches the search string.  It is not used otherwise in the sorting heuristics.  No other
            // values in the Names list are involved in the sorting process.
            if (name != null)
            {
                language.Names.Add(name.Trim());
            }
            if (localName != null && localName != name)
            {
                language.Names.Add(localName.Trim());
            }
            if (names != null)
            {
                foreach (string langname in names)
                {
                    if (!language.Names.Contains(langname))
                    {
                        language.Names.Add(langname.Trim());
                    }
                }
            }
            // If we end up needing to add the language code, that reflects a deficiency in the data.  But
            // having a bogus name value probably hurts less that not having any name at all.  The sort
            // process mentioned above using the language tag as well as the first two items in the Names list.
            Debug.Assert(language.Names.Count > 0);
            if (language.Names.Count == 0)
            {
                language.Names.Add(code);
            }

            // add language to _codeToLanguageIndex and _nameToLanguageIndex
            // if 2 letter code then add both 2 and 3 letter codes to _codeToLanguageIndex

            _codeToLanguageIndex[code] = language;
            if (full != null && !string.Equals(full, code))
            {
                _codeToLanguageIndex[full] = language;                 // add the full expanded tag
            }

            if (threelettercode != null && !string.Equals(code, threelettercode))
            {
                _codeToLanguageIndex[threelettercode] = language;
            }

            if (tags != null)
            {
                foreach (string langtag in tags)
                {
                    _codeToLanguageIndex[langtag] = language;
                }
            }

            foreach (string langname in language.Names)
            {
                GetOrCreateListFromName(langname).Add(language);
            }
            // add to _countryToLanguageIndex
            foreach (var country in language.Countries)
            {
                if (!string.IsNullOrEmpty(country))
                {
                    List <LanguageInfo> list;
                    if (!_countryToLanguageIndex.TryGetValue(country, out list))
                    {
                        list = new List <LanguageInfo>();
                        _countryToLanguageIndex[country] = list;
                    }
                    list.Add(language);
                }
            }

            return(true);
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="LanguageLookup"/> class.
        /// </summary>
        public LanguageLookup()
        {
            var threeToTwoLetter = new Dictionary <string, string>();

            foreach (string line in LanguageRegistryResources.TwoToThreeCodes.Replace("\r\n", "\n").Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries))
            {
                string[] items = line.Split('\t');
                threeToTwoLetter.Add(items[1].Trim(), items[0].Trim());
            }

            //LanguageIndex.txt Format: LangID	CountryID	NameType	Name
            //a language appears on one row for each of its alternative langauges
            var entries = new List <string>(LanguageRegistryResources.LanguageIndex.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries));

            entries.Add("qaa\t?\tL\tUnlisted Language");
            foreach (string entry in entries.Skip(1))             //skip the header
            {
                string[] items = entry.Split('\t');
                if (items.Length != 4)
                {
                    continue;
                }
                if (items[2].Contains('!'))                //temporary suppression of entries while waiting for Ethnologue changes
                {
                    continue;
                }

                string code = items[0].Trim();
                string twoLetterCode;
                if (threeToTwoLetter.TryGetValue(code, out twoLetterCode))
                {
                    code = twoLetterCode;
                }

                string       regionCode = items[1].Trim();
                LanguageInfo language   = GetOrCreateLanguageFromCode(code, regionCode == "?" ? "?" : StandardSubtags.RegisteredRegions[regionCode].Name);

                string name = items[3].Trim();


                if (items[2] == "L")
                {
                    while (language.Names.Contains(name))
                    {
                        language.Names.Remove(name);
                    }
                    language.Names.Insert(0, name);
                }
                else
                {
                    if (items[2].Contains("P"))
                    {
                        //Skip pejorative
                    }
                    else if (items[1] == ("ET"))
                    {
                        //Skip alternatives for Ethiopia, as per request
                    }
                    else if (items[0] == "gax" || items[0] == "om")
                    {
                        //For these two "Oromo" languages, skip all related languages as per request
                    }
                    else if (!language.Names.Contains(name))
                    {
                        language.Names.Add(name);                         //intentionally not lower-casing
                    }
                }
            }

            IEnumerable <IGrouping <string, string> > languageGroups = Sldr.LanguageTags.Where(info => info.IsAvailable && IetfLanguageTag.IsValid(info.LanguageTag))
                                                                       .Select(info => IetfLanguageTag.Canonicalize(info.LanguageTag))
                                                                       .GroupBy(IetfLanguageTag.GetLanguagePart);

            foreach (IGrouping <string, string> languageGroup in languageGroups)
            {
                string[] langTags = languageGroup.ToArray();
                if (langTags.Length == 1)
                {
                    string       langTag = langTags[0];
                    LanguageInfo language;
                    if (langTag != languageGroup.Key && _codeToLanguageIndex.TryGetValue(languageGroup.Key, out language))
                    {
                        _codeToLanguageIndex.Remove(languageGroup.Key);
                        language.LanguageTag          = langTag;
                        _codeToLanguageIndex[langTag] = language;
                    }
                }
                else
                {
                    foreach (string langTag in langTags)
                    {
                        LanguageSubtag languageSubtag;
                        ScriptSubtag   scriptSubtag;
                        RegionSubtag   regionSubtag;
                        IEnumerable <VariantSubtag> variantSubtags;
                        if (IetfLanguageTag.TryGetSubtags(langTag, out languageSubtag, out scriptSubtag, out regionSubtag, out variantSubtags))
                        {
                            if (langTag == languageSubtag)
                            {
                                continue;
                            }

                            LanguageInfo language      = GetOrCreateLanguageFromCode(langTag, regionSubtag == null ? "?" : regionSubtag.Name);
                            bool         displayScript = scriptSubtag != null && !IetfLanguageTag.IsScriptImplied(langTag);
                            LanguageInfo otherLanguage;
                            if (langTag != languageSubtag && !displayScript && _codeToLanguageIndex.TryGetValue(languageSubtag, out otherLanguage) && language.Countries.SetEquals(otherLanguage.Countries))
                            {
                                language.Names.AddRange(otherLanguage.Names);
                            }
                            else
                            {
                                string name = displayScript ? string.Format("{0} ({1})", languageSubtag.Name, scriptSubtag.Name) : languageSubtag.Name;
                                if (!language.Names.Contains(name))
                                {
                                    language.Names.Add(name);                                     //intentionally not lower-casing
                                }
                            }
                        }
                    }
                }
            }

            foreach (LanguageInfo languageInfo in _codeToLanguageIndex.Values)
            {
                foreach (string name in languageInfo.Names)
                {
                    GetOrCreateListFromName(name).Add(languageInfo);
                }

                if (languageInfo.Names.Count == 0)
                {
                    continue;                     // this language is suppressed
                }
                //Why just this small set? Only out of convenience. Ideally we'd have a db of all languages as they write it in their literature.
                string localName = null;
                switch (languageInfo.Names[0])
                {
                case "French":
                    localName = "français";
                    break;

                case "Spanish":
                    localName = "español";
                    break;

                case "Chinese":
                    localName = "中文";
                    break;

                case "Hindi":
                    localName = "हिन्दी";
                    break;

                case "Bengali":
                    localName = "বাংলা";
                    break;

                case "Telugu":
                    localName = "తెలుగు";
                    break;

                case "Tamil":
                    localName = "தமிழ்";
                    break;

                case "Urdu":
                    localName = "اُردُو";
                    break;

                case "Arabic":
                    localName = "العربية/عربي";
                    break;

                case "Thai":
                    localName = "ภาษาไทย";
                    break;

                case "Indonesian":
                    localName = "Bahasa Indonesia";
                    break;
                }
                if (!string.IsNullOrEmpty(localName))
                {
                    if (!languageInfo.Names.Remove(localName))
                    {
                        GetOrCreateListFromName(localName).Add(languageInfo);
                    }
                    languageInfo.Names.Insert(0, localName);
                }
            }
        }
        private LanguageInfo ChangeLanguage(string iso639Code, string potentiallyCustomName=null)
        {
            using (var dlg = new LanguageLookupDialog())
            {
                //at this point, we don't let them customize the national languages
                dlg.IsDesiredLanguageNameFieldVisible = potentiallyCustomName != null;

                var language = new LanguageInfo() { LanguageTag = iso639Code};
                if (!string.IsNullOrEmpty(potentiallyCustomName))
                {
                    language.DesiredName = potentiallyCustomName; // to be noticed, must set before dlg.SelectedLanguage
                }
                dlg.SelectedLanguage = language;
                dlg.SearchText = iso639Code;

                if (DialogResult.OK != dlg.ShowDialog())
                {
                    return null;
                }
                return  dlg.SelectedLanguage;
            }
        }
Beispiel #6
0
        /// <summary>
        /// Initializes a new instance of the <see cref="LanguageLookup"/> class.
        /// It gets its data from the LanguageDataIndex resource
        /// </summary>
        public LanguageLookup()
        {
            // Load from file into the data structures instead of creating it from scratch
            var entries = LanguageRegistryResources.LanguageDataIndex.Replace("\r\n", "\n").Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries);

            foreach (string entry in entries)
            {
                // Fields in LanguageDataIndex
                // Code ThreeLetterCode DesiredName Names Countries PrimaryCountry
                string[] items = entry.Split('\t');
                if (items.Length != 7)                 // This needs to be changed if the number of fields changes
                {
                    continue;
                }
                string       code            = items[0];
                string       threelettercode = items[1];
                string       desiredname     = items[2];
                bool         macrolanguage   = String.Equals("M", items[3]);
                string[]     names           = items[4].Split(';');
                string[]     countries       = items[5].Split(';');
                string       primarycountry  = items[6];
                LanguageInfo language        = new LanguageInfo {
                    LanguageTag     = code, ThreeLetterTag = threelettercode, DesiredName = desiredname,
                    IsMacroLanguage = macrolanguage, PrimaryCountry = primarycountry
                };
                foreach (string country in countries)
                {
                    language.Countries.Add(country);
                }
                foreach (string langname in names)
                {
                    language.Names.Add(langname.Trim());
                }

                // Do not add anything to LanguageInfo manually here if it would be useful in LanguageDataIndex.txt/json

                // add language to _codeToLanguageIndex and _nameToLanguageIndex
                // if 2 letter code then add both 2 and 3 letter codes to _codeToLanguageIndex

                _codeToLanguageIndex[code] = language;
                if (!String.Equals(code, threelettercode))
                {
                    _codeToLanguageIndex[threelettercode] = language;
                }
                foreach (string langname in language.Names)
                {
                    GetOrCreateListFromName(langname).Add(language);
                }
                // add to _countryToLanguageIndex
                foreach (var country in language.Countries)
                {
                    if (!string.IsNullOrEmpty(country))
                    {
                        List <LanguageInfo> list;
                        if (!_countryToLanguageIndex.TryGetValue(country, out list))
                        {
                            list = new List <LanguageInfo>();
                            _countryToLanguageIndex[country] = list;
                        }
                        list.Add(language);
                    }
                }
            }
        }