/// <summary> /// Gets the subtags of the specified language tag. /// </summary> /// <param name="langTag">The language tag.</param> /// <param name="languageSubtag">The language subtag.</param> /// <param name="scriptSubtag">The script subtag.</param> /// <param name="regionSubtag">The region subtag.</param> /// <param name="variantSubtag">The variant subtag.</param> /// <returns></returns> public static bool GetSubtags(string langTag, out LanguageSubtag languageSubtag, out ScriptSubtag scriptSubtag, out RegionSubtag regionSubtag, out VariantSubtag variantSubtag) { if (string.IsNullOrEmpty(langTag)) throw new ArgumentNullException("langTag"); languageSubtag = null; scriptSubtag = null; regionSubtag = null; variantSubtag = null; Match match = s_langTagPattern.Match(langTag); if (!match.Success) return false; var parts = new[] { "language", "script", "region", "variant" }; Group privateUseGroup = match.Groups["privateuse"]; string[] privateUseSubTags = null; int privateUseSubTagIndex = 0; bool privateUsePrefix = false; string privateUseSubTag = null; int part = -1; if (privateUseGroup.Success) { for (part = parts.Length - 1; part >= 0; part--) { Group group = match.Groups[parts[part]]; if (group.Success && privateUseGroup.Index > group.Index) break; } part++; privateUseSubTags = privateUseGroup.Value.Split('-'); privateUseSubTag = NextSubTag(privateUseSubTags, ref privateUseSubTagIndex, out privateUsePrefix); } string languageCode = match.Groups["language"].Value; if (!string.IsNullOrEmpty(languageCode)) { languageSubtag = GetLanguageSubtag(languageCode); } else if (privateUseSubTag != null && part <= 0) { languageSubtag = new LanguageSubtag(privateUseSubTag, null, true, null); privateUseSubTag = NextSubTag(privateUseSubTags, ref privateUseSubTagIndex, out privateUsePrefix); } string scriptCode = match.Groups["script"].Value; if (!string.IsNullOrEmpty(scriptCode)) { scriptSubtag = GetScriptSubtag(scriptCode); } else if (privateUseSubTag != null && part <= 1 && s_scriptPattern.IsMatch(privateUseSubTag)) { scriptSubtag = privateUsePrefix ? new ScriptSubtag(privateUseSubTag, null, true) : GetScriptSubtag(privateUseSubTag); privateUseSubTag = NextSubTag(privateUseSubTags, ref privateUseSubTagIndex, out privateUsePrefix); } string regionCode = match.Groups["region"].Value; if (!string.IsNullOrEmpty(regionCode)) { regionSubtag = GetRegionSubtag(regionCode); } else if (privateUseSubTag != null && part <= 2 && s_regionPattern.IsMatch(privateUseSubTag)) { regionSubtag = GetRegionSubtag(privateUseSubTag); privateUseSubTag = NextSubTag(privateUseSubTags, ref privateUseSubTagIndex, out privateUsePrefix); } var variantSb = new StringBuilder(); bool variantPrivateUsePrefix = false; string variantCode = match.Groups["variant"].Value; if (!string.IsNullOrEmpty(variantCode)) { variantSb.Append(variantCode); } // We would like to also check this subtag against the variant pattern // to ensure that we have a legitimate variant code, but for loading legacy projects // with poorly-formed codes, we have to do something with the private use subtag, // so if it doesn't match any of the others we force it to be a variant even if // it is not properly formed. else if (privateUseSubTag != null && part <= 3) { variantSb.Append(privateUseSubTag); variantPrivateUsePrefix = privateUsePrefix; privateUseSubTag = NextSubTag(privateUseSubTags, ref privateUseSubTagIndex, out privateUsePrefix); } while (privateUseSubTag != null) { variantSb.Append("-"); if (privateUsePrefix) variantSb.Append("x-"); variantSb.Append(privateUseSubTag); privateUseSubTag = NextSubTag(privateUseSubTags, ref privateUseSubTagIndex, out privateUsePrefix); } variantCode = variantSb.ToString(); if (!string.IsNullOrEmpty(variantCode)) { variantSubtag = variantPrivateUsePrefix ? new VariantSubtag(variantCode, null, true, null) : GetVariantSubtag(variantCode); } return true; }
/// <summary> /// Generates a language tag from the specified subtags. /// </summary> /// <param name="languageSubtag">The language subtag.</param> /// <param name="scriptSubtag">The script subtag.</param> /// <param name="regionSubtag">The region subtag.</param> /// <param name="variantSubtag">The variant subtag.</param> /// <returns></returns> public static string ToLangTag(LanguageSubtag languageSubtag, ScriptSubtag scriptSubtag, RegionSubtag regionSubtag, VariantSubtag variantSubtag) { if (languageSubtag == null) throw new ArgumentNullException("languageSubtag"); bool inPrivateUse = false; var sb = new StringBuilder(); if (languageSubtag.IsPrivateUse) { sb.Append("x-"); inPrivateUse = true; } sb.Append(languageSubtag.Code); if (scriptSubtag != null) { sb.Append("-"); if (!inPrivateUse && scriptSubtag.IsPrivateUse) { sb.Append("x-"); inPrivateUse = true; } sb.Append(scriptSubtag.Code); } if (regionSubtag != null) { sb.Append("-"); if (!inPrivateUse && regionSubtag.IsPrivateUse && !IsPrivateUseRegionCode(regionSubtag.Code)) { sb.Append("x-"); inPrivateUse = true; } sb.Append(regionSubtag.Code); } else if (languageSubtag.Code == "zh" && languageSubtag.ISO3Code == "cmn") { sb.Append("-CN"); } if (variantSubtag != null) { sb.Append("-"); if (!inPrivateUse && variantSubtag.IsPrivateUse) sb.Append("x-"); sb.Append(variantSubtag.Code); } return sb.ToString(); }
/// <summary></summary> public IWritingSystem Create(LanguageSubtag languageSubtag, ScriptSubtag scriptSubtag, RegionSubtag regionSubtag, VariantSubtag variantSubtag) { throw new NotImplementedException(); }
/// <summary> /// Creates a new writing system. /// </summary> /// <param name="languageSubtag">The language subtag.</param> /// <param name="scriptSubtag">The script subtag.</param> /// <param name="regionSubtag">The region subtag.</param> /// <param name="variantSubtag">The variant subtag.</param> /// <returns></returns> public IWritingSystem Create(LanguageSubtag languageSubtag, ScriptSubtag scriptSubtag, RegionSubtag regionSubtag, VariantSubtag variantSubtag) { if (!languageSubtag.IsValid) throw new ArgumentException("Can not create a new writing system with an invalid language tag."); PalasoWritingSystem ws; lock (m_syncRoot) ws = (PalasoWritingSystem)m_localStore.CreateNew(); ws.LanguageSubtag = languageSubtag; ws.ScriptSubtag = scriptSubtag; ws.RegionSubtag = regionSubtag; ws.VariantSubtag = variantSubtag; if (!string.IsNullOrEmpty(languageSubtag.Name)) ws.Abbreviation = languageSubtag.Name.Length > 3 ? languageSubtag.Name.Substring(0, 3) : languageSubtag.Name; else ws.Abbreviation = ws.Id; CultureInfo ci = MiscUtils.GetCultureForWs(ws.Id); if (ci != null) { ws.SortUsing = WritingSystemDefinition.SortRulesType.OtherLanguage; ws.SortRules = ci.Name; } ws.Modified = false; return ws; }
/// <summary> /// Gets the variant subtag with the specified code. If it is not a known (non-private-use) one, /// make a private-use one with the specified values. Insert 'x' or move it earlier /// if any leading parts are not standard. /// </summary> public static VariantSubtag GetVariantSubtag(string code, string name, IEnumerable<string> prefixes) { if (string.IsNullOrEmpty(code)) throw new ArgumentNullException("code"); var parts = code.Split('-').Where(part => part.Length > 0); var fixedParts = new List<string>(); bool gotX = false; foreach (var part in parts) { if (part.Equals("x", StringComparison.OrdinalIgnoreCase)) { if (gotX) continue; // no duplicate x. fixedParts.Add(part); gotX = true; continue; } if (gotX) { fixedParts.Add(part); // copy the rest unchanged. continue; } if (!StandardTags.IsValidRegisteredVariant(part)) { fixedParts.Add("x"); gotX = true; } fixedParts.Add(part); // copy the rest unchanged. } var code2 = fixedParts.Aggregate((partialCode, part) => partialCode + "-" + part); VariantSubtag subtag; if (!s_variantSubtags.TryGetValue(code2, out subtag)) subtag = new VariantSubtag(code2, name, true, prefixes); return subtag; }
/// <summary> /// Load the controls from the writing system, if it is not null. If it is null, clear all controls. /// If the combo boxes are not populated, do nothing...the method will get called again /// when the form loads. /// </summary> private void LoadControlsFromWritingSystem() { m_enableLangTagSideEffects = false; if (m_ws == null) return; // Probably in design mode; can't populate. m_origVariantSubtag = m_ws.VariantSubtag; m_origRegionSubtag = m_ws.RegionSubtag; m_origScriptSubtag = m_ws.ScriptSubtag; m_scriptName.ClearItems(); m_scriptName.Items.AddRange(LangTagUtils.ScriptSubtags.ToArray()); ScriptSubtag = m_origScriptSubtag; m_regionName.ClearItems(); m_regionName.Items.AddRange(LangTagUtils.RegionSubtags.ToArray()); RegionSubtag = m_origRegionSubtag; PopulateVariantCombo(false); VariantSubtag = m_origVariantSubtag; m_enableLangTagSideEffects = true; }
static LangTagUtils() { s_icuTagPattern = new Regex(IcuTagExpr, RegexOptions.ExplicitCapture); s_langTagPattern = new Regex(LangTagExpr, RegexOptions.ExplicitCapture); s_langPattern = new Regex("\\A(" + LanguageExpr + ")\\z", RegexOptions.ExplicitCapture); s_scriptPattern = new Regex("\\A(" + ScriptExpr + ")\\z", RegexOptions.ExplicitCapture); s_regionPattern = new Regex("\\A(" + RegionExpr + ")\\z", RegexOptions.ExplicitCapture); s_variantPattern = new Regex("\\A(" + FuzzyVariantExpr + ")\\z", RegexOptions.ExplicitCapture); s_languageSubtags = new Dictionary<string, LanguageSubtag>(); foreach (var langCode in StandardTags.ValidIso639LanguageCodes) { var code = langCode.Code; switch (code) { // ISO3Code is now only set when it differs from Code. case "cmn": code = "zh"; langCode.ISO3Code = "cmn"; break; case "pes": code = "fa"; langCode.ISO3Code = "pes"; break; case "arb": code = "ar"; langCode.ISO3Code = "arb"; break; case "zlm": code = "ms"; langCode.ISO3Code = "zlm"; break; } var languageSubtag = new LanguageSubtag(code, langCode.Name, false, langCode.ISO3Code); s_languageSubtags[languageSubtag.Code] = languageSubtag; if (!string.IsNullOrEmpty(languageSubtag.ISO3Code)) s_languageSubtags[languageSubtag.ISO3Code] = languageSubtag; } s_scriptSubtags = new Dictionary<string, ScriptSubtag>(); foreach (var scriptCode in StandardTags.ValidIso15924Scripts) s_scriptSubtags[scriptCode.Code] = new ScriptSubtag(scriptCode.Code, scriptCode.Label, false); s_regionSubtags = new Dictionary<string, RegionSubtag>(); foreach (var regionCode in StandardTags.ValidIso3166Regions) s_regionSubtags[regionCode.Subtag] = new RegionSubtag(regionCode.Subtag, regionCode.Description, false); s_variantSubtags = new Dictionary<string, VariantSubtag>(); foreach (var variantCode in StandardTags.ValidRegisteredVariants) s_variantSubtags[variantCode.Subtag] = new VariantSubtag(variantCode.Subtag, variantCode.Description, false, variantCode.Prefixes); // These ones are considered non-private in that the user can't edit the code, but they already contain needed X's. s_variantSubtags["fonipa-x-etic"] = new VariantSubtag("fonipa-x-etic", "Phonetic", false, null); s_variantSubtags["fonipa-x-emic"] = new VariantSubtag("fonipa-x-emic", "Phonemic", false, null); s_variantSubtags["x-py"] = new VariantSubtag("x-py", "Pinyin", false, null); s_variantSubtags["x-pyn"] = new VariantSubtag("x-pyn", "Pinyin Numbered", false, null); s_variantSubtags["x-audio"] = new VariantSubtag("x-audio", "Audio", false, null); }
/// <summary> /// Initializes a new instance of the <see cref="T:VariantSubtag"/> class. /// </summary> /// <param name="subtag">The subtag.</param> /// <param name="name">The name.</param> public VariantSubtag(VariantSubtag subtag, string name) : this(subtag.Code, name, subtag.IsPrivateUse, subtag.Prefixes) { }
/// <summary> /// Gets the subtags of the specified language tag. /// </summary> /// <param name="langTag">The language tag.</param> /// <param name="languageSubtag">The language subtag.</param> /// <param name="scriptSubtag">The script subtag.</param> /// <param name="regionSubtag">The region subtag.</param> /// <param name="variantSubtag">The variant subtag.</param> /// <returns></returns> public static bool GetSubtags(string langTag, out LanguageSubtag languageSubtag, out ScriptSubtag scriptSubtag, out RegionSubtag regionSubtag, out VariantSubtag variantSubtag) { if (string.IsNullOrEmpty(langTag)) throw new ArgumentNullException("langTag"); languageSubtag = null; scriptSubtag = null; regionSubtag = null; variantSubtag = null; if (langTag.Any(c => !Char.IsLetterOrDigit(c) && c != '-')) { return false; } var cleaner = new Palaso.WritingSystems.Migration.Rfc5646TagCleaner(langTag); cleaner.Clean(); List<string> privateUseSubTags = new List<string>(cleaner.PrivateUse.Split(new char[] {'-'}, StringSplitOptions.RemoveEmptyEntries)); int privateUseSubTagIndex = 0; bool privateUsePrefix = false; string privateUseSubTag = null; int part = -1; string languageCode = cleaner.Language; if (string.IsNullOrEmpty(languageCode)) return false; if (languageCode.Equals("qaa", StringComparison.OrdinalIgnoreCase)) { // In our own WS dialog, we don't allow no language, but if it isn't a standard one, a language like xkal // produces an identifier like qaa-x-kal, and we interepret the first thing after the x as a private // language code (not allowed as the first three characters according to the standard). // If it's NOT a valid language code (e.g., too many characters), probably came from some other // program. Treating it as a language code will fail if we try to create such a writing system, // since we will detect the invalid language code. So only interpret the first element // after the x as a language code if it is a valid one. Otherwise, we just let qaa be the language. if (privateUseSubTags.Count > 0 && IsLanguageCodeValid(privateUseSubTags[0])) { languageSubtag = new LanguageSubtag(privateUseSubTags[0], "", true, null); privateUseSubTags.RemoveAt(0); } else languageSubtag = GetLanguageSubtag("qaa"); // We do allow just plain qaa. } else { languageSubtag = GetLanguageSubtag(languageCode); } string scriptCode = cleaner.Script; if (!string.IsNullOrEmpty(scriptCode)) { if (scriptCode.Equals("Qaaa", StringComparison.OrdinalIgnoreCase) && privateUseSubTags.Count > 0) { scriptSubtag = new ScriptSubtag(privateUseSubTags[0], "", true); privateUseSubTags.RemoveAt(0); } else scriptSubtag = GetScriptSubtag(scriptCode); } string regionCode = cleaner.Region; if (!string.IsNullOrEmpty(regionCode)) { if (regionCode.Equals("QM", StringComparison.OrdinalIgnoreCase) && privateUseSubTags.Count > 0) { regionSubtag = new RegionSubtag(privateUseSubTags[0], "", true); privateUseSubTags.RemoveAt(0); } else regionSubtag = GetRegionSubtag(regionCode); } var variantSb = new StringBuilder(); string variantCode = cleaner.Variant; if (!string.IsNullOrEmpty(variantCode)) { variantSb.Append(variantCode); } if (privateUseSubTags.Count > 0) { if (variantSb.Length > 0) variantSb.Append("-"); variantSb.Append("x"); foreach (var item in privateUseSubTags) { variantSb.Append("-"); variantSb.Append(item); } } variantCode = variantSb.ToString(); if (!string.IsNullOrEmpty(variantCode)) { variantSubtag = GetVariantSubtag(variantCode); } return true; }
/// <summary> /// Generates an ICU locale from the specified language tag subtags. /// </summary> /// <param name="languageSubtag">The language subtag.</param> /// <param name="scriptSubtag">The script subtag.</param> /// <param name="regionSubtag">The region subtag.</param> /// <param name="variantSubtag">The variant subtag.</param> /// <returns></returns> public static string ToIcuLocale(LanguageSubtag languageSubtag, ScriptSubtag scriptSubtag, RegionSubtag regionSubtag, VariantSubtag variantSubtag) { if (languageSubtag == null) throw new ArgumentNullException("languageSubtag"); var sb = new StringBuilder(); //start with the LanguageCode if (languageSubtag.IsPrivateUse) sb.Append("x"); sb.Append(languageSubtag.Code); //now add the Script if it exists if (scriptSubtag != null) sb.AppendFormat("_{0}", scriptSubtag.Code); //now add the Region if it exists if (regionSubtag != null) sb.AppendFormat("_{0}", regionSubtag.Code); // if variantCode is notNullofEmpty then add it // and if CountryCode is empty add two underscores instead of one. if (variantSubtag != null) { string icuVariant = null; // convert language tag variants to known ICU variants // TODO: are there any more ICU variants? switch (variantSubtag.Code) { case "fonipa": icuVariant = "IPA"; break; case "fonipa-x-etic": icuVariant = "X_ETIC"; break; case "fonipa-x-emic": icuVariant = "X_EMIC"; break; case "pinyin": icuVariant = "X_PY"; break; } if (!string.IsNullOrEmpty(icuVariant)) sb.AppendFormat(regionSubtag == null ? "__{0}" : "_{0}", icuVariant); } return sb.ToString(); }
/// <summary> /// Generates a language tag from the specified subtags. /// </summary> /// <param name="languageSubtag">The language subtag.</param> /// <param name="scriptSubtag">The script subtag.</param> /// <param name="regionSubtag">The region subtag.</param> /// <param name="variantSubtag">The variant subtag.</param> /// <returns></returns> public static string ToLangTag(LanguageSubtag languageSubtag, ScriptSubtag scriptSubtag, RegionSubtag regionSubtag, VariantSubtag variantSubtag) { if (languageSubtag == null) throw new ArgumentNullException("languageSubtag"); var sb = new StringBuilder(); // Insert non-custom language, script, region into main part of code. if (languageSubtag.IsPrivateUse) { sb.Append("qaa"); } else { sb.Append(languageSubtag.Code); } var isCustomScript = false; if (scriptSubtag != null) { sb.Append("-"); // Qaaa is our flag to expect a script in private-use. If the actual value is Qaaa, we need to treat it as custom, // so we don't confuse some other private-use tag with a custom script. isCustomScript = TreatAsCustomScript(scriptSubtag.Code); if (isCustomScript) sb.Append("Qaaa"); else sb.Append(scriptSubtag.Code); } var isCustomRegion = false; if (regionSubtag != null) { sb.Append("-"); // QM is our flag to expect a region in private-use. If the actual value is QM, we need to treat it as custom, // so we don't confuse some other private-use tag with a custom region. isCustomRegion = TreatAsCustomRegion(regionSubtag.Code); if (isCustomRegion) sb.Append("QM"); else sb.Append(regionSubtag.Code); } string standardVariant = ""; string privateUse = ""; if (variantSubtag != null) privateUse = GetPrivateUseAndStandardVariant(variantSubtag.Code, out standardVariant); if (standardVariant != "") { sb.Append("-"); sb.Append(standardVariant); } // Insert custom language, script, or variant into private=use. bool inPrivateUse = false; if (languageSubtag.IsPrivateUse) { sb.Append("-"); if (!inPrivateUse) { inPrivateUse = true; sb.Append("x-"); } sb.Append(languageSubtag.Code); } if (isCustomScript) { sb.Append("-"); if (!inPrivateUse) { inPrivateUse = true; sb.Append("x-"); } sb.Append(scriptSubtag.Code); } if (isCustomRegion) { sb.Append("-"); if (!inPrivateUse) { inPrivateUse = true; sb.Append("x-"); } sb.Append(regionSubtag.Code); } else if (languageSubtag.Code == "zh" && languageSubtag.ISO3Code == "cmn" && regionSubtag == null) { sb.Append("-CN"); } if (privateUse != "") { sb.Append("-"); if (!inPrivateUse) sb.Append("x-"); sb.Append(privateUse); } return sb.ToString(); }