/// <summary> /// Reset Builder's internal state with the given language tag /// </summary> public InternalLocaleBuilder SetLanguageTag(LanguageTag langtag) { Clear(); if (langtag.Extlangs.Count > 0) { _language = langtag.Extlangs[0]; } else { string language = langtag.Language; if (!language.Equals(LanguageTag.Undetermined)) { _language = language; } } _script = langtag.Script; _region = langtag.Region; IList <string> bcpVariants = langtag.Variants; if (bcpVariants.Count > 0) { StringBuilder var = new StringBuilder(bcpVariants[0]); for (int i = 1; i < bcpVariants.Count; i++) { var.Append(BaseLocale.Separator).Append(bcpVariants[i]); } _variant = var.ToString(); } SetExtensions(langtag.Extensions, langtag.PrivateUse); return(this); }
/// <summary> /// Reset Builder's internal state with the given language tag /// </summary> public InternalLocaleBuilder SetLanguageTag(LanguageTag langtag) { Clear(); if (langtag.GetExtlangs().Count > 0) { _language = langtag.GetExtlangs()[0]; } else { string language = langtag.GetLanguage(); if (!language.Equals(LanguageTag.UNDETERMINED)) { _language = language; } } _script = langtag.GetScript(); _region = langtag.GetRegion(); IList <string> bcpVariants = langtag.GetVariants(); if (bcpVariants.Count > 0) { StringBuilder var = new StringBuilder(bcpVariants[0]); for (int i = 1; i < bcpVariants.Count; i++) { var.Append(BaseLocale.SEP).Append(bcpVariants[i]); } _variant = var.ToString(); } SetExtensions(langtag.GetExtensions(), langtag.GetPrivateuse()); return(this); }
private static string ToID(IDictionary <char, Extension> map) { StringBuilder buf = new StringBuilder(); Extension privuse = null; foreach (var entry in map) { char singleton = entry.Key; Extension extension = entry.Value; if (LanguageTag.IsPrivateusePrefixChar(singleton)) { privuse = extension; } else { if (buf.Length > 0) { buf.Append(LanguageTag.Separator); } buf.Append(extension); } } if (privuse != null) { if (buf.Length > 0) { buf.Append(LanguageTag.Separator); } buf.Append(privuse); } return(buf.ToString()); }
/// <summary> /// Check if the given variant subtags separated by the given /// separator(s) are valid. /// </summary> private int CheckVariants(string variants, string sep) { StringTokenEnumerator itr = new StringTokenEnumerator(variants, sep); while (itr.MoveNext()) { string s = itr.Current; if (!LanguageTag.IsVariant(s)) { return(itr.CurrentStart); } } return(-1); }
public InternalLocaleBuilder SetRegion(string region) { if (region == null || region.Length == 0) { _region = ""; } else { if (!LanguageTag.IsRegion(region)) { throw new FormatException("Ill-formed region: " + region /*, 0*/); } _region = region; } return(this); }
public InternalLocaleBuilder SetScript(string script) { if (script == null || script.Length == 0) { _script = ""; } else { if (!LanguageTag.IsScript(script)) { throw new FormatException("Ill-formed script: " + script /*, 0*/); } _script = script; } return(this); }
public InternalLocaleBuilder SetLanguage(string language) { if (language == null || language.Length == 0) { _language = ""; } else { if (!LanguageTag.IsLanguage(language)) { throw new FormatException("Ill-formed language: " + language /*, 0*/); } _language = language; } return(this); }
public InternalLocaleBuilder SetLocale(BaseLocale @base, LocaleExtensions extensions) { string language = @base.GetLanguage(); string script = @base.GetScript(); string region = @base.GetRegion(); string variant = @base.GetVariant(); // ICU4N TODO: Remove ? if (JDKIMPL) { // Special backward compatibility support // Exception 1 - ja_JP_JP if (language.Equals("ja") && region.Equals("JP") && variant.Equals("JP")) { // When locale ja_JP_JP is created, ca-japanese is always there. // The builder ignores the variant "JP" Debug.Assert("japanese".Equals(extensions.GetUnicodeLocaleType("ca"))); variant = ""; } // Exception 2 - th_TH_TH else if (language.Equals("th") && region.Equals("TH") && variant.Equals("TH")) { // When locale th_TH_TH is created, nu-thai is always there. // The builder ignores the variant "TH" Debug.Assert("thai".Equals(extensions.GetUnicodeLocaleType("nu"))); variant = ""; } // Exception 3 - no_NO_NY else if (language.Equals("no") && region.Equals("NO") && variant.Equals("NY")) // ICU4N TODO: Fix this handling for .NET (no-NO is not reliable across platforms) { // no_NO_NY is a valid locale and used by Java 6 or older versions. // The build ignores the variant "NY" and change the language to "nn". language = "nn"; variant = ""; } } // Validate base locale fields before updating internal state. // LocaleExtensions always store validated/canonicalized values, // so no checks are necessary. if (language.Length > 0 && !LanguageTag.IsLanguage(language)) { throw new FormatException("Ill-formed language: " + language); } if (script.Length > 0 && !LanguageTag.IsScript(script)) { throw new FormatException("Ill-formed script: " + script); } if (region.Length > 0 && !LanguageTag.IsRegion(region)) { throw new FormatException("Ill-formed region: " + region); // ICU4N TODO: Port LocaleSyntaxException (instead of FormatException) } if (variant.Length > 0) { int errIdx = CheckVariants(variant, BaseLocale.Separator); if (errIdx != -1) { throw new FormatException("Ill-formed variant: " + variant /*, errIdx*/); } } // The input locale is validated at this point. // Now, updating builder's internal fields. _language = language; _script = script; _region = region; _variant = variant; ClearExtensions(); var extKeys = (extensions == null) ? null : extensions.Keys; if (extKeys != null) { // map extensions back to builder's internal format foreach (char key in extKeys) { Extension e = extensions.GetExtension(key); if (e is UnicodeLocaleExtension ue) { foreach (string uatr in ue.UnicodeLocaleAttributes) { if (_uattributes == null) { _uattributes = new HashSet <CaseInsensitiveString>(/*4*/); } _uattributes.Add(new CaseInsensitiveString(uatr)); } foreach (string ukey in ue.UnicodeLocaleKeys) { if (_ukeywords == null) { _ukeywords = new Dictionary <CaseInsensitiveString, string>(4); } _ukeywords[new CaseInsensitiveString(ukey)] = ue.GetUnicodeLocaleType(ukey); } } else { if (_extensions == null) { _extensions = new Dictionary <CaseInsensitiveChar, string>(4); } _extensions[new CaseInsensitiveChar(key)] = e.Value; } } } return(this); }
/// <summary> /// Set extension/private subtags in a single string representation /// </summary> public InternalLocaleBuilder SetExtensions(string subtags) { if (subtags == null || subtags.Length == 0) { ClearExtensions(); return(this); } subtags = subtags.Replace(BaseLocale.Separator, LanguageTag.Separator); StringTokenEnumerator itr = new StringTokenEnumerator(subtags, LanguageTag.Separator); List <string> extensions = null; string privateuse = null; int parsed = 0; int start; // Move to first element itr.MoveNext(); // Make a list of extension subtags while (!itr.IsDone) { string s = itr.Current; if (LanguageTag.IsExtensionSingleton(s)) { start = itr.CurrentStart; string singleton = s; StringBuilder sb = new StringBuilder(singleton); itr.MoveNext(); while (!itr.IsDone) { s = itr.Current; if (LanguageTag.IsExtensionSubtag(s)) { sb.Append(LanguageTag.Separator).Append(s); parsed = itr.CurrentEnd; } else { break; } itr.MoveNext(); } if (parsed < start) { throw new FormatException("Incomplete extension '" + singleton + "'" /*, start*/); } if (extensions == null) { extensions = new List <string>(4); } extensions.Add(sb.ToString()); } else { break; } } if (!itr.IsDone) { string s = itr.Current; if (LanguageTag.IsPrivateusePrefix(s)) { start = itr.CurrentStart; StringBuilder sb = new StringBuilder(s); itr.MoveNext(); while (!itr.IsDone) { s = itr.Current; if (!LanguageTag.IsPrivateuseSubtag(s)) { break; } sb.Append(LanguageTag.Separator).Append(s); parsed = itr.CurrentEnd; itr.MoveNext(); } if (parsed <= start) { throw new FormatException("Incomplete privateuse:" + subtags.Substring(start) /*, start*/); } else { privateuse = sb.ToString(); } } } if (!itr.IsDone) { throw new FormatException("Ill-formed extension subtags:" + subtags.Substring(itr.CurrentStart) /*, itr.CurrentStart*/); } return(SetExtensions(extensions, privateuse)); }
public InternalLocaleBuilder SetExtension(char singleton, string value) { // validate key bool isBcpPrivateuse = LanguageTag.IsPrivateusePrefixChar(singleton); if (!isBcpPrivateuse && !LanguageTag.IsExtensionSingletonChar(singleton)) { throw new FormatException("Ill-formed extension key: " + singleton); } bool remove = (value == null || value.Length == 0); CaseInsensitiveChar key = new CaseInsensitiveChar(singleton); if (remove) { if (UnicodeLocaleExtension.IsSingletonChar(key.Value)) { // clear entire Unicode locale extension if (_uattributes != null) { _uattributes.Clear(); } if (_ukeywords != null) { _ukeywords.Clear(); } } else { if (_extensions != null && _extensions.ContainsKey(key)) { _extensions.Remove(key); } } } else { // validate value string val = value.Replace(BaseLocale.Separator, LanguageTag.Separator); StringTokenEnumerator itr = new StringTokenEnumerator(val, LanguageTag.Separator); while (itr.MoveNext()) { string s = itr.Current; bool validSubtag; if (isBcpPrivateuse) { validSubtag = LanguageTag.IsPrivateuseSubtag(s); } else { validSubtag = LanguageTag.IsExtensionSubtag(s); } if (!validSubtag) { throw new FormatException("Ill-formed extension value: " + s /*, itr.CurrentStart*/); } } if (UnicodeLocaleExtension.IsSingletonChar(key.Value)) { SetUnicodeLocaleExtension(val); } else { if (_extensions == null) { _extensions = new Dictionary <CaseInsensitiveChar, string>(4); } _extensions[key] = val; } } return(this); }
public static LanguageTag ParseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) { LanguageTag tag = new LanguageTag(); string language = baseLocale.GetLanguage(); string script = baseLocale.GetScript(); string region = baseLocale.GetRegion(); string variant = baseLocale.GetVariant(); bool hasSubtag = false; string privuseVar = null; // store ill-formed variant subtags if (language.Length > 0 && IsLanguage(language)) { // Convert a deprecated language code used by Java to // a new code if (language.Equals("iw")) { language = "he"; } else if (language.Equals("ji")) { language = "yi"; } else if (language.Equals("in")) { language = "id"; } tag._language = language; } if (script.Length > 0 && IsScript(script)) { tag._script = CanonicalizeScript(script); hasSubtag = true; } if (region.Length > 0 && IsRegion(region)) { tag._region = CanonicalizeRegion(region); hasSubtag = true; } // ICU4N TODO: Remove ? if (JDKIMPL) { // Special handling for no_NO_NY - use nn_NO for language tag if (tag._language.Equals("no") && tag._region.Equals("NO") && variant.Equals("NY")) // ICU4N TODO: Fix this handling for .NET (no-NO is not reliable across platforms) { tag._language = "nn"; variant = ""; } } if (variant.Length > 0) { List <string> variants = null; StringTokenEnumerator varitr = new StringTokenEnumerator(variant, BaseLocale.SEP); while (varitr.MoveNext()) { string var = varitr.Current; if (!IsVariant(var)) { break; } if (variants == null) { variants = new List <string>(); } if (JDKIMPL) { variants.Add(var); // Do not canonicalize! } else { variants.Add(CanonicalizeVariant(var)); } } if (variants != null) { tag._variants = variants; hasSubtag = true; } if (!varitr.IsDone) { // ill-formed variant subtags StringBuilder buf = new StringBuilder(); while (!varitr.IsDone) { string prvv = varitr.Current; if (!IsPrivateuseSubtag(prvv)) { // cannot use private use subtag - truncated break; } if (buf.Length > 0) { buf.Append(SEP); } if (!JDKIMPL) { prvv = AsciiUtil.ToLowerString(prvv); } buf.Append(prvv); varitr.MoveNext(); } if (buf.Length > 0) { privuseVar = buf.ToString(); } } } List <string> extensions = null; string privateuse = null; var locextKeys = localeExtensions.Keys; foreach (char locextKey in locextKeys) { Extension ext = localeExtensions.GetExtension(locextKey); if (IsPrivateusePrefixChar(locextKey)) { privateuse = ext.Value; } else { if (extensions == null) { extensions = new List <string>(); } extensions.Add(locextKey.ToString() + SEP + ext.Value); } } if (extensions != null) { tag._extensions = extensions; hasSubtag = true; } // append ill-formed variant subtags to private use if (privuseVar != null) { if (privateuse == null) { privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar; } else { privateuse = privateuse + SEP + PRIVUSE_VARIANT_PREFIX + SEP + privuseVar.Replace(BaseLocale.SEP, SEP); } } if (privateuse != null) { tag._privateuse = privateuse; } if (tag._language.Length == 0 && (hasSubtag || privateuse == null)) { // use lang "und" when 1) no language is available AND // 2) any of other subtags other than private use are available or // no private use tag is available tag._language = UNDETERMINED; } return(tag); }
/// <summary> /// BNF in RFC5464 /// </summary> /// <remarks> /// Language-Tag = langtag ; normal language tags /// / privateuse ; private use tag /// / grandfathered ; grandfathered tags /// /// /// langtag = language /// ["-" script] /// ["-" region] /// *("-" variant) /// *("-" extension) /// ["-" privateuse] /// /// language = 2*3ALPHA ; shortest ISO 639 code /// ["-" extlang] ; sometimes followed by /// ; extended language subtags /// / 4ALPHA ; or reserved for future use /// / 5*8ALPHA ; or registered language subtag /// /// extlang = 3ALPHA ; selected ISO 639 codes /// *2("-" 3ALPHA) ; permanently reserved /// /// script = 4ALPHA ; ISO 15924 code /// /// region = 2ALPHA ; ISO 3166-1 code /// / 3DIGIT ; UN M.49 code /// /// variant = 5*8alphanum ; registered variants /// / (DIGIT 3alphanum) /// /// extension = singleton 1*("-" (2*8alphanum)) /// /// ; Single alphanumerics /// ; "x" reserved for private use /// singleton = DIGIT ; 0 - 9 /// / %x41-57 ; A - W /// / %x59-5A ; Y - Z /// / %x61-77 ; a - w /// / %x79-7A ; y - z /// /// privateuse = "x" 1*("-" (1*8alphanum)) /// </remarks> public static LanguageTag Parse(string languageTag, ParseStatus sts) { if (sts == null) { sts = new ParseStatus(); } else { sts.Reset(); } StringTokenEnumerator itr; bool isGrandfathered = false; // Check if the tag is grandfathered string[] gfmap; if (GRANDFATHERED.TryGetValue(new AsciiUtil.CaseInsensitiveKey(languageTag), out gfmap) && gfmap != null) { // use preferred mapping itr = new StringTokenEnumerator(gfmap[1], SEP); isGrandfathered = true; } else { itr = new StringTokenEnumerator(languageTag, SEP); } // ICU4N: Move to the first element itr.MoveNext(); LanguageTag tag = new LanguageTag(); // langtag must start with either language or privateuse if (tag.ParseLanguage(itr, sts)) { tag.ParseExtlangs(itr, sts); tag.ParseScript(itr, sts); tag.ParseRegion(itr, sts); tag.ParseVariants(itr, sts); tag.ParseExtensions(itr, sts); } tag.ParsePrivateuse(itr, sts); if (isGrandfathered) { // Grandfathered tag is replaced with a well-formed tag above. // However, the parsed length must be the original tag length. Debug.Assert(itr.IsDone); Debug.Assert(!sts.IsError); sts.ParseLength = languageTag.Length; } else if (!itr.IsDone && !sts.IsError) { string s = itr.Current; sts.ErrorIndex = itr.CurrentStart; if (s.Length == 0) { sts.ErrorMessage = "Empty subtag"; } else { sts.ErrorMessage = "Invalid subtag: " + s; } } return(tag); }
/// <summary> /// Internal constructor, only used by <see cref="InternalLocaleBuilder"/>. /// </summary> internal LocaleExtensions(IDictionary <CaseInsensitiveChar, string> extensions, ISet <CaseInsensitiveString> uattributes, IDictionary <CaseInsensitiveString, string> ukeywords) { bool hasExtension = (extensions != null && extensions.Count > 0); bool hasUAttributes = (uattributes != null && uattributes.Count > 0); bool hasUKeywords = (ukeywords != null && ukeywords.Count > 0); if (!hasExtension && !hasUAttributes && !hasUKeywords) { _map = EmptyMap; _id = ""; return; } // Build extension map _map = new JCG.SortedDictionary <char, Extension>(); if (hasExtension) { foreach (var ext in extensions) { char key = AsciiUtil.ToLower(ext.Key.Value); string value = ext.Value; if (LanguageTag.IsPrivateusePrefixChar(key)) { // we need to exclude special variant in privuateuse, e.g. "x-abc-lvariant-DEF" value = InternalLocaleBuilder.RemovePrivateuseVariant(value); if (value == null) { continue; } } Extension e = new Extension(key, AsciiUtil.ToLower(value)); _map[key] = e; } } if (hasUAttributes || hasUKeywords) { JCG.SortedSet <string> uaset = null; JCG.SortedDictionary <string, string> ukmap = null; if (hasUAttributes) { uaset = new JCG.SortedSet <string>(StringComparer.Ordinal); foreach (CaseInsensitiveString cis in uattributes) { uaset.Add(AsciiUtil.ToLower(cis.Value)); } } if (hasUKeywords) { ukmap = new JCG.SortedDictionary <string, string>(StringComparer.Ordinal); foreach (var kwd in ukeywords) { string key = AsciiUtil.ToLower(kwd.Key.Value); string type = AsciiUtil.ToLower(kwd.Value); ukmap[key] = type; } } UnicodeLocaleExtension ule = new UnicodeLocaleExtension(uaset, ukmap); _map[UnicodeLocaleExtension.Singleton] = ule; } if (_map.Count == 0) { // this could happen when only privuateuse with special variant _map = EmptyMap; _id = ""; } else { _id = ToID(_map); } }
public static bool IsValidKey(char c) { return(LanguageTag.IsExtensionSingletonChar(c) || LanguageTag.IsPrivateusePrefixChar(c)); }