private void WriteLanguageTagElements(XElement identityElem, string languageTag) { string language, script, region, variant; IetfLanguageTag.TryGetParts(languageTag, out language, out script, out region, out variant); // language element is required identityElem.SetAttributeValue("language", "type", language); // write the rest if they have contents if (!string.IsNullOrEmpty(script)) { identityElem.SetAttributeValue("script", "type", script); } else { identityElem.Elements("script").Remove(); } if (!string.IsNullOrEmpty(region)) { identityElem.SetAttributeValue("territory", "type", region); } else { identityElem.Elements("territory").Remove(); } if (!string.IsNullOrEmpty(variant)) { identityElem.SetAttributeValue("variant", "type", variant); } else { identityElem.Elements("variant").Remove(); } }
public static string CurrentVersion(string languageTag) { string language, script, region, variant; IetfLanguageTag.TryGetParts(languageTag, out language, out script, out region, out variant); return(CurrentVersion(language, script, region, variant)); }
/// <summary> /// Filter out tags that contain a region marker unless the caller has already specified that region /// markers are allowed in language tags. Note that li.LanguageTag can be just a search string the /// user has typed, which might be a (partial) language tag or might be (part of) a language name. /// If the tag doesn't actually parse as a language tag, we assume the user is typing something other /// than a language tag and consider it not to be something we'd filter out as specifying a region. /// </summary> private bool RegionalDialectsFilter(LanguageInfo li) { if (IncludeRegionalDialects) { return(true); } // always include Chinese languages with region codes if (li.LanguageTag.IsOneOf("zh-CN", "zh-TW")) { return(true); } // written this way to avoid having to catch predictable exceptions as the user is typing string language; string script; string region; string variant; if (IetfLanguageTag.TryGetParts(li.LanguageTag, out language, out script, out region, out variant)) { return(string.IsNullOrEmpty(region)); // OK only if no region. } return(true); // Not a tag? Don't filter it out. }
/// <summary> /// Some languages in langtags.json have not been normalized to have a default tag without a script marker /// in one of its entries. For some uses of the data, we really want to see only the default tags but we /// also don't want to not see any languages. So scan through the data for cases where every tag associated /// with a language contains a script marker and choose one as the default to receive a minimal tag that is /// equal to the language code alone. (The one found in the most countries is chosen by default.) /// </summary> private void EnsureDefaultTags() { HashSet<string> tagSet = new HashSet<string>(); foreach (var langInfo in _codeToLanguageIndex.Values) tagSet.Add(langInfo.LanguageTag); var tagList = tagSet.ToList(); tagList.Sort((a,b) => string.Compare(a, b, StringComparison.Ordinal)); var prevLang = string.Empty; var countChanged = 0; for (var i = 0; i < tagList.Count; ++i) { var tag = tagList[i]; string language; string script; string region; string variant; if (!IetfLanguageTag.TryGetParts(tag, out language, out script, out region, out variant)) { prevLang = tag; // shouldn't happen, but if it does... continue; } // Check for a language without a simple tag that has a tag with a script. // (not quite foolproof in theory since a tag with region or variant might sort // in front of a tag with a script, but good enough in practice) if (language == prevLang || string.IsNullOrEmpty(script)) { prevLang = language; continue; } // Go through all the entries for this language so we can attempt to choose // the "best" for being the default; var langInfo = _codeToLanguageIndex[tag]; while (i + 1 < tagList.Count) { var tagNext = tagList[i + 1]; if (tagNext.StartsWith(language + "-")) { ++i; var langInfoNext = _codeToLanguageIndex[tagNext]; // choose the one that's more widespread unless the name information // indicates a possibly less widespread region of origin. if (langInfoNext.Names.Count >= langInfo.Names.Count && langInfoNext.Countries.Count > langInfo.Countries.Count) { langInfo = langInfoNext; } } else { break; } } langInfo.LanguageTag = language; // force tag to default form arbitrarily for now. ++countChanged; prevLang = language; } Debug.WriteLine($"LanguageLookup.EnsureDefaultTags() changed {countChanged} language tags"); }
public void TryGetParts_ReturnsExpectedResults(string tag, bool valid, string expectedLanguage, string expectedScript, string expectedRegion, string expectedVariant) { string language, script, region, variant; var result = IetfLanguageTag.TryGetParts(tag, out language, out script, out region, out variant); Assert.That(result, Is.EqualTo(valid)); Assert.That(language, Is.EqualTo(expectedLanguage), "parsing " + tag + " produced unexpected language " + language + " instead of " + expectedLanguage); Assert.That(script, Is.EqualTo(expectedScript), "parsing " + tag + " produced unexpected script " + script + " instead of " + expectedScript); Assert.That(region, Is.EqualTo(expectedRegion), "parsing " + tag + " produced unexpected region " + region + " instead of " + expectedRegion); Assert.That(variant, Is.EqualTo(expectedVariant), "parsing " + tag + " produced unexpected variant " + variant + " instead of " + expectedVariant); }
private bool ScriptMarkerFilter(LanguageInfo li) { string language; string script; string region; string variant; if (IetfLanguageTag.TryGetParts(li.LanguageTag, out language, out script, out region, out variant)) { return(string.IsNullOrEmpty(script)); // OK only if no script. } return(true); // Not a tag? Don't filter it out. }
/// <summary/> public bool ValidateIetfCode(string text) { string language; string script; string region; string variant; if (IetfLanguageTag.TryGetParts(_model.CurrentWsSetupModel?.CurrentLanguageTag, out language, out script, out region, out variant)) { return(text.StartsWith(language) && IetfLanguageTag.IsValid(text)); } throw new ApplicationException("Invalid code stored in the model"); }
/// <summary> /// If so desired, filter out any language whose tags contain a Script value. Except that there are 90+ /// languages in the data whose tags all contain a Script value. Since we don't want to lose access to /// those languages, we detect when that happens and pass the first occurrence with the tag adjusted to /// the bare language code. /// </summary> private bool ScriptMarkerFilter(LanguageInfo li) { if (IncludeScriptMarkers) { return(true); } // written this way to avoid having to catch predictable exceptions as the user is typing string language; string script; string region; string variant; if (IetfLanguageTag.TryGetParts(li.LanguageTag, out language, out script, out region, out variant)) { return(string.IsNullOrEmpty(script)); // OK only if no script. } return(true); // Not a tag? Don't filter it out. }
/// <summary> /// Sorting the languages for display is tricky: we want the most relevant languages at the /// top of the list, so we can't simply sort alphabetically by language name or by language tag, /// but need to take both items into account together with the current search string. Ordering /// by relevance is clearly impossible since we'd have to read the user's mind and apply that /// knowledge to the data. But the heuristics we use here may be better than nothing... /// </summary> public int Compare(LanguageInfo x, LanguageInfo y) { if (x.LanguageTag == y.LanguageTag) { return(0); } // Favor ones where some language name matches the search string to solve BL-1141 // We restrict this to the top 2 names of each language, and to cases where the // corresponding names of the two languages are different. (If both language names // match the search string, there's no good reason to favor one over the other!) if (!x.Names[0].Equals(y.Names[0], StringComparison.InvariantCultureIgnoreCase)) { if (x.Names[0].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(-1); } if (y.Names[0].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(1); } } else if (x.Names.Count == 1 || y.Names.Count == 1 || !x.Names[1].Equals(y.Names[1], StringComparison.InvariantCultureIgnoreCase)) { // If we get here, x.Names[0] == y.Names[0]. If both equal the search string, then neither x.Names[1] // nor y.Names[1] should equal the search string since the code adding to Names checks for redundancy. // Also it's possible that neither x.Names[1] nor y.Names[1] exists at this point in the code, or that // only one of them exists, or that both of them exist (in which case they are not equal). if (x.Names.Count > 1 && x.Names[1].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(-1); } if (y.Names.Count > 1 && y.Names[1].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(1); } } // Favor a language whose tag matches the search string exactly. (equal tags are handled above) if (x.LanguageTag.Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(-1); } if (y.LanguageTag.Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(1); } // written this way to avoid having to catch predictable exceptions as the user is typing string xlanguage; string ylanguage; string script; string region; string variant; var xtagParses = IetfLanguageTag.TryGetParts(x.LanguageTag, out xlanguage, out script, out region, out variant); var ytagParses = IetfLanguageTag.TryGetParts(y.LanguageTag, out ylanguage, out script, out region, out variant); var bothTagLanguagesMatchSearch = xtagParses && ytagParses && xlanguage == ylanguage && _searchString.Equals(xlanguage, StringComparison.InvariantCultureIgnoreCase); if (!bothTagLanguagesMatchSearch) { // One of the tag language pieces may match the search string even though not both match. In that case, // sort the matching language earlier in the list. if (xtagParses && _searchString.Equals(xlanguage, StringComparison.InvariantCultureIgnoreCase)) { return(-1); // x.Tag's language part matches search string exactly, so sort it earlier in the list. } else if (ytagParses && _searchString.Equals(ylanguage, StringComparison.InvariantCultureIgnoreCase)) { return(1); // y.Tag's language part matches search string exactly, so sort it earlier in the list. } } // shortest simplest tag is most likely to be what is being looked for if (x.LanguageTag.Length < y.LanguageTag.Length) { return(-1); } if (y.LanguageTag.Length < x.LanguageTag.Length) { return(1); } // Editing distance to a language name is not useful when we've detected that the user appears to be // typing a language tag in that both language tags match what the user has typed. (For example, // it gives a strange and unwanted order to the variants of zh.) In such a case we just order the // matching codes by length (already done) and then alphabetically by code, skipping the sort by // editing distance to the language names. if (!bothTagLanguagesMatchSearch) { // Use the "editing distance" relative to the search string to sort by the primary name. // (But we don't really care once the editing distance gets very large.) // See https://silbloom.myjetbrains.com/youtrack/issue/BL-5847 for motivation. // Timing tests indicate that 1) calculating these distances doesn't slow down the sorting noticeably // and 2) caching these distances in a dictionary also doesn't speed up the sorting noticeably. var xDistance = ApproximateMatcher.EditDistance(_lowerSearch, x.Names[0].ToLowerInvariant(), 25, false); var yDistance = ApproximateMatcher.EditDistance(_lowerSearch, y.Names[0].ToLowerInvariant(), 25, false); var distanceDiff = xDistance - yDistance; if (distanceDiff != 0) { return(distanceDiff); } // If the editing distances for the primary names are the same, sort by the primary name. int res = string.Compare(x.Names[0], y.Names[0], StringComparison.InvariantCultureIgnoreCase); if (res != 0) { return(res); } } return(string.Compare(x.LanguageTag, y.LanguageTag, StringComparison.InvariantCultureIgnoreCase)); }