private void WriteLanguageTagElements(XElement identityElem, string languageTag)
        {
            string language, script, region, variant;

            IetfLanguageTag.TryGetParts(languageTag, out language, out script, out region, out variant);

            // language element is required
            identityElem.SetAttributeValue("language", "type", language);
            // write the rest if they have contents
            if (!string.IsNullOrEmpty(script))
            {
                identityElem.SetAttributeValue("script", "type", script);
            }
            else
            {
                identityElem.Elements("script").Remove();
            }
            if (!string.IsNullOrEmpty(region))
            {
                identityElem.SetAttributeValue("territory", "type", region);
            }
            else
            {
                identityElem.Elements("territory").Remove();
            }
            if (!string.IsNullOrEmpty(variant))
            {
                identityElem.SetAttributeValue("variant", "type", variant);
            }
            else
            {
                identityElem.Elements("variant").Remove();
            }
        }
        public static string CurrentVersion(string languageTag)
        {
            string language, script, region, variant;

            IetfLanguageTag.TryGetParts(languageTag, out language, out script, out region, out variant);
            return(CurrentVersion(language, script, region, variant));
        }
Exemple #3
0
        /// <summary>
        /// Filter out tags that contain a region marker unless the caller has already specified that region
        /// markers are allowed in language tags.  Note that li.LanguageTag can be just a search string the
        /// user has typed, which might be a (partial) language tag or might be (part of) a language name.
        /// If the tag doesn't actually parse as a language tag, we assume the user is typing something other
        /// than a language tag and consider it not to be something we'd filter out as specifying a region.
        /// </summary>
        private bool RegionalDialectsFilter(LanguageInfo li)
        {
            if (IncludeRegionalDialects)
            {
                return(true);
            }

            // always include Chinese languages with region codes
            if (li.LanguageTag.IsOneOf("zh-CN", "zh-TW"))
            {
                return(true);
            }

            // written this way to avoid having to catch predictable exceptions as the user is typing
            string language;
            string script;
            string region;
            string variant;

            if (IetfLanguageTag.TryGetParts(li.LanguageTag, out language, out script, out region, out variant))
            {
                return(string.IsNullOrEmpty(region)); // OK only if no region.
            }
            return(true);                             // Not a tag?  Don't filter it out.
        }
		/// <summary>
		/// Some languages in langtags.json have not been normalized to have a default tag without a script marker
		/// in one of its entries.  For some uses of the data, we really want to see only the default tags but we
		/// also don't want to not see any languages.  So scan through the data for cases where every tag associated
		/// with a language contains a script marker and choose one as the default to receive a minimal tag that is
		/// equal to the language code alone.  (The one found in the most countries is chosen by default.)
		/// </summary>
		private void EnsureDefaultTags()
		{
			HashSet<string> tagSet = new HashSet<string>();
			foreach (var langInfo in _codeToLanguageIndex.Values)
				tagSet.Add(langInfo.LanguageTag);
			var tagList = tagSet.ToList();
			tagList.Sort((a,b) => string.Compare(a, b, StringComparison.Ordinal));
			var prevLang = string.Empty;
			var countChanged = 0;
			for (var i = 0; i < tagList.Count; ++i)
			{
				var tag = tagList[i];
				string language;
				string script;
				string region;
				string variant;
				if (!IetfLanguageTag.TryGetParts(tag, out language, out script, out region, out variant))
				{
					prevLang = tag;	// shouldn't happen, but if it does...
					continue;
				}
				// Check for a language without a simple tag that has a tag with a script.
				// (not quite foolproof in theory since a tag with region or variant might sort
				// in front of a tag with a script, but good enough in practice)
				if (language == prevLang || string.IsNullOrEmpty(script))
				{
					prevLang = language;
					continue;
				}
				// Go through all the entries for this language so we can attempt to choose
				// the "best" for being the default;
				var langInfo = _codeToLanguageIndex[tag];
				while (i + 1 < tagList.Count)
				{
					var tagNext = tagList[i + 1];
					if (tagNext.StartsWith(language + "-"))
					{
						++i;
						var langInfoNext = _codeToLanguageIndex[tagNext];
						// choose the one that's more widespread unless the name information
						// indicates a possibly less widespread region of origin.
						if (langInfoNext.Names.Count >= langInfo.Names.Count &&
							langInfoNext.Countries.Count > langInfo.Countries.Count)
						{
							langInfo = langInfoNext;
						}
					}
					else
					{
						break;
					}
				}
				langInfo.LanguageTag = language;		// force tag to default form arbitrarily for now.
				++countChanged;
				prevLang = language;
			}
			Debug.WriteLine($"LanguageLookup.EnsureDefaultTags() changed {countChanged} language tags");
		}
Exemple #5
0
        public void TryGetParts_ReturnsExpectedResults(string tag, bool valid, string expectedLanguage, string expectedScript, string expectedRegion, string expectedVariant)
        {
            string language, script, region, variant;
            var    result = IetfLanguageTag.TryGetParts(tag, out language, out script, out region, out variant);

            Assert.That(result, Is.EqualTo(valid));
            Assert.That(language, Is.EqualTo(expectedLanguage), "parsing " + tag + " produced unexpected language " + language + " instead of " + expectedLanguage);
            Assert.That(script, Is.EqualTo(expectedScript), "parsing " + tag + " produced unexpected script " + script + " instead of " + expectedScript);
            Assert.That(region, Is.EqualTo(expectedRegion), "parsing " + tag + " produced unexpected region " + region + " instead of " + expectedRegion);
            Assert.That(variant, Is.EqualTo(expectedVariant), "parsing " + tag + " produced unexpected variant " + variant + " instead of " + expectedVariant);
        }
        private bool ScriptMarkerFilter(LanguageInfo li)
        {
            string language;
            string script;
            string region;
            string variant;

            if (IetfLanguageTag.TryGetParts(li.LanguageTag, out language, out script, out region, out variant))
            {
                return(string.IsNullOrEmpty(script)); // OK only if no script.
            }
            return(true);                             // Not a tag?  Don't filter it out.
        }
Exemple #7
0
        /// <summary/>
        public bool ValidateIetfCode(string text)
        {
            string language;
            string script;
            string region;
            string variant;

            if (IetfLanguageTag.TryGetParts(_model.CurrentWsSetupModel?.CurrentLanguageTag, out language, out script, out region,
                                            out variant))
            {
                return(text.StartsWith(language) && IetfLanguageTag.IsValid(text));
            }
            throw new ApplicationException("Invalid code stored in the model");
        }
Exemple #8
0
        /// <summary>
        /// If so desired, filter out any language whose tags contain a Script value.  Except that there are 90+
        /// languages in the data whose tags all contain a Script value.  Since we don't want to lose access to
        /// those languages, we detect when that happens and pass the first occurrence with the tag adjusted to
        /// the bare language code.
        /// </summary>
        private bool ScriptMarkerFilter(LanguageInfo li)
        {
            if (IncludeScriptMarkers)
            {
                return(true);
            }

            // written this way to avoid having to catch predictable exceptions as the user is typing
            string language;
            string script;
            string region;
            string variant;

            if (IetfLanguageTag.TryGetParts(li.LanguageTag, out language, out script, out region, out variant))
            {
                return(string.IsNullOrEmpty(script)); // OK only if no script.
            }
            return(true);                             // Not a tag?  Don't filter it out.
        }
            /// <summary>
            /// Sorting the languages for display is tricky: we want the most relevant languages at the
            /// top of the list, so we can't simply sort alphabetically by language name or by language tag,
            /// but need to take both items into account together with the current search string.  Ordering
            /// by relevance is clearly impossible since we'd have to read the user's mind and apply that
            /// knowledge to the data.  But the heuristics we use here may be better than nothing...
            /// </summary>
            public int Compare(LanguageInfo x, LanguageInfo y)
            {
                if (x.LanguageTag == y.LanguageTag)
                {
                    return(0);
                }

                // Favor ones where some language name matches the search string to solve BL-1141
                // We restrict this to the top 2 names of each language, and to cases where the
                // corresponding names of the two languages are different.  (If both language names
                // match the search string, there's no good reason to favor one over the other!)
                if (!x.Names[0].Equals(y.Names[0], StringComparison.InvariantCultureIgnoreCase))
                {
                    if (x.Names[0].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase))
                    {
                        return(-1);
                    }
                    if (y.Names[0].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase))
                    {
                        return(1);
                    }
                }
                else if (x.Names.Count == 1 || y.Names.Count == 1 || !x.Names[1].Equals(y.Names[1], StringComparison.InvariantCultureIgnoreCase))
                {
                    // If we get here, x.Names[0] == y.Names[0].  If both equal the search string, then neither x.Names[1]
                    // nor y.Names[1] should equal the search string since the code adding to Names checks for redundancy.
                    // Also it's possible that neither x.Names[1] nor y.Names[1] exists at this point in the code, or that
                    // only one of them exists, or that both of them exist (in which case they are not equal).
                    if (x.Names.Count > 1 && x.Names[1].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase))
                    {
                        return(-1);
                    }
                    if (y.Names.Count > 1 && y.Names[1].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase))
                    {
                        return(1);
                    }
                }

                // Favor a language whose tag matches the search string exactly.  (equal tags are handled above)
                if (x.LanguageTag.Equals(_searchString, StringComparison.InvariantCultureIgnoreCase))
                {
                    return(-1);
                }
                if (y.LanguageTag.Equals(_searchString, StringComparison.InvariantCultureIgnoreCase))
                {
                    return(1);
                }

                // written this way to avoid having to catch predictable exceptions as the user is typing
                string xlanguage;
                string ylanguage;
                string script;
                string region;
                string variant;
                var    xtagParses = IetfLanguageTag.TryGetParts(x.LanguageTag, out xlanguage, out script, out region, out variant);
                var    ytagParses = IetfLanguageTag.TryGetParts(y.LanguageTag, out ylanguage, out script, out region, out variant);
                var    bothTagLanguagesMatchSearch = xtagParses && ytagParses && xlanguage == ylanguage &&
                                                     _searchString.Equals(xlanguage, StringComparison.InvariantCultureIgnoreCase);

                if (!bothTagLanguagesMatchSearch)
                {
                    // One of the tag language pieces may match the search string even though not both match.  In that case,
                    // sort the matching language earlier in the list.
                    if (xtagParses && _searchString.Equals(xlanguage, StringComparison.InvariantCultureIgnoreCase))
                    {
                        return(-1);                         // x.Tag's language part matches search string exactly, so sort it earlier in the list.
                    }
                    else if (ytagParses && _searchString.Equals(ylanguage, StringComparison.InvariantCultureIgnoreCase))
                    {
                        return(1);                          // y.Tag's language part matches search string exactly, so sort it earlier in the list.
                    }
                }
                // shortest simplest tag is most likely to be what is being looked for
                if (x.LanguageTag.Length < y.LanguageTag.Length)
                {
                    return(-1);
                }
                if (y.LanguageTag.Length < x.LanguageTag.Length)
                {
                    return(1);
                }

                // Editing distance to a language name is not useful when we've detected that the user appears to be
                // typing a language tag in that both language tags match what the user has typed.  (For example,
                // it gives a strange and unwanted order to the variants of zh.)  In such a case we just order the
                // matching codes by length (already done) and then alphabetically by code, skipping the sort by
                // editing distance to the language names.
                if (!bothTagLanguagesMatchSearch)
                {
                    // Use the "editing distance" relative to the search string to sort by the primary name.
                    // (But we don't really care once the editing distance gets very large.)
                    // See https://silbloom.myjetbrains.com/youtrack/issue/BL-5847 for motivation.
                    // Timing tests indicate that 1) calculating these distances doesn't slow down the sorting noticeably
                    // and 2) caching these distances in a dictionary also doesn't speed up the sorting noticeably.
                    var xDistance    = ApproximateMatcher.EditDistance(_lowerSearch, x.Names[0].ToLowerInvariant(), 25, false);
                    var yDistance    = ApproximateMatcher.EditDistance(_lowerSearch, y.Names[0].ToLowerInvariant(), 25, false);
                    var distanceDiff = xDistance - yDistance;
                    if (distanceDiff != 0)
                    {
                        return(distanceDiff);
                    }

                    // If the editing distances for the primary names are the same, sort by the primary name.
                    int res = string.Compare(x.Names[0], y.Names[0], StringComparison.InvariantCultureIgnoreCase);
                    if (res != 0)
                    {
                        return(res);
                    }
                }
                return(string.Compare(x.LanguageTag, y.LanguageTag, StringComparison.InvariantCultureIgnoreCase));
            }