Esempio n. 1
0
        private Lang Lookup(Lang langDef)
        {
            Lang           retval;
            HashSet <Lang> copy;

            lock (supported)
            {
                retval = this.defaultLang;
                copy   = GetSupportedListClone();
            }



            //1st step: all language whose language subtag does not match are not match
            string lang = langDef.Language.Subtag;

            //1.1 if a language is not a match, a better match than the default could be another language that is a macrolanguage for this one

            if (!copy.Any(a => a.Language.Subtag == lang))
            {
                //no one language is matching, as a second chanche we will try to see if the requested language have any macrolanguage
                //in this case we will match against the macrolanguage instead of the language
                if (langDef.Language.MacroLanguage != "") //Macrolanguage cant be null, see Record constructor
                {
                    lang = langDef.Language.MacroLanguage;
                }
            }

            //now lang is the subtag of the requested language or of the macrolanguage

            //1.2 now we see if we have some match
            if (copy.Any(a => a.Language.Subtag == lang))
            {
                //we have at least a match.. remove mismatch from the list
                copy.RemoveWhere(a => a.Language.Subtag != lang);
            }
            else
            {
                //there is no match, we should return default language, but,
                //as last resort try to see if there is any language on the bunch that share the same macrolanguage

                //remove from the list all language that are not on the same family
                copy.RemoveWhere(a => a.Language.MacroLanguage != lang);
            }

            //now the list contains the best possible match for the given language or contains nothing

            if (copy.Count == 0)
            {
                return(retval);
            }

            if (copy.Count == 1)
            {
                return(copy.First()); //we matched at least the language, this is better than the default, so return this
            }

            //2nd step, we have some match, now see if we can refine using extlang... btw the extlang should have been canonicalized, so 99% wil do nothing
            string extlang = langDef.ExtLang.Subtag;

            if (copy.Any(a => a.ExtLang.Subtag == extlang))
            {
                //ok we can refine the list using the extlang
                copy.RemoveWhere(a => a.ExtLang.Subtag != extlang);
                if (copy.Count == 1)
                {
                    return(copy.First()); //we cannot find any better match
                }
            }


            //3rd step, we have some match, now go to see the scripts

            string script = langDef.Script.Subtag;

            //note that script value is blank only if the language supports many scripts and the user have not choosen one, otherwise the
            //registry provide a SuppressScript for the language tag so even if the user have not specified it the script value does contain the
            //default script value

            //3.1 if we do match any script, remove all other languages that does not match
            if (copy.Any(a => a.Script.Subtag == script))
            {
                copy.RemoveWhere(a => a.Script.Subtag != script);

                if (copy.Count == 1)
                {
                    return(copy.First()); //we cannot find any better match
                }
            }

            //4th step, we have more than 1 match and we see if we can refine it by using the region

            string region = langDef.Region.Subtag;

            if (copy.Any(a => a.Region.Subtag == region))
            {
                //ok we can refine the list using the extlang
                copy.RemoveWhere(a => a.Region.Subtag != region);
                if (copy.Count == 1)
                {
                    return(copy.First()); //we cannot find any better match
                }
            }
            else
            {
                if (copy.Any(a => a.Region.Subtag == ""))
                {
                    //we keep those language that have generic region and remove those with unmatching specific regions
                    copy.RemoveWhere(a => a.Region.Subtag != "");
                    if (copy.Count == 1)
                    {
                        return(copy.First()); //we cannot find any better match
                    }
                }
            }

            //5th step, we still have more than 1 match, try to refine using the VARIANTS
            var variants = new HashSet <string>(langDef.Variants.Select(a => a.Subtag));
            //now, let's see who is the supported language that have the greatest number of variants and filter the other
            var queryVariants = copy.Select
                                (
                a =>
                new
            {
                Language = a,
                Num      = a.Variants.Count(b => variants.Contains(b.Subtag))
            }
                                ).Where(c => c.Num > 0).ToList();

            int maxVariants = queryVariants.Count == 0 ? 0 : queryVariants.Select(a => a.Num).Max();

            if (maxVariants > 0)
            {
                var bestVariants = new HashSet <Lang>(queryVariants.Where(a => a.Num == maxVariants).Select(a => a.Language));
                //ok we have a set of elements that best fits user requested language... remove other languages from the set
                copy.RemoveWhere(a => !bestVariants.Contains(a));

                if (copy.Count == 1)
                {
                    return(copy.First()); //we cannot find any better match
                }
            }

            //6th step, this is becoming a long task, we might find if we are matching some extension
            Dictionary <char, HashSet <string> > extensions = langDef.Extensions.ToDictionary(a => a.Key, b => new HashSet <string>(b.Value));
            //now, let's see who is the supported language that have the greatest number of extension matching and filter out others
            var queryExtensions = copy.Select
                                  (
                a => new
            {
                Language = a,
                Num      = a.Extensions.Sum(b => b.Value.Count(c => extensions.ContainsKey(b.Key) && extensions[b.Key].Contains(c)))
            }
                                  ).Where(d => d.Num > 0).ToList();
            int maxExtensions = queryExtensions.Count == 0 ? 0 : queryExtensions.Select(a => a.Num).Max();

            if (maxExtensions > 0)
            {
                var bestExtensions = new HashSet <Lang>(queryExtensions.Where(a => a.Num == maxExtensions).Select(a => a.Language));
                //ok we have a set of elements that best fits user requested language... remove other languages from the set
                copy.RemoveWhere(a => !bestExtensions.Contains(a));

                if (copy.Count == 1)
                {
                    return(copy.First()); //we cannot find any better match
                }
            }

            //7th step, last resort is matching the private section, we will not do that...

            //8th step, consider languages with shorter name to be better match than ones with long names
            //for example when matching "de-Qaaa" against "de" and "de-Qaab" prefer "de"

            return(copy.OrderBy(a => a.Private.Length).ThenBy(s => s.Canonical.Length).ThenBy(a => a.Canonical).First());
        }
Esempio n. 2
0
        /// <summary>
        /// Return the language closest to langDef in the supported list
        /// </summary>
        /// <param name="langDef">language to find </param>
        /// <returns>best match found or default</returns>
        /// <remarks>
        /// The idea is to find the closest language using this policy:
        ///
        /// - a language whose is in the same family of the searched language is always better than another language
        ///
        /// for example
        ///
        /// xx-xxx-Xxxx-XX-x-xxxxxxxx-xxxxx-xxxxxx-x-xxxxxx-xxxx
        /// is a better match to
        /// yy-yyy-Yyyy-YY-y-yyyyyyyy-yyyyy
        ///
        /// if yy is a sublanguage of the same macrolanguage than the default
        ///
        /// and so on the same policy is applied in cascade
        ///
        /// example: ( X = Y means X match Y, X = Y > Z means that Y match X better than Z)
        ///
        /// de-CH is a better match to de-DE than the default
        ///
        ///
        /// de-CH-1901 = de-CH > de > default ; cause it match language and region even if orthography is old.
        ///
        /// to match the variants: greater number of variant matches is always better than lower match of matches
        ///
        /// In the case of equal matching, the shortest one is preferred
        /// </remarks>

        public Lang Lookup(string language)
        {
            return(Lookup(Lang.Parse(language)));
        }