Esempio n. 1
0
 /// <summary>
 /// Gets rules for a combination of name type, rule type and a single language.
 /// </summary>
 /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
 /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
 /// <param name="lang">The language to consider.</param>
 /// <returns>A list of <see cref="Rule"/>s that apply.</returns>
 public static IList <Rule> GetInstance(NameType nameType, RuleType rt, string lang)
 {
     return(GetInstance(nameType, rt, LanguageSet.From(new JCG.HashSet <string>()
     {
         lang
     })));
 }
Esempio n. 2
0
        /**
         * This code is similar in style to code found in Solr:
         * solr/core/src/java/org/apache/solr/analysis/BeiderMorseFilterFactory.java
         *
         * Making a JUnit test out of it to protect Solr from possible future
         * regressions in Commons-Codec.
         */
        private static String Encode(IDictionary <String, String> args, bool concat, String input)
        {
            LanguageSet    languageSet;
            PhoneticEngine engine;

            // PhoneticEngine = NameType + RuleType + concat
            // we use common-codec's defaults: GENERIC + APPROX + true
            String nameTypeArg;

            args.TryGetValue("nameType", out nameTypeArg);
            NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : (NameType)Enum.Parse(typeof(NameType), nameTypeArg, true);

            String ruleTypeArg;

            args.TryGetValue("ruleType", out ruleTypeArg);
            RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : (RuleType)Enum.Parse(typeof(RuleType), ruleTypeArg, true);

            engine = new PhoneticEngine(nameType, ruleType, concat);

            // LanguageSet: defaults to automagic, otherwise a comma-separated list.
            String languageSetArg;

            args.TryGetValue("languageSet", out languageSetArg);
            if (languageSetArg == null || languageSetArg.equals("auto"))
            {
                languageSet = null;
            }
            else
            {
                languageSet = LanguageSet.From(new HashSet <String>(Arrays.AsList(languageSetArg.Split(',').TrimEnd())));
            }

            /*
             *  org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (lines 96-98) does this:
             *
             *  encoded = (languages == null)
             *      ? engine.encode(termAtt.toString())
             *      : engine.encode(termAtt.toString(), languages);
             *
             *  Hence our approach, below:
             */
            if (languageSet == null)
            {
                return(engine.Encode(input));
            }
            else
            {
                return(engine.Encode(input, languageSet));
            }
        }
Esempio n. 3
0
        private static Phoneme ParsePhoneme(string ph)
        {
            int open = ph.IndexOf('[');

            if (open >= 0)
            {
                if (!ph.EndsWith("]", StringComparison.Ordinal))
                {
                    throw new ArgumentException("Phoneme expression contains a '[' but does not end in ']'");
                }
                string        before = ph.Substring(0, open - 0);
                string        input  = ph.Substring(open + 1, (ph.Length - 1) - (open + 1));
                ISet <string> langs  = new JCG.HashSet <string>(PLUS.Split(input).TrimEnd());

                return(new Phoneme(before, LanguageSet.From(langs)));
            }
            else
            {
                return(new Phoneme(ph, Languages.ANY_LANGUAGE));
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Guesses the languages of a word.
        /// </summary>
        /// <param name="input">The word.</param>
        /// <returns>A Set of Strings of language names that are potential matches for the input word.</returns>
        public virtual LanguageSet GuessLanguages(string input)
        {
            string text = input.ToLowerInvariant();

            ISet <string> langs = new JCG.HashSet <string>(this.languages.GetLanguages());

            foreach (LangRule rule in this.rules)
            {
                if (rule.Matches(text))
                {
                    if (rule.acceptOnMatch)
                    {
                        IList <string> toRemove = new JCG.List <string>();
                        foreach (var item in langs)
                        {
                            if (!rule.languages.Contains(item))
                            {
                                toRemove.Add(item);
                            }
                        }
                        foreach (var item in toRemove)
                        {
                            langs.Remove(item);
                        }
                    }
                    else
                    {
                        foreach (var item in rule.languages)
                        {
                            langs.Remove(item);
                        }
                    }
                }
            }

            LanguageSet ls = LanguageSet.From(langs);

            return(ls.Equals(Languages.NO_LANGUAGES) ? Languages.ANY_LANGUAGE : ls);
        }