Esempio n. 1
0
        /// <summary>
        /// Encodes an input string into an output phonetic representation, given a set of possible origin languages.
        /// </summary>
        /// <param name="input">String to phoneticise; a string with dashes or spaces separating each word.</param>
        /// <param name="languageSet"></param>
        /// <returns>A phonetic representation of the input; a string containing '-'-separated phonetic representations of the input.</returns>
        public virtual string Encode(string input, LanguageSet languageSet)
        {
            IDictionary <string, IList <Rule> > rules = Rule.GetInstanceMap(this.nameType, RuleType.RULES, languageSet);
            // rules common across many (all) languages
            IDictionary <string, IList <Rule> > finalRules1 = Rule.GetInstanceMap(this.nameType, this.ruleType, "common");
            // rules that apply to a specific language that may be ambiguous or wrong if applied to other languages
            IDictionary <string, IList <Rule> > finalRules2 = Rule.GetInstanceMap(this.nameType, this.ruleType, languageSet);

            // tidy the input
            // lower case is a locale-dependent operation
            input = input.ToLowerInvariant().Replace('-', ' ').Trim();

            if (this.nameType == NameType.GENERIC)
            {
                if (input.Length >= 2 && input.Substring(0, 2 - 0).Equals("d'", StringComparison.Ordinal))
                { // check for d'
                    string remainder = input.Substring(2);
                    string combined  = "d" + remainder;
                    return("(" + Encode(remainder) + ")-(" + Encode(combined) + ")");
                }
                foreach (string l in NAME_PREFIXES[this.nameType])
                {
                    // handle generic prefixes
                    if (input.StartsWith(l + " ", StringComparison.Ordinal))
                    {
                        // check for any prefix in the words list
                        string remainder = input.Substring(l.Length + 1); // input without the prefix
                        string combined  = l + remainder;                 // input with prefix without space
                        return("(" + Encode(remainder) + ")-(" + Encode(combined) + ")");
                    }
                }
            }

            IList <string> words  = WHITESPACE.Split(input).TrimEnd();
            ISet <string>  words2 = new JCG.HashSet <string>();

            // special-case handling of word prefixes based upon the name type
            switch (this.nameType)
            {
            case NameType.SEPHARDIC:
                foreach (string aWord in words)
                {
                    string[] parts    = aWord.Split('\'').TrimEnd();
                    string   lastPart = parts[parts.Length - 1];
                    words2.Add(lastPart);
                }
                words2.ExceptWith(NAME_PREFIXES[this.nameType]);
                break;

            case NameType.ASHKENAZI:
                words2.UnionWith(words);
                words2.ExceptWith(NAME_PREFIXES[this.nameType]);
                break;

            case NameType.GENERIC:
                words2.UnionWith(words);
                break;

            default:
                throw new InvalidOperationException("Unreachable case: " + this.nameType);
            }

            if (this.concat)
            {
                // concat mode enabled
                input = Join(words2, " ");
            }
            else if (words2.Count == 1)
            {
                // not a multi-word name
                //input = words.iterator().next();
                input = words[0];
            }
            else
            {
                // encode each word in a multi-word name separately (normally used for approx matches)
                StringBuilder result = new StringBuilder();
                foreach (string word in words2)
                {
                    result.Append("-").Append(Encode(word));
                }
                // return the result without the leading "-"
                return(result.ToString(1, result.Length - 1));
            }

            PhonemeBuilder phonemeBuilder = PhonemeBuilder.Empty(languageSet);

            // loop over each char in the input - we will handle the increment manually
            for (int i = 0; i < input.Length;)
            {
                RulesApplication rulesApplication =
                    new RulesApplication(rules, input, phonemeBuilder, i, maxPhonemes).Invoke();
                i = rulesApplication.I;
                phonemeBuilder = rulesApplication.PhonemeBuilder;
            }

            // Apply the general rules
            phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules1);
            // Apply the language-specific rules
            phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules2);

            return(phonemeBuilder.MakeString());
        }