/// <summary> /// Applies the final rules to convert from a language-specific phonetic representation to a /// language-independent representation. /// </summary> /// <param name="phonemeBuilder">The current phonemes.</param> /// <param name="finalRules">The final rules to apply.</param> /// <returns>The resulting phonemes.</returns> private PhonemeBuilder ApplyFinalRules(PhonemeBuilder phonemeBuilder, IDictionary <string, IList <Rule> > finalRules) { if (finalRules == null) { throw new ArgumentNullException("finalRules can not be null"); } if (finalRules.Count == 0) { return(phonemeBuilder); } ISet <Phoneme> phonemes = new SortedSet <Phoneme>(Phoneme.COMPARER); foreach (Phoneme phoneme in phonemeBuilder.Phonemes) { PhonemeBuilder subBuilder = PhonemeBuilder.Empty(phoneme.Languages); string phonemeText = phoneme.GetPhonemeText(); for (int i = 0; i < phonemeText.Length;) { RulesApplication rulesApplication = new RulesApplication(finalRules, phonemeText, subBuilder, i, maxPhonemes).Invoke(); bool found = rulesApplication.IsFound; subBuilder = rulesApplication.PhonemeBuilder; if (!found) { // not found, appending as-is subBuilder.Append(phonemeText.Substring(i, 1)); } i = rulesApplication.I; } phonemes.UnionWith(subBuilder.Phonemes); } return(new PhonemeBuilder(phonemes.ToList())); }
/// <summary> /// Encodes an input string into an output phonetic representation, given a set of possible origin languages. /// </summary> /// <param name="input">String to phoneticise; a string with dashes or spaces separating each word.</param> /// <param name="languageSet"></param> /// <returns>A phonetic representation of the input; a string containing '-'-separated phonetic representations of the input.</returns> public virtual string Encode(string input, LanguageSet languageSet) { IDictionary <string, IList <Rule> > rules = Rule.GetInstanceMap(this.nameType, RuleType.RULES, languageSet); // rules common across many (all) languages IDictionary <string, IList <Rule> > finalRules1 = Rule.GetInstanceMap(this.nameType, this.ruleType, "common"); // rules that apply to a specific language that may be ambiguous or wrong if applied to other languages IDictionary <string, IList <Rule> > finalRules2 = Rule.GetInstanceMap(this.nameType, this.ruleType, languageSet); // tidy the input // lower case is a locale-dependent operation input = input.ToLowerInvariant().Replace('-', ' ').Trim(); if (this.nameType == NameType.GENERIC) { if (input.Length >= 2 && input.Substring(0, 2 - 0).Equals("d'")) { // check for d' string remainder = input.Substring(2); string combined = "d" + remainder; return("(" + Encode(remainder) + ")-(" + Encode(combined) + ")"); } foreach (string l in NAME_PREFIXES[this.nameType]) { // handle generic prefixes if (input.StartsWith(l + " ", StringComparison.Ordinal)) { // check for any prefix in the words list string remainder = input.Substring(l.Length + 1); // input without the prefix string combined = l + remainder; // input with prefix without space return("(" + Encode(remainder) + ")-(" + Encode(combined) + ")"); } } } IList <string> words = WHITESPACE.Split(input).ToList(); IList <string> words2 = new List <string>(); // special-case handling of word prefixes based upon the name type switch (this.nameType) { case NameType.SEPHARDIC: foreach (string aWord in words) { string[] parts = aWord.Split(new char[] { '\'' }, StringSplitOptions.RemoveEmptyEntries); string lastPart = parts[parts.Length - 1]; words2.Add(lastPart); } words2.RemoveAll(NAME_PREFIXES[this.nameType]); break; case NameType.ASHKENAZI: words2.AddRange(words); words2.RemoveAll(NAME_PREFIXES[this.nameType]); break; case NameType.GENERIC: words2.AddRange(words); break; default: throw new InvalidOperationException("Unreachable case: " + this.nameType); } if (this.concat) { // concat mode enabled input = Join(words2, " "); } else if (words2.Count == 1) { // not a multi-word name //input = words.iterator().next(); input = words.FirstOrDefault(); } else { // encode each word in a multi-word name separately (normally used for approx matches) StringBuilder result = new StringBuilder(); foreach (string word in words2) { result.Append("-").Append(Encode(word)); } // return the result without the leading "-" return(result.ToString(1, result.Length - 1)); } PhonemeBuilder phonemeBuilder = PhonemeBuilder.Empty(languageSet); // loop over each char in the input - we will handle the increment manually for (int i = 0; i < input.Length;) { RulesApplication rulesApplication = new RulesApplication(rules, input, phonemeBuilder, i, maxPhonemes).Invoke(); i = rulesApplication.I; phonemeBuilder = rulesApplication.PhonemeBuilder; } // Apply the general rules phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules1); // Apply the language-specific rules phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules2); return(phonemeBuilder.MakeString()); }