/// <summary> /// Truncate one phone from nucleus. /// </summary> /// <param name="phoneme">Phoneme of the language to process.</param> /// <param name="rules">Truncation rules.</param> /// <param name="nucleus">CVC source to truncate.</param> /// <returns>Result: left part + right part.</returns> public static string[] TruncateOnePhoneFromNucleus(Phoneme phoneme, Collection<TruncateRule> rules, string nucleus) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (rules == null) { throw new ArgumentNullException("rules"); } if (string.IsNullOrEmpty(nucleus)) { throw new ArgumentNullException("nucleus"); } TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language); ttsMetaUnit.Name = nucleus; string[] phoneNames = ttsMetaUnit.GetPhonesName(); string leftPart = null; string rightPart = null; for (int i = 0; i < rules.Count; i++) { if (rules[i] == null) { string message = Helper.NeutralFormat("rules[{0}] should not be null.", i); throw new ArgumentException(message); } if (rules[i].Side == TruncateSide.Right) { Match m = Regex.Match(rules[i].Phones, @"\b" + phoneNames[phoneNames.Length - 1] + @"\b"); if (m.Success) { leftPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 0, phoneNames.Length - 1); rightPart = ttsMetaUnit.Phones[phoneNames.Length - 1].Name; break; } } else if (rules[i].Side == TruncateSide.Left) { Match m = Regex.Match(rules[i].Phones, @"\b" + phoneNames[0] + @"\b"); if (m.Success) { leftPart = ttsMetaUnit.Phones[0].Name; rightPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 1, phoneNames.Length - 1); break; } } else { string message = string.Format(CultureInfo.InvariantCulture, "Truncating side [{0}] is not supported.", rules[i].Side); Debug.Assert(false, message); throw new NotSupportedException(message); } } if (string.IsNullOrEmpty(leftPart) || string.IsNullOrEmpty(rightPart)) { string message = string.Format(CultureInfo.InvariantCulture, "Nucleus [{0}] has empty left phone or right phone after truncating.", nucleus); Trace.WriteLine(message); leftPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 0, phoneNames.Length - 1); rightPart = ttsMetaUnit.Phones[phoneNames.Length - 1].Name; } return new string[] { leftPart, rightPart }; }
/// <summary> /// Build units for syllbale pronunciation, /// And the units are concatenated together in the string and seperated by ".". /// </summary> /// <param name="phoneme">Phoneme of the language to process with.</param> /// <param name="sliceData">Slice data to process.</param> /// <param name="syllable">Syllables to process.</param> /// <returns>Best unit list.</returns> public static string[] BuildUnits(Phoneme phoneme, SliceData sliceData, string syllable) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (phoneme.TtsSonorantPhones == null) { string message = Helper.NeutralFormat("phoneme.TtsSonorantPhones should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(syllable)) { throw new ArgumentNullException("syllable"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (sliceData.OnsetSlices == null) { string message = Helper.NeutralFormat("sliceData.OnsetSlices should not be null."); throw new ArgumentException(message); } if (sliceData.NucleusSlices == null) { string message = Helper.NeutralFormat("sliceData.NucleusSlices should not be null."); throw new ArgumentException(message); } List<string> slicedUnits = new List<string>(); string unstressedSyllable = Pronunciation.RemoveStress(syllable); ScriptItem scriptItem = new ScriptItem(phoneme.Language); // items contains phone and tone. string[] items = scriptItem.PronunciationSeparator.SplitPhones(unstressedSyllable); // Treate all syllable as one unit at first. TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language); ttsMetaUnit.Name = string.Join(" ", items); string[] phones = ttsMetaUnit.GetPhonesName(); // Treat all phones in this syllable as a whole unit if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) >= 0) { // If it is alread defined in the predefined unit collection, return it slicedUnits.Add(TtsUnit.NucleusPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); return slicedUnits.ToArray(); } int vowelIndex = phoneme.GetFirstVowelIndex(phones); if (vowelIndex < 0) { // If no vowel in the syllable, treat all phones in this syllable as a unit if it is in unit table if (sliceData.OnsetSlices.IndexOf(ttsMetaUnit.Name) >= 0) { slicedUnits.Add(TtsUnit.OnsetPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); } else if (sliceData.CodaSlices.IndexOf(ttsMetaUnit.Name) >= 0) { slicedUnits.Add(TtsUnit.CodaPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); } else { // otherwise, treat each phone as a coda unit foreach (string phone in phones) { slicedUnits.Add(TtsUnit.CodaPrefix + phone); } } return slicedUnits.ToArray(); } // Search first cosonant sonarant from the left side of the vowel font in the syllable int firstSonarantIndex = vowelIndex; for (int i = vowelIndex - 1; i >= 0; i--) { if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0) { firstSonarantIndex = i; } } // Search last cosonant sonarant from the right side of the vowel font in the syllable int lastSonarantIndex = vowelIndex; for (int i = vowelIndex + 1; i <= phones.Length - 1; i++) { if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0) { lastSonarantIndex = i; } } // Treat all vowel and surrounding sonarant consonants as the nucleus unit first string nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1); TruncateRuleData truncateRuleData = Localor.GetTruncateRuleData(phoneme.Language); // Refine nucleus according to the predefined unit table while (lastSonarantIndex - firstSonarantIndex > 0 && sliceData.NucleusSlices.IndexOf(nucleus) <= 0) { // If the unit candidate is not listed in the predefined unit list, try to truncate it string[] leftRight = PhoneMerger.TruncateOnePhoneFromNucleus(phoneme, truncateRuleData.NucleusTruncateRules, nucleus); if (phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0) { Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0); firstSonarantIndex++; } else { Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[1]) >= 0); lastSonarantIndex--; } // Re-define the remaining nucleus unit nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1); } slicedUnits.Add(TtsUnit.NucleusPrefix + nucleus.Replace(" ", TtsUnit.PhoneDelimiter)); // Refine onset for (int index = firstSonarantIndex - 1; index >= 0; index--) { string onset = TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, 0, index + 1); if (sliceData.OnsetSlices.IndexOf(onset.Replace(TtsUnit.PhoneDelimiter, " ")) >= 0) { slicedUnits.Insert(0, TtsUnit.OnsetPrefix + onset); // Remove the number of added phones, // except current phone itself which will be recuded by index-- index -= index; } else { // Treat it as a single phone unit slicedUnits.Insert(0, TtsUnit.OnsetPrefix + TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, index, 1)); } } // Refine coda, matching from right to left BuildCodaUnits(sliceData, ttsMetaUnit.Phones, lastSonarantIndex + 1, slicedUnits); return slicedUnits.ToArray(); }
/// <summary> /// Validate the syllable in word pronunciation are valid or not. /// </summary> /// <param name="entry">Script item.</param> /// <param name="word">Pronunciation of word.</param> /// <returns>Data error found.</returns> public static DataError ValidateSyllables(ScriptItem entry, string word) { if (entry == null) { throw new ArgumentNullException("entry"); } if (entry.PronunciationSeparator == null) { string message = Helper.NeutralFormat("entry.PronunciationSeparator should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(entry.PronunciationSeparator.Syllable)) { string message = Helper.NeutralFormat("entry.PronunciationSeparator.Syllable should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(word)) { throw new ArgumentNullException("word"); } Phoneme phoneme = Localor.GetPhoneme(entry.Language); DataError dataError = null; string[] syllables = word.Split(new string[] { entry.PronunciationSeparator.Syllable }, StringSplitOptions.None); for (int j = 0; j < syllables.Length; j++) { string syllable = syllables[j].Trim(); if (string.IsNullOrEmpty(syllable)) { string message = string.Format(CultureInfo.InvariantCulture, "The syllable[{0}] of word[{1}] pronunciation is empty by separator [{2}]", j, word, entry.PronunciationSeparator.Syllable); dataError = new DataError("null", message, entry.Id); break; } if (Regex.Match(syllable, "^_(.*)_$").Success) { // Special unit continue; } string[] itmes = entry.PronunciationSeparator.SplitPhones(syllable); TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(entry.Language); ttsMetaUnit.Name = string.Join(" ", itmes); string[] phones = ttsMetaUnit.GetPhonesName(); // Tell whether is a valid nucleus, // which could be syllable with no vowel in some languages, like fr-CA SliceData sliceData = Localor.GetSliceData(entry.Language); if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) < 0) { bool goodSyllable; if (entry.Language == Language.EnUS) { // syllable that must have vowels goodSyllable = IsGoodSyllableWithVowel(entry, phoneme, phones); } else if (entry.Language == Language.RuRU) { // A Russian syllable can have no sonorant goodSyllable = IsSyllableWithLessVowel(entry, phoneme, phones); } else { // syllable that must have vowels or sonorants goodSyllable = IsGoodSyllableWithSonorant(entry, phoneme, phones); } if (!goodSyllable) { int[] vowelIndexes = phoneme.GetVowelIndexes(phones); string str1 = "There must be minimum {0} vowels or maximum {1} included in syllable "; string str2 = "or the syllable should have one sonorant and more than one consonants, "; string str3 = "but {2} vowels are found in syllable [{3}] of word [{4}]."; string message = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, entry.MinVowelCountInSyllable, entry.MaxVowelCountInSyllable, vowelIndexes.Length, syllables[j], word); dataError = new DataError("null", message, entry.Id); break; } } // check slice's pronunciation dataError = ValidateSlices(entry, syllable); if (dataError != null) { break; } } return dataError; }