/// <summary> /// Tell a slice is nucleus, through checking whether /// 1) phone sequence already exists in the nucleus set /// 2) or there is any vowel in the phone set. /// </summary> /// <param name="ttsMetaUnit">TtsMetaUnit to test.</param> /// <returns>Ture if yes, otherwise false.</returns> public bool IsNucleus(TtsMetaUnit ttsMetaUnit) { if (ttsMetaUnit == null) { throw new ArgumentNullException("ttsMetaUnit"); } // if the slice already exists in the nucleus set, // then return directly if (_nucleusSlices.IndexOf(ttsMetaUnit.Name) >= 0) { return true; } // else, check if there any vowel in the phone array Phoneme phoneme = Localor.GetPhoneme(Language); for (int i = 0; i < ttsMetaUnit.Phones.Length; i++) { if (phoneme.TtsVowelPhones.IndexOf(ttsMetaUnit.Phones[i].Name) >= 0) { return true; } } return false; }
/// <summary> /// Initializes a new instance of the <see cref="TtsUnit"/> class. /// </summary> /// <param name="language">Language for this unit.</param> public TtsUnit(Language language) { _language = language; _languages.Add(_language); Feature = new TtsUnitFeature(); MetaUnit = new TtsMetaUnit(language); }
/// <summary> /// Truncate one phone from nucleus. /// </summary> /// <param name="phoneme">Phoneme of the language to process.</param> /// <param name="rules">Truncation rules.</param> /// <param name="nucleus">CVC source to truncate.</param> /// <returns>Result: left part + right part.</returns> public static string[] TruncateOnePhoneFromNucleus(Phoneme phoneme, Collection<TruncateRule> rules, string nucleus) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (rules == null) { throw new ArgumentNullException("rules"); } if (string.IsNullOrEmpty(nucleus)) { throw new ArgumentNullException("nucleus"); } TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language); ttsMetaUnit.Name = nucleus; string[] phoneNames = ttsMetaUnit.GetPhonesName(); string leftPart = null; string rightPart = null; for (int i = 0; i < rules.Count; i++) { if (rules[i] == null) { string message = Helper.NeutralFormat("rules[{0}] should not be null.", i); throw new ArgumentException(message); } if (rules[i].Side == TruncateSide.Right) { Match m = Regex.Match(rules[i].Phones, @"\b" + phoneNames[phoneNames.Length - 1] + @"\b"); if (m.Success) { leftPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 0, phoneNames.Length - 1); rightPart = ttsMetaUnit.Phones[phoneNames.Length - 1].Name; break; } } else if (rules[i].Side == TruncateSide.Left) { Match m = Regex.Match(rules[i].Phones, @"\b" + phoneNames[0] + @"\b"); if (m.Success) { leftPart = ttsMetaUnit.Phones[0].Name; rightPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 1, phoneNames.Length - 1); break; } } else { string message = string.Format(CultureInfo.InvariantCulture, "Truncating side [{0}] is not supported.", rules[i].Side); Debug.Assert(false, message); throw new NotSupportedException(message); } } if (string.IsNullOrEmpty(leftPart) || string.IsNullOrEmpty(rightPart)) { string message = string.Format(CultureInfo.InvariantCulture, "Nucleus [{0}] has empty left phone or right phone after truncating.", nucleus); Trace.WriteLine(message); leftPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 0, phoneNames.Length - 1); rightPart = ttsMetaUnit.Phones[phoneNames.Length - 1].Name; } return new string[] { leftPart, rightPart }; }
/// <summary> /// Build units for syllbale pronunciation, /// And the units are concatenated together in the string and seperated by ".". /// </summary> /// <param name="phoneme">Phoneme of the language to process with.</param> /// <param name="sliceData">Slice data to process.</param> /// <param name="syllable">Syllables to process.</param> /// <returns>Best unit list.</returns> public static string[] BuildUnits(Phoneme phoneme, SliceData sliceData, string syllable) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (phoneme.TtsSonorantPhones == null) { string message = Helper.NeutralFormat("phoneme.TtsSonorantPhones should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(syllable)) { throw new ArgumentNullException("syllable"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (sliceData.OnsetSlices == null) { string message = Helper.NeutralFormat("sliceData.OnsetSlices should not be null."); throw new ArgumentException(message); } if (sliceData.NucleusSlices == null) { string message = Helper.NeutralFormat("sliceData.NucleusSlices should not be null."); throw new ArgumentException(message); } List<string> slicedUnits = new List<string>(); string unstressedSyllable = Pronunciation.RemoveStress(syllable); ScriptItem scriptItem = new ScriptItem(phoneme.Language); // items contains phone and tone. string[] items = scriptItem.PronunciationSeparator.SplitPhones(unstressedSyllable); // Treate all syllable as one unit at first. TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language); ttsMetaUnit.Name = string.Join(" ", items); string[] phones = ttsMetaUnit.GetPhonesName(); // Treat all phones in this syllable as a whole unit if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) >= 0) { // If it is alread defined in the predefined unit collection, return it slicedUnits.Add(TtsUnit.NucleusPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); return slicedUnits.ToArray(); } int vowelIndex = phoneme.GetFirstVowelIndex(phones); if (vowelIndex < 0) { // If no vowel in the syllable, treat all phones in this syllable as a unit if it is in unit table if (sliceData.OnsetSlices.IndexOf(ttsMetaUnit.Name) >= 0) { slicedUnits.Add(TtsUnit.OnsetPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); } else if (sliceData.CodaSlices.IndexOf(ttsMetaUnit.Name) >= 0) { slicedUnits.Add(TtsUnit.CodaPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); } else { // otherwise, treat each phone as a coda unit foreach (string phone in phones) { slicedUnits.Add(TtsUnit.CodaPrefix + phone); } } return slicedUnits.ToArray(); } // Search first cosonant sonarant from the left side of the vowel font in the syllable int firstSonarantIndex = vowelIndex; for (int i = vowelIndex - 1; i >= 0; i--) { if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0) { firstSonarantIndex = i; } } // Search last cosonant sonarant from the right side of the vowel font in the syllable int lastSonarantIndex = vowelIndex; for (int i = vowelIndex + 1; i <= phones.Length - 1; i++) { if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0) { lastSonarantIndex = i; } } // Treat all vowel and surrounding sonarant consonants as the nucleus unit first string nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1); TruncateRuleData truncateRuleData = Localor.GetTruncateRuleData(phoneme.Language); // Refine nucleus according to the predefined unit table while (lastSonarantIndex - firstSonarantIndex > 0 && sliceData.NucleusSlices.IndexOf(nucleus) <= 0) { // If the unit candidate is not listed in the predefined unit list, try to truncate it string[] leftRight = PhoneMerger.TruncateOnePhoneFromNucleus(phoneme, truncateRuleData.NucleusTruncateRules, nucleus); if (phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0) { Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0); firstSonarantIndex++; } else { Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[1]) >= 0); lastSonarantIndex--; } // Re-define the remaining nucleus unit nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1); } slicedUnits.Add(TtsUnit.NucleusPrefix + nucleus.Replace(" ", TtsUnit.PhoneDelimiter)); // Refine onset for (int index = firstSonarantIndex - 1; index >= 0; index--) { string onset = TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, 0, index + 1); if (sliceData.OnsetSlices.IndexOf(onset.Replace(TtsUnit.PhoneDelimiter, " ")) >= 0) { slicedUnits.Insert(0, TtsUnit.OnsetPrefix + onset); // Remove the number of added phones, // except current phone itself which will be recuded by index-- index -= index; } else { // Treat it as a single phone unit slicedUnits.Insert(0, TtsUnit.OnsetPrefix + TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, index, 1)); } } // Refine coda, matching from right to left BuildCodaUnits(sliceData, ttsMetaUnit.Phones, lastSonarantIndex + 1, slicedUnits); return slicedUnits.ToArray(); }
/// <summary> /// Estimate position in syllable for each slice in a slice set. /// </summary> /// <param name="slices">Slice collection to estimate.</param> /// <returns>Estimated result of position in syllable.</returns> private PosInSyllable[] EstimatePosInSyllable(string[] slices) { PosInSyllable[] pis = new PosInSyllable[slices.Length]; int nucleusIndex = -1; for (int sliceIndex = 0; sliceIndex < slices.Length; sliceIndex++) { SliceData slicedata = Localor.GetSliceData(this.Language); TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(this.Language); ttsMetaUnit.Name = slices[sliceIndex]; if (slicedata.IsNucleus(ttsMetaUnit)) { if (sliceIndex == 0) { if (sliceIndex == slices.Length - 1) { pis[sliceIndex] = PosInSyllable.NucleusInV; } else { pis[sliceIndex] = PosInSyllable.NucleusInVC; } } else { if (sliceIndex == slices.Length - 1) { pis[sliceIndex] = PosInSyllable.NucleusInCV; } else { pis[sliceIndex] = PosInSyllable.NucleusInCVC; } } nucleusIndex = sliceIndex; break; } } for (int sliceIndex = 0; sliceIndex < nucleusIndex; sliceIndex++) { pis[sliceIndex] = PosInSyllable.Onset; } for (int sliceIndex = nucleusIndex + 1; sliceIndex < slices.Length; sliceIndex++) { pis[sliceIndex] = PosInSyllable.Coda; } return pis; }
/// <summary> /// Given one sentence's pronunciation string, convert /// One Phone-based segment file to Unit-based segment file. /// </summary> /// <param name="pronunciation">Pronunciation string.</param> /// <param name="filePath">Phone-based segment file.</param> /// <param name="targetFilePath">Unit-based segment file.</param> /// <returns>Data error found.</returns> public virtual DataError CombinePhone(string pronunciation, string filePath, string targetFilePath) { pronunciation = Core.Pronunciation.CleanDecorate(pronunciation); string[] slices = PronunciationSeparator.SplitSlices(pronunciation); Collection<WaveSegment> phoneSegs = SegmentFile.ReadAllData(filePath); DataError dataError = null; using (StreamWriter sw = new StreamWriter(targetFilePath)) { int sliceIndex = 0; StringBuilder slice = new StringBuilder(); for (int i = 0; i < phoneSegs.Count;) { if (phoneSegs[i].IsSilenceFeature) { sw.WriteLine(phoneSegs[i].ToString()); i++; continue; } if (sliceIndex >= slices.Length) { string sid = Path.GetFileNameWithoutExtension(filePath); dataError = new DataError(filePath, "Data does not align between phone segmentation and pronunciation in CombinePhone", sid); break; } TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(Language); ttsMetaUnit.Name = slices[sliceIndex]; sliceIndex++; // Clear first slice.Remove(0, slice.Length); foreach (TtsMetaPhone phone in ttsMetaUnit.Phones) { if (string.IsNullOrEmpty(phone.Name)) { continue; } if (slice.Length > 0) { slice.Append("+"); } slice.Append(phone.FullName); } if (slice.Length == 0) { continue; } sw.Write(phoneSegs[i].StartTime.ToString("F5", CultureInfo.InvariantCulture)); sw.WriteLine(" " + slice.ToString()); i += ttsMetaUnit.Phones.Length; } } if (dataError != null) { try { File.Delete(targetFilePath); } catch (IOException ioe) { Console.WriteLine(ioe.Message); } } return dataError; }
private static TtsMetaUnit ParseMetaUnitForViterbi(string line) { string[] items = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); System.Diagnostics.Debug.Assert(items.Length > 13); int languageValue = int.Parse(items[12], CultureInfo.InvariantCulture); int unitId = int.Parse(items[13], CultureInfo.InvariantCulture); TtsMetaUnit mu = new TtsMetaUnit(Localor.MapLanguageId(languageValue)); mu.Id = unitId; mu.Name = items[items.Length - 1]; return mu; }
/// <summary> /// Validate the syllable in word pronunciation are valid or not. /// </summary> /// <param name="entry">Script item.</param> /// <param name="word">Pronunciation of word.</param> /// <returns>Data error found.</returns> public static DataError ValidateSyllables(ScriptItem entry, string word) { if (entry == null) { throw new ArgumentNullException("entry"); } if (entry.PronunciationSeparator == null) { string message = Helper.NeutralFormat("entry.PronunciationSeparator should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(entry.PronunciationSeparator.Syllable)) { string message = Helper.NeutralFormat("entry.PronunciationSeparator.Syllable should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(word)) { throw new ArgumentNullException("word"); } Phoneme phoneme = Localor.GetPhoneme(entry.Language); DataError dataError = null; string[] syllables = word.Split(new string[] { entry.PronunciationSeparator.Syllable }, StringSplitOptions.None); for (int j = 0; j < syllables.Length; j++) { string syllable = syllables[j].Trim(); if (string.IsNullOrEmpty(syllable)) { string message = string.Format(CultureInfo.InvariantCulture, "The syllable[{0}] of word[{1}] pronunciation is empty by separator [{2}]", j, word, entry.PronunciationSeparator.Syllable); dataError = new DataError("null", message, entry.Id); break; } if (Regex.Match(syllable, "^_(.*)_$").Success) { // Special unit continue; } string[] itmes = entry.PronunciationSeparator.SplitPhones(syllable); TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(entry.Language); ttsMetaUnit.Name = string.Join(" ", itmes); string[] phones = ttsMetaUnit.GetPhonesName(); // Tell whether is a valid nucleus, // which could be syllable with no vowel in some languages, like fr-CA SliceData sliceData = Localor.GetSliceData(entry.Language); if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) < 0) { bool goodSyllable; if (entry.Language == Language.EnUS) { // syllable that must have vowels goodSyllable = IsGoodSyllableWithVowel(entry, phoneme, phones); } else if (entry.Language == Language.RuRU) { // A Russian syllable can have no sonorant goodSyllable = IsSyllableWithLessVowel(entry, phoneme, phones); } else { // syllable that must have vowels or sonorants goodSyllable = IsGoodSyllableWithSonorant(entry, phoneme, phones); } if (!goodSyllable) { int[] vowelIndexes = phoneme.GetVowelIndexes(phones); string str1 = "There must be minimum {0} vowels or maximum {1} included in syllable "; string str2 = "or the syllable should have one sonorant and more than one consonants, "; string str3 = "but {2} vowels are found in syllable [{3}] of word [{4}]."; string message = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, entry.MinVowelCountInSyllable, entry.MaxVowelCountInSyllable, vowelIndexes.Length, syllables[j], word); dataError = new DataError("null", message, entry.Id); break; } } // check slice's pronunciation dataError = ValidateSlices(entry, syllable); if (dataError != null) { break; } } return dataError; }
/// <summary> /// Given one item, convert /// One Phone-based segment file to Unit-based segment file. /// </summary> /// <param name="item">ScriptItem.</param> /// <param name="segmentFilePath">Phone-based segment file.</param> /// <param name="targetFilePath">Unit-based segment file.</param> /// <param name="ignoreTone">IgnoreTone.</param> /// <returns>Data error found.</returns> public static Error CombinePhonesToUnits(ScriptItem item, string segmentFilePath, string targetFilePath, bool ignoreTone) { if (item == null) { throw new ArgumentNullException("item"); } StringBuilder sb = new StringBuilder(); foreach (ScriptSentence sentence in item.Sentences) { foreach (ScriptWord word in sentence.Words) { if (!string.IsNullOrEmpty(word.Pronunciation)) { sb.AppendFormat("{0} / ", word.Pronunciation); } } } string pronunciation = Pronunciation.RemoveStress(sb.ToString()); string[] slices = item.PronunciationSeparator.SplitSlices(pronunciation); Collection<WaveSegment> phoneSegs = SegmentFile.ReadAllData(segmentFilePath); Error dataError = null; using (StreamWriter sw = new StreamWriter(targetFilePath)) { int sliceIndex = 0; StringBuilder slice = new StringBuilder(); for (int i = 0; i < phoneSegs.Count;) { if (phoneSegs[i].IsSilenceFeature) { sw.WriteLine(phoneSegs[i].ToString()); i++; continue; } if (sliceIndex >= slices.Length) { string strTmp = "Data does not align between phone segmentation and pronunciation in CombinePhone"; dataError = new Error(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat(strTmp)); break; } TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(item.Language); ttsMetaUnit.Name = slices[sliceIndex]; sliceIndex++; // Clear first slice.Remove(0, slice.Length); foreach (TtsMetaPhone phone in ttsMetaUnit.Phones) { if (string.IsNullOrEmpty(phone.Name)) { continue; } if (slice.Length > 0) { slice.Append("+"); } slice.Append(ignoreTone ? phone.Name : phone.FullName); } if (slice.Length == 0) { continue; } sw.Write(phoneSegs[i].StartTime.ToString("F5", CultureInfo.InvariantCulture)); sw.WriteLine(" " + slice.ToString()); i += ttsMetaUnit.Phones.Length; } } if (dataError != null) { try { File.Delete(targetFilePath); } catch (IOException ioe) { Console.WriteLine(ioe.Message); } } return dataError; }