/// <summary> /// Get the unit list this sentence has. /// </summary> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <param name="buildUnitFeature">Whether build unit features.</param> /// <returns>Tts units.</returns> public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData, bool buildUnitFeature) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (_needBuildUnits) { BuildUnits(phoneme, sliceData, buildUnitFeature); _needBuildUnits = false; } return _units; }
/// <summary> /// Build units for this sentence. /// </summary> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <param name="buildUnitFeature">Whether build unit features.</param> private void BuildUnits(Phoneme phoneme, SliceData sliceData, bool buildUnitFeature) { Helper.ThrowIfNull(phoneme); Helper.ThrowIfNull(sliceData); _units.Clear(); string punctuationPattern = ScriptItem.PunctuationPattern; for (int wordIndex = 0; wordIndex < Words.Count; wordIndex++) { ScriptWord word = Words[wordIndex]; if (!word.IsPronouncableNormalWord || (!buildUnitFeature && string.IsNullOrEmpty(word.Pronunciation))) { continue; } // look forward one item, test whether that is '?' mark WordType wordType = WordType.Normal; while (wordIndex < Words.Count - 1 && Words[wordIndex + 1].WordType != WordType.Normal) { WordType nextType = Localor.MapPunctuation(Words[wordIndex + 1].Grapheme, punctuationPattern); // advance one more if (nextType == WordType.OtherPunctuation) { wordType = nextType; } else { wordType = nextType; break; } wordIndex++; } word.Units.Clear(); word.BuildUnitWithoutFeature(sliceData, ScriptItem.PronunciationSeparator); foreach (TtsUnit unit in word.Units) { unit.WordType = wordType; } Helper.AppendCollection<TtsUnit>(_units, word.Units); } if (buildUnitFeature) { BuildUnitFeatures(phoneme); } }
/// <summary> /// Get the unit list this word has. /// </summary> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <param name="buildUnitFeature">Whether build unit features.</param> /// <returns>Tts units.</returns> public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData, bool buildUnitFeature) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (WordType == WordType.Normal && _units.Count == 0) { if (Sentence == null) { throw new InvalidDataException(Helper.NeutralFormat("word should belong to a sentence.")); } Sentence.GetUnits(phoneme, sliceData, buildUnitFeature); } return _units; }
/// <summary> /// Get the unit list this sentence has. /// </summary> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <returns>Tts units.</returns> public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData) { return GetUnits(phoneme, sliceData, true); }
/// <summary> /// Build units for syllbale pronunciation, /// And the units are concatenated together in the string and seperated by ".". /// </summary> /// <param name="phoneme">Phoneme of the language to process with.</param> /// <param name="sliceData">Slice data to process.</param> /// <param name="syllable">Syllables to process.</param> /// <returns>Best unit list.</returns> public static string[] BuildUnits(Phoneme phoneme, SliceData sliceData, string syllable) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (phoneme.TtsSonorantPhones == null) { string message = Helper.NeutralFormat("phoneme.TtsSonorantPhones should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(syllable)) { throw new ArgumentNullException("syllable"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (sliceData.OnsetSlices == null) { string message = Helper.NeutralFormat("sliceData.OnsetSlices should not be null."); throw new ArgumentException(message); } if (sliceData.NucleusSlices == null) { string message = Helper.NeutralFormat("sliceData.NucleusSlices should not be null."); throw new ArgumentException(message); } List<string> slicedUnits = new List<string>(); string unstressedSyllable = Pronunciation.RemoveStress(syllable); ScriptItem scriptItem = new ScriptItem(phoneme.Language); // items contains phone and tone. string[] items = scriptItem.PronunciationSeparator.SplitPhones(unstressedSyllable); // Treate all syllable as one unit at first. TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language); ttsMetaUnit.Name = string.Join(" ", items); string[] phones = ttsMetaUnit.GetPhonesName(); // Treat all phones in this syllable as a whole unit if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) >= 0) { // If it is alread defined in the predefined unit collection, return it slicedUnits.Add(TtsUnit.NucleusPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); return slicedUnits.ToArray(); } int vowelIndex = phoneme.GetFirstVowelIndex(phones); if (vowelIndex < 0) { // If no vowel in the syllable, treat all phones in this syllable as a unit if it is in unit table if (sliceData.OnsetSlices.IndexOf(ttsMetaUnit.Name) >= 0) { slicedUnits.Add(TtsUnit.OnsetPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); } else if (sliceData.CodaSlices.IndexOf(ttsMetaUnit.Name) >= 0) { slicedUnits.Add(TtsUnit.CodaPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); } else { // otherwise, treat each phone as a coda unit foreach (string phone in phones) { slicedUnits.Add(TtsUnit.CodaPrefix + phone); } } return slicedUnits.ToArray(); } // Search first cosonant sonarant from the left side of the vowel font in the syllable int firstSonarantIndex = vowelIndex; for (int i = vowelIndex - 1; i >= 0; i--) { if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0) { firstSonarantIndex = i; } } // Search last cosonant sonarant from the right side of the vowel font in the syllable int lastSonarantIndex = vowelIndex; for (int i = vowelIndex + 1; i <= phones.Length - 1; i++) { if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0) { lastSonarantIndex = i; } } // Treat all vowel and surrounding sonarant consonants as the nucleus unit first string nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1); TruncateRuleData truncateRuleData = Localor.GetTruncateRuleData(phoneme.Language); // Refine nucleus according to the predefined unit table while (lastSonarantIndex - firstSonarantIndex > 0 && sliceData.NucleusSlices.IndexOf(nucleus) <= 0) { // If the unit candidate is not listed in the predefined unit list, try to truncate it string[] leftRight = PhoneMerger.TruncateOnePhoneFromNucleus(phoneme, truncateRuleData.NucleusTruncateRules, nucleus); if (phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0) { Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0); firstSonarantIndex++; } else { Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[1]) >= 0); lastSonarantIndex--; } // Re-define the remaining nucleus unit nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1); } slicedUnits.Add(TtsUnit.NucleusPrefix + nucleus.Replace(" ", TtsUnit.PhoneDelimiter)); // Refine onset for (int index = firstSonarantIndex - 1; index >= 0; index--) { string onset = TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, 0, index + 1); if (sliceData.OnsetSlices.IndexOf(onset.Replace(TtsUnit.PhoneDelimiter, " ")) >= 0) { slicedUnits.Insert(0, TtsUnit.OnsetPrefix + onset); // Remove the number of added phones, // except current phone itself which will be recuded by index-- index -= index; } else { // Treat it as a single phone unit slicedUnits.Insert(0, TtsUnit.OnsetPrefix + TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, index, 1)); } } // Refine coda, matching from right to left BuildCodaUnits(sliceData, ttsMetaUnit.Phones, lastSonarantIndex + 1, slicedUnits); return slicedUnits.ToArray(); }
/// <summary> /// Build mlf from syllable. /// </summary> /// <param name="syllable">Syllable.</param> /// <param name="item">Script item.</param> /// <param name="sw">Text writer.</param> /// <param name="writeToFile">Whethe writing to file.</param> /// <param name="phoneme">Phoneme.</param> /// <returns>Errors.</returns> private static ErrorSet BuildMonoMlf(ScriptSyllable syllable, ScriptItem item, StreamWriter sw, bool writeToFile, Phoneme phoneme) { Debug.Assert(syllable != null); Debug.Assert(item != null); ErrorSet errors = new ErrorSet(); string syllableText = Pronunciation.RemoveStress(syllable.Text.Trim()); string[] srPhones = phoneme.Tts2SrPhones(syllableText.Trim()); if (srPhones == null) { string message = string.Format(CultureInfo.InvariantCulture, "Invalid TTS syllable[{0}], which can not be converted to Speech Recognition Phone.", syllableText); errors.Add(ScriptError.OtherErrors, item.Id, message); } if (writeToFile && srPhones != null) { foreach (string phone in srPhones) { sw.WriteLine(phone); } } return errors; }
/// <summary> /// Extract acoustic features for a given script file. /// </summary> /// <param name="script">Script file instance.</param> /// <param name="phoneme">Phoneme used to get units.</param> /// <param name="sliceData">Slice data used to get units.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segmentation file directory.</param> /// <param name="wave16kDir">16k Hz waveform file directory.</param> /// <param name="epochDir">Epoch file directory.</param> /// <param name="targetFilePath">Target acoustic file path.</param> public static void ExtractAcoustic(XmlScriptFile script, Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir, string targetFilePath) { // Parameters validation if (script == null) { throw new ArgumentNullException("script"); } if (string.IsNullOrEmpty(script.FilePath)) { throw new ArgumentException("script.FilePath is null"); } if (fileMap == null) { throw new ArgumentNullException("fileMap"); } if (fileMap.Map == null) { throw new ArgumentException("fileMap.Map"); } if (fileMap.Map.Keys == null) { throw new ArgumentException("fileMap.Map.Keys is null"); } if (string.IsNullOrEmpty(segmentDir)) { throw new ArgumentNullException("segmentDir"); } if (string.IsNullOrEmpty(wave16kDir)) { throw new ArgumentNullException("wave16kDir"); } if (string.IsNullOrEmpty(epochDir)) { throw new ArgumentNullException("epochDir"); } if (!Directory.Exists(segmentDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), segmentDir); } if (!Directory.Exists(wave16kDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), wave16kDir); } if (!Directory.Exists(epochDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), epochDir); } if (string.IsNullOrEmpty(targetFilePath)) { throw new ArgumentNullException("targetFilePath"); } Helper.EnsureFolderExistForFile(targetFilePath); using (StreamWriter sw = new StreamWriter(targetFilePath)) { // iterate each script item or sentence foreach (string sid in fileMap.Map.Keys) { if (!script.ItemDic.ContainsKey(sid)) { string message = string.Format(CultureInfo.InvariantCulture, "Sentence [{0}] does not exist in script file [{1}].", sid, script.FilePath); throw new InvalidDataException(message); } ExtractAcoustic(sw, script, sid, phoneme, sliceData, fileMap, segmentDir, wave16kDir, epochDir); } } }
/// <summary> /// Set nucleus vowel stress mark. /// </summary> /// <param name="phoneme">Phoneme of the language to process.</param> /// <param name="pronunciation">Pronunciation to set pronunciation.</param> /// <param name="stress">Stress mark to set for the vowel in the pronunciation.</param> /// <returns>Pronunciation with stress.</returns> public static string SetVowelStress(Phoneme phoneme, string pronunciation, TtsStress stress) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (string.IsNullOrEmpty(pronunciation)) { return null; } if (stress > TtsStress.None) { string[] phones = pronunciation.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); int vowelIndex = phoneme.GetFirstVowelIndex(phones); if (vowelIndex < 0) { string message = string.Format(CultureInfo.InvariantCulture, "There is no vowel found in the syllable pronunciation [{0}]", phones); throw new InvalidDataException(message); } phones[vowelIndex] = string.Format(CultureInfo.InvariantCulture, "{0} {1}", phones[vowelIndex], (int)stress); return string.Join(" ", phones); } else { return pronunciation; } }
/// <summary> /// Extract acoustic features for a given sentence. /// </summary> /// <param name="writer">Stream writer to write acoustic features.</param> /// <param name="script">Script file instance.</param> /// <param name="sid">Script item id.</param> /// <param name="phoneme">Phoneme used to get units.</param> /// <param name="sliceData">Slice data used to get units.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segmentation file directory.</param> /// <param name="wave16kDir">16k Hz waveform file directory.</param> /// <param name="epochDir">Epoch file directory.</param> private static void ExtractAcoustic(StreamWriter writer, XmlScriptFile script, string sid, Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir) { ScriptItem scriptItem = script.ItemDic[sid]; // find the absolute file paths for each kind data file string wave16kFilePath = Path.Combine(wave16kDir, fileMap.Map[scriptItem.Id] + ".wav"); string epochFilePath = Path.Combine(epochDir, fileMap.Map[scriptItem.Id] + ".epoch"); string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[scriptItem.Id] + ".txt"); // load data files SegmentFile segFile = new SegmentFile(); segFile.Load(segmentFilePath); EggAcousticFeature eggFile = new EggAcousticFeature(); eggFile.LoadEpoch(epochFilePath); WaveAcousticFeature waveFile = new WaveAcousticFeature(); waveFile.Load(wave16kFilePath); // calculate acoustic features for each segments in the files int totalCount = segFile.NonSilenceWaveSegments.Count; Collection<TtsUnit> units = scriptItem.GetUnits(phoneme, sliceData); if (units.Count != totalCount) { string str1 = "Unit number mis-matched between sentence [{0}] in "; string str2 = "script file [{1}] and in the alignment file [{2}]. "; string str3 = "There are {3} units in script but {4} units in alignment."; string message = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, sid, script.FilePath, segmentFilePath, units.Count, totalCount); throw new InvalidDataException(message); } for (int i = 0; i < totalCount; i++) { // for each wave segment WaveSegment ws = segFile.NonSilenceWaveSegments[i]; // get unit sample scope int sampleOffset = (int)(ws.StartTime * waveFile.SamplesPerSecond); int sampleLength = (int)(ws.Duration * waveFile.SamplesPerSecond); int sampleEnd = sampleOffset + sampleLength; int epochOffset = 0; int epochEnd = 0; // calculate average pitch, pitch average float averagePitch, pitchRange; eggFile.GetPitchAndRange(sampleOffset, sampleLength, out averagePitch, out pitchRange); ws.AveragePitch = averagePitch; ws.PitchRange = pitchRange; // calculate root mean square, and before that ajust the segment alignment with // the epoch data epochOffset = eggFile.AdjustAlignment(ref sampleOffset); epochEnd = eggFile.AdjustAlignment(ref sampleEnd); if (epochOffset > epochEnd) { string info = string.Format(CultureInfo.InvariantCulture, "epochOffset[{0}] should not be bigger than epochEnd[{1}]", epochOffset, epochEnd); throw new InvalidDataException(info); } if (sampleEnd > waveFile.SampleNumber) { string str1 = "Mis-match found between alignment file [{0}] and waveform file [{1}], "; string str2 = "for the end sample of alignment is [{2}] but"; string str3 = " the total sample number of waveform file is [{3}]."; string info = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, segmentFilePath, wave16kFilePath, epochEnd, waveFile.SampleNumber); throw new InvalidDataException(info); } ws.RootMeanSquare = waveFile.CalculateRms(sampleOffset, sampleEnd - sampleOffset); // calculate epoch int epoch16KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch, epochOffset, epochEnd - epochOffset, null); int epoch8KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch8k, epochOffset, epochEnd - epochOffset, null); // leave (epoch offset in sentence) (epoch length) // (16k compressed epoch lenght) (8k compressed epoch lenght) as zero string message = string.Format(CultureInfo.InvariantCulture, "{0,12} {1,3} {2,9:0.000000} {3,9:0.000000} {4,7} {5,5} {6,4} {7,3} {8,3} {9,3} {10,7:0.0} {11,5:0.0} {12,4:0.0} {13}", scriptItem.Id, i, ws.StartTime, ws.Duration, sampleOffset, sampleEnd - sampleOffset, epochOffset, epochEnd - epochOffset, epoch16KCompressLength, epoch8KCompressLength, ws.RootMeanSquare, ws.AveragePitch, ws.PitchRange, units[i].FullName); writer.WriteLine(message); } }
/// <summary> /// Check data consistence between script file and segmentation files. /// </summary> /// <param name="fileMap">File list map.</param> /// <param name="script">Script file instance.</param> /// <param name="phoneme">Phoneme used to get units.</param> /// <param name="sliceData">Slice data used to get units.</param> /// <param name="segmentDir">Segment file directory.</param> /// <returns>Data error set found.</returns> public static ErrorSet ValidateDataAlignment(FileListMap fileMap, XmlScriptFile script, Phoneme phoneme, SliceData sliceData, string segmentDir) { // Parameters validation if (string.IsNullOrEmpty(segmentDir)) { throw new ArgumentNullException("segmentDir"); } if (fileMap == null) { throw new ArgumentNullException("fileMap"); } if (fileMap.Map == null) { throw new ArgumentException("fileMap.Map is null"); } if (fileMap.Map.Keys == null) { throw new ArgumentException("fileMap.Map.Keys is null"); } if (script == null) { throw new ArgumentNullException("script"); } ErrorSet errorSet = new ErrorSet(); foreach (ScriptItem item in script.Items) { try { if (!fileMap.Map.ContainsKey(item.Id)) { errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("File list map does not contain item")); continue; } ValidateDataAlignment(item, phoneme, sliceData, fileMap, segmentDir, errorSet); } catch (InvalidDataException ide) { errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.BuildExceptionMessage(ide)); } } foreach (string sid in fileMap.Map.Keys) { if (!script.ItemDic.ContainsKey(sid)) { errorSet.Add(ScriptError.OtherErrors, sid, Helper.NeutralFormat("script file does not contain item")); } } return errorSet; }
/// <summary> /// Check data consistence between script item and segmentation file. /// </summary> /// <param name="item">Script item.</param> /// <param name="phoneme">Phoneme used to get units.</param> /// <param name="sliceData">Slice data used to get units.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segment file directory.</param> /// <param name="errorSet">Data error set found.</param> public static void ValidateDataAlignment(ScriptItem item, Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir, ErrorSet errorSet) { string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt"); StringBuilder errorMessage = new StringBuilder(); SegmentFile segmentFile = ValidateAlignmentFile(segmentFilePath, errorMessage); if (errorMessage.Length != 0) { errorSet.Add(ScriptError.OtherErrors, item.Id, errorMessage.ToString()); } else { Collection<TtsUnit> units = item.GetUnits(phoneme, sliceData); if (segmentFile.WaveSegments.Count == 0) { string message = Helper.NeutralFormat( "There is no valid alignment data in alignment file {0}.", segmentFilePath); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilenceFeature) { string message = Helper.NeutralFormat( "Alignment file {0} is invalid, for without silence segment at the end.", segmentFilePath); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else if (units.Count != segmentFile.NonSilenceWaveSegments.Count) { string message = Helper.NeutralFormat( "script units {0} do not match with non-silence " + "segments {1} in segmentation file.", units.Count, segmentFile.NonSilenceWaveSegments.Count); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else { // go through each segments for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++) { WaveSegment segment = segmentFile.NonSilenceWaveSegments[i]; TtsUnit unit = units[i]; if (segment.Label != WaveSegment.FormatLabel(unit.MetaUnit.Name)) { string message = Helper.NeutralFormat( "units [{0}/{1}] at {2} do not match between script and segment.", WaveSegment.FormatLabel(unit.MetaUnit.Name), segment.Label, i); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } } } } }
public static ErrorSet BuildMonoMlf(string scriptFilePath, string outFilePath, bool writeToFile, Phoneme phoneme, XmlScriptValidateSetting validateSetting, SliceData sliceData) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (sliceData == null) { throw new ArgumentNullException("phoneme"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } validateSetting.VerifySetting(); ErrorSet errors = new ErrorSet(); StreamWriter sw = null; if (writeToFile) { sw = new StreamWriter(outFilePath, false, Encoding.ASCII); sw.WriteLine("#!MLF!#"); } try { XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting); script.Remove(GetNeedDeleteItemIds(script.ErrorSet)); if (script.Items.Count == 0) { throw new InvalidDataException( Helper.NeutralFormat("No valid items in {0}.", scriptFilePath)); } errors.Merge(script.ErrorSet); foreach (ScriptItem item in script.Items) { errors.Merge(BuildMonoMlf(item, sw, writeToFile, phoneme, sliceData)); } } finally { if (sw != null) { sw.Close(); } } if (writeToFile) { Debug.Assert(HtkTool.VerifyMlfFormat(outFilePath)); } return errors; }
/// <summary> /// Build mlf from unit. /// </summary> /// <param name="unit">Unit.</param> /// <param name="item">Script item.</param> /// <param name="sw">Text writer.</param> /// <param name="writeToFile">Whethe writing to file.</param> /// <param name="phoneme">Phoneme.</param> /// <returns>Errors.</returns> private static ErrorSet BuildMonoMlf(TtsUnit unit, ScriptItem item, StreamWriter sw, bool writeToFile, Phoneme phoneme) { Debug.Assert(unit != null); Debug.Assert(item != null); ErrorSet errors = new ErrorSet(); List<string> allPhones = new List<string>(); foreach (TtsMetaPhone phone in unit.MetaUnit.Phones) { string[] srPhones = phoneme.Tts2SrPhones(phone.Name); if (srPhones == null) { string message = string.Format(CultureInfo.InvariantCulture, "Invalid TTS phone[{0}], which can not be converted to Speech Recognition Phone.", phone.Name); errors.Add(ScriptError.OtherErrors, item.Id, message); continue; } allPhones.AddRange(srPhones); } if (writeToFile) { foreach (string phone in allPhones) { sw.WriteLine(phone); } } return errors; }
/// <summary> /// Build unit features for this sentence. /// </summary> /// <param name="phoneme">Phoneme.</param> private void BuildUnitFeatures(Phoneme phoneme) { Helper.ThrowIfNull(phoneme); TtsUnit preUnit = null; ScriptSyllable preSyllable = null; ScriptWord preWord = null; TtsUnit nextUnit = null; for (int i = 0; i < _units.Count; i++) { TtsUnit unit = _units[i]; ScriptSyllable syllable = (ScriptSyllable)unit.Tag; ScriptWord word = (ScriptWord)syllable.Tag; // Build context nextUnit = (i + 1 < _units.Count) ? _units[i + 1] : null; preUnit = (i > 0) ? _units[i - 1] : null; preSyllable = ScriptItem.FindPreviousSyllable(_units, i); preWord = ScriptItem.FindPreviousWord(Words, word); bool unitAtWordHead = preUnit == null || word != (ScriptWord)((ScriptSyllable)preUnit.Tag).Tag; bool unitAtWordTail = nextUnit == null || word != (ScriptWord)((ScriptSyllable)nextUnit.Tag).Tag; if (preUnit == null || (unitAtWordHead && preWord != null && ((int)preWord.Break >= (int)TtsBreak.InterPhrase)) || preUnit.MetaUnit.Special) { unit.Feature.LeftContextPhone = phoneme.TtsPhone2Id(Phoneme.SilencePhone); unit.Feature.LeftContextTone = ToneManager.NoneContextTone; } else { unit.Feature.LeftContextPhone = phoneme.TtsPhone2Id(preUnit.MetaUnit.RightPhone); unit.Feature.LeftContextTone = preUnit.MetaUnit.RightTone; } if (nextUnit == null || (unitAtWordTail && ((int)word.Break >= (int)TtsBreak.InterPhrase)) || nextUnit.MetaUnit.Special) { unit.Feature.RightContextPhone = phoneme.TtsPhone2Id(Phoneme.SilencePhone); unit.Feature.RightContextTone = ToneManager.NoneContextTone; } else { unit.Feature.RightContextPhone = phoneme.TtsPhone2Id(nextUnit.MetaUnit.LeftPhone); unit.Feature.RightContextTone = nextUnit.MetaUnit.LeftTone; } // adjust position in syllable unit.Feature.PosInSyllable = ScriptItem.CalculatePosInSyllable(preUnit, unit); // syllable position in word unit.Feature.PosInWord = ScriptItem.CalculatePosInWord(preSyllable, syllable); // word position in sentence unit.Feature.PosInSentence = ScriptItem.CalculatePosInSentence(preWord, word); if (unit.WordType == WordType.Question) { unit.Feature.PosInSentence = PosInSentence.Quest; } // The unit in last syllable will get the same WordTone as the word. if (word.UnitSyllables.IndexOf(syllable) == word.UnitSyllables.Count - 1) { unit.Feature.TtsWordTone = word.WordTone; } else { unit.Feature.TtsWordTone = TtsWordTone.Continue; } } }
/// <summary> /// Check if the syllable has valid vowel number. /// </summary> /// <param name="entry">Script entry.</param> /// <param name="phoneme">Phoneme.</param> /// <param name="phones">Phones of the syllable.</param> /// <returns>Bool.</returns> private static bool IsGoodSyllableWithVowel(ScriptItem entry, Phoneme phoneme, string[] phones) { bool goodSyllable = IsSyllableWithEnoughVowel(entry, phoneme, phones) && IsSyllableWithLessVowel(entry, phoneme, phones); return goodSyllable; }
/// <summary> /// Get the unit list this item has. /// </summary> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <returns>Tts units.</returns> public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } Collection<TtsUnit> units = new Collection<TtsUnit>(); foreach (ScriptSentence sentence in Sentences) { foreach (TtsUnit unit in sentence.GetUnits(phoneme, sliceData)) { units.Add(unit); } } return units; }
/// <summary> /// Check if the syllable has too many vowels. /// </summary> /// <param name="entry">Script entry.</param> /// <param name="phoneme">Phoneme.</param> /// <param name="phones">Phones of the syllable.</param> /// <returns>True if not having too many.</returns> private static bool IsSyllableWithLessVowel(ScriptItem entry, Phoneme phoneme, string[] phones) { int[] vowelIndexes = phoneme.GetVowelIndexes(phones); return vowelIndexes.Length <= entry.MaxVowelCountInSyllable; }
/// <summary> /// Truncate one phone from nucleus. /// </summary> /// <param name="phoneme">Phoneme of the language to process.</param> /// <param name="rules">Truncation rules.</param> /// <param name="nucleus">CVC source to truncate.</param> /// <returns>Result: left part + right part.</returns> public static string[] TruncateOnePhoneFromNucleus(Phoneme phoneme, Collection<TruncateRule> rules, string nucleus) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (rules == null) { throw new ArgumentNullException("rules"); } if (string.IsNullOrEmpty(nucleus)) { throw new ArgumentNullException("nucleus"); } TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language); ttsMetaUnit.Name = nucleus; string[] phoneNames = ttsMetaUnit.GetPhonesName(); string leftPart = null; string rightPart = null; for (int i = 0; i < rules.Count; i++) { if (rules[i] == null) { string message = Helper.NeutralFormat("rules[{0}] should not be null.", i); throw new ArgumentException(message); } if (rules[i].Side == TruncateSide.Right) { Match m = Regex.Match(rules[i].Phones, @"\b" + phoneNames[phoneNames.Length - 1] + @"\b"); if (m.Success) { leftPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 0, phoneNames.Length - 1); rightPart = ttsMetaUnit.Phones[phoneNames.Length - 1].Name; break; } } else if (rules[i].Side == TruncateSide.Left) { Match m = Regex.Match(rules[i].Phones, @"\b" + phoneNames[0] + @"\b"); if (m.Success) { leftPart = ttsMetaUnit.Phones[0].Name; rightPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 1, phoneNames.Length - 1); break; } } else { string message = string.Format(CultureInfo.InvariantCulture, "Truncating side [{0}] is not supported.", rules[i].Side); Debug.Assert(false, message); throw new NotSupportedException(message); } } if (string.IsNullOrEmpty(leftPart) || string.IsNullOrEmpty(rightPart)) { string message = string.Format(CultureInfo.InvariantCulture, "Nucleus [{0}] has empty left phone or right phone after truncating.", nucleus); Trace.WriteLine(message); leftPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 0, phoneNames.Length - 1); rightPart = ttsMetaUnit.Phones[phoneNames.Length - 1].Name; } return new string[] { leftPart, rightPart }; }
/// <summary> /// Check if the syllable has vowel or has a sonorant phoneme. /// </summary> /// <param name="entry">Script entry.</param> /// <param name="phoneme">Phoneme.</param> /// <param name="phones">Phones of the syllable.</param> /// <returns>Bool.</returns> private static bool IsGoodSyllableWithSonorant(ScriptItem entry, Phoneme phoneme, string[] phones) { bool goodSyllable = IsSyllableWithLessVowel(entry, phoneme, phones); if (goodSyllable) { if (!IsSyllableWithEnoughVowel(entry, phoneme, phones)) { if (phoneme.GetVowelIndexes(phones).Length == 0) { // no vowel, should have one sonorant and more than one consonants int[] sonorantIndexes = phoneme.GetSonorantIndexes(phones); if (sonorantIndexes.Length == 0 || phones.Length == 1) { goodSyllable = false; } } else { goodSyllable = false; } } } return goodSyllable; }
/// <summary> /// Load phoneme. /// </summary> /// <param name="language">Language of phoneme to load.</param> /// <returns>Phoneme.</returns> public static Phoneme Create(Language language) { Phoneme phoneme = new Phoneme(); phoneme.Language = language; TtsPhoneSet phoneSet = Localor.GetPhoneSet(language); bool loaded = false; if (phoneSet != null) { phoneme.ParseData(phoneSet); loaded = true; } return loaded ? phoneme : null; }
/// <summary> /// Build mlf from script item. /// </summary> /// <param name="item">Script item.</param> /// <param name="sw">Text writer.</param> /// <param name="writeToFile">Whether writing to file.</param> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <returns>Errors.</returns> private static ErrorSet BuildMonoMlf(ScriptItem item, StreamWriter sw, bool writeToFile, Phoneme phoneme, SliceData sliceData) { Debug.Assert(item != null); Debug.Assert(phoneme != null); if (writeToFile && sw == null) { throw new ArgumentNullException("sw"); } Collection<ScriptWord> allPronouncedNormalWords = item.AllPronouncedNormalWords; ErrorSet errors = new ErrorSet(); if (allPronouncedNormalWords.Count == 0) { errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("No pronounced normal word.")); } else { for (int i = 0; i < allPronouncedNormalWords.Count; i++) { ScriptWord word = allPronouncedNormalWords[i]; Debug.Assert(word != null); if (string.IsNullOrEmpty(word.Pronunciation)) { errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("No pronunciation normal word '{1}' in script item {0}.", item.Id, word.Grapheme)); } } if (errors.Count == 0) { if (writeToFile) { sw.WriteLine("\"*/{0}.lab\"", item.Id); sw.WriteLine(Phoneme.SilencePhone); } for (int i = 0; i < allPronouncedNormalWords.Count; i++) { ScriptWord word = allPronouncedNormalWords[i]; Collection<TtsUnit> units = word.GetUnits(phoneme, sliceData); if (phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.PhoneBased) { foreach (TtsUnit unit in units) { errors.Merge(BuildMonoMlf(unit, item, sw, writeToFile, phoneme)); } } else if (phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.SyllableBased) { foreach (ScriptSyllable syllable in word.UnitSyllables) { errors.Merge(BuildMonoMlf(syllable, item, sw, writeToFile, phoneme)); } } if (writeToFile && i + 1 < allPronouncedNormalWords.Count) { sw.WriteLine(Phoneme.ShortPausePhone); } } if (writeToFile) { sw.WriteLine(Phoneme.SilencePhone); sw.WriteLine("."); // end of sentence } } } return errors; }