/// <summary> /// Initializes a new instance of the <see cref="PhoneMerger"/> class. /// </summary> /// <param name="phoneSet">Phone set.</param> /// <param name="sliceData">Slice data.</param> /// <param name="truncRule">Truncate rule data.</param> public PhoneMerger(TtsPhoneSet phoneSet, SliceData sliceData, TruncateRuleData truncRule) { if (phoneSet == null) { throw new ArgumentNullException("phoneSet"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (truncRule == null) { throw new ArgumentNullException("truncRule"); } _phoneSet = phoneSet; _sliceData = sliceData; _truncateRuleData = truncRule; }
/// <summary> /// Build units for this sentence. /// </summary> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <param name="buildUnitFeature">Whether build unit features.</param> private void BuildUnits(Phoneme phoneme, SliceData sliceData, bool buildUnitFeature) { Helper.ThrowIfNull(phoneme); Helper.ThrowIfNull(sliceData); _units.Clear(); string punctuationPattern = ScriptItem.PunctuationPattern; for (int wordIndex = 0; wordIndex < Words.Count; wordIndex++) { ScriptWord word = Words[wordIndex]; if (!word.IsPronouncableNormalWord || (!buildUnitFeature && string.IsNullOrEmpty(word.Pronunciation))) { continue; } // look forward one item, test whether that is '?' mark WordType wordType = WordType.Normal; while (wordIndex < Words.Count - 1 && Words[wordIndex + 1].WordType != WordType.Normal) { WordType nextType = Localor.MapPunctuation(Words[wordIndex + 1].Grapheme, punctuationPattern); // advance one more if (nextType == WordType.OtherPunctuation) { wordType = nextType; } else { wordType = nextType; break; } wordIndex++; } word.Units.Clear(); word.BuildUnitWithoutFeature(sliceData, ScriptItem.PronunciationSeparator); foreach (TtsUnit unit in word.Units) { unit.WordType = wordType; } Helper.AppendCollection<TtsUnit>(_units, word.Units); } if (buildUnitFeature) { BuildUnitFeatures(phoneme); } }
/// <summary> /// Get the unit list this sentence has. /// </summary> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <param name="buildUnitFeature">Whether build unit features.</param> /// <returns>Tts units.</returns> public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData, bool buildUnitFeature) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (_needBuildUnits) { BuildUnits(phoneme, sliceData, buildUnitFeature); _needBuildUnits = false; } return _units; }
/// <summary> /// Get the unit list this sentence has. /// </summary> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <returns>Tts units.</returns> public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData) { return GetUnits(phoneme, sliceData, true); }
/// <summary> /// Estimate pos in syllable for each slice. /// </summary> /// <param name="slices">Slices.</param> /// <param name="sliceData">Slice data table.</param> /// <returns>PosInSyllable list.</returns> private static PosInSyllable[] EstimatePosInSyllable(string[] slices, SliceData sliceData) { PosInSyllable[] pis = new PosInSyllable[slices.Length]; int nucleusIndex = -1; for (int sliceIndex = 0; sliceIndex < slices.Length; sliceIndex++) { TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(sliceData.Language); ttsMetaUnit.Name = slices[sliceIndex]; if (sliceData.IsNucleus(ttsMetaUnit)) { if (sliceIndex == 0) { if (sliceIndex == slices.Length - 1) { pis[sliceIndex] = PosInSyllable.NucleusInV; } else { pis[sliceIndex] = PosInSyllable.NucleusInVC; } } else { if (sliceIndex == slices.Length - 1) { pis[sliceIndex] = PosInSyllable.NucleusInCV; } else { pis[sliceIndex] = PosInSyllable.NucleusInCVC; } } nucleusIndex = sliceIndex; break; } } for (int sliceIndex = 0; sliceIndex < nucleusIndex; sliceIndex++) { if (sliceIndex == 0) { pis[sliceIndex] = PosInSyllable.Onset; } else { pis[sliceIndex] = PosInSyllable.OnsetNext; } } for (int sliceIndex = nucleusIndex + 1; sliceIndex < slices.Length; sliceIndex++) { if (sliceIndex == slices.Length - 1) { pis[sliceIndex] = PosInSyllable.Coda; } else { pis[sliceIndex] = PosInSyllable.CodaNext; } } return pis; }
public static ErrorSet BuildMonoMlf(string scriptFilePath, string outFilePath, bool writeToFile, Phoneme phoneme, XmlScriptValidateSetting validateSetting, SliceData sliceData) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (sliceData == null) { throw new ArgumentNullException("phoneme"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } validateSetting.VerifySetting(); ErrorSet errors = new ErrorSet(); StreamWriter sw = null; if (writeToFile) { sw = new StreamWriter(outFilePath, false, Encoding.ASCII); sw.WriteLine("#!MLF!#"); } try { XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting); script.Remove(GetNeedDeleteItemIds(script.ErrorSet)); if (script.Items.Count == 0) { throw new InvalidDataException( Helper.NeutralFormat("No valid items in {0}.", scriptFilePath)); } errors.Merge(script.ErrorSet); foreach (ScriptItem item in script.Items) { errors.Merge(BuildMonoMlf(item, sw, writeToFile, phoneme, sliceData)); } } finally { if (sw != null) { sw.Close(); } } if (writeToFile) { Debug.Assert(HtkTool.VerifyMlfFormat(outFilePath)); } return errors; }
/// <summary> /// Build coda units from the phone list. /// </summary> /// <param name="sliceData">Slice data.</param> /// <param name="phones">Phones to process.</param> /// <param name="codaOffset">The offset of the first phone in coda group.</param> /// <param name="slicedUnits">Unit container to append result coda units.</param> private static void BuildCodaUnits(SliceData sliceData, TtsMetaPhone[] phones, int codaOffset, List<string> slicedUnits) { int remainPhoneCount = phones.Length - codaOffset; int codaUnitOffset = slicedUnits.Count; // t w ih 1 k s t if (remainPhoneCount > 0) { int codaStartCursor = codaOffset; while (remainPhoneCount > 0) { int phoneCount = remainPhoneCount - (codaStartCursor - codaOffset); string tentativeCoda = TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, phones, codaStartCursor, phoneCount); if (remainPhoneCount != 1 && sliceData.CodaSlices.IndexOf(tentativeCoda.Replace(TtsUnit.PhoneDelimiter, " ")) < 0 && phoneCount != 1) { codaStartCursor++; } else { // Left single phone will be treated as coda unit slicedUnits.Insert(codaUnitOffset, TtsUnit.CodaPrefix + tentativeCoda); remainPhoneCount = codaStartCursor - codaOffset; codaStartCursor = codaOffset; } } } }
/// <summary> /// Build units for syllbale pronunciation, /// And the units are concatenated together in the string and seperated by ".". /// </summary> /// <param name="phoneme">Phoneme of the language to process with.</param> /// <param name="sliceData">Slice data to process.</param> /// <param name="syllable">Syllables to process.</param> /// <returns>Best unit list.</returns> public static string[] BuildUnits(Phoneme phoneme, SliceData sliceData, string syllable) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (phoneme.TtsSonorantPhones == null) { string message = Helper.NeutralFormat("phoneme.TtsSonorantPhones should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(syllable)) { throw new ArgumentNullException("syllable"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (sliceData.OnsetSlices == null) { string message = Helper.NeutralFormat("sliceData.OnsetSlices should not be null."); throw new ArgumentException(message); } if (sliceData.NucleusSlices == null) { string message = Helper.NeutralFormat("sliceData.NucleusSlices should not be null."); throw new ArgumentException(message); } List<string> slicedUnits = new List<string>(); string unstressedSyllable = Pronunciation.RemoveStress(syllable); ScriptItem scriptItem = new ScriptItem(phoneme.Language); // items contains phone and tone. string[] items = scriptItem.PronunciationSeparator.SplitPhones(unstressedSyllable); // Treate all syllable as one unit at first. TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language); ttsMetaUnit.Name = string.Join(" ", items); string[] phones = ttsMetaUnit.GetPhonesName(); // Treat all phones in this syllable as a whole unit if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) >= 0) { // If it is alread defined in the predefined unit collection, return it slicedUnits.Add(TtsUnit.NucleusPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); return slicedUnits.ToArray(); } int vowelIndex = phoneme.GetFirstVowelIndex(phones); if (vowelIndex < 0) { // If no vowel in the syllable, treat all phones in this syllable as a unit if it is in unit table if (sliceData.OnsetSlices.IndexOf(ttsMetaUnit.Name) >= 0) { slicedUnits.Add(TtsUnit.OnsetPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); } else if (sliceData.CodaSlices.IndexOf(ttsMetaUnit.Name) >= 0) { slicedUnits.Add(TtsUnit.CodaPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); } else { // otherwise, treat each phone as a coda unit foreach (string phone in phones) { slicedUnits.Add(TtsUnit.CodaPrefix + phone); } } return slicedUnits.ToArray(); } // Search first cosonant sonarant from the left side of the vowel font in the syllable int firstSonarantIndex = vowelIndex; for (int i = vowelIndex - 1; i >= 0; i--) { if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0) { firstSonarantIndex = i; } } // Search last cosonant sonarant from the right side of the vowel font in the syllable int lastSonarantIndex = vowelIndex; for (int i = vowelIndex + 1; i <= phones.Length - 1; i++) { if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0) { lastSonarantIndex = i; } } // Treat all vowel and surrounding sonarant consonants as the nucleus unit first string nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1); TruncateRuleData truncateRuleData = Localor.GetTruncateRuleData(phoneme.Language); // Refine nucleus according to the predefined unit table while (lastSonarantIndex - firstSonarantIndex > 0 && sliceData.NucleusSlices.IndexOf(nucleus) <= 0) { // If the unit candidate is not listed in the predefined unit list, try to truncate it string[] leftRight = PhoneMerger.TruncateOnePhoneFromNucleus(phoneme, truncateRuleData.NucleusTruncateRules, nucleus); if (phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0) { Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0); firstSonarantIndex++; } else { Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[1]) >= 0); lastSonarantIndex--; } // Re-define the remaining nucleus unit nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1); } slicedUnits.Add(TtsUnit.NucleusPrefix + nucleus.Replace(" ", TtsUnit.PhoneDelimiter)); // Refine onset for (int index = firstSonarantIndex - 1; index >= 0; index--) { string onset = TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, 0, index + 1); if (sliceData.OnsetSlices.IndexOf(onset.Replace(TtsUnit.PhoneDelimiter, " ")) >= 0) { slicedUnits.Insert(0, TtsUnit.OnsetPrefix + onset); // Remove the number of added phones, // except current phone itself which will be recuded by index-- index -= index; } else { // Treat it as a single phone unit slicedUnits.Insert(0, TtsUnit.OnsetPrefix + TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, index, 1)); } } // Refine coda, matching from right to left BuildCodaUnits(sliceData, ttsMetaUnit.Phones, lastSonarantIndex + 1, slicedUnits); return slicedUnits.ToArray(); }
/// <summary> /// Validate language data files. /// </summary> /// <param name="language">Language of the data files.</param> /// <returns>Error set.</returns> public ErrorSet ValidateLanguageData(Language language) { ErrorSet errorSet = new ErrorSet(); if (!IsEmpty()) { if (!string.IsNullOrEmpty(_phoneSet)) { TtsPhoneSet ttsPhoneSet = new TtsPhoneSet(); ttsPhoneSet.Load(PhoneSet); if (ttsPhoneSet.Language != language) { errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage, Localor.LanguageToString(language), Localor.LanguageToString(ttsPhoneSet.Language), Localor.PhoneSetFileName, PhoneSet)); } } if (!string.IsNullOrEmpty(_unitTable)) { SliceData sliceData = new SliceData(); sliceData.Language = language; sliceData.Load(UnitTable); if (sliceData.IsEmpty()) { errorSet.Add(new Error(VoiceCreationLanguageDataError.EmptyLanguageDataFile, Localor.LanguageToString(language), Localor.UnitTableFileName, UnitTable)); } } if (!string.IsNullOrEmpty(_lexicalAttributeSchema)) { LexicalAttributeSchema lexicalAttributeSchema = new LexicalAttributeSchema(); lexicalAttributeSchema.Load(LexicalAttributeSchema); if (lexicalAttributeSchema.Language != language) { errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage, Localor.LanguageToString(language), Localor.LanguageToString(lexicalAttributeSchema.Language), Localor.PhoneSetFileName, LexicalAttributeSchema)); } } if (!string.IsNullOrEmpty(_truncateRule)) { TruncateRuleData truncateRuleData = new TruncateRuleData(); truncateRuleData.Load(TruncateRule); if (truncateRuleData.Language != language) { errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage, Localor.LanguageToString(language), Localor.LanguageToString(truncateRuleData.Language), Localor.TruncateRulesFileName, TruncateRule)); } } if (!string.IsNullOrEmpty(_ttsToSapiVisemeId)) { PhoneMap phoneMap = PhoneMap.CreatePhoneMap(TtsToSapiVisemeId); if (phoneMap.Language != language) { errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage, Localor.LanguageToString(language), Localor.LanguageToString(phoneMap.Language), Localor.TtsToSapiVisemeIdFileName, TtsToSapiVisemeId)); } } if (!string.IsNullOrEmpty(_ttsToSrPhone)) { PhoneMap phoneMap = PhoneMap.CreatePhoneMap(TtsToSrPhone); if (phoneMap.Language != language) { errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage, Localor.LanguageToString(language), Localor.LanguageToString(phoneMap.Language), Localor.TtsToSrPhoneFileName, TtsToSrPhone)); } } if (!string.IsNullOrEmpty(_ttsToIpaPhone)) { PhoneMap phoneMap = PhoneMap.CreatePhoneMap(TtsToIpaPhone); if (phoneMap.Language != language) { errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage, Localor.LanguageToString(language), Localor.LanguageToString(phoneMap.Language), Localor.TtsToIpaPhoneFileName, TtsToIpaPhone)); } } if (!string.IsNullOrEmpty(_fontMeta)) { PhoneMap phoneMap = PhoneMap.CreatePhoneMap(FontMeta); if (phoneMap.Language != language) { errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage, Localor.LanguageToString(language), Localor.LanguageToString(phoneMap.Language), Localor.FontMetaFileName, FontMeta)); } } } else { Trace.WriteLine("Using stocked language data with tools..."); } return errorSet; }
private static SliceData LoadSliceData(Language language) { SliceData sliceData = null; using (StreamReader unitTableReader = LoadResource(language, Localor.UnitTableFileName)) { if (unitTableReader != null) { sliceData = new SliceData(); sliceData.Language = language; sliceData.Load(unitTableReader); } } return sliceData; }
/// <summary> /// Extract acoustic features for a given script file. /// </summary> /// <param name="script">Script file instance.</param> /// <param name="phoneme">Phoneme used to get units.</param> /// <param name="sliceData">Slice data used to get units.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segmentation file directory.</param> /// <param name="wave16kDir">16k Hz waveform file directory.</param> /// <param name="epochDir">Epoch file directory.</param> /// <param name="targetFilePath">Target acoustic file path.</param> public static void ExtractAcoustic(XmlScriptFile script, Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir, string targetFilePath) { // Parameters validation if (script == null) { throw new ArgumentNullException("script"); } if (string.IsNullOrEmpty(script.FilePath)) { throw new ArgumentException("script.FilePath is null"); } if (fileMap == null) { throw new ArgumentNullException("fileMap"); } if (fileMap.Map == null) { throw new ArgumentException("fileMap.Map"); } if (fileMap.Map.Keys == null) { throw new ArgumentException("fileMap.Map.Keys is null"); } if (string.IsNullOrEmpty(segmentDir)) { throw new ArgumentNullException("segmentDir"); } if (string.IsNullOrEmpty(wave16kDir)) { throw new ArgumentNullException("wave16kDir"); } if (string.IsNullOrEmpty(epochDir)) { throw new ArgumentNullException("epochDir"); } if (!Directory.Exists(segmentDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), segmentDir); } if (!Directory.Exists(wave16kDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), wave16kDir); } if (!Directory.Exists(epochDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), epochDir); } if (string.IsNullOrEmpty(targetFilePath)) { throw new ArgumentNullException("targetFilePath"); } Helper.EnsureFolderExistForFile(targetFilePath); using (StreamWriter sw = new StreamWriter(targetFilePath)) { // iterate each script item or sentence foreach (string sid in fileMap.Map.Keys) { if (!script.ItemDic.ContainsKey(sid)) { string message = string.Format(CultureInfo.InvariantCulture, "Sentence [{0}] does not exist in script file [{1}].", sid, script.FilePath); throw new InvalidDataException(message); } ExtractAcoustic(sw, script, sid, phoneme, sliceData, fileMap, segmentDir, wave16kDir, epochDir); } } }
/// <summary> /// Check data consistence between script file and segmentation files. /// </summary> /// <param name="fileMap">File list map.</param> /// <param name="script">Script file instance.</param> /// <param name="phoneme">Phoneme used to get units.</param> /// <param name="sliceData">Slice data used to get units.</param> /// <param name="segmentDir">Segment file directory.</param> /// <returns>Data error set found.</returns> public static ErrorSet ValidateDataAlignment(FileListMap fileMap, XmlScriptFile script, Phoneme phoneme, SliceData sliceData, string segmentDir) { // Parameters validation if (string.IsNullOrEmpty(segmentDir)) { throw new ArgumentNullException("segmentDir"); } if (fileMap == null) { throw new ArgumentNullException("fileMap"); } if (fileMap.Map == null) { throw new ArgumentException("fileMap.Map is null"); } if (fileMap.Map.Keys == null) { throw new ArgumentException("fileMap.Map.Keys is null"); } if (script == null) { throw new ArgumentNullException("script"); } ErrorSet errorSet = new ErrorSet(); foreach (ScriptItem item in script.Items) { try { if (!fileMap.Map.ContainsKey(item.Id)) { errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("File list map does not contain item")); continue; } ValidateDataAlignment(item, phoneme, sliceData, fileMap, segmentDir, errorSet); } catch (InvalidDataException ide) { errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.BuildExceptionMessage(ide)); } } foreach (string sid in fileMap.Map.Keys) { if (!script.ItemDic.ContainsKey(sid)) { errorSet.Add(ScriptError.OtherErrors, sid, Helper.NeutralFormat("script file does not contain item")); } } return errorSet; }
/// <summary> /// Extract acoustic features for a given sentence. /// </summary> /// <param name="writer">Stream writer to write acoustic features.</param> /// <param name="script">Script file instance.</param> /// <param name="sid">Script item id.</param> /// <param name="phoneme">Phoneme used to get units.</param> /// <param name="sliceData">Slice data used to get units.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segmentation file directory.</param> /// <param name="wave16kDir">16k Hz waveform file directory.</param> /// <param name="epochDir">Epoch file directory.</param> private static void ExtractAcoustic(StreamWriter writer, XmlScriptFile script, string sid, Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir) { ScriptItem scriptItem = script.ItemDic[sid]; // find the absolute file paths for each kind data file string wave16kFilePath = Path.Combine(wave16kDir, fileMap.Map[scriptItem.Id] + ".wav"); string epochFilePath = Path.Combine(epochDir, fileMap.Map[scriptItem.Id] + ".epoch"); string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[scriptItem.Id] + ".txt"); // load data files SegmentFile segFile = new SegmentFile(); segFile.Load(segmentFilePath); EggAcousticFeature eggFile = new EggAcousticFeature(); eggFile.LoadEpoch(epochFilePath); WaveAcousticFeature waveFile = new WaveAcousticFeature(); waveFile.Load(wave16kFilePath); // calculate acoustic features for each segments in the files int totalCount = segFile.NonSilenceWaveSegments.Count; Collection<TtsUnit> units = scriptItem.GetUnits(phoneme, sliceData); if (units.Count != totalCount) { string str1 = "Unit number mis-matched between sentence [{0}] in "; string str2 = "script file [{1}] and in the alignment file [{2}]. "; string str3 = "There are {3} units in script but {4} units in alignment."; string message = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, sid, script.FilePath, segmentFilePath, units.Count, totalCount); throw new InvalidDataException(message); } for (int i = 0; i < totalCount; i++) { // for each wave segment WaveSegment ws = segFile.NonSilenceWaveSegments[i]; // get unit sample scope int sampleOffset = (int)(ws.StartTime * waveFile.SamplesPerSecond); int sampleLength = (int)(ws.Duration * waveFile.SamplesPerSecond); int sampleEnd = sampleOffset + sampleLength; int epochOffset = 0; int epochEnd = 0; // calculate average pitch, pitch average float averagePitch, pitchRange; eggFile.GetPitchAndRange(sampleOffset, sampleLength, out averagePitch, out pitchRange); ws.AveragePitch = averagePitch; ws.PitchRange = pitchRange; // calculate root mean square, and before that ajust the segment alignment with // the epoch data epochOffset = eggFile.AdjustAlignment(ref sampleOffset); epochEnd = eggFile.AdjustAlignment(ref sampleEnd); if (epochOffset > epochEnd) { string info = string.Format(CultureInfo.InvariantCulture, "epochOffset[{0}] should not be bigger than epochEnd[{1}]", epochOffset, epochEnd); throw new InvalidDataException(info); } if (sampleEnd > waveFile.SampleNumber) { string str1 = "Mis-match found between alignment file [{0}] and waveform file [{1}], "; string str2 = "for the end sample of alignment is [{2}] but"; string str3 = " the total sample number of waveform file is [{3}]."; string info = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, segmentFilePath, wave16kFilePath, epochEnd, waveFile.SampleNumber); throw new InvalidDataException(info); } ws.RootMeanSquare = waveFile.CalculateRms(sampleOffset, sampleEnd - sampleOffset); // calculate epoch int epoch16KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch, epochOffset, epochEnd - epochOffset, null); int epoch8KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch8k, epochOffset, epochEnd - epochOffset, null); // leave (epoch offset in sentence) (epoch length) // (16k compressed epoch lenght) (8k compressed epoch lenght) as zero string message = string.Format(CultureInfo.InvariantCulture, "{0,12} {1,3} {2,9:0.000000} {3,9:0.000000} {4,7} {5,5} {6,4} {7,3} {8,3} {9,3} {10,7:0.0} {11,5:0.0} {12,4:0.0} {13}", scriptItem.Id, i, ws.StartTime, ws.Duration, sampleOffset, sampleEnd - sampleOffset, epochOffset, epochEnd - epochOffset, epoch16KCompressLength, epoch8KCompressLength, ws.RootMeanSquare, ws.AveragePitch, ws.PitchRange, units[i].FullName); writer.WriteLine(message); } }
/// <summary> /// Check data consistence between script item and segmentation file. /// </summary> /// <param name="item">Script item.</param> /// <param name="phoneme">Phoneme used to get units.</param> /// <param name="sliceData">Slice data used to get units.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segment file directory.</param> /// <param name="errorSet">Data error set found.</param> public static void ValidateDataAlignment(ScriptItem item, Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir, ErrorSet errorSet) { string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt"); StringBuilder errorMessage = new StringBuilder(); SegmentFile segmentFile = ValidateAlignmentFile(segmentFilePath, errorMessage); if (errorMessage.Length != 0) { errorSet.Add(ScriptError.OtherErrors, item.Id, errorMessage.ToString()); } else { Collection<TtsUnit> units = item.GetUnits(phoneme, sliceData); if (segmentFile.WaveSegments.Count == 0) { string message = Helper.NeutralFormat( "There is no valid alignment data in alignment file {0}.", segmentFilePath); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilenceFeature) { string message = Helper.NeutralFormat( "Alignment file {0} is invalid, for without silence segment at the end.", segmentFilePath); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else if (units.Count != segmentFile.NonSilenceWaveSegments.Count) { string message = Helper.NeutralFormat( "script units {0} do not match with non-silence " + "segments {1} in segmentation file.", units.Count, segmentFile.NonSilenceWaveSegments.Count); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else { // go through each segments for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++) { WaveSegment segment = segmentFile.NonSilenceWaveSegments[i]; TtsUnit unit = units[i]; if (segment.Label != WaveSegment.FormatLabel(unit.MetaUnit.Name)) { string message = Helper.NeutralFormat( "units [{0}/{1}] at {2} do not match between script and segment.", WaveSegment.FormatLabel(unit.MetaUnit.Name), segment.Label, i); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } } } } }
/// <summary> /// Get the unit list this item has. /// </summary> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <returns>Tts units.</returns> public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } Collection<TtsUnit> units = new Collection<TtsUnit>(); foreach (ScriptSentence sentence in Sentences) { foreach (TtsUnit unit in sentence.GetUnits(phoneme, sliceData)) { units.Add(unit); } } return units; }
/// <summary> /// Format phone string in Word to slice string. /// </summary> /// <param name="sliceData">Slice data.</param> /// <param name="wordPron">Word pronunciation to convert.</param> /// <returns>Word pronunciation string in slice.</returns> public static string RewritePhones2Units(SliceData sliceData, string wordPron) { if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (string.IsNullOrEmpty(wordPron)) { throw new ArgumentNullException("wordPron"); } string[] syllables = Regex.Split(wordPron, @"\s*[&|\-]\s*"); List<string> tgtSylls = new List<string>(); for (int i = 0; i < syllables.Length; i++) { TtsStress nucleusStress = Pronunciation.GetStress(syllables[i]); string[] units = BuildUnits(Localor.GetPhoneme(sliceData.Language), sliceData, syllables[i]); string tgtslice = string.Join(" . ", units); tgtslice = tgtslice.Replace(TtsUnit.OnsetPrefix, string.Empty); tgtslice = tgtslice.Replace(TtsUnit.NucleusPrefix, string.Empty); tgtslice = tgtslice.Replace(TtsUnit.CodaPrefix, string.Empty); tgtslice = tgtslice.Replace(TtsUnit.PhoneDelimiter, " "); if (nucleusStress != TtsStress.None) { tgtslice = SetVowelStress(Localor.GetPhoneme(sliceData.Language), tgtslice, nucleusStress); } tgtSylls.Add(tgtslice); } return string.Join(" - ", tgtSylls.ToArray()); }
/// <summary> /// Get the unit list this word has. /// </summary> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <param name="buildUnitFeature">Whether build unit features.</param> /// <returns>Tts units.</returns> public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData, bool buildUnitFeature) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (WordType == WordType.Normal && _units.Count == 0) { if (Sentence == null) { throw new InvalidDataException(Helper.NeutralFormat("word should belong to a sentence.")); } Sentence.GetUnits(phoneme, sliceData, buildUnitFeature); } return _units; }
/// <summary> /// Build word unit without filling features. /// </summary> /// <param name="sliceData">Slice data.</param> /// <param name="pronunciationSeparator">Pronunciation separator.</param> public void BuildUnitWithoutFeature(SliceData sliceData, PronunciationSeparator pronunciationSeparator) { if (Units.Count > 0) { return; } UpdateUnitSyllables(); for (int syllableIndex = 0; syllableIndex < UnitSyllables.Count; syllableIndex++) { ScriptSyllable syllable = UnitSyllables[syllableIndex]; syllable.Tag = this; Collection<TtsUnit> syllableUnits = BuildUnitsForSyllable(syllable, sliceData, pronunciationSeparator); for (int i = 0; i < syllableUnits.Count; i++) { syllableUnits[i].WordType = WordType; syllableUnits[i].Tag = syllable; syllableUnits[i].Word = this; Units.Add(syllableUnits[i]); } } // Parse TCGPP score to TtsMetaPhone if (!string.IsNullOrEmpty(_tcgppScores)) { string[] tcgppScores = _tcgppScores.Split(new char[] { TcgppScoreDelimeter }, StringSplitOptions.RemoveEmptyEntries); int index = 0; foreach (TtsUnit unit in Units) { foreach (TtsMetaPhone phone in unit.MetaUnit.Phones) { if (index >= tcgppScores.Length) { throw new InvalidDataException(Helper.NeutralFormat( "Invalid TCGPP score format [{0}]", _tcgppScores)); } phone.TcgppScore = int.Parse(tcgppScores[index]); index++; } } } }
/// <summary> /// Build units from syllable. /// </summary> /// <param name="syllable">Syllable.</param> /// <param name="sliceData">Slice data.</param> /// <param name="pronunciationSeparator">Pronunciation separator.</param> /// <returns>Units.</returns> private static Collection<TtsUnit> BuildUnitsForSyllable(ScriptSyllable syllable, SliceData sliceData, PronunciationSeparator pronunciationSeparator) { Debug.Assert(syllable != null); Debug.Assert(sliceData != null); string syllableText = Core.Pronunciation.RemoveStress(syllable.Text.Trim()); string[] slices = pronunciationSeparator.SplitSlices(syllableText); PosInSyllable[] pis = EstimatePosInSyllable(slices, sliceData); Collection<TtsUnit> units = new Collection<TtsUnit>(); for (int sliceIndex = 0; sliceIndex < slices.Length; sliceIndex++) { string slice = slices[sliceIndex].Trim(); if (string.IsNullOrEmpty(slice)) { continue; } TtsUnit unit = new TtsUnit(sliceData.Language); // break level unit.TtsBreak = (sliceIndex == slices.Length - 1) ? syllable.TtsBreak : TtsBreak.Phone; // pos in syllable unit.Feature.PosInSyllable = pis[sliceIndex]; // NONE: punctuation type // emphasis unit.Feature.TtsEmphasis = syllable.TtsEmphasis; // stress mark unit.Feature.TtsStress = syllable.Stress; // fill unit name // remove stress mark and replace white space with '+' for unit name unit.MetaUnit.Name = Regex.Replace(slice, " +", @"+"); unit.MetaUnit.Language = unit.Language; units.Add(unit); } return units; }
/// <summary> /// Convert this unit to slide data. /// </summary> /// <param name="language">Language of the Slice data.</param> /// <param name="unitFullNames">Full unit name collections.</param> /// <returns>Converted result.</returns> public static SliceData ToSliceData(Language language, IEnumerable<string> unitFullNames) { if (unitFullNames == null) { throw new ArgumentNullException("unitFullNames"); } SliceData sliceData = new SliceData(); sliceData.Language = language; foreach (string name in unitFullNames) { sliceData.ParseUnit(name); } return sliceData; }
/// <summary> /// Build mlf from script item. /// </summary> /// <param name="item">Script item.</param> /// <param name="sw">Text writer.</param> /// <param name="writeToFile">Whether writing to file.</param> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <returns>Errors.</returns> private static ErrorSet BuildMonoMlf(ScriptItem item, StreamWriter sw, bool writeToFile, Phoneme phoneme, SliceData sliceData) { Debug.Assert(item != null); Debug.Assert(phoneme != null); if (writeToFile && sw == null) { throw new ArgumentNullException("sw"); } Collection<ScriptWord> allPronouncedNormalWords = item.AllPronouncedNormalWords; ErrorSet errors = new ErrorSet(); if (allPronouncedNormalWords.Count == 0) { errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("No pronounced normal word.")); } else { for (int i = 0; i < allPronouncedNormalWords.Count; i++) { ScriptWord word = allPronouncedNormalWords[i]; Debug.Assert(word != null); if (string.IsNullOrEmpty(word.Pronunciation)) { errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("No pronunciation normal word '{1}' in script item {0}.", item.Id, word.Grapheme)); } } if (errors.Count == 0) { if (writeToFile) { sw.WriteLine("\"*/{0}.lab\"", item.Id); sw.WriteLine(Phoneme.SilencePhone); } for (int i = 0; i < allPronouncedNormalWords.Count; i++) { ScriptWord word = allPronouncedNormalWords[i]; Collection<TtsUnit> units = word.GetUnits(phoneme, sliceData); if (phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.PhoneBased) { foreach (TtsUnit unit in units) { errors.Merge(BuildMonoMlf(unit, item, sw, writeToFile, phoneme)); } } else if (phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.SyllableBased) { foreach (ScriptSyllable syllable in word.UnitSyllables) { errors.Merge(BuildMonoMlf(syllable, item, sw, writeToFile, phoneme)); } } if (writeToFile && i + 1 < allPronouncedNormalWords.Count) { sw.WriteLine(Phoneme.ShortPausePhone); } } if (writeToFile) { sw.WriteLine(Phoneme.SilencePhone); sw.WriteLine("."); // end of sentence } } } return errors; }