/// <summary> /// Dump the data in the utterance to a script item. /// </summary> /// <param name="utt">The utterance for dumpping.</param> /// <param name="ttsEngine">The object ttsEngine to help to convert the Pos and get sentence id.</param> /// <returns>A script item object.</returns> public static ScriptItem ToScriptItem(this SP.TtsUtterance utt, SP.TtsEngine ttsEngine) { if (ttsEngine == null) { throw new ArgumentNullException("ttsEngine"); } ScriptItem item = new ScriptItem(); item.Text = utt.OriginalText; item.Id = Helper.NeutralFormat("{0:D" + IdLength + "}", BeginId); ScriptSentence sentence = new ScriptSentence(); sentence.SentenceType = (SentenceType)utt.SentenceType; DumpWords(sentence, utt, ttsEngine, GetLanguage(utt)); sentence.Text = sentence.BuildTextFromWords(); item.Sentences.Add(sentence); return item; }
/// <summary> /// Validate the slices in syllable are valid or not. /// </summary> /// <param name="entry">Script item.</param> /// <param name="syllable">Pronunciation of syllable.</param> /// <returns>Data error found.</returns> public static DataError ValidateSlices(ScriptItem entry, string syllable) { if (entry == null) { throw new ArgumentNullException("entry"); } if (entry.PronunciationSeparator == null) { string message = Helper.NeutralFormat("entry.PronunciationSeparator should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(entry.PronunciationSeparator.Slice)) { string message = Helper.NeutralFormat("entry.PronunciationSeparator.Slice should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(syllable)) { throw new ArgumentNullException("syllable"); } DataError dataError = null; string[] slices = syllable.Split(new string[] { entry.PronunciationSeparator.Slice }, StringSplitOptions.None); for (int k = 0; k < slices.Length; k++) { if (string.IsNullOrEmpty(slices[k])) { string message = string.Format(CultureInfo.InvariantCulture, "The slice[{0}] of syllable[{1}] is empty by separator [{2}]", k, syllable, entry.PronunciationSeparator.Slice); dataError = new DataError("null", message, entry.Id); break; } // check phones dataError = ValidatePhones(entry, slices[k]); if (dataError != null) { break; } } return dataError; }
/// <summary> /// Generate script item from raw text(only generate to word level). /// </summary> /// <param name="text">Plain text.</param> /// <returns>ScriptItem.</returns> public ScriptItem GenerateScriptItem(string text) { if (string.IsNullOrEmpty(text)) { throw new ArgumentNullException("text"); } // this function should contain "ProcessMode.TextProcess" if ((_mode & ProcessMode.TextProcess) == 0) { throw new InvalidOperationException("Process mode can only be ProcessMode.TextProcess"); } ScriptItem item = new ScriptItem(); item.Text = text; foreach (SP.TtsUtterance utt in EspUtterances(text)) { using (utt) { if (utt.Words.Count == 0) { continue; } ScriptSentence sentence = new ScriptSentence(); foreach (SP.TtsWord word in utt.Words) { if (!string.IsNullOrEmpty(word.WordText)) { ScriptWord scriptWord = new ScriptWord(); scriptWord.Grapheme = word.WordText; if (!string.IsNullOrEmpty(word.Pronunciation)) { scriptWord.Pronunciation = word.Pronunciation.ToLowerInvariant(); } scriptWord.WordType = WordType.Normal; if (word.WordType == SP.TtsWordType.WT_PUNCTUATION) { scriptWord.WordType = WordType.Punctuation; } scriptWord.PronSource = (TtsPronSource)word.PronSource; sentence.Words.Add(scriptWord); } } sentence.Text = sentence.BuildTextFromWords(); item.Sentences.Add(sentence); } } return item; }
/// <summary> /// Extracts the features of the given script item. /// </summary> /// <param name="item"> /// The script item. /// </param> /// <param name="segmentFile"> /// The segmentation file. /// </param> /// <returns> /// The sentence contains all the features. /// </returns> /// <exception cref="InvalidDataException"> /// Exception. /// </exception> private Sentence Extract(ScriptItem item, SegmentFile segmentFile) { UtteranceBuilder builder = new UtteranceBuilder(PhoneSet, PosSet, Phoneme) { NeedPos = NeedPos, NeedToBI = NeedToBI, }; // Builds a utterance first. Sentence sentence = null; using (TtsUtterance utterance = builder.Build(item, segmentFile, false, -1)) { // Extract ToneIndex if the language is zh-CN if (Language.ZhCN == (Language)PhoneSet.Language) { ChineseToneIndexExtractor.Process(utterance, item); } if (UtteranceExtenders != null) { // Uses the utterance extender here. foreach (IUtteranceExtender extender in UtteranceExtenders) { extender.Process(utterance, item); } } // Creates a sentence to store all the features. sentence = Extract(item.Id, utterance); for (int i = 0; i < sentence.PhoneSegments.Count; ++i) { // Create candidates for each phoneme. sentence.PhoneSegments[i].StartTimeInSecond = (float)segmentFile.WaveSegments[i].StartTime; sentence.PhoneSegments[i].EndTimeInSecond = (float)segmentFile.WaveSegments[i].EndTime; } } return sentence; }
/// <summary> /// Prepare for speak. /// </summary> /// <param name="scriptItem">Script item.</param> public void PrepareSpeak(ScriptItem scriptItem) { _curScriptItem = scriptItem; TraceLog(_logger, true, "Processing item: {0}", scriptItem.Id); SpeechSynthesizer synthesizer = _serviceProvider.SpeechSynthesizer; if (_commonConfig != null) { string outPutWaveFile = Path.Combine(_commonConfig.OutputWaveDir, Helper.NeutralFormat("{0}.wav", scriptItem.Id)); synthesizer.SetOutputToWaveFile(outPutWaveFile, _serviceProvider.Engine.AudioFormatInfo); } else { synthesizer.SetOutputToNull(); } }
/// <summary> /// Build units for syllbale pronunciation, /// And the units are concatenated together in the string and seperated by ".". /// </summary> /// <param name="phoneme">Phoneme of the language to process with.</param> /// <param name="sliceData">Slice data to process.</param> /// <param name="syllable">Syllables to process.</param> /// <returns>Best unit list.</returns> public static string[] BuildUnits(Phoneme phoneme, SliceData sliceData, string syllable) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (phoneme.TtsSonorantPhones == null) { string message = Helper.NeutralFormat("phoneme.TtsSonorantPhones should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(syllable)) { throw new ArgumentNullException("syllable"); } if (sliceData == null) { throw new ArgumentNullException("sliceData"); } if (sliceData.OnsetSlices == null) { string message = Helper.NeutralFormat("sliceData.OnsetSlices should not be null."); throw new ArgumentException(message); } if (sliceData.NucleusSlices == null) { string message = Helper.NeutralFormat("sliceData.NucleusSlices should not be null."); throw new ArgumentException(message); } List<string> slicedUnits = new List<string>(); string unstressedSyllable = Pronunciation.RemoveStress(syllable); ScriptItem scriptItem = new ScriptItem(phoneme.Language); // items contains phone and tone. string[] items = scriptItem.PronunciationSeparator.SplitPhones(unstressedSyllable); // Treate all syllable as one unit at first. TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language); ttsMetaUnit.Name = string.Join(" ", items); string[] phones = ttsMetaUnit.GetPhonesName(); // Treat all phones in this syllable as a whole unit if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) >= 0) { // If it is alread defined in the predefined unit collection, return it slicedUnits.Add(TtsUnit.NucleusPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); return slicedUnits.ToArray(); } int vowelIndex = phoneme.GetFirstVowelIndex(phones); if (vowelIndex < 0) { // If no vowel in the syllable, treat all phones in this syllable as a unit if it is in unit table if (sliceData.OnsetSlices.IndexOf(ttsMetaUnit.Name) >= 0) { slicedUnits.Add(TtsUnit.OnsetPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); } else if (sliceData.CodaSlices.IndexOf(ttsMetaUnit.Name) >= 0) { slicedUnits.Add(TtsUnit.CodaPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter)); } else { // otherwise, treat each phone as a coda unit foreach (string phone in phones) { slicedUnits.Add(TtsUnit.CodaPrefix + phone); } } return slicedUnits.ToArray(); } // Search first cosonant sonarant from the left side of the vowel font in the syllable int firstSonarantIndex = vowelIndex; for (int i = vowelIndex - 1; i >= 0; i--) { if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0) { firstSonarantIndex = i; } } // Search last cosonant sonarant from the right side of the vowel font in the syllable int lastSonarantIndex = vowelIndex; for (int i = vowelIndex + 1; i <= phones.Length - 1; i++) { if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0) { lastSonarantIndex = i; } } // Treat all vowel and surrounding sonarant consonants as the nucleus unit first string nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1); TruncateRuleData truncateRuleData = Localor.GetTruncateRuleData(phoneme.Language); // Refine nucleus according to the predefined unit table while (lastSonarantIndex - firstSonarantIndex > 0 && sliceData.NucleusSlices.IndexOf(nucleus) <= 0) { // If the unit candidate is not listed in the predefined unit list, try to truncate it string[] leftRight = PhoneMerger.TruncateOnePhoneFromNucleus(phoneme, truncateRuleData.NucleusTruncateRules, nucleus); if (phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0) { Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0); firstSonarantIndex++; } else { Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[1]) >= 0); lastSonarantIndex--; } // Re-define the remaining nucleus unit nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1); } slicedUnits.Add(TtsUnit.NucleusPrefix + nucleus.Replace(" ", TtsUnit.PhoneDelimiter)); // Refine onset for (int index = firstSonarantIndex - 1; index >= 0; index--) { string onset = TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, 0, index + 1); if (sliceData.OnsetSlices.IndexOf(onset.Replace(TtsUnit.PhoneDelimiter, " ")) >= 0) { slicedUnits.Insert(0, TtsUnit.OnsetPrefix + onset); // Remove the number of added phones, // except current phone itself which will be recuded by index-- index -= index; } else { // Treat it as a single phone unit slicedUnits.Insert(0, TtsUnit.OnsetPrefix + TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, index, 1)); } } // Refine coda, matching from right to left BuildCodaUnits(sliceData, ttsMetaUnit.Phones, lastSonarantIndex + 1, slicedUnits); return slicedUnits.ToArray(); }
/// <summary> /// Check whether a script item is valid /// We don't check schema here /// Validation conditions: /// 1. Normal word should have pronunciation /// 2. Pronunciation should be good /// 3. POS should be in POS set /// We could use some flag to control the validation conditions /// When we need flexible control. /// </summary> /// <param name="item">The item to be checked.</param> /// <param name="errors">Errors if item is invalid.</param> /// <param name="validateSetting">Validation data set.</param> /// <returns>True is valid.</returns> public static bool IsValidItem(ScriptItem item, ErrorSet errors, XmlScriptValidateSetting validateSetting) { if (item == null) { throw new ArgumentNullException("item"); } if (errors == null) { throw new ArgumentNullException("errors"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } validateSetting.VerifySetting(); XmlScriptValidationScope scope = validateSetting.ValidationScope; bool valid = true; errors.Clear(); int sentIndex = 0; foreach (ScriptSentence sentence in item.Sentences) { int wordIndex = 0; foreach (ScriptWord word in sentence.Words) { if ((scope & XmlScriptValidationScope.Pronunciation) == XmlScriptValidationScope.Pronunciation) { // check pronunciation string pron = null; if (word.WordType == WordType.Normal) { pron = word.GetPronunciation(validateSetting.PhoneSet); } if (!string.IsNullOrEmpty(pron)) { ErrorSet pronErrors = Core.Pronunciation.Validate(pron, validateSetting.PhoneSet); foreach (Error error in pronErrors.Errors) { errors.Add(ScriptError.PronunciationError, error, item.Id, word.Grapheme); } } else if (word.WordType == WordType.Normal) { // Pronunciation is optional for normal word, will give warning if empty pronunciation for normal word. errors.Add(ScriptError.EmptyPronInNormalWord, item.Id, word.Grapheme); } } if ((scope & XmlScriptValidationScope.POS) == XmlScriptValidationScope.POS) { // check pos name if (!string.IsNullOrEmpty(word.PosString) && !validateSetting.PosSet.Items.ContainsKey(word.PosString)) { errors.Add(ScriptError.UnrecognizedPos, item.Id, word.Grapheme, word.Pronunciation, word.PosString); } } string nodePath = string.Format(CultureInfo.InvariantCulture, "Sentence[{0}].Word[{1}]", sentIndex, wordIndex); word.IsValid(item.Id, nodePath, scope, errors); wordIndex++; } sentIndex++; } if ((scope & XmlScriptValidationScope.SegmentSequence) == XmlScriptValidationScope.SegmentSequence) { CheckSegments(item, errors); } if (errors.Count > 0) { valid = false; } return valid; }
/// <summary> /// Initializes a new instance of the <see cref="TtsUtterance"/> class. /// </summary> /// <param name="language">Language of the utterance.</param> /// <param name="engine">Engine of the utterance.</param> public TtsUtterance(Language language, EngineType engine) { _script = Localor.CreateScriptItem(language, engine); }
/// <summary> /// Read one script item from the text stream reader. /// </summary> /// <param name="reader">Text stream to read out from.</param> /// <param name="scriptItem">Result script item.</param> /// <param name="withPron">Flag to indicate whether text stream is /// with pronunciation for each sentence.</param> /// <param name="withSid">Flag to indicate whether text stream is /// with sentence id for each sentence.</param> /// <param name="validate">Whether validate script item.</param> /// <returns>Data error found during reading, otherwise null returned.</returns> public static DataError ReadOneScriptItem(StreamReader reader, ScriptItem scriptItem, bool withPron, bool withSid, bool validate) { if (reader == null) { throw new ArgumentNullException("reader"); } if (reader.CurrentEncoding == null) { string message = Helper.NeutralFormat("reader.CurrentEncoding should not be null."); throw new ArgumentException(message); } if (scriptItem == null) { throw new ArgumentNullException("scriptItem"); } string sentenceLine = null; string pronunciationLine = null; // Read the sentence content line while ((sentenceLine = reader.ReadLine()) != null) { // Skip empty line for sentence if (string.IsNullOrEmpty(sentenceLine)) { continue; } else { if (reader.CurrentEncoding.CodePage != Encoding.Unicode.CodePage) { string message = string.Format(CultureInfo.InvariantCulture, "The script file must be saved in Unicode."); throw new InvalidDataException(message); } break; } } if (string.IsNullOrEmpty(sentenceLine)) { // End of file reached return null; } if (withPron) { // Read the pronunciation line while ((pronunciationLine = reader.ReadLine()) != null) { if (!string.IsNullOrEmpty(pronunciationLine)) { break; } } if (string.IsNullOrEmpty(pronunciationLine)) { string message = string.Format(CultureInfo.InvariantCulture, "Invalid format, empty pronunciation string for sentence: '{0}', pronunciation: '{1}'.", sentenceLine, pronunciationLine); return new DataError(message); } } return ReadOneScriptItem(scriptItem, sentenceLine, pronunciationLine, withPron, withSid, validate); }
/// <summary> /// Check if the syllable has vowel or has a sonorant phoneme. /// </summary> /// <param name="entry">Script entry.</param> /// <param name="phoneme">Phoneme.</param> /// <param name="phones">Phones of the syllable.</param> /// <returns>Bool.</returns> private static bool IsGoodSyllableWithSonorant(ScriptItem entry, Phoneme phoneme, string[] phones) { bool goodSyllable = IsSyllableWithLessVowel(entry, phoneme, phones); if (goodSyllable) { if (!IsSyllableWithEnoughVowel(entry, phoneme, phones)) { if (phoneme.GetVowelIndexes(phones).Length == 0) { // no vowel, should have one sonorant and more than one consonants int[] sonorantIndexes = phoneme.GetSonorantIndexes(phones); if (sonorantIndexes.Length == 0 || phones.Length == 1) { goodSyllable = false; } } else { goodSyllable = false; } } } return goodSyllable; }
/// <summary> /// Check if the syllable has too many vowels. /// </summary> /// <param name="entry">Script entry.</param> /// <param name="phoneme">Phoneme.</param> /// <param name="phones">Phones of the syllable.</param> /// <returns>True if not having too many.</returns> private static bool IsSyllableWithLessVowel(ScriptItem entry, Phoneme phoneme, string[] phones) { int[] vowelIndexes = phoneme.GetVowelIndexes(phones); return vowelIndexes.Length <= entry.MaxVowelCountInSyllable; }
/// <summary> /// Check if the syllable has valid vowel number. /// </summary> /// <param name="entry">Script entry.</param> /// <param name="phoneme">Phoneme.</param> /// <param name="phones">Phones of the syllable.</param> /// <returns>Bool.</returns> private static bool IsGoodSyllableWithVowel(ScriptItem entry, Phoneme phoneme, string[] phones) { bool goodSyllable = IsSyllableWithEnoughVowel(entry, phoneme, phones) && IsSyllableWithLessVowel(entry, phoneme, phones); return goodSyllable; }
/// <summary> /// Build one sentence for mono MLF file . /// </summary> /// <param name="writer">Text writer to save MLF file.</param> /// <param name="entry">Script item.</param> protected virtual void BuildMonoMlf(TextWriter writer, ScriptItem entry) { // Go through each sentences if (entry.NormalWords.Count == 0) { string message = string.Format(CultureInfo.InvariantCulture, "No normal word found in the sentence."); throw new InvalidDataException(message); } if (writer != null) { // write sentence header of MLF file writer.WriteLine("\"*/" + entry.Id + ".lab\""); writer.WriteLine(Phoneme.SilencePhone); } try { for (int i = 0; i < entry.NormalWords.Count; i++) { // for each words ScriptWord word = entry.NormalWords[i]; if (Phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.PhoneBased) { for (int j = 0; j < word.Units.Count; j++) { TtsUnit unit = word.Units[j]; BuildMonoMlf(writer, unit); } } else if (Phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.SyllableBased) { Collection<TtsUnit> units = word.Units; for (int j = 0; j < word.Syllables.Count; j++) { ScriptSyllable syllable = word.Syllables[j]; BuildMonoMlf(writer, syllable); } } if (i + 1 < entry.NormalWords.Count) { // not last normal word in the sentence if (writer != null) { writer.WriteLine(Phoneme.ShortPausePhone); } } } if (writer != null) { writer.WriteLine(Phoneme.SilencePhone); writer.WriteLine("."); // end of sentence } } catch (InvalidDataException) { if (writer != null) { writer.WriteLine(Phoneme.SilencePhone); writer.WriteLine("."); // end of sentence } throw; } }
/// <summary> /// Process pronunciaction for script entry before script building. /// </summary> /// <param name="entry">Script item.</param> /// <returns>Data error found.</returns> protected virtual DataError ProcessPronunciation(ScriptItem entry) { // This function to be hooked by the implementation of sub class return ValidatePronunciation(entry); }
/// <summary> /// Validate the phones in slice are valid or not. /// </summary> /// <param name="entry">Script item.</param> /// <param name="slice">Pronunciation of slice.</param> /// <returns>Data error found.</returns> public static DataError ValidatePhones(ScriptItem entry, string slice) { if (entry == null) { throw new ArgumentNullException("entry"); } if (entry.PronunciationSeparator == null) { string message = Helper.NeutralFormat("entry.PronunciationSeparator should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(entry.PronunciationSeparator.Phone)) { string message = Helper.NeutralFormat("entry.PronunciationSeparator.Phone should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(slice)) { throw new ArgumentNullException("slice"); } DataError error = null; Phoneme phoneme = Localor.GetPhoneme(entry.Language); string[] items = slice.Split(new string[] { entry.PronunciationSeparator.Phone }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < items.Length; i++) { // TODO: PS#13181 Offline tools:Syllable veridation and pronunciation design if (items[i] == "1" || items[i] == "2" || items[i] == "3") { continue; } if (items[i].StartsWith("_", StringComparison.Ordinal) && items[i].EndsWith("_", StringComparison.Ordinal)) { // special phone continue; } if (phoneme.ToneManager.NameMap.ContainsKey(items[i])) { // tone continue; } if (phoneme.TtsPhones.IndexOf(items[i]) < 0) { // invalid tts phone found string message = string.Format(CultureInfo.InvariantCulture, "The phone[{0}] in slice[{1}] is invalid", items[i], slice); error = new DataError("null", message, entry.Id); break; } } return error; }
/// <summary> /// Add one item to script file. /// This method will check whether the item is balid before adding. /// </summary> /// <param name="item">The item to be added.</param> /// <param name="errors">The errors if failed to add.</param> /// <param name="validate">Whether validate schema and content.</param> /// <param name="sort">Whether insert the script item in the sort position.</param> /// <returns>True if successfully added.</returns> public bool Add(ScriptItem item, ErrorSet errors, bool validate, bool sort) { if (item == null) { throw new ArgumentNullException("item"); } if (errors == null) { throw new ArgumentNullException("errors"); } // check schema, should throw exception if invalid CheckSchema(item); bool added = true; errors.Clear(); // content checking, should add to errors if invalid if (_itemDic.ContainsKey(item.Id)) { errors.Add(ScriptError.DuplicateItemId, item.Id); } if (validate) { ErrorSet contentErrors = new ErrorSet(); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(PhoneSet, PosSet); ScriptItem.IsValidItem(item, contentErrors, validateSetting); errors.Merge(contentErrors); } if (errors.Count > 0) { added = false; } if (added) { _itemDic.Add(item.Id, item); if (sort) { bool inserted = false; for (int i = 0; i < _items.Count; i++) { if (string.Compare(item.Id, _items[i].Id, StringComparison.OrdinalIgnoreCase) < 0) { _items.Insert(i, item); inserted = true; break; } } if (!inserted) { _items.Add(item); } } else { _items.Add(item); } } return added; }
/// <summary> /// Add one item to script file. /// This method will check whether the item is balid before adding. /// </summary> /// <param name="item">The item to be added.</param> /// <param name="errors">The errors if failed to add.</param> /// <param name="validate">Whether validate schema and content.</param> /// <returns>True if successfully added.</returns> public bool Add(ScriptItem item, ErrorSet errors, bool validate) { if (item == null) { throw new ArgumentNullException("item"); } if (errors == null) { throw new ArgumentNullException("errors"); } return Add(item, errors, validate, false); }
/// <summary> /// Read one script item from the sentence content line and pronunciation line. /// </summary> /// <param name="scriptItem">Result script item.</param> /// <param name="sentenceLine">Sentence content line.</param> /// <param name="pronunciationLine">Pronunciation line.</param> /// <param name="withPron">Flag to indicate whether text stream is /// with pronunciation for each sentence.</param> /// <param name="withSid">Flag to indicate whether text stream is /// with sentence id for each sentence.</param> /// <returns>Data error found during reading, otherwise null returned.</returns> public static DataError ReadOneScriptItem(ScriptItem scriptItem, string sentenceLine, string pronunciationLine, bool withPron, bool withSid) { if (scriptItem == null) { throw new ArgumentNullException("scriptItem"); } if (string.IsNullOrEmpty(sentenceLine)) { throw new ArgumentNullException("sentenceLine"); } if (withPron) { if (string.IsNullOrEmpty(pronunciationLine)) { throw new ArgumentNullException("pronunciationLine"); } } return ReadOneScriptItem(scriptItem, sentenceLine, pronunciationLine, withPron, withSid, true); }
/// <summary> /// Check data consistence between script item and segmentation file. /// </summary> /// <param name="script">Script file instance.</param> /// <param name="item">Script item.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segment file directory.</param> /// <param name="errorSet">Data error set found.</param> /// <param name="phoneBasedSegment">Phone based alignment or unit based alignment.</param> public static void ValidateDataAlignment(ScriptFile script, ScriptItem item, FileListMap fileMap, string segmentDir, DataErrorSet errorSet, bool phoneBasedSegment) { string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt"); SegmentFile segmentFile = new SegmentFile(); segmentFile.Load(segmentFilePath); if (segmentFile.WaveSegments.Count == 0) { string message = string.Format(CultureInfo.InvariantCulture, "There is no valid alignment data into alignment file."); errorSet.Errors.Add(new DataError(segmentFilePath, message, item.Id)); } else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilencePhone) { string message = string.Format(CultureInfo.InvariantCulture, "The alignment file is invalid, for without silence segment at the end."); errorSet.Errors.Add(new DataError(segmentFilePath, message, item.Id)); } else if (!phoneBasedSegment && item.Units.Count != segmentFile.NonSilenceWaveSegments.Count) { string message = string.Format(CultureInfo.InvariantCulture, "script units {0} do not match with non-silence segments {1} in segmentation file.", item.Units.Count, segmentFile.NonSilenceWaveSegments.Count); errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id)); } else if (phoneBasedSegment && item.GetPhones().Length != segmentFile.NonSilenceWaveSegments.Count) { string message = string.Format(CultureInfo.InvariantCulture, "script phones {0} do not match with non-silence segments {1} in segmentation file.", item.GetPhones().Length, segmentFile.NonSilenceWaveSegments.Count); errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id)); } else { // go through each segments if (phoneBasedSegment) { string[] phones = item.GetPhones(); for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++) { WaveSegment segment = segmentFile.NonSilenceWaveSegments[i]; if (segment.Label != phones[i]) { string message = string.Format(CultureInfo.InvariantCulture, "phone [{0}/{1}] at {2} does not match between script and segment.", WaveSegment.FormatLabel(phones[i]), segment.Label, i); errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id)); } } } else { for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++) { WaveSegment segment = segmentFile.NonSilenceWaveSegments[i]; TtsUnit unit = item.Units[i]; if (segment.Label != WaveSegment.FormatLabel(unit.MetaUnit.Name)) { string message = string.Format(CultureInfo.InvariantCulture, "units [{0}/{1}] at {2} do not match between script and segment.", WaveSegment.FormatLabel(unit.MetaUnit.Name), segment.Label, i); errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id)); } } } } }
public static DataError ReadOneScriptItem(ScriptItem scriptItem, string sentenceLine, string pronunciationLine, bool withPron, bool withSid, bool validate) { if (scriptItem == null) { throw new ArgumentNullException("scriptItem"); } if (string.IsNullOrEmpty(sentenceLine)) { throw new ArgumentNullException("sentenceLine"); } sentenceLine = sentenceLine.Trim(); if (withPron) { if (string.IsNullOrEmpty(pronunciationLine)) { throw new ArgumentNullException("pronunciationLine"); } pronunciationLine = pronunciationLine.Trim(); } if (withSid) { Match m = Regex.Match(sentenceLine, @"^([0-9a-zA-Z]+)[\t ]+(.+)$"); if (!m.Success) { string message = string.Format(CultureInfo.InvariantCulture, "Invalid format, no sentence id for sentence: '{0}', pronunciation: '{1}'.", sentenceLine, withPron ? pronunciationLine : "null"); return new DataError(message); } scriptItem.Id = m.Groups[1].Value; scriptItem.Sentence = m.Groups[2].Value.Trim(); } else { scriptItem.Sentence = sentenceLine; } if (withPron) { // Phone set is case insensitive, so convert pronunciation to lower letter. scriptItem.Pronunciation = pronunciationLine.ToLower(CultureInfo.InvariantCulture); } if (validate) { Phoneme phoneme = null; if (scriptItem.Language != Language.Neutral) { phoneme = Localor.GetPhoneme(scriptItem.Language, scriptItem.Engine); } try { // Check all phonemes, currently for DeDE and JaJP only if (phoneme != null && (scriptItem.Language == Language.DeDE || scriptItem.Language == Language.JaJP)) { string[] phones = scriptItem.GetPhones(); foreach (string phone in phones) { phoneme.TtsPhone2Id(phone); } } if (scriptItem.Language != Language.Neutral && (scriptItem.NormalWords == null || scriptItem.NormalWords.Count == 0)) { string message = string.Format(CultureInfo.InvariantCulture, "No normal word found in the sentence."); return new DataError("null", message, scriptItem.Id); } } catch (InvalidDataException ide) { return new DataError("null", Helper.BuildExceptionMessage(ide), scriptItem.Id); } } return null; }
/// <summary> /// Check for segment error in a script item. /// </summary> /// <param name="item">Script item.</param> /// <param name="errors">Error list.</param> private static void CheckSegments(ScriptItem item, ErrorSet errors) { int preSegEndWord = 0; int preSegEndSyllable = 0; int preSegEndPhone = 0; int sentIndex = 0; foreach (ScriptSentence sentence in item.Sentences) { int wordIndex = 0; foreach (ScriptWord word in sentence.Words) { string wordPath = string.Format(CultureInfo.InvariantCulture, "Sentence[{0}].Word[{1}]", sentIndex, wordIndex); if (word.HasAcousticsValue && word.Acoustics.HasSegmentInterval) { word.Acoustics.SegmentIntervals.ForEach(seg => CheckSegment(errors, item.Id, wordPath, seg, ref preSegEndWord)); } int syllableIndex = 0; foreach (ScriptSyllable syllable in word.Syllables) { string syllablePath = string.Format(CultureInfo.InvariantCulture, "{0}.Syllable[{1}]", wordPath, syllableIndex); if (syllable.HasAcousticsValue && syllable.Acoustics.HasSegmentInterval) { syllable.Acoustics.SegmentIntervals.ForEach(seg => CheckSegment(errors, item.Id, syllablePath, seg, ref preSegEndSyllable)); } int phoneIndex = 0; foreach (ScriptPhone phone in syllable.Phones) { if (phone.HasAcousticsValue && phone.Acoustics.HasSegmentInterval) { string phonePath = string.Format(CultureInfo.InvariantCulture, "{0}.Phone[{1}]", syllablePath, phoneIndex); phone.Acoustics.SegmentIntervals.ForEach(seg => CheckSegment(errors, item.Id, phonePath, seg, ref preSegEndPhone)); } phoneIndex++; } syllableIndex++; } wordIndex++; } sentIndex++; } }
/// <summary> /// Create script item instance for specified language and engine. /// </summary> /// <param name="language">Which language to create for.</param> /// <param name="engine">Which Engine to create for.</param> /// <returns>ScriptItem.</returns> public static ScriptItem CreateScriptItem(Language language, EngineType engine) { if (language == Language.Neutral) { return new ScriptItem(); } try { string typeName = "Microsoft.Tts.Offline." + language.ToString() + "ScriptItem"; Type scriptItemType = typeof(ScriptItem); ScriptItem sciptItem = (ScriptItem)scriptItemType.Assembly.CreateInstance(typeName); if (sciptItem == null) { // TODO: Enable logging here for easier diagnostics sciptItem = new ScriptItem(language); } sciptItem.Engine = engine; sciptItem.Language = language; return sciptItem; } catch (MissingMethodException mme) { string message = string.Format(CultureInfo.InvariantCulture, "Language {0} is not implemented.", language); throw new NotSupportedException(message, mme); } }
/// <summary> /// Slice pronunciation of one script entry into sliced units. /// </summary> /// <param name="item">Entry to generate slices.</param> public void Slice(ScriptItem item) { if (item == null) { throw new ArgumentNullException("item"); } foreach (ScriptWord word in item.AllPronouncedNormalWords) { string slicePronunciation = SliceWord(word); if (!string.IsNullOrEmpty(slicePronunciation)) { word.Pronunciation = slicePronunciation; } } }
/// <summary> /// Speak a script sentence in a script item. /// </summary> /// <param name="scriptItem">The script item.</param> /// <param name="scriptSentence">The script sentence.</param> public void Speak(ScriptItem scriptItem, ScriptSentence scriptSentence) { _curScriptItem = scriptItem; _curScriptSententence = scriptSentence; _donator.SpeechSynthesizer.Speak(_curScriptSententence.Text); }
/// <summary> /// Prepare for the speak. /// </summary> /// <param name="scriptItem">Script item.</param> public void PrepareSpeak(ScriptItem scriptItem) { _curScriptItem = scriptItem; }
/// <summary> /// Prepare for speak. /// </summary> /// <param name="scriptItem">Script item.</param> public void PrepareSpeak(ScriptItem scriptItem) { if (_scriptFeatureImportConfig != null) { _scriptFeatureImport.PrepareSpeak(scriptItem); } if (_customizedFeatureExtraction != null) { _customizedFeatureExtraction.PrepareSpeak(scriptItem); } }
public TtsUtterance Build(ScriptItem item, SegmentFile segmentFile, bool buildAllWords, int subSentenceIndex) { Helper.ThrowIfNull(item); TtsUtterance utterance = new TtsUtterance(); int phoneIndex = 0; try { // Silence indicates a silence word. if (segmentFile != null && segmentFile.WaveSegments[phoneIndex].IsSilenceFeature) { phoneIndex += AppendSilenceWord(utterance, segmentFile.WaveSegments[phoneIndex].Label); } // Creates a words map for ToBI accent. Dictionary<ScriptWord, TtsWord> mapWords = new Dictionary<ScriptWord, TtsWord>(); int sentenceIndex = 0; foreach (ScriptSentence scriptSentence in item.Sentences) { // Only add certain sentence in the scriptItem. if (subSentenceIndex != -1 && sentenceIndex++ != subSentenceIndex) { continue; } // Treats unkown sentence type as declarative. if (scriptSentence.SentenceType != SentenceType.Unknown) { utterance.SentenceType = (TtsSentenceType)scriptSentence.SentenceType; } else { utterance.SentenceType = (TtsSentenceType)SentenceType.Declarative; } utterance.SentenceEmotionType = (EmotionmlCategory)scriptSentence.Emotion; // Converts each word in script sentence. foreach (ScriptWord scriptWord in scriptSentence.Words) { if (buildAllWords || scriptWord.IsPronouncableNormalWord) { phoneIndex += AppendNormalWord(utterance, scriptWord); // Adds into words map. mapWords.Add(scriptWord, utterance.Words[utterance.Words.Count - 1]); // Breaks if meets the end of the utterance. if (segmentFile != null && phoneIndex >= segmentFile.WaveSegments.Count) { break; } if (segmentFile != null && segmentFile.WaveSegments[phoneIndex].IsSilenceFeature) { phoneIndex += AppendSilenceWord(utterance, segmentFile.WaveSegments[phoneIndex].Label); } } else if (buildAllWords || (NeedPunctuation && scriptWord.WordType == WordType.Punctuation)) { phoneIndex += AppendPunctuationWord(utterance, scriptWord); } } } // Builds phone list. int[] pauseDurations = new int[(int)TtsPauseLevel.PAU_IDX_SENTENCE + 1]; Array.Clear(pauseDurations, 0, pauseDurations.Length); utterance.BuildPhoneList(Phoneme, pauseDurations, 0, 0); // Builds ToBI accent, which should be happened after phone list built. BuildToBIInformation(mapWords); // Builds phrase list. utterance.BuildPhraseList(); // Builds character list. utterance.BuildContextCharacters(); return utterance; } catch (EspException e) { throw new InvalidDataException( Helper.NeutralFormat("Build utterance error on sentence \"{0}\"", item.Id), e); } }
/// <summary> /// Check whether an item is compliant with schema. /// </summary> /// <param name="item">Item to be checked.</param> public static void CheckSchema(ScriptItem item) { if (item == null) { throw new ArgumentNullException("item"); } // currently disable id format checking, we will enable it when // all script id are re-set // if (!Regex.IsMatch(item.Id, @"^[0-9]{10}$")) if (string.IsNullOrEmpty(item.Id)) { string message = Helper.NeutralFormat("Script id [{0}] is invalid.", item.Id); throw new InvalidDataException(message); } if (string.IsNullOrEmpty(item.Text)) { string message = Helper.NeutralFormat("Script id [{0}] is invalid.", item.Id); throw new InvalidDataException(message); } foreach (ScriptSentence sentence in item.Sentences) { if (string.IsNullOrEmpty(sentence.Text)) { string message = Helper.NeutralFormat("Sentence text in item [{0}] is empty.", item.Id); throw new InvalidDataException(message); } foreach (ScriptWord word in sentence.Words) { if (string.IsNullOrEmpty(word.Grapheme) && word.WordType != WordType.Silence) { string message = Helper.NeutralFormat("word in item [{0}] is empty.", item.Id); throw new InvalidDataException(message); } foreach (ScriptSyllable syllable in word.Syllables) { foreach (ScriptPhone phone in syllable.Phones) { if (string.IsNullOrEmpty(phone.Name)) { string message = Helper.NeutralFormat("phone in item [{0}] is empty.", item.Id); throw new InvalidDataException(message); } } } } } }
/// <summary> /// Load one script item from the xmltextreader. /// </summary> /// <param name="reader">XmlTextReader.</param> /// <param name="contentController">ContentControler.</param> /// <param name="language">The language of the script.</param> /// <returns>ScriptItem that read.</returns> public static ScriptItem LoadItem(XmlTextReader reader, object contentController, Language language) { Debug.Assert(reader != null); ContentControler scriptContentController = new ContentControler(); if (contentController is ContentControler) { scriptContentController = contentController as ContentControler; } else if (contentController != null) { throw new ArgumentException("Invalid contentController type"); } ScriptItem item = new ScriptItem(language); // get id, domain and reading difficulty if (!string.IsNullOrEmpty(reader.GetAttribute("id"))) { item.Id = reader.GetAttribute("id"); } else { string message = "Script id value cannot be null."; throw new ArgumentException(message); } string domain = reader.GetAttribute("domain"); if (!string.IsNullOrEmpty(domain)) { item.Domain = ScriptItem.StringToDomainType(domain); } string frequency = reader.GetAttribute("frequency"); if (!string.IsNullOrEmpty(frequency)) { item.Frequency = int.Parse(frequency); } string score = reader.GetAttribute("difficulty"); if (!string.IsNullOrEmpty(score)) { item.ReadingDifficulty = double.Parse(score, CultureInfo.InvariantCulture); } // get the text and sentences if (!reader.IsEmptyElement) { while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Name == "sent") { ScriptSentence sentence = LoadSentence(reader, scriptContentController, language); sentence.ScriptItem = item; item.Sentences.Add(sentence); } else if (reader.NodeType == XmlNodeType.Element && reader.Name == "text") { reader.Read(); item.Text = reader.Value; } else if (reader.NodeType == XmlNodeType.Element && reader.Name == "comments") { if (scriptContentController.LoadComments) { item.TtsXmlComments.Parse(reader); item.TtsXmlComments.Tag = item; } else { reader.Skip(); } } else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "si") { break; } } } return item; }
/// <summary> /// Validate the syllable in word pronunciation are valid or not. /// </summary> /// <param name="entry">Script item.</param> /// <param name="word">Pronunciation of word.</param> /// <returns>Data error found.</returns> public static DataError ValidateSyllables(ScriptItem entry, string word) { if (entry == null) { throw new ArgumentNullException("entry"); } if (entry.PronunciationSeparator == null) { string message = Helper.NeutralFormat("entry.PronunciationSeparator should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(entry.PronunciationSeparator.Syllable)) { string message = Helper.NeutralFormat("entry.PronunciationSeparator.Syllable should not be null."); throw new ArgumentException(message); } if (string.IsNullOrEmpty(word)) { throw new ArgumentNullException("word"); } Phoneme phoneme = Localor.GetPhoneme(entry.Language); DataError dataError = null; string[] syllables = word.Split(new string[] { entry.PronunciationSeparator.Syllable }, StringSplitOptions.None); for (int j = 0; j < syllables.Length; j++) { string syllable = syllables[j].Trim(); if (string.IsNullOrEmpty(syllable)) { string message = string.Format(CultureInfo.InvariantCulture, "The syllable[{0}] of word[{1}] pronunciation is empty by separator [{2}]", j, word, entry.PronunciationSeparator.Syllable); dataError = new DataError("null", message, entry.Id); break; } if (Regex.Match(syllable, "^_(.*)_$").Success) { // Special unit continue; } string[] itmes = entry.PronunciationSeparator.SplitPhones(syllable); TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(entry.Language); ttsMetaUnit.Name = string.Join(" ", itmes); string[] phones = ttsMetaUnit.GetPhonesName(); // Tell whether is a valid nucleus, // which could be syllable with no vowel in some languages, like fr-CA SliceData sliceData = Localor.GetSliceData(entry.Language); if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) < 0) { bool goodSyllable; if (entry.Language == Language.EnUS) { // syllable that must have vowels goodSyllable = IsGoodSyllableWithVowel(entry, phoneme, phones); } else if (entry.Language == Language.RuRU) { // A Russian syllable can have no sonorant goodSyllable = IsSyllableWithLessVowel(entry, phoneme, phones); } else { // syllable that must have vowels or sonorants goodSyllable = IsGoodSyllableWithSonorant(entry, phoneme, phones); } if (!goodSyllable) { int[] vowelIndexes = phoneme.GetVowelIndexes(phones); string str1 = "There must be minimum {0} vowels or maximum {1} included in syllable "; string str2 = "or the syllable should have one sonorant and more than one consonants, "; string str3 = "but {2} vowels are found in syllable [{3}] of word [{4}]."; string message = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, entry.MinVowelCountInSyllable, entry.MaxVowelCountInSyllable, vowelIndexes.Length, syllables[j], word); dataError = new DataError("null", message, entry.Id); break; } } // check slice's pronunciation dataError = ValidateSlices(entry, syllable); if (dataError != null) { break; } } return dataError; }