/// <summary> /// Check the consistence of pronounced words between engine and script. /// </summary> /// <param name="scriptSentence">Script sentence.</param> /// <param name="utt">Tts utterance.</param> public static void CheckPronouncedWordsMatched(ScriptSentence scriptSentence, SP.TtsUtterance utt) { int wordIndex = 0; int phoneIndex = 0; foreach (TtsWord uttWord in utt.Words) { if (uttWord.IsPronounceable) { string wordText = utt.OriginalText.Substring((int)uttWord.TextOffset, (int)uttWord.TextLength).ToLower(CultureInfo.CurrentCulture); if (wordIndex < scriptSentence.PronouncedWords.Count && !wordText.Equals(scriptSentence.PronouncedWords[wordIndex].Description.ToLower(CultureInfo.CurrentCulture))) { string message = Helper.NeutralFormat("Runtime's word [{0}] " + "and script word [{1}] has no consistence.", wordText, scriptSentence.PronouncedWords[wordIndex].Description); throw new InvalidDataException(message); } foreach (ScriptSyllable scriptSyllable in scriptSentence.PronouncedWords[wordIndex].Syllables) { foreach (ScriptPhone scriptPhone in scriptSyllable.Phones) { string uttPhoneText = string.Empty; if (phoneIndex < utt.Phones.Count) { string[] items = utt.Phones[phoneIndex].Pronunciation.Split(new char[] { ' ' }); uttPhoneText = items[0].ToLower(CultureInfo.CurrentCulture); } if (!uttPhoneText.Equals(scriptPhone.Name.ToLower(CultureInfo.CurrentCulture))) { string message = Helper.NeutralFormat("Runtime's phone [{0}] " + "and script phone [{1}] has no consistence.", uttPhoneText, scriptPhone.Name); throw new InvalidDataException(message); } phoneIndex++; } } wordIndex++; } else if (uttWord.IsSilence) { phoneIndex++; } } if (wordIndex != scriptSentence.PronouncedWords.Count) { throw new InvalidDataException("Runtime's normal words' count must " + "equal to the script's."); } }
/// <summary> /// Check the word consistency. /// </summary> /// <param name="utt">Tts utterance.</param> /// <param name="scriptSentence">Script sentence.</param> public static void CheckWordConsistency(SP.TtsUtterance utt, ScriptSentence scriptSentence) { int wordIndex = 0; foreach (TtsWord uttWord in utt.Words) { if (uttWord.IsPronounceable) { string wordText = utt.OriginalText.Substring((int)uttWord.TextOffset, (int)uttWord.TextLength).ToLower(CultureInfo.CurrentCulture); if (wordIndex < scriptSentence.PronouncedWords.Count && !wordText.Equals(scriptSentence.PronouncedWords[wordIndex].Description.ToLower(CultureInfo.CurrentCulture))) { string message = Helper.NeutralFormat("Runtime's word [{0}] " + "and script word [{1}] has no consistence.", wordText, scriptSentence.PronouncedWords[wordIndex].Description); throw new InvalidDataException(message); } wordIndex++; } } if (wordIndex != scriptSentence.PronouncedWords.Count) { throw new InvalidDataException("Runtime's normal words' count must equal to the script's."); } }
/// <summary> /// Initializes a new instance of the PhoneObjectMatch class. /// </summary> /// <param name="scriptSentence">Script sentence.</param> /// <param name="utt">Tts utterance.</param> public PhoneObjectMatch(ScriptSentence scriptSentence, SP.TtsUtterance utt) { EngineMatchToScript.CheckPronouncedWordsMatched(scriptSentence, utt); BuildPronouncedPhones(scriptSentence, utt); if (utt.Phones.Count > 0) { _engineFirstPhone = utt.Phones[0]; } }
/// <summary> /// Copy engine's silence words to xml script. /// </summary> /// <param name="utterance">Tts utterance.</param> /// <param name="scriptSentence">Script sentence.</param> public static void CopySilenceTo(this SP.TtsUtterance utterance, ScriptSentence scriptSentence) { CheckPronouncedWordsMatched(scriptSentence, utterance); int scriptWordIndex = 0; foreach (TtsWord uttWord in utterance.Words) { if (uttWord.IsPronounceable) { if (scriptWordIndex < scriptSentence.Words.Count && scriptSentence.Words[scriptWordIndex].WordType == WordType.Silence) { scriptSentence.Words.RemoveAt(scriptWordIndex); } } else if (uttWord.IsSilence && ((scriptWordIndex < scriptSentence.Words.Count && scriptSentence.Words[scriptWordIndex].WordType != WordType.Silence) || scriptWordIndex == scriptSentence.Words.Count)) { if (uttWord.FirstSyllable.FirstPhone.PhoneID == Phoneme.SilencePhoneId) { InsertSilenceWord(scriptSentence, scriptWordIndex, Phoneme.SilencePhone); } else { Debug.Assert(uttWord.FirstSyllable.FirstPhone.IsShortPauseSupported, "Short pause should be supported."); InsertSilenceWord(scriptSentence, scriptWordIndex, Phoneme.ShortPausePhone); } } scriptWordIndex++; } if (utterance.Words.Count != scriptWordIndex) { throw new InvalidDataException("Runtime's words' count must equal to " + "the script's."); } }
/// <summary> /// Update emphasis. /// </summary> /// <param name="utt">Tts utterance.</param> /// <param name="scriptSentence">Script sentence.</param> private static void UpdateEmphasis(SP.TtsUtterance utt, ScriptSentence scriptSentence) { int wordIndex = 0; System.Console.WriteLine("warning: update the Emphasis!"); foreach (TtsWord uttWord in utt.Words) { if (uttWord.IsPronounceable && wordIndex < scriptSentence.PronouncedWords.Count) { int emphasis = (int)scriptSentence.PronouncedWords[wordIndex].Emphasis; uttWord.Emphasis = (SP.TtsEmphasis)emphasis; wordIndex++; } } }
/// <summary> /// Update BoundaryTone. /// </summary> /// <param name="utt">Tts utterance.</param> /// <param name="scriptSentence">Script sentence.</param> private static void UpdateBoundaryTone(SP.TtsUtterance utt, ScriptSentence scriptSentence) { int wordIndex = 0; System.Console.WriteLine("warning: update the BoundaryTone!"); foreach (TtsWord uttWord in utt.Words) { if (uttWord.IsPronounceable && wordIndex < scriptSentence.PronouncedWords.Count) { uttWord.ToBIFinalBoundaryTone = TtsTobiBoundaryTone.K_NOBND; TobiLabel tobiLabel = scriptSentence.PronouncedWords[wordIndex].TobiFinalBoundaryTone; if (tobiLabel != null) { uttWord.ToBIFinalBoundaryTone = StringToTobiBoundary(tobiLabel.Label); } wordIndex++; } } }
/// <summary> /// Speak a script sentence in a script item. /// </summary> /// <param name="scriptItem">The script item.</param> /// <param name="scriptSentence">The script sentence.</param> public void Speak(ScriptItem scriptItem, ScriptSentence scriptSentence) { _curScriptItem = scriptItem; _curScriptSententence = scriptSentence; _donator.SpeechSynthesizer.Speak(_curScriptSententence.Text); }
/// <summary> /// Dump the data in the words. /// </summary> /// <param name="sentence">The script sentence which to store the data dumped from the words.</param> /// <param name="utt">The utterance.</param> /// <param name="ttsEngine">The object ttsEngine to help to convert the Pos and get sentence id.</param> /// <param name="scriptLanguage">The language of the script.</param> private static void DumpWords(ScriptSentence sentence, SP.TtsUtterance utt, SP.TtsEngine ttsEngine, Language scriptLanguage) { Debug.Assert(sentence != null, "Sentence should not be null"); Debug.Assert(utt != null, "Utt should not be null"); Debug.Assert(ttsEngine != null, "ttsEngine should not be null"); // Phone index to mark the phone in the Utt.Phones int phoneIndex = 0; // F0 index to mark the start position in the Utt.Sccoustic.F0s int f0StartIndex = 0; // Unit index to mark the unit in the Utt.Units int unitIndex = 0; // Word index to mark the position in the Utt.Words int wordIndex = 0; foreach (SP.TtsWord word in utt.Words) { if (word.WordText != null) { ScriptWord scriptWord = new ScriptWord(); // Tag the language to the word level if there is not single language in the utt. // The major language (the most word count with this language) will be tag on the // script level, others tag on the word level. if ((Language)word.LangId != scriptLanguage) { scriptWord.Language = (Language)word.LangId; } // According to the schema, if the word is "silence", there should be not // value in the scriptWord pronunciation. Means: <w v="" if (word.WordType != TtsWordType.WT_SILENCE) { scriptWord.Grapheme = word.WordText; } if (!string.IsNullOrEmpty(word.Pronunciation)) { scriptWord.Pronunciation = word.Pronunciation.ToLowerInvariant(); } scriptWord.WordType = ConvertWordType(word); // Dump the Part-Of-Speech. // If the word is "sil", the word text is " ", the pos id is 65535, out of boundary. // In this case, will not dump the pos. if (!string.IsNullOrEmpty(word.WordText.Trim())) { scriptWord.PosString = ttsEngine.PosTable.IdToString(word.Pos); } scriptWord.Break = (TtsBreak)word.BreakLevel; scriptWord.Emphasis = (TtsEmphasis)word.Emphasis; scriptWord.TobiFinalBoundaryTone = ConvertTobiFBT(word.ToBIFinalBoundaryTone); scriptWord.PronSource = (TtsPronSource)word.PronSource; scriptWord.OffsetInString = (int)word.TextOffset; scriptWord.LengthInString = (int)word.TextLength; DumpSyllables(scriptWord, utt, word, ref phoneIndex, ref unitIndex, ref f0StartIndex, ttsEngine); sentence.Words.Add(scriptWord); } else { string message = Helper.NeutralFormat("The word text of word [{0}]: \"{1}\" in the" + "utterance is empty.", wordIndex, word.WordText); throw new InvalidDataException(message); } wordIndex++; } }
/// <summary> /// Load one sentence from the xml text reader. /// </summary> /// <param name="reader">XmlTextReader.</param> /// <param name="scriptContentController">ContentControler.</param> /// <param name="language">The language of the script.</param> /// <returns>Sentence that read.</returns> private static ScriptSentence LoadSentence(XmlTextReader reader, ContentControler scriptContentController, Language language) { Debug.Assert(reader != null); Debug.Assert(scriptContentController != null); ScriptSentence sentence = new ScriptSentence(language); // get sentence type string type = reader.GetAttribute("type"); if (!string.IsNullOrEmpty(type)) { sentence.SentenceType = ScriptSentence.StringToSentenceType(type); } // get sentence emotion type string emotion = reader.GetAttribute("emotion"); if (!string.IsNullOrEmpty(emotion)) { sentence.Emotion = ScriptSentence.StringToEmotionType(emotion); } // get the text and word list while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Name == "text") { reader.Read(); sentence.Text = reader.Value; } else if (reader.NodeType == XmlNodeType.Element && reader.Name == "words") { while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Name == "w") { ScriptWord word = LoadWord(reader, scriptContentController, language); word.Sentence = sentence; sentence.Words.Add(word); } else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "words") { break; } } } else if (reader.NodeType == XmlNodeType.Element && reader.Name == "accept") { List<ScriptWord> acceptSent = new List<ScriptWord>(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Name == "w") { ScriptWord acceptWord = LoadWord(reader, scriptContentController, language); acceptWord.Sentence = sentence; acceptSent.Add(acceptWord); } else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "accept") { break; } } sentence.AcceptSentences.Add(acceptSent); } else if (reader.NodeType == XmlNodeType.Element && reader.Name == "nes") { while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Name == "ne") { ScriptNamedEntity entity = LoadNamedEntity(reader, sentence, scriptContentController); sentence.NamedEntities.Add(entity); } else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "nes") { break; } } } else if (reader.NodeType == XmlNodeType.Element && reader.Name == "comments") { if (scriptContentController.LoadComments) { sentence.TtsXmlComments.Parse(reader); sentence.TtsXmlComments.Tag = sentence; } else { reader.Skip(); } } else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "sent") { break; } } if (scriptContentController.LoadComments) { ParseDeletedWordsFromComments(sentence, language); } return sentence; }
/// <summary> /// Build script and engine's pronounced phones. /// </summary> /// <param name="scriptSentence">Script sentence.</param> /// <param name="utt">Tts utterance.</param> private void BuildPronouncedPhones(ScriptSentence scriptSentence, SP.TtsUtterance utt) { foreach (ScriptPhone scriptPhone in scriptSentence.ScriptPhones) { if (!Phoneme.IsSilenceFeature(scriptPhone.Name)) { _scriptPronouncedPhones.Add(scriptPhone); } } foreach (TtsPhone uttPhone in utt.Phones) { if (!uttPhone.IsSilence) { _enginePronouncedPhones.Add(uttPhone); } } if (_scriptPronouncedPhones.Count != _enginePronouncedPhones.Count) { throw new InvalidDataException("Runtime's pronounced phones' count has no " + "consistence with script's."); } }
/// <summary> /// Update word information. /// </summary> /// <param name="utt">Tts utterance.</param> /// <param name="scriptSentence">Script sentence.</param> /// <param name="engine">Tts engine.</param> private void UpdateWords(SP.TtsUtterance utt, ScriptSentence scriptSentence, TtsEngine engine) { if (_config.UpdatePronunciation) { UpdatePronunciation(utt, scriptSentence, engine); } if (_config.UpdatePartOfSpeech) { UpdatePartOfSpeech(utt, scriptSentence, engine); } }
/// <summary> /// Do F0 update if given external F0s. /// </summary> /// <param name="intUtt">Internal utterance.</param> /// <param name="extSentence">External script sentence.</param> private void F0Update(SP.TtsUtterance intUtt, ScriptSentence extSentence) { int normalWordIndex = 0; TraceLog(_logger, true, "Updated F0 Position (Indicated by normal words index):"); foreach (ScriptWord extWord in extSentence.PronouncedWords) { int syllableIndex = 0; IUpdateHelper f0Updater; foreach (ScriptSyllable extSyllable in extWord.Syllables) { int phoneIndex = 0; f0Updater = new SyllableUpdateHelper(); ProcessF0Update(f0Updater, intUtt, extSyllable, phoneIndex, syllableIndex, normalWordIndex); foreach (ScriptPhone extPhone in extSyllable.Phones) { f0Updater = new PhoneUpdateHelper(); ProcessF0Update(f0Updater, intUtt, extPhone, phoneIndex, syllableIndex, normalWordIndex); phoneIndex++; } syllableIndex++; } normalWordIndex++; } if (_config.FixF0NoConsistenceNum > 0) { FixF0NoConsistence(intUtt); } TraceLogLine(_logger); }
/// <summary> /// Do duration update if given a external duration. /// </summary> /// <param name="intUtt">Internal utterance.</param> /// <param name="extSentence">External script sentence.</param> private void DurationUpdate(SP.TtsUtterance intUtt, ScriptSentence extSentence) { // The length of each frame in millisecond. float frameLength = _serviceProvider.Engine.Config.SamplesPerFrame * 1000 / (float)_serviceProvider.Engine.Config.SamplesPerSecond; int normalWordIndex = 0; IUpdateHelper durationUpdater; TraceLog(_logger, true, "Updated Duration Position (Indicated by normal words index):"); for (int wordIndex = 0; wordIndex < extSentence.Words.Count; wordIndex++) { ScriptWord scriptThisWord = extSentence.Words[wordIndex]; if (scriptThisWord.IsPronounced) { if (_config.UpdateNormalWordDuration) { int syllableIndex = 0; foreach (ScriptSyllable extSyllable in scriptThisWord.Syllables) { int phoneIndex = 0; durationUpdater = new SyllableUpdateHelper(); ProcessDurationUpdate(durationUpdater, intUtt, extSyllable, 0, phoneIndex, syllableIndex, normalWordIndex, frameLength); foreach (ScriptPhone extPhone in extSyllable.Phones) { durationUpdater = new PhoneUpdateHelper(); ProcessDurationUpdate(durationUpdater, intUtt, extPhone, 0, phoneIndex, syllableIndex, normalWordIndex, frameLength); durationUpdater = new StateUpdateHelper(); UpdateStateDuration(durationUpdater, intUtt, extPhone, phoneIndex, syllableIndex, normalWordIndex, frameLength); phoneIndex++; } syllableIndex++; } } normalWordIndex++; } else if (scriptThisWord.WordType == WordType.Silence && _config.UpdateScriptSilenceDuration) { durationUpdater = new SilenceUpdateHelper(); ProcessDurationUpdate(durationUpdater, intUtt, scriptThisWord.Syllables[0], NotUpdateState, 0, 0, normalWordIndex - 1, frameLength); ProcessDurationUpdate(durationUpdater, intUtt, scriptThisWord.Syllables[0].Phones[0], NotUpdateState, 0, 0, normalWordIndex - 1, frameLength); UpdateStateDuration(durationUpdater, intUtt, scriptThisWord.Syllables[0].Phones[0], 0, 0, normalWordIndex - 1, frameLength); } } TraceLogLine(_logger); }
/// <summary> /// Get sentence ID, sentence ID informat of: "itemID-sentenceIndex", start from 1. /// </summary> /// <param name="scriptSentence">Script sentence to get ID.</param> /// <returns>String.</returns> public string GetSentenceId(ScriptSentence scriptSentence) { if (scriptSentence == null) { throw new ArgumentNullException("scriptSentence"); } if (scriptSentence.ScriptItem == null) { throw new InvalidDataException(Helper.NeutralFormat( "Empty script item detected, script sentence should belongs to one script item")); } int sentenceIndex = Sentences.IndexOf(scriptSentence); if (sentenceIndex < 0) { throw new InvalidDataException(Helper.NeutralFormat("Can't find sentence in item")); } return Helper.NeutralFormat("{0}-{1}", scriptSentence.ScriptItem.Id, sentenceIndex + 1); }
/// <summary> /// Load one script named entity from the xml text reader. /// </summary> /// <param name="reader">The XML reader instance to read data from.</param> /// <param name="sentence">Script sentence.</param> /// <param name="scriptContentController">ContentControler.</param> /// <returns>ScriptNamedEntity instance that read.</returns> public static ScriptNamedEntity LoadNamedEntity(XmlTextReader reader, ScriptSentence sentence, ContentControler scriptContentController) { Debug.Assert(reader != null); Debug.Assert(scriptContentController != null); ScriptNamedEntity entity = new ScriptNamedEntity(); entity.Type = reader.GetAttribute("type"); entity.Text = reader.GetAttribute("v"); string pos = reader.GetAttribute("pos"); if (!string.IsNullOrEmpty(pos)) { entity.PosString = pos; } Debug.Assert(sentence.Words.Count > 0); int startIndex = int.Parse(reader.GetAttribute("s"), CultureInfo.InvariantCulture); int endIndex = int.Parse(reader.GetAttribute("e"), CultureInfo.InvariantCulture); Collection<ScriptWord> graphemeWords = sentence.TextWords; if (startIndex < 0 && startIndex >= graphemeWords.Count) { throw new InvalidDataException(Helper.NeutralFormat( "Invalid start index for sentence [{0}] : [{1}]", sentence.ScriptItem.GetSentenceId(sentence), startIndex)); } entity.Start = graphemeWords[startIndex]; if (endIndex < 0 || endIndex >= graphemeWords.Count) { throw new InvalidDataException(Helper.NeutralFormat( "Invalid end index for sentence [{0}] : [{1}]", sentence.ScriptItem.GetSentenceId(sentence), endIndex)); } entity.End = graphemeWords[endIndex]; return entity; }
/// <summary> /// Parse deleted words from comments. /// </summary> /// <param name="scriptSentence">Script sentence to be parse.</param> /// <param name="scriptLanguage">The language of the script.</param> private static void ParseDeletedWordsFromComments(ScriptSentence scriptSentence, Language scriptLanguage) { scriptSentence.DeletedWordsDict.Clear(); scriptSentence.DeletedWordAndFollowingWordDict.Clear(); if (scriptSentence.TtsXmlComments.TtsXmlStatusDict.ContainsKey(ScriptSentence.DeletedWordStatusName)) { SortedDictionary<int, SortedDictionary<int, ScriptWord>> deletedWordDict = new SortedDictionary<int, SortedDictionary<int, ScriptWord>>(); foreach (TtsXmlStatus status in scriptSentence.TtsXmlComments.TtsXmlStatusDict[ScriptSentence.DeletedWordStatusName]) { using (StringReader sr = new StringReader(status.OriginalValue)) { XmlTextReader xtr = new XmlTextReader(sr); if (!xtr.IsEmptyElement) { while (xtr.Read()) { if (xtr.NodeType == XmlNodeType.Element && xtr.Name == "w") { ScriptWord word = LoadWord(xtr, null, scriptLanguage); word.Sentence = scriptSentence; scriptSentence.DeletedWordsDict.Add(word, status); if (status.Position == TtsXmlStatus.UnsetPosition) { status.Position = scriptSentence.Words.Count; } if (status.DelIndex == TtsXmlStatus.UnsetPosition) { status.DelIndex = 0; } if (!deletedWordDict.ContainsKey(status.Position)) { deletedWordDict.Add(status.Position, new SortedDictionary<int, ScriptWord>()); } // To keep compatable with old format(which doesn't contains this parameter), need automatically // update del index. while (deletedWordDict[status.Position].ContainsKey(status.DelIndex)) { status.DelIndex++; } deletedWordDict[status.Position].Add(status.DelIndex, word); } } } } } foreach (int position in deletedWordDict.Keys) { List<ScriptWord> deletedWordInTheSamePosition = new List<ScriptWord>(); foreach (int delIndex in deletedWordDict[position].Keys) { deletedWordInTheSamePosition.Add(deletedWordDict[position][delIndex]); } if (deletedWordInTheSamePosition.Count > 0) { for (int i = 0; i < deletedWordInTheSamePosition.Count - 1; i++) { scriptSentence.DeletedWordAndFollowingWordDict.Add(deletedWordInTheSamePosition[i + 1], deletedWordInTheSamePosition[i]); } ScriptWord nextWord = position < scriptSentence.Words.Count ? scriptSentence.Words[position] : null; scriptSentence.DeletedWordAndFollowingWordDict.Add( deletedWordInTheSamePosition[0], nextWord); } } } }
/// <summary> /// Update ToBIAccent. /// </summary> /// <param name="utt">Tts utterance.</param> /// <param name="scriptSentence">Script sentence.</param> private static void UpdateToBIAccent(SP.TtsUtterance utt, ScriptSentence scriptSentence) { int uttWordCount = 0; TtsTobiAccentSet accentSet = new TtsTobiAccentSet(); System.Console.WriteLine("warning: update the ToBIAccent!"); foreach (ScriptWord scriptWord in scriptSentence.Words) { if (scriptWord.IsPronounced) { SP.TtsWord uttWord; while (!(uttWord = utt.Words[uttWordCount++]).IsPronounceable) { } Collection<ScriptSyllable> scriptSyllables = scriptWord.Syllables; SP.TtsSyllable thisSyllable = uttWord.FirstSyllable; foreach (ScriptSyllable scriptSyllable in scriptSyllables) { if (scriptSyllable.TobiPitchAccent != null) { thisSyllable.ToBIAccent = (SP.TtsTobiAccent)accentSet.Items[scriptSyllable.TobiPitchAccent.Label]; } thisSyllable = thisSyllable.Next; } } } }
/// <summary> /// Initailize sentence to run. /// </summary> /// <param name="scriptSentence">Script sentence.</param> public void InitializeSentence(ScriptSentence scriptSentence) { _curScriptSentence = scriptSentence; _totalPhoneIndex = null; _sentenceMatch = false; TraceLog(_logger, true, "Processing sentence: {0}", _curScriptSentence.Text); }
/// <summary> /// Generate script item from raw text(only generate to word level). /// </summary> /// <param name="text">Plain text.</param> /// <returns>ScriptItem.</returns> public ScriptItem GenerateScriptItem(string text) { if (string.IsNullOrEmpty(text)) { throw new ArgumentNullException("text"); } // this function should contain "ProcessMode.TextProcess" if ((_mode & ProcessMode.TextProcess) == 0) { throw new InvalidOperationException("Process mode can only be ProcessMode.TextProcess"); } ScriptItem item = new ScriptItem(); item.Text = text; foreach (SP.TtsUtterance utt in EspUtterances(text)) { using (utt) { if (utt.Words.Count == 0) { continue; } ScriptSentence sentence = new ScriptSentence(); foreach (SP.TtsWord word in utt.Words) { if (!string.IsNullOrEmpty(word.WordText)) { ScriptWord scriptWord = new ScriptWord(); scriptWord.Grapheme = word.WordText; if (!string.IsNullOrEmpty(word.Pronunciation)) { scriptWord.Pronunciation = word.Pronunciation.ToLowerInvariant(); } scriptWord.WordType = WordType.Normal; if (word.WordType == SP.TtsWordType.WT_PUNCTUATION) { scriptWord.WordType = WordType.Punctuation; } scriptWord.PronSource = (TtsPronSource)word.PronSource; sentence.Words.Add(scriptWord); } } sentence.Text = sentence.BuildTextFromWords(); item.Sentences.Add(sentence); } } return item; }
/// <summary> /// Insert silence word to script. /// </summary> /// <param name="scriptSentence">Script sentence.</param> /// <param name="wordIndex">To be insert word's position.</param> /// <param name="phoneme">The phoneme string.</param> public static void InsertSilenceWord(ScriptSentence scriptSentence, int wordIndex, string phoneme) { Debug.Assert(Phoneme.IsSilenceFeature(phoneme), "The phoneme should have silence feature"); ScriptWord silenceWord = new ScriptWord(); silenceWord.WordType = WordType.Silence; silenceWord.Pronunciation = Phoneme.ToRuntime(phoneme); silenceWord.Sentence = scriptSentence; ScriptSyllable silenceSyllable = new ScriptSyllable(); silenceSyllable.Word = silenceWord; silenceWord.Syllables.Add(silenceSyllable); ScriptPhone silencePhone = new ScriptPhone(phoneme); silencePhone.Syllable = silenceSyllable; silenceWord.Syllables[0].Phones.Add(silencePhone); scriptSentence.Words.Insert(wordIndex, silenceWord); }
/// <summary> /// Check whether the internal normal words matches the external normal words. /// </summary> /// <param name="extSentence">External script sentence.</param> private void CheckMatched(ScriptSentence extSentence) { if (_totalPhoneIndex.Length != extSentence.PronouncedWords.Count) { throw new InvalidDataException("Runtime's normal words' count must equal to the script's."); } for (int wordIndex = 0; wordIndex < _totalPhoneIndex.Length; wordIndex++) { int uttSyllableCount = _totalPhoneIndex[wordIndex].GetLength(0); if (uttSyllableCount != extSentence.PronouncedWords[wordIndex].Syllables.Count) { string message = Helper.NeutralFormat("Script's normal word [{0}]'s syllables' count " + "must equal to the runtime's.", extSentence.PronouncedWords[wordIndex].Pronunciation); throw new InvalidDataException(message); } for (int syllableIndex = 0; syllableIndex < uttSyllableCount; syllableIndex++) { int phoneCount = _totalPhoneIndex[wordIndex][syllableIndex, 1] - _totalPhoneIndex[wordIndex][syllableIndex, 0]; if (phoneCount != extSentence.PronouncedWords[wordIndex].Syllables[syllableIndex].Phones.Count) { string message = Helper.NeutralFormat("Script's normal word [{0}]'s " + "syllable [{1}]'s phones' count must equal to the runtime's.", extSentence.PronouncedWords[wordIndex].Pronunciation, extSentence.PronouncedWords[wordIndex].Syllables[syllableIndex].Text); throw new InvalidDataException(message); } } } }
/// <summary> /// Update silence words. /// </summary> /// <param name="utt">Engine TtsUtterance.</param> /// <param name="scriptSentence">Script sentence.</param> /// <param name="engine">Tts engine.</param> /// <param name="logger">Log writer object.</param> private static void UpdateSilenceWords(SP.TtsUtterance utt, ScriptSentence scriptSentence, TtsEngine engine, TextLogger logger) { // Gets phone set. TtsPhoneSet phoneSet = null; if (scriptSentence.ScriptItem != null && scriptSentence.ScriptItem.ScriptFile != null) { phoneSet = scriptSentence.ScriptItem.ScriptFile.PhoneSet; } if (phoneSet == null) { phoneSet = Localor.GetPhoneSet(scriptSentence.Language); } if (scriptSentence.ScriptItem != null && scriptSentence.ScriptItem.ScriptFile != null) { scriptSentence.ScriptItem.ScriptFile.PhoneSet = phoneSet; } int extWordIndex = 0; if (scriptSentence.Words[extWordIndex].WordType == WordType.Silence && utt.Words[0].WordType != TtsWordType.WT_SILENCE) { string phone = scriptSentence.Words[extWordIndex].GetPronunciation(phoneSet); Debug.Assert( Offline.Phoneme.IsSilenceFeature(phone), "Silence word should have only one phoneme - silence or short pause."); TtsWord silenceWord = utt.AddNewWord(utt.Words[0], InsertOptions.Before); ConfigSilenceWord( engine.Phoneme.PronunciationToPhoneIds(Offline.Phoneme.ToRuntime(phone)), silenceWord, utt.Words[0].BreakLevel); } for (int uttWordIndex = 0; uttWordIndex < utt.Words.Count; uttWordIndex++) { TtsWord uttWord = utt.Words[uttWordIndex]; if (uttWord.IsPronounceable) { for (; extWordIndex < scriptSentence.Words.Count; extWordIndex++) { if (scriptSentence.Words[extWordIndex].IsPronounced) { extWordIndex++; if (uttWord.BreakLevel < TtsBreakLevel.BK_IDX_INTERM_PHRASE) { if (extWordIndex < scriptSentence.Words.Count && scriptSentence.Words[extWordIndex].WordType == WordType.Silence) { string str1 = "Warning: Script xml has a silence word, "; string str2 = "but corresponding word[{0}] in engine has a break level "; string str3 = "less than BK_IDX_INTERM_PHRASE"; TraceLog(logger, true, str1 + str2 + str3, uttWord.WordText); } if (uttWord.Next != null && uttWord.Next.WordType == TtsWordType.WT_SILENCE) { utt.Delete(uttWord.Next); } } else { if (extWordIndex < scriptSentence.Words.Count && scriptSentence.Words[extWordIndex].WordType == WordType.Silence && uttWord.Next.WordType != TtsWordType.WT_SILENCE) { string phone = scriptSentence.Words[extWordIndex].GetPronunciation(phoneSet); Debug.Assert( Offline.Phoneme.IsSilenceFeature(phone), "Silence word should have only one phoneme - silence or short pause."); TtsWord silenceWord = utt.AddNewWord(uttWord, InsertOptions.After); ConfigSilenceWord( engine.Phoneme.PronunciationToPhoneIds(Offline.Phoneme.ToRuntime(phone)), silenceWord, uttWord.BreakLevel); } else if (uttWord.Next != null && uttWord.Next.WordType == TtsWordType.WT_SILENCE) { utt.Delete(uttWord.Next); } } break; } } } } }
/// <summary> /// Initailize sentence to run. /// </summary> /// <param name="scriptSentence">Script sentence.</param> public void InitializeSentence(ScriptSentence scriptSentence) { if (_scriptFeatureImportConfig != null) { _scriptFeatureImport.InitializeSentence(scriptSentence); } }
/// <summary> /// Check consistency of script and utterance. /// </summary> /// <param name="utt">Tts utterance.</param> /// <param name="scriptSentence">Script sentence.</param> /// <param name="option">Checking option.</param> private static void CheckConsistency(SP.TtsUtterance utt, ScriptSentence scriptSentence, ScriptFeatureImportConfig.CheckingOptions option) { if ((option & ScriptFeatureImportConfig.CheckingOptions.Word) == ScriptFeatureImportConfig.CheckingOptions.Word) { CheckWordConsistency(utt, scriptSentence); } }
/// <summary> /// Update pronunciation. /// </summary> /// <param name="utt">Tts utterance.</param> /// <param name="scriptSentence">Script sentence.</param> /// <param name="engine">Tts engine.</param> private static void UpdatePronunciation(SP.TtsUtterance utt, ScriptSentence scriptSentence, TtsEngine engine) { int wordCount = 0; System.Console.WriteLine("warning: update the Pronunciation!"); foreach (TtsWord uttWord in utt.Words) { if (uttWord.TextLength > 0) { if (!string.IsNullOrEmpty(scriptSentence.Words[wordCount].Pronunciation) && !uttWord.Pronunciation.Equals(Offline.Core.Pronunciation.RemoveUnitBoundary( scriptSentence.Words[wordCount].Pronunciation).ToUpper(CultureInfo.InvariantCulture))) { uttWord.PhoneIds = engine.Phoneme.PronunciationToPhoneIds( Offline.Core.Pronunciation.RemoveUnitBoundary(scriptSentence.Words[wordCount].Pronunciation)); } wordCount++; } } }
/// <summary> /// Update PartOfSpeech. /// </summary> /// <param name="utt">Tts utterance.</param> /// <param name="scriptSentence">Script sentence.</param> /// <param name="engine">Tts engine.</param> private static void UpdatePartOfSpeech(SP.TtsUtterance utt, ScriptSentence scriptSentence, TtsEngine engine) { int wordCount = 0; System.Console.WriteLine("warning: update the PartOfSpeech!"); foreach (TtsWord uttWord in utt.Words) { if (uttWord.TextLength > 0) { uint posId = engine.PosTable.StringToId(scriptSentence.Words[wordCount].PosString); uttWord.Pos = (ushort)posId; wordCount++; } } }
/// <summary> /// Update BreakLevel. /// </summary> /// <param name="utt">Tts utterance.</param> /// <param name="scriptSentence">Script sentence.</param> private static void UpdateBreak(SP.TtsUtterance utt, ScriptSentence scriptSentence) { int wordIndex = 0; System.Console.WriteLine("warning: update the BreakLevel!"); foreach (TtsWord uttWord in utt.Words) { if (uttWord.IsPronounceable && wordIndex < scriptSentence.PronouncedWords.Count) { int breaklevel = (int)scriptSentence.PronouncedWords[wordIndex].Break; uttWord.BreakLevel = (SP.TtsBreakLevel)breaklevel; wordIndex++; } } }
/// <summary> /// Dump the data in the utterance to a script item. /// </summary> /// <param name="utt">The utterance for dumpping.</param> /// <param name="ttsEngine">The object ttsEngine to help to convert the Pos and get sentence id.</param> /// <returns>A script item object.</returns> public static ScriptItem ToScriptItem(this SP.TtsUtterance utt, SP.TtsEngine ttsEngine) { if (ttsEngine == null) { throw new ArgumentNullException("ttsEngine"); } ScriptItem item = new ScriptItem(); item.Text = utt.OriginalText; item.Id = Helper.NeutralFormat("{0:D" + IdLength + "}", BeginId); ScriptSentence sentence = new ScriptSentence(); sentence.SentenceType = (SentenceType)utt.SentenceType; DumpWords(sentence, utt, ttsEngine, GetLanguage(utt)); sentence.Text = sentence.BuildTextFromWords(); item.Sentences.Add(sentence); return item; }
/// <summary> /// Convert two-line script item to XML format. /// </summary> /// <param name="item">Two-line format script item.</param> /// <param name="inScriptWithoutPron">Whether input script without pronunciation.</param> /// <param name="errors">Errors if having.</param> /// <returns>New format item.</returns> private static ScriptItem ConvertScriptItemToXmlFormat(ScriptItem item, bool inScriptWithoutPron, ErrorSet errors) { if (item == null) { throw new ArgumentNullException("item"); } if (errors == null) { throw new ArgumentNullException("errors"); } ScriptItem newItem = new ScriptItem(); try { newItem.Id = item.Id; newItem.Text = item.PlainSentence; if (!inScriptWithoutPron) { ScriptSentence sentence = new ScriptSentence(); sentence.Text = item.PlainSentence; foreach (ScriptWord word in item.Words) { switch (word.WordType) { case WordType.Exclamation: case WordType.Period: case WordType.Question: case WordType.OtherPunctuation: word.WordType = WordType.Punctuation; break; } // postag is used for two-line format if (!string.IsNullOrEmpty(word.PosTag)) { word.PosString = word.PosTag; } if (word.WordType == WordType.Normal && string.IsNullOrEmpty(word.Pronunciation)) { errors.Add(ScriptError.EmptyPronInNormalWord, item.Id, word.Grapheme); } sentence.Words.Add(word); } if (newItem != null) { newItem.Sentences.Add(sentence); } } } catch (InvalidDataException e) { errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("Invalid item {0}: {1}", item.Id, Helper.BuildExceptionMessage(e))); newItem = null; } catch (Exception e) { errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("Error in item: {0}: {1}", item.Id, Helper.BuildExceptionMessage(e))); newItem = null; if (e == null) { throw; } } return newItem; }