/// <summary> /// Get utterance generated by ESP. /// </summary> /// <param name="content">Content to be spoken.</param> /// <param name="sayas">Sayas used by ESP.</param> /// <returns>Utterance enum.</returns> public IEnumerable<SP.TtsUtterance> EspUtterances(string content, string sayas) { if (_engine == null) { throw new ArgumentNullException("_engine"); } if (string.IsNullOrEmpty(content)) { throw new ArgumentNullException("content"); } if (string.IsNullOrEmpty(sayas)) { _engine.SetSpeakText(content); } else { _engine.SetSpeakText(content, sayas); } if ((_mode & ProcessMode.TextProcess) != 0) { _engine.TextProcessor.Reset(); } if ((_mode & ProcessMode.ProsodyTag) != 0) { _engine.LinguisticProsodyTagger.Reset(); } if ((_mode & ProcessMode.UnitGenerate) != 0) { _engine.UnitGenerator.Reset(); } if ((_mode & ProcessMode.UnitLatticeGenerate) != 0) { _engine.UnitLatticeGenerator.Reset(); } if ((_mode & ProcessMode.UnitSelect) != 0) { _engine.UnitSelector.Reset(); } if ((_mode & ProcessMode.WaveGenerate) != 0) { _engine.WaveGenerator.Reset(); } while (true) { SP.TtsUtterance utterance = new SP.TtsUtterance(); if ((_mode & ProcessMode.TextProcess) != 0 && !_engine.TextProcessor.Process(utterance)) { break; } if ((_mode & ProcessMode.ProsodyTag) != 0 && !_engine.LinguisticProsodyTagger.Process(utterance)) { break; } if ((_mode & ProcessMode.UnitGenerate) != 0 && !_engine.UnitGenerator.Process(utterance)) { break; } if ((_mode & ProcessMode.UnitLatticeGenerate) != 0 && !_engine.UnitLatticeGenerator.Process(utterance)) { break; } if ((_mode & ProcessMode.UnitSelect) != 0 && !_engine.UnitSelector.Process(utterance)) { break; } if ((_mode & ProcessMode.WaveGenerate) != 0 && !_engine.WaveGenerator.Process(utterance)) { break; } yield return utterance; } }
/// <summary> /// Appends a normal word in the end of given utterance. /// </summary> /// <param name="utterance"> /// The given utterance. /// </param> /// <param name="scriptWord"> /// The script word. /// </param> /// <returns> /// The phoneme count of the given word. /// </returns> /// <exception cref="InvalidDataException"> /// Exception. /// </exception> private int AppendNormalWord(TtsUtterance utterance, ScriptWord scriptWord) { TtsWord word = utterance.AppendNewWord(); word.LangId = (ushort)scriptWord.Language; word.BreakLevel = (TtsBreakLevel)scriptWord.Break; word.Emphasis = (TtsEmphasis)scriptWord.Emphasis; word.WordText = scriptWord.Grapheme; word.NETypeText = scriptWord.NETypeText; word.WordRegularText = scriptWord.RegularText; word.WordType = TtsWordType.WT_NORMAL; word.AcousticDomain = DomainExtension.MapToEnum(scriptWord.AcousticDomainTag); word.WordExpansion = scriptWord.Expansion; word.ReadablePronunciation = scriptWord.Pronunciation; if (!string.IsNullOrEmpty(scriptWord.Pronunciation)) { word.PhoneIds = Phoneme.PronunciationToPhoneIds(Pronunciation.RemoveUnitBoundary(scriptWord.Pronunciation)); } if (NeedPos) { // Checks pos. if (string.IsNullOrEmpty(scriptWord.PosString)) { throw new InvalidDataException( Helper.NeutralFormat("No POS found in sentence \"{0}\" for word \"{1}\"", scriptWord.Sentence.ScriptItem.Id, scriptWord.Grapheme)); } // Sets pos value. word.Pos = (ushort)PosSet.Items[scriptWord.PosString]; string taggingPos = PosSet.CategoryTaggingPOS[scriptWord.PosString]; word.POSTaggerPos = (ushort)PosSet.Items[taggingPos]; } // Gets the normal phoneme count. ErrorSet errorSet = new ErrorSet(); int count = scriptWord.GetNormalPhoneNames(PhoneSet, errorSet).Count; if (errorSet.Count > 0) { throw new InvalidDataException( Helper.NeutralFormat("Invalid phone found in sentence \"{0}\" for word \"{1}\"", scriptWord.Sentence.ScriptItem.Id, scriptWord.Grapheme)); } word.TextOffset = (uint)scriptWord.OffsetInString; word.TextLength = (uint)scriptWord.LengthInString; return count; }
/// <summary> /// Appends a punctuation word in the end of given utterance. /// </summary> /// <param name="utterance"> /// The given utterance. /// </param> /// <param name="scriptWord"> /// The script word. /// </param> /// <returns> /// The phoneme count of the given word. /// </returns> /// <exception cref="InvalidDataException"> /// Exception. /// </exception> private int AppendPunctuationWord(TtsUtterance utterance, ScriptWord scriptWord) { TtsWord word = utterance.AppendNewWord(); word.LangId = (ushort)scriptWord.Language; word.BreakLevel = (TtsBreakLevel)scriptWord.Break; word.Emphasis = (TtsEmphasis)scriptWord.Emphasis; word.WordText = scriptWord.Grapheme; word.NETypeText = scriptWord.NETypeText; word.WordType = TtsWordType.WT_PUNCTUATION; // There is no phoneme for punctuation word. return 0; }
/// <summary> /// Appends a silence word in then end of given utterance. /// </summary> /// <param name="utterance"> /// The given utterance. /// </param> /// <param name="phone"> /// The phone name. /// </param> /// <returns> /// The phoneme count of the silence word. /// </returns> private int AppendSilenceWord(TtsUtterance utterance, string phone) { Debug.Assert(Offline.Phoneme.IsSilenceFeature(phone), "Silence word should have phone: short pause or silence"); TtsWord word = utterance.AppendNewWord(); word.PhoneIds = Phoneme.PronunciationToPhoneIds(Offline.Phoneme.ToRuntime(phone)); word.LangId = (ushort)PhoneSet.Language; word.WordType = TtsWordType.WT_SILENCE; word.Pos = 0; // Modify the silence word's break level to make it consistent with runtime engine if (word.Previous != null) { word.BreakLevel = word.Previous.BreakLevel; } else { word.BreakLevel = TtsBreakLevel.BK_IDX_SENTENCE; } return 1; }
public TtsUtterance Build(ScriptItem item, SegmentFile segmentFile, bool buildAllWords, int subSentenceIndex) { Helper.ThrowIfNull(item); TtsUtterance utterance = new TtsUtterance(); int phoneIndex = 0; try { // Silence indicates a silence word. if (segmentFile != null && segmentFile.WaveSegments[phoneIndex].IsSilenceFeature) { phoneIndex += AppendSilenceWord(utterance, segmentFile.WaveSegments[phoneIndex].Label); } // Creates a words map for ToBI accent. Dictionary<ScriptWord, TtsWord> mapWords = new Dictionary<ScriptWord, TtsWord>(); int sentenceIndex = 0; foreach (ScriptSentence scriptSentence in item.Sentences) { // Only add certain sentence in the scriptItem. if (subSentenceIndex != -1 && sentenceIndex++ != subSentenceIndex) { continue; } // Treats unkown sentence type as declarative. if (scriptSentence.SentenceType != SentenceType.Unknown) { utterance.SentenceType = (TtsSentenceType)scriptSentence.SentenceType; } else { utterance.SentenceType = (TtsSentenceType)SentenceType.Declarative; } utterance.SentenceEmotionType = (EmotionmlCategory)scriptSentence.Emotion; // Converts each word in script sentence. foreach (ScriptWord scriptWord in scriptSentence.Words) { if (buildAllWords || scriptWord.IsPronouncableNormalWord) { phoneIndex += AppendNormalWord(utterance, scriptWord); // Adds into words map. mapWords.Add(scriptWord, utterance.Words[utterance.Words.Count - 1]); // Breaks if meets the end of the utterance. if (segmentFile != null && phoneIndex >= segmentFile.WaveSegments.Count) { break; } if (segmentFile != null && segmentFile.WaveSegments[phoneIndex].IsSilenceFeature) { phoneIndex += AppendSilenceWord(utterance, segmentFile.WaveSegments[phoneIndex].Label); } } else if (buildAllWords || (NeedPunctuation && scriptWord.WordType == WordType.Punctuation)) { phoneIndex += AppendPunctuationWord(utterance, scriptWord); } } } // Builds phone list. int[] pauseDurations = new int[(int)TtsPauseLevel.PAU_IDX_SENTENCE + 1]; Array.Clear(pauseDurations, 0, pauseDurations.Length); utterance.BuildPhoneList(Phoneme, pauseDurations, 0, 0); // Builds ToBI accent, which should be happened after phone list built. BuildToBIInformation(mapWords); // Builds phrase list. utterance.BuildPhraseList(); // Builds character list. utterance.BuildContextCharacters(); return utterance; } catch (EspException e) { throw new InvalidDataException( Helper.NeutralFormat("Build utterance error on sentence \"{0}\"", item.Id), e); } }
/// <summary> /// Extracts the features of the given utterance. /// </summary> /// <param name="sentId"> /// Sentence id. /// </param> /// <param name="utterance"> /// Service Provider utterance object. /// </param> /// <returns> /// The sentence contains all the features. /// </returns> /// <exception cref="InvalidDataException"> /// Exception. /// </exception> public Sentence Extract(string sentId, TtsUtterance utterance) { List<FeatureVector> vectors; try { // Then, extracts the features. vectors = ExtractionEngine.Extract(utterance, FeatureMetas); } catch (EspException e) { throw new InvalidDataException(Helper.NeutralFormat("Extract feature error on sentence \"{0}\"", sentId), e); } // Validates the extracted vectors. if (vectors.Count != FeatureMetas.Count) { throw new InvalidDataException( Helper.NeutralFormat("Length of result is mismatch on sentence \"{0}\"", sentId)); } for (int i = 0; i < vectors.Count; i++) { if (vectors[i].Count != utterance.Phones.Count) { throw new InvalidDataException( Helper.NeutralFormat("Length of vector is mismatch on sentence \"{0}\"", sentId)); } } // Creates a sentence to store all the features. Sentence sentence = new Sentence { Id = sentId }; for (int i = 0; i < vectors[0].Count; ++i) { // Create candidates for each phoneme. PhoneSegment p = new PhoneSegment { Sentence = sentence, Index = i, Features = vectors.Select(v => v[i]) .Skip(LabelFeatureNameSet.MandatoryFeatureNames.Length).ToArray(), }; // Create the label to store the features. Label label = new Label(FeatureNameSet); for (int j = 0; j < vectors.Count; ++j) { if (vectors[j][i].ValueType == FeatureValueType.FEATURE_VALUE_TYPE_UNKOWN) { label.SetFeatureValue(FeatureNameSet.FeatureNames[j], Label.NotApplicableFeatureValue); } else if (FeatureMetas[j].Property == TtsFeatureProperty.TTS_FEATURE_PROPERTY_PHONE_ID) { Phone phone = PhoneSet.GetPhone(vectors[j][i].IntValue); label.SetFeatureValue(FeatureNameSet.FeatureNames[j], Offline.Phoneme.ToHtk(phone.Name)); } else { label.SetFeatureValue(FeatureNameSet.FeatureNames[j], vectors[j][i].IntValue.ToString(CultureInfo.InvariantCulture)); } // Updates the corresponding value records. FeatureValueRecords[j].Update(vectors[j][i]); } p.Label = label; sentence.PhoneSegments.Add(p); } return sentence; }