/// <summary> /// Get matched engine phone. /// </summary> /// <param name="scriptPhone">Script phone.</param> /// <returns>The matched engine phone.</returns> public TtsPhone ToTtsPhone(ScriptPhone scriptPhone) { TtsPhone targetPhone = null; if (!Phoneme.IsSilenceFeature(scriptPhone.Name)) { int phoneIndex = _scriptPronouncedPhones.IndexOf(scriptPhone); targetPhone = _enginePronouncedPhones[phoneIndex]; } else { int phoneIndex = scriptPhone.Syllable.Word.Sentence.ScriptPhones.IndexOf(scriptPhone); if (phoneIndex == 0) { if (_engineFirstPhone != null && _engineFirstPhone.IsSilence) { targetPhone = _engineFirstPhone; } } else { phoneIndex = _scriptPronouncedPhones.IndexOf( scriptPhone.Syllable.Word.Sentence.ScriptPhones[phoneIndex - 1]); TtsPhone tempPhone = _enginePronouncedPhones[phoneIndex].Next; if (tempPhone.IsSilence) { targetPhone = tempPhone; } } } return targetPhone; }
/// <summary> /// Dump the data in the phone. /// </summary> /// <param name="scriptSyllable">The script syllable to store the data dumped from the phones.</param> /// <param name="utt">The utterance.</param> /// <param name="syllable">The syllable which contains these phones.</param> /// <param name="phoneIndex">Phone index to mark the phone in the Utt.Phones.</param> /// <param name="unitIndex">Unit index to mark the unit in the Utt.Units.</param> /// <param name="f0StartIndex">F0 index to mark the start position in the F0s.</param> /// <param name="ttsEngine">The object ttsEngine to help to convert the Pos and get sentence id.</param> private static void DumpPhones(ScriptSyllable scriptSyllable, SP.TtsUtterance utt, SP.TtsSyllable syllable, ref int phoneIndex, ref int unitIndex, ref int f0StartIndex, SP.TtsEngine ttsEngine) { Debug.Assert(scriptSyllable != null, "ScriptSyllable should not be null"); Debug.Assert(utt != null, "Utt should not be null"); Debug.Assert(syllable != null, "Syllable should not be null"); Debug.Assert(phoneIndex >= 0, "PhoneIndex should not be less than 0"); Debug.Assert(f0StartIndex >= 0, "f0StartIndex should not be less than 0"); Debug.Assert(ttsEngine != null, "ttsEngine should not be null"); WuiManager wuiManager = null; if (utt.Segments.Count > 0) { int bestNodeIndex = (int)utt.UnitLattice.WucList[unitIndex].BestNodeIndex; wuiManager = ttsEngine.RUSVoiceDataManager.GetWuiManagerByUnitCostNode(utt.UnitLattice.WucList[unitIndex].WucNodeList[bestNodeIndex]); } // Go through each phone in the syllable. SP.TtsPhone phone = syllable.FirstPhone; while (phone != null) { // Dump the pronunciation of the phone. string phonePronunciation = Pronunciation.RemoveStress(phone.Pronunciation.ToLowerInvariant()).Trim(); // Remove the tone from the phone pronunciation if it exist. if (phone.Tone != 0) { phonePronunciation = Pronunciation.RemoveTone(phonePronunciation).Trim(); } ScriptPhone scriptPhone = new ScriptPhone(phonePronunciation); scriptPhone.Tone = phone.Tone.ToString(); scriptPhone.Stress = (TtsStress)phone.Stress; if (phone.Pronunciation != PronOfSilence) { if (wuiManager != null) { scriptPhone.SentenceId = wuiManager.GetSentenceId(utt.Segments[unitIndex].WaveUnitInfo); } if (phone.Unit != null) { scriptPhone.UnitIndex = (int)phone.Unit.UnitIndex; } } scriptPhone.Acoustics = new ScriptAcoustics(); // Dump the segments. if (utt.Segments.Count > 0 && !utt.Segments[unitIndex].Unit.UnitText.Equals(PronOfSilence) && !utt.Segments[unitIndex].Unit.UnitText.Equals(PronOfShortPause)) { scriptPhone.Acoustics.Duration = (int)utt.Segments[unitIndex].WaveUnitInfo.WaveLength + (int)utt.Segments[unitIndex + 1].WaveUnitInfo.WaveLength; int segStart = (int)utt.Segments[unitIndex].WaveUnitInfo.RecordingWaveStartPosition; int segEnd = segStart + (int)utt.Segments[unitIndex].WaveUnitInfo.WaveLength; scriptPhone.Acoustics.SegmentIntervals.Add(new SegmentInterval(segStart, segEnd)); segStart = (int)utt.Segments[unitIndex + 1].WaveUnitInfo.RecordingWaveStartPosition; segEnd = segStart + (int)utt.Segments[unitIndex + 1].WaveUnitInfo.WaveLength; scriptPhone.Acoustics.SegmentIntervals.Add(new SegmentInterval(segStart, segEnd)); } // Relative begin position of the uvsegment interval. int relativeBegin = 0; // Relative end position of the uvsegment interval. int relativeEnd = 0; // When go through the F0 values, this valuie to identify if meet the first voiced segment. bool reBeginPositionFindOut = false; // Check if all the F0 values in one state are equals to 0. If yes, don't write down the uvseg. bool isF0ValueExist = false; // Dump the durations and F0s in each state. if (utt.Acoustic.Durations != null) { for (int i = 0; i < utt.Acoustic.Durations[phoneIndex].Length; ++i) { ScriptState scriptState = new ScriptState(); // Dump duration int durationInFrame = (int)utt.Acoustic.Durations[phoneIndex][i]; scriptState.Acoustics = new ScriptAcoustics(durationInFrame * MillisecondsPerFrame); // Dump F0s if (utt.Acoustic.F0s != null) { ScriptUvSeg scriptUvSeg = GetF0Contour(utt, f0StartIndex, durationInFrame, ScriptAcousticChunkEncoding.Text, ref relativeBegin, ref relativeEnd, ref reBeginPositionFindOut, ref isF0ValueExist); if (isF0ValueExist == true) { scriptState.Acoustics.UvSegs.Add(scriptUvSeg); } f0StartIndex += durationInFrame; } scriptPhone.States.Add(scriptState); } } // Dump the uvsegment relative interval. if (utt.Acoustic.F0s != null && !phone.Pronunciation.Equals(PronOfSilence) && !phone.Pronunciation.Equals(PronOfShortPause)) { ScriptUvSeg uvSegForRelativeInterval = new ScriptUvSeg(ScriptUvSegType.Mixed); uvSegForRelativeInterval.Interval = new ScriptUvSegInterval(relativeBegin * 5, relativeEnd * 5); scriptPhone.Acoustics.UvSegs.Add(uvSegForRelativeInterval); } phoneIndex++; unitIndex++; if (wuiManager != null && !phone.Pronunciation.Equals(PronOfSilence) && !phone.Pronunciation.Equals(PronOfShortPause)) { // if it is not an silence phone, the according unit must be an half phone unit, // we need skip the right half phone to move next phone's unit unitIndex++; } scriptSyllable.Phones.Add(scriptPhone); if (phone == syllable.LastPhone) { break; } phone = phone.Next; } }
/// <summary> /// Insert silence word to script. /// </summary> /// <param name="scriptSentence">Script sentence.</param> /// <param name="wordIndex">To be insert word's position.</param> /// <param name="phoneme">The phoneme string.</param> public static void InsertSilenceWord(ScriptSentence scriptSentence, int wordIndex, string phoneme) { Debug.Assert(Phoneme.IsSilenceFeature(phoneme), "The phoneme should have silence feature"); ScriptWord silenceWord = new ScriptWord(); silenceWord.WordType = WordType.Silence; silenceWord.Pronunciation = Phoneme.ToRuntime(phoneme); silenceWord.Sentence = scriptSentence; ScriptSyllable silenceSyllable = new ScriptSyllable(); silenceSyllable.Word = silenceWord; silenceWord.Syllables.Add(silenceSyllable); ScriptPhone silencePhone = new ScriptPhone(phoneme); silencePhone.Syllable = silenceSyllable; silenceWord.Syllables[0].Phones.Add(silencePhone); scriptSentence.Words.Insert(wordIndex, silenceWord); }
/// <summary> /// Update state duration. /// </summary> /// <param name="durationUpdater">Duration updater.</param> /// <param name="intUtt">Internal utterance.</param> /// <param name="scriptPhone">Script phone.</param> /// <param name="phoneIndex">External phone index.</param> /// <param name="syllableIndex">External syllable index.</param> /// <param name="wordIndex">External normal word index.</param> /// <param name="frameLength">The length of each frame in millisecond.</param> private void UpdateStateDuration(IUpdateHelper durationUpdater, SP.TtsUtterance intUtt, ScriptPhone scriptPhone, int phoneIndex, int syllableIndex, int wordIndex, float frameLength) { int statesCount = scriptPhone.States.Count; if (statesCount != 0) { if (statesCount != intUtt.Acoustic.Durations.Column) { throw new InvalidDataException("Script states' count must equal to the" + "engine's."); } for (int stateIndex = 0; stateIndex < statesCount; stateIndex++) { ProcessDurationUpdate(durationUpdater, intUtt, scriptPhone.States[stateIndex], stateIndex, phoneIndex, syllableIndex, wordIndex, frameLength); } } }
/// <summary> /// Load phone from XmlTextReader. /// </summary> /// <param name="reader">XmlTextReader.</param> /// <returns>ScriptPhone.</returns> private static ScriptPhone LoadPhone(XmlTextReader reader) { Debug.Assert(reader != null); ScriptPhone phone = new ScriptPhone(reader.GetAttribute("v")); string valid = reader.GetAttribute("valid"); if (!string.IsNullOrEmpty(valid)) { phone.Valid = bool.Parse(valid); } string tone = reader.GetAttribute("tone"); if (!string.IsNullOrEmpty(tone)) { phone.Tone = tone; } string stress = reader.GetAttribute("stress"); if (!string.IsNullOrEmpty(stress)) { phone.Stress = ScriptSyllable.StringToStress(stress); } string sentenceID = reader.GetAttribute("sentenceID"); if (!string.IsNullOrEmpty(sentenceID)) { phone.SentenceId = sentenceID; } string unitIndex = reader.GetAttribute("unitIndex"); if (!string.IsNullOrEmpty(unitIndex)) { phone.UnitIndex = int.Parse(unitIndex); } if (!reader.IsEmptyElement) { while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Name == "states") { while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Name == "state") { ScriptState state = LoadState(reader); state.Phone = phone; phone.States.Add(state); } else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "states") { break; } } } else if (reader.NodeType == XmlNodeType.Element && reader.Name == "acoustics") { phone.Acoustics = new ScriptAcoustics(); phone.Acoustics.ParseFromXml(reader); } else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "ph") { break; } } } return phone; }
/// <summary> /// Parsing the syllable string to a script syllable /// Here we suppose syllable is a valid pronunciation string. /// </summary> /// <param name="syllable">Syllable string, doesn't include unit boundary.</param> /// <param name="phoneSet">TtsPhoneSet.</param> /// <returns>The constructed script syllable.</returns> public static ScriptSyllable ParseStringToSyllable(string syllable, TtsPhoneSet phoneSet) { if (string.IsNullOrEmpty(syllable)) { throw new ArgumentNullException("syllable"); } if (phoneSet == null) { throw new ArgumentNullException("phoneSet"); } ScriptSyllable scriptSyllable = new ScriptSyllable(phoneSet.Language); ErrorSet errors = new ErrorSet(); Phone[] phones = Pronunciation.SplitIntoPhones(syllable, phoneSet, errors); if (errors.Count > 0) { string message = Helper.NeutralFormat( "The syllable string [{0}] isn't valid : {1}{2}", syllable, Environment.NewLine, errors.ErrorsString()); throw new InvalidDataException(message); } Collection<ScriptPhone> scriptPhones = new Collection<ScriptPhone>(); foreach (Phone phone in phones) { if (phone.HasFeature(PhoneFeature.MainStress) || phone.HasFeature(PhoneFeature.SubStress)) { switch (phone.Name) { case "1": scriptSyllable.Stress = TtsStress.Primary; break; case "2": scriptSyllable.Stress = TtsStress.Secondary; break; case "3": scriptSyllable.Stress = TtsStress.Tertiary; break; } } else if (phone.HasFeature(PhoneFeature.Tone)) { scriptPhones[scriptPhones.Count - 1].Tone = phone.Name; } else { ScriptPhone scriptPhone = new ScriptPhone(phone.Name); scriptPhone.Syllable = scriptSyllable; scriptPhones.Add(scriptPhone); } } scriptSyllable.Phones.Clear(); Helper.AppendCollection(scriptSyllable.Phones, scriptPhones); return scriptSyllable; }