/// <summary> /// Get the F0s in the state. /// </summary> /// <param name="utt">Utterance which will provide the F0 values.</param> /// <param name="f0StartIndex">The start index to get the F0.</param> /// <param name="duration">The duration value in the state.</param> /// <param name="f0EncodingMode">The F0 encoding mode, like "text", "hexBinary", etc.</param> /// <param name="relativeBegin">The begin position of the voice segment.</param> /// <param name="relativeEnd">The end position of the voice segment.</param> /// <param name="reBeginPositionFindOut">The bool value to mark if arrive the first voice segment.</param> /// <param name="isF0ValueExist">The bool value to mark if the F0 value exist, means not all equal to 0.</param> /// <returns>Object ScriptUvSeg.</returns> private static ScriptUvSeg GetF0Contour(SP.TtsUtterance utt, int f0StartIndex, int duration, ScriptAcousticChunkEncoding f0EncodingMode, ref int relativeBegin, ref int relativeEnd, ref bool reBeginPositionFindOut, ref bool isF0ValueExist) { Debug.Assert(utt != null, "Utt should not be null"); Debug.Assert(f0StartIndex >= 0, "f0StartIndex should not be less than 0"); Debug.Assert(duration > 0, "Duration should not be less than 0"); Debug.Assert(relativeBegin >= 0, "relativeBegin should not be less than 0"); Debug.Assert(relativeEnd >= 0, "relativeEnd should not be less than 0"); ScriptUvSeg scriptUvSeg = new ScriptUvSeg(); scriptUvSeg.SegType = ScriptUvSegType.Mixed; scriptUvSeg.F0Contour = new ScriptF0Contour(); scriptUvSeg.F0Contour.ChunkEncoding = f0EncodingMode; int f0EndIndex = f0StartIndex + duration; for (int i = f0StartIndex; i < f0EndIndex; i++) { float f0 = utt.Acoustic.F0s[i][0]; if (f0 == 0) { if (reBeginPositionFindOut == false) { relativeBegin++; relativeEnd++; } } else { isF0ValueExist = true; reBeginPositionFindOut = true; relativeEnd++; scriptUvSeg.F0Contour.Contour.Add(f0); } } return scriptUvSeg; }
/// <summary> /// Dump the data in the phone. /// </summary> /// <param name="scriptSyllable">The script syllable to store the data dumped from the phones.</param> /// <param name="utt">The utterance.</param> /// <param name="syllable">The syllable which contains these phones.</param> /// <param name="phoneIndex">Phone index to mark the phone in the Utt.Phones.</param> /// <param name="unitIndex">Unit index to mark the unit in the Utt.Units.</param> /// <param name="f0StartIndex">F0 index to mark the start position in the F0s.</param> /// <param name="ttsEngine">The object ttsEngine to help to convert the Pos and get sentence id.</param> private static void DumpPhones(ScriptSyllable scriptSyllable, SP.TtsUtterance utt, SP.TtsSyllable syllable, ref int phoneIndex, ref int unitIndex, ref int f0StartIndex, SP.TtsEngine ttsEngine) { Debug.Assert(scriptSyllable != null, "ScriptSyllable should not be null"); Debug.Assert(utt != null, "Utt should not be null"); Debug.Assert(syllable != null, "Syllable should not be null"); Debug.Assert(phoneIndex >= 0, "PhoneIndex should not be less than 0"); Debug.Assert(f0StartIndex >= 0, "f0StartIndex should not be less than 0"); Debug.Assert(ttsEngine != null, "ttsEngine should not be null"); WuiManager wuiManager = null; if (utt.Segments.Count > 0) { int bestNodeIndex = (int)utt.UnitLattice.WucList[unitIndex].BestNodeIndex; wuiManager = ttsEngine.RUSVoiceDataManager.GetWuiManagerByUnitCostNode(utt.UnitLattice.WucList[unitIndex].WucNodeList[bestNodeIndex]); } // Go through each phone in the syllable. SP.TtsPhone phone = syllable.FirstPhone; while (phone != null) { // Dump the pronunciation of the phone. string phonePronunciation = Pronunciation.RemoveStress(phone.Pronunciation.ToLowerInvariant()).Trim(); // Remove the tone from the phone pronunciation if it exist. if (phone.Tone != 0) { phonePronunciation = Pronunciation.RemoveTone(phonePronunciation).Trim(); } ScriptPhone scriptPhone = new ScriptPhone(phonePronunciation); scriptPhone.Tone = phone.Tone.ToString(); scriptPhone.Stress = (TtsStress)phone.Stress; if (phone.Pronunciation != PronOfSilence) { if (wuiManager != null) { scriptPhone.SentenceId = wuiManager.GetSentenceId(utt.Segments[unitIndex].WaveUnitInfo); } if (phone.Unit != null) { scriptPhone.UnitIndex = (int)phone.Unit.UnitIndex; } } scriptPhone.Acoustics = new ScriptAcoustics(); // Dump the segments. if (utt.Segments.Count > 0 && !utt.Segments[unitIndex].Unit.UnitText.Equals(PronOfSilence) && !utt.Segments[unitIndex].Unit.UnitText.Equals(PronOfShortPause)) { scriptPhone.Acoustics.Duration = (int)utt.Segments[unitIndex].WaveUnitInfo.WaveLength + (int)utt.Segments[unitIndex + 1].WaveUnitInfo.WaveLength; int segStart = (int)utt.Segments[unitIndex].WaveUnitInfo.RecordingWaveStartPosition; int segEnd = segStart + (int)utt.Segments[unitIndex].WaveUnitInfo.WaveLength; scriptPhone.Acoustics.SegmentIntervals.Add(new SegmentInterval(segStart, segEnd)); segStart = (int)utt.Segments[unitIndex + 1].WaveUnitInfo.RecordingWaveStartPosition; segEnd = segStart + (int)utt.Segments[unitIndex + 1].WaveUnitInfo.WaveLength; scriptPhone.Acoustics.SegmentIntervals.Add(new SegmentInterval(segStart, segEnd)); } // Relative begin position of the uvsegment interval. int relativeBegin = 0; // Relative end position of the uvsegment interval. int relativeEnd = 0; // When go through the F0 values, this valuie to identify if meet the first voiced segment. bool reBeginPositionFindOut = false; // Check if all the F0 values in one state are equals to 0. If yes, don't write down the uvseg. bool isF0ValueExist = false; // Dump the durations and F0s in each state. if (utt.Acoustic.Durations != null) { for (int i = 0; i < utt.Acoustic.Durations[phoneIndex].Length; ++i) { ScriptState scriptState = new ScriptState(); // Dump duration int durationInFrame = (int)utt.Acoustic.Durations[phoneIndex][i]; scriptState.Acoustics = new ScriptAcoustics(durationInFrame * MillisecondsPerFrame); // Dump F0s if (utt.Acoustic.F0s != null) { ScriptUvSeg scriptUvSeg = GetF0Contour(utt, f0StartIndex, durationInFrame, ScriptAcousticChunkEncoding.Text, ref relativeBegin, ref relativeEnd, ref reBeginPositionFindOut, ref isF0ValueExist); if (isF0ValueExist == true) { scriptState.Acoustics.UvSegs.Add(scriptUvSeg); } f0StartIndex += durationInFrame; } scriptPhone.States.Add(scriptState); } } // Dump the uvsegment relative interval. if (utt.Acoustic.F0s != null && !phone.Pronunciation.Equals(PronOfSilence) && !phone.Pronunciation.Equals(PronOfShortPause)) { ScriptUvSeg uvSegForRelativeInterval = new ScriptUvSeg(ScriptUvSegType.Mixed); uvSegForRelativeInterval.Interval = new ScriptUvSegInterval(relativeBegin * 5, relativeEnd * 5); scriptPhone.Acoustics.UvSegs.Add(uvSegForRelativeInterval); } phoneIndex++; unitIndex++; if (wuiManager != null && !phone.Pronunciation.Equals(PronOfSilence) && !phone.Pronunciation.Equals(PronOfShortPause)) { // if it is not an silence phone, the according unit must be an half phone unit, // we need skip the right half phone to move next phone's unit unitIndex++; } scriptSyllable.Phones.Add(scriptPhone); if (phone == syllable.LastPhone) { break; } phone = phone.Next; } }
/// <summary> /// Add a unvoiced-voiced segs object. /// </summary> /// <param name="uvseg">Uv segment.</param> public void AddUvSeg(ScriptUvSeg uvseg) { _scriptUvSegs.Add(uvseg); }
/// <summary> /// Get the orignal internal and external F0s. /// </summary> /// <param name="intUtt">Internal utterance.</param> /// <param name="extUvSeg">External uvSeg.</param> /// <param name="layerIndex">Certain syllable/phone/state's position.</param> /// <param name="extNotNullF0">Out float[], External F0s.</param> /// <param name="intNotNullF0Position">Not null F0s' phones' position.</param> public static void GetF0(SP.TtsUtterance intUtt, ScriptUvSeg extUvSeg, LayerIndex layerIndex, out float[] extNotNullF0, out List<int> intNotNullF0Position) { intNotNullF0Position = new List<int>(); if (extUvSeg.SegType == ScriptUvSegType.Voiced) { extNotNullF0 = new float[extUvSeg.F0Contour.Contour.Count]; for (int i = 0; i < extUvSeg.F0Contour.Contour.Count; i++) { extNotNullF0[i] = extUvSeg.F0Contour.Contour[i]; } } else if (extUvSeg.SegType == ScriptUvSegType.Mixed) { List<float> extNotNullF0List = new List<float>(); for (int i = 0; i < extUvSeg.F0Contour.Contour.Count; i++) { if (extUvSeg.F0Contour.Contour[i] != 0) { extNotNullF0List.Add(extUvSeg.F0Contour.Contour[i]); } } extNotNullF0 = new float[extNotNullF0List.Count]; extNotNullF0List.CopyTo(extNotNullF0); } else { extNotNullF0 = null; } int interF0Index = 0; for (int i = 0; i < layerIndex.StartPhone; i++) { for (int j = 0; j < (int)intUtt.Acoustic.Durations.Column; j++) { interF0Index += (int)intUtt.Acoustic.Durations[i][j]; } } for (int i = layerIndex.StartPhone; i < layerIndex.EndPhone; i++) { for (int j = 0; j < (int)intUtt.Acoustic.Durations.Column; j++) { for (int k = 0; k < intUtt.Acoustic.Durations[i][j]; k++) { if (intUtt.Acoustic.F0s[interF0Index][0] != 0) { intNotNullF0Position.Add(interF0Index); } interF0Index++; } } } }
/// <summary> /// Generate the ScriptAcoustics object from the xml doc indicated by reader. /// </summary> /// <param name="reader">Xml text reader.</param> public void ParseFromXml(XmlTextReader reader) { if (reader == null) { throw new ArgumentNullException("reader"); } string dura = reader.GetAttribute("dura"); if (string.IsNullOrEmpty(dura)) { _duration = 0; } else { _duration = int.Parse(dura, CultureInfo.InvariantCulture); } string qdura = reader.GetAttribute("qdura"); if (string.IsNullOrEmpty(qdura)) { _quanDuration = 0; } else { _quanDuration = int.Parse(qdura, CultureInfo.InvariantCulture); } _scriptUvSegs = new Collection<ScriptUvSeg>(); if (!reader.IsEmptyElement) { while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element && reader.Name == "segment") { SegmentInterval segmentInterval = new SegmentInterval(); segmentInterval.ParseFromXml(reader); _segmentIntervals.Add(segmentInterval); } else if (reader.NodeType == XmlNodeType.Element && reader.Name == "uvseg") { ScriptUvSeg uvSeg = new ScriptUvSeg(); uvSeg.ParseFromXml(reader); _scriptUvSegs.Add(uvSeg); } else if (reader.NodeType == XmlNodeType.Element && reader.Name == "pow") { _powContour = new ScriptPowerContour(reader.Name); _powContour.ParseFromXml(reader); } else if (reader.NodeType == XmlNodeType.Element && reader.Name == "qpow") { _powContourQuantized = new ScriptPowerContour(reader.Name); _powContourQuantized.ParseFromXml(reader); } else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "acoustics") { break; } } } }