ScriptUvSeg, Microsoft.Tts.Offline C# (CSharp)代码示例

示例#1

0

显示文件

文件： UtteranceToXML.cs 项目： JohnsonYuan/TTSFramework

        /// <summary>
        /// Get the F0s in the state.
        /// </summary>
        /// <param name="utt">Utterance which will provide the F0 values.</param>
        /// <param name="f0StartIndex">The start index to get the F0.</param>
        /// <param name="duration">The duration value in the state.</param>
        /// <param name="f0EncodingMode">The F0 encoding mode, like "text", "hexBinary", etc.</param>
        /// <param name="relativeBegin">The begin position of the voice segment.</param>
        /// <param name="relativeEnd">The end position of the voice segment.</param>
        /// <param name="reBeginPositionFindOut">The bool value to mark if arrive the first voice segment.</param>
        /// <param name="isF0ValueExist">The bool value to mark if the F0 value exist, means not all equal to 0.</param>
        /// <returns>Object ScriptUvSeg.</returns>
        private static ScriptUvSeg GetF0Contour(SP.TtsUtterance utt, int f0StartIndex,
            int duration, ScriptAcousticChunkEncoding f0EncodingMode, ref int relativeBegin,
            ref int relativeEnd, ref bool reBeginPositionFindOut, ref bool isF0ValueExist)
        {
            Debug.Assert(utt != null, "Utt should not be null");
            Debug.Assert(f0StartIndex >= 0, "f0StartIndex should not be less than 0");
            Debug.Assert(duration > 0, "Duration should not be less than 0");
            Debug.Assert(relativeBegin >= 0, "relativeBegin should not be less than 0");
            Debug.Assert(relativeEnd >= 0, "relativeEnd should not be less than 0");

            ScriptUvSeg scriptUvSeg = new ScriptUvSeg();
            scriptUvSeg.SegType = ScriptUvSegType.Mixed;
            scriptUvSeg.F0Contour = new ScriptF0Contour();
            scriptUvSeg.F0Contour.ChunkEncoding = f0EncodingMode;
            int f0EndIndex = f0StartIndex + duration;

            for (int i = f0StartIndex; i < f0EndIndex; i++)
            {
                float f0 = utt.Acoustic.F0s[i][0];

                if (f0 == 0)
                {
                    if (reBeginPositionFindOut == false)
                    {
                        relativeBegin++;
                        relativeEnd++;
                    }
                }
                else
                {
                    isF0ValueExist = true;
                    reBeginPositionFindOut = true;
                    relativeEnd++;
                    scriptUvSeg.F0Contour.Contour.Add(f0);
                }
            }

            return scriptUvSeg;
        }

示例#2

0

显示文件

文件： UtteranceToXML.cs 项目： JohnsonYuan/TTSFramework

        /// <summary>
        /// Dump the data in the phone.
        /// </summary>
        /// <param name="scriptSyllable">The script syllable to store the data dumped from the phones.</param>
        /// <param name="utt">The utterance.</param>
        /// <param name="syllable">The syllable which contains these phones.</param>
        /// <param name="phoneIndex">Phone index to mark the phone in the Utt.Phones.</param>
        /// <param name="unitIndex">Unit index to mark the unit in the Utt.Units.</param>
        /// <param name="f0StartIndex">F0 index to mark the start position in the F0s.</param>
        /// <param name="ttsEngine">The object ttsEngine to help to convert the Pos and get sentence id.</param>
        private static void DumpPhones(ScriptSyllable scriptSyllable, SP.TtsUtterance utt,
            SP.TtsSyllable syllable, ref int phoneIndex, ref int unitIndex, ref int f0StartIndex, SP.TtsEngine ttsEngine)
        {
            Debug.Assert(scriptSyllable != null, "ScriptSyllable should not be null");
            Debug.Assert(utt != null, "Utt should not be null");
            Debug.Assert(syllable != null, "Syllable should not be null");
            Debug.Assert(phoneIndex >= 0, "PhoneIndex should not be less than 0");
            Debug.Assert(f0StartIndex >= 0, "f0StartIndex should not be less than 0");
            Debug.Assert(ttsEngine != null, "ttsEngine should not be null");

            WuiManager wuiManager = null;
            if (utt.Segments.Count > 0)
            {
                int bestNodeIndex = (int)utt.UnitLattice.WucList[unitIndex].BestNodeIndex;
                wuiManager = ttsEngine.RUSVoiceDataManager.GetWuiManagerByUnitCostNode(utt.UnitLattice.WucList[unitIndex].WucNodeList[bestNodeIndex]);
            }

            // Go through each phone in the syllable.
            SP.TtsPhone phone = syllable.FirstPhone;
            while (phone != null)
            {
                // Dump the pronunciation of the phone.
                string phonePronunciation = Pronunciation.RemoveStress(phone.Pronunciation.ToLowerInvariant()).Trim();

                // Remove the tone from the phone pronunciation if it exist.
                if (phone.Tone != 0)
                {
                    phonePronunciation = Pronunciation.RemoveTone(phonePronunciation).Trim();
                }

                ScriptPhone scriptPhone = new ScriptPhone(phonePronunciation);
                scriptPhone.Tone = phone.Tone.ToString();
                scriptPhone.Stress = (TtsStress)phone.Stress;

                if (phone.Pronunciation != PronOfSilence)
                {
                    if (wuiManager != null)
                    {
                        scriptPhone.SentenceId = wuiManager.GetSentenceId(utt.Segments[unitIndex].WaveUnitInfo);
                    }

                    if (phone.Unit != null)
                    {
                        scriptPhone.UnitIndex = (int)phone.Unit.UnitIndex;
                    }
                }

                scriptPhone.Acoustics = new ScriptAcoustics();

                // Dump the segments.
                if (utt.Segments.Count > 0 && !utt.Segments[unitIndex].Unit.UnitText.Equals(PronOfSilence)
                    && !utt.Segments[unitIndex].Unit.UnitText.Equals(PronOfShortPause))
                {
                    scriptPhone.Acoustics.Duration = (int)utt.Segments[unitIndex].WaveUnitInfo.WaveLength + (int)utt.Segments[unitIndex + 1].WaveUnitInfo.WaveLength;
                    int segStart = (int)utt.Segments[unitIndex].WaveUnitInfo.RecordingWaveStartPosition;
                    int segEnd = segStart + (int)utt.Segments[unitIndex].WaveUnitInfo.WaveLength;
                    scriptPhone.Acoustics.SegmentIntervals.Add(new SegmentInterval(segStart, segEnd));
                    segStart = (int)utt.Segments[unitIndex + 1].WaveUnitInfo.RecordingWaveStartPosition;
                    segEnd = segStart + (int)utt.Segments[unitIndex + 1].WaveUnitInfo.WaveLength;
                    scriptPhone.Acoustics.SegmentIntervals.Add(new SegmentInterval(segStart, segEnd));
                }

                // Relative begin position of the uvsegment interval.
                int relativeBegin = 0;

                // Relative end position of the uvsegment interval.
                int relativeEnd = 0;

                // When go through the F0 values, this valuie to identify if meet the first voiced segment. 
                bool reBeginPositionFindOut = false;

                // Check if all the F0 values in one state are equals to 0. If yes, don't write down the uvseg.
                bool isF0ValueExist = false;

                // Dump the durations and F0s in each state. 
                if (utt.Acoustic.Durations != null)
                {
                    for (int i = 0; i < utt.Acoustic.Durations[phoneIndex].Length; ++i)
                    {
                        ScriptState scriptState = new ScriptState();

                        // Dump duration
                        int durationInFrame = (int)utt.Acoustic.Durations[phoneIndex][i];
                        scriptState.Acoustics = new ScriptAcoustics(durationInFrame * MillisecondsPerFrame);

                        // Dump F0s
                        if (utt.Acoustic.F0s != null)
                        {
                            ScriptUvSeg scriptUvSeg = GetF0Contour(utt, f0StartIndex, durationInFrame, ScriptAcousticChunkEncoding.Text,
                                ref relativeBegin, ref relativeEnd, ref reBeginPositionFindOut, ref isF0ValueExist);
                            if (isF0ValueExist == true)
                            {
                                scriptState.Acoustics.UvSegs.Add(scriptUvSeg);
                            }

                            f0StartIndex += durationInFrame;
                        }

                        scriptPhone.States.Add(scriptState);
                    }
                }

                // Dump the uvsegment relative interval.
                if (utt.Acoustic.F0s != null && !phone.Pronunciation.Equals(PronOfSilence)
                    && !phone.Pronunciation.Equals(PronOfShortPause))
                {
                    ScriptUvSeg uvSegForRelativeInterval = new ScriptUvSeg(ScriptUvSegType.Mixed);
                    uvSegForRelativeInterval.Interval = new ScriptUvSegInterval(relativeBegin * 5, relativeEnd * 5);
                    scriptPhone.Acoustics.UvSegs.Add(uvSegForRelativeInterval);
                }

                phoneIndex++;
                unitIndex++;
                if (wuiManager != null &&
                    !phone.Pronunciation.Equals(PronOfSilence) &&
                    !phone.Pronunciation.Equals(PronOfShortPause))
                {
                    // if it is not an silence phone, the according unit must be an half phone unit, 
                    // we need skip the right half phone to move next phone's unit
                    unitIndex++;
                }

                scriptSyllable.Phones.Add(scriptPhone);

                if (phone == syllable.LastPhone)
                {
                    break;
                }

                phone = phone.Next;
            }
        }

示例#3

0

显示文件

文件： ScriptAcoustics.cs 项目： JohnsonYuan/TTSFramework

 /// <summary>
 /// Add a unvoiced-voiced segs object.
 /// </summary>
 /// <param name="uvseg">Uv segment.</param>
 public void AddUvSeg(ScriptUvSeg uvseg)
 {
     _scriptUvSegs.Add(uvseg);
 }

示例#4

0

显示文件

文件： ScriptFeatureHelper.cs 项目： JohnsonYuan/TTSFramework

        /// <summary>
        /// Get the orignal internal and external F0s.
        /// </summary>
        /// <param name="intUtt">Internal utterance.</param>
        /// <param name="extUvSeg">External uvSeg.</param>
        /// <param name="layerIndex">Certain syllable/phone/state's position.</param>
        /// <param name="extNotNullF0">Out float[], External F0s.</param> 
        /// <param name="intNotNullF0Position">Not null F0s' phones' position.</param>
        public static void GetF0(SP.TtsUtterance intUtt, ScriptUvSeg extUvSeg, LayerIndex layerIndex, 
            out float[] extNotNullF0, out List<int> intNotNullF0Position)
        {
            intNotNullF0Position = new List<int>();
            if (extUvSeg.SegType == ScriptUvSegType.Voiced)
            {
                extNotNullF0 = new float[extUvSeg.F0Contour.Contour.Count];
                for (int i = 0; i < extUvSeg.F0Contour.Contour.Count; i++)
                {
                    extNotNullF0[i] = extUvSeg.F0Contour.Contour[i];
                }
            }
            else if (extUvSeg.SegType == ScriptUvSegType.Mixed)
            {
                List<float> extNotNullF0List = new List<float>();
                for (int i = 0; i < extUvSeg.F0Contour.Contour.Count; i++)
                {
                    if (extUvSeg.F0Contour.Contour[i] != 0)
                    {
                        extNotNullF0List.Add(extUvSeg.F0Contour.Contour[i]);
                    }
                }

                extNotNullF0 = new float[extNotNullF0List.Count];
                extNotNullF0List.CopyTo(extNotNullF0);
            }
            else
            {
                extNotNullF0 = null;
            }

            int interF0Index = 0;
            for (int i = 0; i < layerIndex.StartPhone; i++)
            {
                for (int j = 0; j < (int)intUtt.Acoustic.Durations.Column; j++)
                {
                    interF0Index += (int)intUtt.Acoustic.Durations[i][j];
                }
            }

            for (int i = layerIndex.StartPhone; i < layerIndex.EndPhone; i++)
            {
                for (int j = 0; j < (int)intUtt.Acoustic.Durations.Column; j++)
                {
                    for (int k = 0; k < intUtt.Acoustic.Durations[i][j]; k++)
                    {
                        if (intUtt.Acoustic.F0s[interF0Index][0] != 0)
                        {
                            intNotNullF0Position.Add(interF0Index);
                        }

                        interF0Index++;
                    }
                }
            }
        }

示例#5

0

显示文件

文件： ScriptAcoustics.cs 项目： JohnsonYuan/TTSFramework

        /// <summary>
        /// Generate the ScriptAcoustics object from the xml doc indicated by reader.
        /// </summary>
        /// <param name="reader">Xml text reader.</param>
        public void ParseFromXml(XmlTextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            string dura = reader.GetAttribute("dura");

            if (string.IsNullOrEmpty(dura))
            {
                _duration = 0;
            }
            else
            {
                _duration = int.Parse(dura, CultureInfo.InvariantCulture);
            }

            string qdura = reader.GetAttribute("qdura");

            if (string.IsNullOrEmpty(qdura))
            {
                _quanDuration = 0;
            }
            else
            {
                _quanDuration = int.Parse(qdura, CultureInfo.InvariantCulture);
            }

            _scriptUvSegs = new Collection<ScriptUvSeg>();

            if (!reader.IsEmptyElement)
            {
                while (reader.Read())
                {
                    if (reader.NodeType == XmlNodeType.Element && reader.Name == "segment")
                    {
                        SegmentInterval segmentInterval = new SegmentInterval();
                        segmentInterval.ParseFromXml(reader);
                        _segmentIntervals.Add(segmentInterval);
                    }
                    else if (reader.NodeType == XmlNodeType.Element && reader.Name == "uvseg")
                    {
                        ScriptUvSeg uvSeg = new ScriptUvSeg();
                        uvSeg.ParseFromXml(reader);

                        _scriptUvSegs.Add(uvSeg);
                    }
                    else if (reader.NodeType == XmlNodeType.Element && reader.Name == "pow")
                    {
                        _powContour = new ScriptPowerContour(reader.Name);
                        _powContour.ParseFromXml(reader);
                    }
                    else if (reader.NodeType == XmlNodeType.Element && reader.Name == "qpow")
                    {
                        _powContourQuantized = new ScriptPowerContour(reader.Name);
                        _powContourQuantized.ParseFromXml(reader);
                    }
                    else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "acoustics")
                    {
                        break;
                    }
                }
            }
        }

C# (CSharp) Microsoft.Tts.Offline ScriptUvSeg示例