예제 #1
0
        /// <summary>
        /// Dump the data in the syllable.
        /// </summary>
        /// <param name="scriptWord">The script word to store the data dumped from the syllables.</param>
        /// <param name="utt">The utterance.</param>
        /// <param name="word">The word which contains the these syllables.</param>
        /// <param name="phoneIndex">Phone index to mark the phone in the Utt.Phones.</param>
        /// <param name="unitIndex">Unit index to mark the unit in the Utt.Units.</param>
        /// <param name="f0StartIndex">F0 index to mark the start position in the F0s.</param>
        /// <param name="ttsEngine">The object ttsEngine to help to convert the Pos and get sentence id.</param>
        private static void DumpSyllables(ScriptWord scriptWord, SP.TtsUtterance utt,
            SP.TtsWord word, ref int phoneIndex, ref int unitIndex, ref int f0StartIndex, SP.TtsEngine ttsEngine)
        {
            Debug.Assert(scriptWord != null, "ScriptWord should not be null");
            Debug.Assert(utt != null, "Utt should not be null");
            Debug.Assert(word != null, "Word should not be null");
            Debug.Assert(phoneIndex >= 0, "PhoneIndex should not be less than 0");
            Debug.Assert(f0StartIndex >= 0, "f0StartIndex should not be less than 0");
            Debug.Assert(ttsEngine != null, "ttsEngine should not be null");

            // Go through each syllable in the word.
            SP.TtsSyllable syllable = word.FirstSyllable;
            while (syllable != null)
            {
                ScriptSyllable scriptSyllable = new ScriptSyllable();
                TtsTobiAccentSet tobiAccentSet = new TtsTobiAccentSet();
                if (syllable.ToBIAccent != SP.TtsTobiAccent.K_NOACC)
                {
                    scriptSyllable.TobiPitchAccent = TobiLabel.Create(tobiAccentSet.IdItems[(uint)syllable.ToBIAccent]);
                }

                scriptSyllable.Stress = (TtsStress)syllable.Stress;
                DumpPhones(scriptSyllable, utt, syllable, ref phoneIndex, ref unitIndex, ref f0StartIndex, ttsEngine);
                scriptWord.Syllables.Add(scriptSyllable);
                if (syllable == word.LastSyllable)
                {
                    break;
                }

                syllable = syllable.Next;
            }
        }
예제 #2
0
        /// <summary>
        /// Dump the data in the phone.
        /// </summary>
        /// <param name="scriptSyllable">The script syllable to store the data dumped from the phones.</param>
        /// <param name="utt">The utterance.</param>
        /// <param name="syllable">The syllable which contains these phones.</param>
        /// <param name="phoneIndex">Phone index to mark the phone in the Utt.Phones.</param>
        /// <param name="unitIndex">Unit index to mark the unit in the Utt.Units.</param>
        /// <param name="f0StartIndex">F0 index to mark the start position in the F0s.</param>
        /// <param name="ttsEngine">The object ttsEngine to help to convert the Pos and get sentence id.</param>
        private static void DumpPhones(ScriptSyllable scriptSyllable, SP.TtsUtterance utt,
            SP.TtsSyllable syllable, ref int phoneIndex, ref int unitIndex, ref int f0StartIndex, SP.TtsEngine ttsEngine)
        {
            Debug.Assert(scriptSyllable != null, "ScriptSyllable should not be null");
            Debug.Assert(utt != null, "Utt should not be null");
            Debug.Assert(syllable != null, "Syllable should not be null");
            Debug.Assert(phoneIndex >= 0, "PhoneIndex should not be less than 0");
            Debug.Assert(f0StartIndex >= 0, "f0StartIndex should not be less than 0");
            Debug.Assert(ttsEngine != null, "ttsEngine should not be null");

            WuiManager wuiManager = null;
            if (utt.Segments.Count > 0)
            {
                int bestNodeIndex = (int)utt.UnitLattice.WucList[unitIndex].BestNodeIndex;
                wuiManager = ttsEngine.RUSVoiceDataManager.GetWuiManagerByUnitCostNode(utt.UnitLattice.WucList[unitIndex].WucNodeList[bestNodeIndex]);
            }

            // Go through each phone in the syllable.
            SP.TtsPhone phone = syllable.FirstPhone;
            while (phone != null)
            {
                // Dump the pronunciation of the phone.
                string phonePronunciation = Pronunciation.RemoveStress(phone.Pronunciation.ToLowerInvariant()).Trim();

                // Remove the tone from the phone pronunciation if it exist.
                if (phone.Tone != 0)
                {
                    phonePronunciation = Pronunciation.RemoveTone(phonePronunciation).Trim();
                }

                ScriptPhone scriptPhone = new ScriptPhone(phonePronunciation);
                scriptPhone.Tone = phone.Tone.ToString();
                scriptPhone.Stress = (TtsStress)phone.Stress;

                if (phone.Pronunciation != PronOfSilence)
                {
                    if (wuiManager != null)
                    {
                        scriptPhone.SentenceId = wuiManager.GetSentenceId(utt.Segments[unitIndex].WaveUnitInfo);
                    }

                    if (phone.Unit != null)
                    {
                        scriptPhone.UnitIndex = (int)phone.Unit.UnitIndex;
                    }
                }

                scriptPhone.Acoustics = new ScriptAcoustics();

                // Dump the segments.
                if (utt.Segments.Count > 0 && !utt.Segments[unitIndex].Unit.UnitText.Equals(PronOfSilence)
                    && !utt.Segments[unitIndex].Unit.UnitText.Equals(PronOfShortPause))
                {
                    scriptPhone.Acoustics.Duration = (int)utt.Segments[unitIndex].WaveUnitInfo.WaveLength + (int)utt.Segments[unitIndex + 1].WaveUnitInfo.WaveLength;
                    int segStart = (int)utt.Segments[unitIndex].WaveUnitInfo.RecordingWaveStartPosition;
                    int segEnd = segStart + (int)utt.Segments[unitIndex].WaveUnitInfo.WaveLength;
                    scriptPhone.Acoustics.SegmentIntervals.Add(new SegmentInterval(segStart, segEnd));
                    segStart = (int)utt.Segments[unitIndex + 1].WaveUnitInfo.RecordingWaveStartPosition;
                    segEnd = segStart + (int)utt.Segments[unitIndex + 1].WaveUnitInfo.WaveLength;
                    scriptPhone.Acoustics.SegmentIntervals.Add(new SegmentInterval(segStart, segEnd));
                }

                // Relative begin position of the uvsegment interval.
                int relativeBegin = 0;

                // Relative end position of the uvsegment interval.
                int relativeEnd = 0;

                // When go through the F0 values, this valuie to identify if meet the first voiced segment. 
                bool reBeginPositionFindOut = false;

                // Check if all the F0 values in one state are equals to 0. If yes, don't write down the uvseg.
                bool isF0ValueExist = false;

                // Dump the durations and F0s in each state. 
                if (utt.Acoustic.Durations != null)
                {
                    for (int i = 0; i < utt.Acoustic.Durations[phoneIndex].Length; ++i)
                    {
                        ScriptState scriptState = new ScriptState();

                        // Dump duration
                        int durationInFrame = (int)utt.Acoustic.Durations[phoneIndex][i];
                        scriptState.Acoustics = new ScriptAcoustics(durationInFrame * MillisecondsPerFrame);

                        // Dump F0s
                        if (utt.Acoustic.F0s != null)
                        {
                            ScriptUvSeg scriptUvSeg = GetF0Contour(utt, f0StartIndex, durationInFrame, ScriptAcousticChunkEncoding.Text,
                                ref relativeBegin, ref relativeEnd, ref reBeginPositionFindOut, ref isF0ValueExist);
                            if (isF0ValueExist == true)
                            {
                                scriptState.Acoustics.UvSegs.Add(scriptUvSeg);
                            }

                            f0StartIndex += durationInFrame;
                        }

                        scriptPhone.States.Add(scriptState);
                    }
                }

                // Dump the uvsegment relative interval.
                if (utt.Acoustic.F0s != null && !phone.Pronunciation.Equals(PronOfSilence)
                    && !phone.Pronunciation.Equals(PronOfShortPause))
                {
                    ScriptUvSeg uvSegForRelativeInterval = new ScriptUvSeg(ScriptUvSegType.Mixed);
                    uvSegForRelativeInterval.Interval = new ScriptUvSegInterval(relativeBegin * 5, relativeEnd * 5);
                    scriptPhone.Acoustics.UvSegs.Add(uvSegForRelativeInterval);
                }

                phoneIndex++;
                unitIndex++;
                if (wuiManager != null &&
                    !phone.Pronunciation.Equals(PronOfSilence) &&
                    !phone.Pronunciation.Equals(PronOfShortPause))
                {
                    // if it is not an silence phone, the according unit must be an half phone unit, 
                    // we need skip the right half phone to move next phone's unit
                    unitIndex++;
                }

                scriptSyllable.Phones.Add(scriptPhone);

                if (phone == syllable.LastPhone)
                {
                    break;
                }

                phone = phone.Next;
            }
        }
예제 #3
0
        /// <summary>
        /// Build unit collection for a given syllable.
        /// </summary>
        /// <param name="syllable">Syllable to process.</param>
        /// <returns>Unit collection.</returns>
        private Collection<TtsUnit> BuildUnits(ScriptSyllable syllable)
        {
            string syllableText = Core.Pronunciation.CleanDecorate(syllable.Text.Trim());

            string[] slices = PronunciationSeparator.SplitSlices(syllableText);

            PosInSyllable[] pis = EstimatePosInSyllable(slices);

            Collection<TtsUnit> units = new Collection<TtsUnit>();
            int vowelPhoneCount = 0;
            for (int sliceIndex = 0; sliceIndex < slices.Length; sliceIndex++)
            {
                string slice = slices[sliceIndex].Trim();
                if (string.IsNullOrEmpty(slice))
                {
                    continue;
                }

                TtsUnit unit = new TtsUnit(Language);

                // break level
                unit.TtsBreak = (sliceIndex == slices.Length - 1) ? syllable.TtsBreak : TtsBreak.Phone;

                // pos in syllable
                unit.Feature.PosInSyllable = pis[sliceIndex];

                // NONE: punctuation type

                // emphasis
                unit.Feature.TtsEmphasis = syllable.TtsEmphasis;

                // stress mark
                unit.Feature.TtsStress = syllable.Stress;

                // fill unit name
                // remove stress mark and replace white space with '+' for unit name
                unit.MetaUnit.Name = Regex.Replace(slice, " +", @"+");
                unit.MetaUnit.Language = unit.Language;

                Phoneme phoneme = Localor.GetPhoneme(unit.Language);
                foreach (TtsMetaPhone phone in unit.MetaUnit.Phones)
                {
                    if (phoneme.TtsVowelPhones.IndexOf(phone.Name) >= 0)
                    {
                        vowelPhoneCount++;
                    }
                }

                units.Add(unit);
            }

            if (vowelPhoneCount > MaxVowelCountInSyllable)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "There are more than {0} vowel phone in this syllable [{1}], which is supposed to contain no more than one vowel phone",
                    MaxVowelCountInSyllable, syllable.Text);
                throw new InvalidDataException(message);
            }

            return units;
        }
        /// <summary>
        /// Insert silence word to script.
        /// </summary>
        /// <param name="scriptSentence">Script sentence.</param>
        /// <param name="wordIndex">To be insert word's position.</param>
        /// <param name="phoneme">The phoneme string.</param>
        public static void InsertSilenceWord(ScriptSentence scriptSentence, int wordIndex, string phoneme)
        {
            Debug.Assert(Phoneme.IsSilenceFeature(phoneme), "The phoneme should have silence feature");

            ScriptWord silenceWord = new ScriptWord();
            silenceWord.WordType = WordType.Silence;
            silenceWord.Pronunciation = Phoneme.ToRuntime(phoneme);
            silenceWord.Sentence = scriptSentence;
            ScriptSyllable silenceSyllable = new ScriptSyllable();
            silenceSyllable.Word = silenceWord;
            silenceWord.Syllables.Add(silenceSyllable);
            ScriptPhone silencePhone = new ScriptPhone(phoneme);
            silencePhone.Syllable = silenceSyllable;
            silenceWord.Syllables[0].Phones.Add(silencePhone);

            scriptSentence.Words.Insert(wordIndex, silenceWord);
        }
예제 #5
0
 /// <summary>
 /// Calculate PosInWord feature for a given syllable
 /// Change it to public for code re-use in script sentence.
 /// </summary>
 /// <param name="preSyllable">Previous syllable of target syllable to calculate.</param>
 /// <param name="syllable">Target syllable to calculate.</param>
 /// <returns>PosInWord feature.</returns>
 public static PosInWord CalculatePosInWord(ScriptSyllable preSyllable,
     ScriptSyllable syllable)
 {
     int row = (preSyllable == null ||
         (int)preSyllable.TtsBreak > (int)TtsBreak.Syllable) ? 1 : 0;
     int column = ((int)syllable.TtsBreak > (int)TtsBreak.Syllable) ? 1 : 0;
     return _posInWordTrans[row][column];
 }
예제 #6
0
        /// <summary>
        /// Build syllable collection for a given word.
        /// </summary>
        /// <param name="word">Word to process.</param>
        private static void BuildSyllables(ScriptWord word)
        {
            if (word == null)
            {
                throw new ArgumentNullException("word");
            }

            if (word.Pronunciation == null)
            {
                throw new ArgumentException("word.Pronunciation is null");
            }

            word.Syllables.Clear();

            string[] syllableTexts = Core.Pronunciation.SplitIntoSyllables(word.Pronunciation);
            for (int syllableIndex = 0; syllableIndex < syllableTexts.Length; syllableIndex++)
            {
                ScriptSyllable syllable = new ScriptSyllable();

                syllable.Text = syllableTexts[syllableIndex];

                syllable.TtsBreak = (syllableIndex == syllableTexts.Length - 1) ?
                    word.Break : TtsBreak.Syllable;
                syllable.Stress = Core.Pronunciation.GetStress(syllable.Text);
                syllable.TtsEmphasis = (syllable.Stress != TtsStress.None) ?
                    word.Emphasis : TtsEmphasis.None;

                word.Syllables.Add(syllable);
            }
        }
예제 #7
0
        /// <summary>
        /// Build units from syllable.
        /// </summary>
        /// <param name="syllable">Syllable.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="pronunciationSeparator">Pronunciation separator.</param>
        /// <returns>Units.</returns>
        private static Collection<TtsUnit> BuildUnitsForSyllable(ScriptSyllable syllable,
            SliceData sliceData, PronunciationSeparator pronunciationSeparator)
        {
            Debug.Assert(syllable != null);
            Debug.Assert(sliceData != null);

            string syllableText = Core.Pronunciation.RemoveStress(syllable.Text.Trim());
            string[] slices = pronunciationSeparator.SplitSlices(syllableText);

            PosInSyllable[] pis = EstimatePosInSyllable(slices, sliceData);

            Collection<TtsUnit> units = new Collection<TtsUnit>();
            for (int sliceIndex = 0; sliceIndex < slices.Length; sliceIndex++)
            {
                string slice = slices[sliceIndex].Trim();
                if (string.IsNullOrEmpty(slice))
                {
                    continue;
                }

                TtsUnit unit = new TtsUnit(sliceData.Language);

                // break level
                unit.TtsBreak = (sliceIndex == slices.Length - 1) ? syllable.TtsBreak : TtsBreak.Phone;

                // pos in syllable
                unit.Feature.PosInSyllable = pis[sliceIndex];

                // NONE: punctuation type

                // emphasis
                unit.Feature.TtsEmphasis = syllable.TtsEmphasis;

                // stress mark
                unit.Feature.TtsStress = syllable.Stress;

                // fill unit name
                // remove stress mark and replace white space with '+' for unit name
                unit.MetaUnit.Name = Regex.Replace(slice, " +", @"+");
                unit.MetaUnit.Language = unit.Language;

                units.Add(unit);
            }

            return units;
        }
예제 #8
0
        /// <summary>
        /// Update the syllables for the word.
        /// </summary>
        private void UpdateUnitSyllables()
        {
            if (Pronunciation == null)
            {
                throw new InvalidDataException(Helper.NeutralFormat("word {0}'s has no pronunciation",
                    Grapheme));
            }

            string[] syllableTexts = Core.Pronunciation.SplitIntoSyllables(Pronunciation);
            UnitSyllables.Clear();
            for (int syllableIndex = 0; syllableIndex < syllableTexts.Length; syllableIndex++)
            {
                ScriptSyllable syllable = new ScriptSyllable();
                syllable.Text = syllableTexts[syllableIndex];
                syllable.TtsBreak = (syllableIndex == syllableTexts.Length - 1) ?
                    Break : TtsBreak.Syllable;
                syllable.Stress = Core.Pronunciation.GetStress(syllable.Text);
                syllable.TtsEmphasis = (syllable.Stress != TtsStress.None) ?
                    Emphasis : TtsEmphasis.None;

                UnitSyllables.Add(syllable);
            }
        }
예제 #9
0
        /// <summary>
        /// Load syllable from XmlTextReader.
        /// </summary>
        /// <param name="reader">XmlTextReader.</param>
        /// <param name="language">The language of the script.</param>
        /// <returns>ScriptSyllable.</returns>
        private static ScriptSyllable LoadSyllable(XmlTextReader reader, Language language)
        {
            Debug.Assert(reader != null);
            ScriptSyllable syllable = new ScriptSyllable(language);

            // load attributes
            string stress = reader.GetAttribute("stress");
            if (!string.IsNullOrEmpty(stress))
            {
                syllable.Stress = ScriptSyllable.StringToStress(stress);
            }

            string tobipa = reader.GetAttribute("tobipa");
            syllable.TobiPitchAccent = TobiLabel.Create(tobipa);

            // load phone
            if (!reader.IsEmptyElement)
            {
                while (reader.Read())
                {
                    if (reader.NodeType == XmlNodeType.Element && reader.Name == "phs")
                    {
                        while (reader.Read())
                        {
                            if (reader.NodeType == XmlNodeType.Element && reader.Name == "ph")
                            {
                                ScriptPhone phone = LoadPhone(reader);
                                phone.Syllable = syllable;
                                syllable.Phones.Add(phone);
                            }
                            else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "phs")
                            {
                                break;
                            }
                        }
                    }
                    else if (reader.NodeType == XmlNodeType.Element && reader.Name == "acoustics")
                    {
                        syllable.Acoustics = new ScriptAcoustics();
                        syllable.Acoustics.ParseFromXml(reader);
                    }
                    else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "syl")
                    {
                        break;
                    }
                }
            }

            return syllable;
        }
예제 #10
0
        /// <summary>
        /// Parsing the syllable string to a script syllable
        /// Here we suppose syllable is a valid pronunciation string.
        /// </summary>
        /// <param name="syllable">Syllable string, doesn't include unit boundary.</param>
        /// <param name="phoneSet">TtsPhoneSet.</param>
        /// <returns>The constructed script syllable.</returns>
        public static ScriptSyllable ParseStringToSyllable(string syllable, TtsPhoneSet phoneSet)
        {
            if (string.IsNullOrEmpty(syllable))
            {
                throw new ArgumentNullException("syllable");
            }

            if (phoneSet == null)
            {
                throw new ArgumentNullException("phoneSet");
            }

            ScriptSyllable scriptSyllable = new ScriptSyllable(phoneSet.Language);
            ErrorSet errors = new ErrorSet();
            Phone[] phones = Pronunciation.SplitIntoPhones(syllable, phoneSet, errors);
            if (errors.Count > 0)
            {
                string message = Helper.NeutralFormat(
                    "The syllable string [{0}] isn't valid : {1}{2}",
                    syllable, Environment.NewLine, errors.ErrorsString());
                throw new InvalidDataException(message);
            }

            Collection<ScriptPhone> scriptPhones = new Collection<ScriptPhone>();
            foreach (Phone phone in phones)
            {
                if (phone.HasFeature(PhoneFeature.MainStress) ||
                    phone.HasFeature(PhoneFeature.SubStress))
                {
                    switch (phone.Name)
                    {
                        case "1":
                            scriptSyllable.Stress = TtsStress.Primary;
                            break;
                        case "2":
                            scriptSyllable.Stress = TtsStress.Secondary;
                            break;
                        case "3":
                            scriptSyllable.Stress = TtsStress.Tertiary;
                            break;
                    }
                }
                else if (phone.HasFeature(PhoneFeature.Tone))
                {
                    scriptPhones[scriptPhones.Count - 1].Tone = phone.Name;
                }
                else
                {
                    ScriptPhone scriptPhone = new ScriptPhone(phone.Name);
                    scriptPhone.Syllable = scriptSyllable;
                    scriptPhones.Add(scriptPhone);
                }
            }

            scriptSyllable.Phones.Clear();
            Helper.AppendCollection(scriptSyllable.Phones, scriptPhones);
            return scriptSyllable;
        }
예제 #11
0
        /// <summary>
        /// Convert phones of TTS unit to SR phones.
        /// </summary>
        /// <param name="syllable">Syllable to be processed.</param>
        /// <returns>SR phone array.</returns>
        private string[] ConvertToSrPhone(ScriptSyllable syllable)
        {
            string syllableText = Pronunciation.CleanDecorate(syllable.Text.Trim());

            // Map phone to Speech Recognition phone(s)
            string[] srPhones = Phoneme.Tts2SrPhones(syllableText.Trim());
            if (srPhones == null)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Invalid TTS syllable[{0}], which can not be converted to Speech Recognition Phone.",
                     syllableText);
                throw new InvalidDataException(message);
            }

            return srPhones;
        }
예제 #12
0
        /// <summary>
        /// Build one syllable for mono MLF file.
        /// </summary>
        /// <param name="writer">Text writer to save MLF file.</param>
        /// <param name="syllable">Syllable.</param>
        private void BuildMonoMlf(TextWriter writer, ScriptSyllable syllable)
        {
            string[] srPhones = ConvertToSrPhone(syllable);

            foreach (string srPhone in srPhones)
            {
                if (writer != null)
                {
                    writer.WriteLine(srPhone);
                }
            }
        }
예제 #13
0
        /// <summary>
        /// Build mlf from syllable.
        /// </summary>
        /// <param name="syllable">Syllable.</param>
        /// <param name="item">Script item.</param>
        /// <param name="sw">Text writer.</param>
        /// <param name="writeToFile">Whethe writing to file.</param>
        /// <param name="phoneme">Phoneme.</param>
        /// <returns>Errors.</returns>
        private static ErrorSet BuildMonoMlf(ScriptSyllable syllable, ScriptItem item, StreamWriter sw,
            bool writeToFile, Phoneme phoneme)
        {
            Debug.Assert(syllable != null);
            Debug.Assert(item != null);

            ErrorSet errors = new ErrorSet();
            string syllableText = Pronunciation.RemoveStress(syllable.Text.Trim());
            string[] srPhones = phoneme.Tts2SrPhones(syllableText.Trim());
            if (srPhones == null)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Invalid TTS syllable[{0}], which can not be converted to Speech Recognition Phone.",
                     syllableText);
                errors.Add(ScriptError.OtherErrors, item.Id, message);
            }

            if (writeToFile && srPhones != null)
            {
                foreach (string phone in srPhones)
                {
                    sw.WriteLine(phone);
                }
            }

            return errors;
        }