コード例 #1
0
ファイル: TtsUnit.cs プロジェクト: JohnsonYuan/TTSFramework
        /// <summary>
        /// Read all tts unit from Unit Linguistic FeatureVector file.
        /// </summary>
        /// <param name="filePath">Unit Linguistic FeatureVector file.</param>
        /// <param name="language">Language of the unit file.</param>
        /// <returns>Unit dictionary, indexing by (sentence id + index in sentence).</returns>
        public static Dictionary<string, TtsUnit> ReadAllData(string filePath,
            Language language)
        {
            Dictionary<string, TtsUnit> units = new Dictionary<string, TtsUnit>();

            using (StreamReader sr = new StreamReader(filePath))
            {
                string line = null;
                while (string.IsNullOrEmpty(line = sr.ReadLine()) != true)
                {
                    TtsUnit unit = new TtsUnit(language);

                    string[] items = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                    unit.Feature = new TtsUnitFeature();
                    unit.Feature.Parse(items, 2);
                    unit.MetaUnit = new TtsMetaUnit(language);
                    unit.MetaUnit.Name = items[items.Length - 1];
                    string key = items[0] + " " + items[1];

                    units.Add(key, unit);
                }
            }

            return units;
        }
コード例 #2
0
        /// <summary>
        /// Calculate PosInSyllable feature for a given unit
        /// Change it to public for code re-use in script sentence.
        /// </summary>
        /// <param name="preUnit">Previous unit of target unit to calculate.</param>
        /// <param name="unit">Target unit to calculate.</param>
        /// <returns>PosInSyllable feature.</returns>
        public static PosInSyllable CalculatePosInSyllable(TtsUnit preUnit, TtsUnit unit)
        {
            PosInSyllable pis = PosInSyllable.Coda;

            if (unit.Feature.PosInSyllable == PosInSyllable.Onset)
            {
                pis = PosInSyllable.Onset;
                if (preUnit != null && (int)preUnit.TtsBreak <= (int)TtsBreak.Phone)
                {
                    pis = PosInSyllable.OnsetNext;
                }
            }
            else if (unit.Feature.PosInSyllable == PosInSyllable.OnsetNext)
            {
                pis = PosInSyllable.OnsetNext;
            }
            else if (unit.Feature.PosInSyllable == PosInSyllable.Coda)
            {
                pis = PosInSyllable.Coda;
                if ((int)unit.TtsBreak <= (int)TtsBreak.Phone)
                {
                    pis = PosInSyllable.CodaNext;
                }
            }
            else if (unit.Feature.PosInSyllable == PosInSyllable.CodaNext)
            {
                pis = PosInSyllable.CodaNext;
            }
            else if (unit.Feature.PosInSyllable == PosInSyllable.NucleusInV ||
                unit.Feature.PosInSyllable == PosInSyllable.NucleusInVC ||
                unit.Feature.PosInSyllable == PosInSyllable.NucleusInCV ||
                unit.Feature.PosInSyllable == PosInSyllable.NucleusInCVC)
            {
                if (preUnit != null && (int)preUnit.TtsBreak <= (int)TtsBreak.Phone)
                {
                    if ((int)unit.TtsBreak <= (int)TtsBreak.Phone)
                    {
                        pis = PosInSyllable.NucleusInCVC;
                    }
                    else
                    {
                        pis = PosInSyllable.NucleusInCV;
                    }
                }
                else
                {
                    if ((int)unit.TtsBreak <= (int)TtsBreak.Phone)
                    {
                        pis = PosInSyllable.NucleusInVC;
                    }
                    else
                    {
                        pis = PosInSyllable.NucleusInV;
                    }
                }
            }

            if (unit.MetaUnit.Special)
            {
                pis = PosInSyllable.Onset;
            }

            return pis;
        }
コード例 #3
0
        /// <summary>
        /// Build unit collection for a given syllable.
        /// </summary>
        /// <param name="syllable">Syllable to process.</param>
        /// <returns>Unit collection.</returns>
        private Collection<TtsUnit> BuildUnits(ScriptSyllable syllable)
        {
            string syllableText = Core.Pronunciation.CleanDecorate(syllable.Text.Trim());

            string[] slices = PronunciationSeparator.SplitSlices(syllableText);

            PosInSyllable[] pis = EstimatePosInSyllable(slices);

            Collection<TtsUnit> units = new Collection<TtsUnit>();
            int vowelPhoneCount = 0;
            for (int sliceIndex = 0; sliceIndex < slices.Length; sliceIndex++)
            {
                string slice = slices[sliceIndex].Trim();
                if (string.IsNullOrEmpty(slice))
                {
                    continue;
                }

                TtsUnit unit = new TtsUnit(Language);

                // break level
                unit.TtsBreak = (sliceIndex == slices.Length - 1) ? syllable.TtsBreak : TtsBreak.Phone;

                // pos in syllable
                unit.Feature.PosInSyllable = pis[sliceIndex];

                // NONE: punctuation type

                // emphasis
                unit.Feature.TtsEmphasis = syllable.TtsEmphasis;

                // stress mark
                unit.Feature.TtsStress = syllable.Stress;

                // fill unit name
                // remove stress mark and replace white space with '+' for unit name
                unit.MetaUnit.Name = Regex.Replace(slice, " +", @"+");
                unit.MetaUnit.Language = unit.Language;

                Phoneme phoneme = Localor.GetPhoneme(unit.Language);
                foreach (TtsMetaPhone phone in unit.MetaUnit.Phones)
                {
                    if (phoneme.TtsVowelPhones.IndexOf(phone.Name) >= 0)
                    {
                        vowelPhoneCount++;
                    }
                }

                units.Add(unit);
            }

            if (vowelPhoneCount > MaxVowelCountInSyllable)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "There are more than {0} vowel phone in this syllable [{1}], which is supposed to contain no more than one vowel phone",
                    MaxVowelCountInSyllable, syllable.Text);
                throw new InvalidDataException(message);
            }

            return units;
        }
コード例 #4
0
        /// <summary>
        /// Save information of one unit into log file.
        /// </summary>
        /// <param name="writer">Stream writer to save the information.</param>
        /// <param name="index">Index of the unit to save.</param>
        /// <param name="expectedUnit">Expected unit from front-end.</param>
        /// <param name="selectedNode">Selected node through unit selection.</param>
        /// <returns>Next unit index.</returns>
        private static int SaveUnit(StreamWriter writer, int index,
            TtsUnit expectedUnit, CostNode selectedNode)
        {
            writer.Write(index.ToString(CultureInfo.InvariantCulture) + " ");
            ++index;
            writer.Write(index.ToString(CultureInfo.InvariantCulture) + " ");
            writer.Write(expectedUnit.MetaUnit.Id.ToString(CultureInfo.InvariantCulture) + " ");
            writer.Write(selectedNode.WaveUnit.SampleOffset.ToString(CultureInfo.InvariantCulture) + " ");
            writer.Write(selectedNode.WaveUnit.SampleLength.ToString(CultureInfo.InvariantCulture) + " ");

            TtsUnitFeature selFea = selectedNode.WaveUnit.Features;
            if (selFea != null)
            {
                StringBuilder builder = new StringBuilder();

                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "{0}/ {1} ",
                    (int)selFea.PosInSentence, (int)expectedUnit.Feature.PosInSentence);
                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "{0}/{1} ",
                    (int)selFea.PosInWord, (int)expectedUnit.Feature.PosInWord);
                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "{0}/{1} ",
                    (int)selFea.PosInSyllable, (int)expectedUnit.Feature.PosInSyllable);

                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "{0}/{1} ",
                    (int)selFea.LeftContextPhone, (int)expectedUnit.Feature.LeftContextPhone);
                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "{0}/{1} ",
                    (int)selFea.RightContextPhone, (int)expectedUnit.Feature.RightContextPhone);

                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "{0}/{1} ",
                    (int)selFea.LeftContextTone, (int)expectedUnit.Feature.LeftContextTone);
                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "{0}/{1} ",
                    (int)selFea.RightContextTone, (int)expectedUnit.Feature.RightContextTone);

                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "{0}/ {1} ",
                    (int)selFea.TtsStress, (int)expectedUnit.Feature.TtsStress);
                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "{0}/{1} ",
                    (int)selFea.TtsEmphasis, (int)expectedUnit.Feature.TtsEmphasis);

                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "{0}/{1} ",
                    (int)selFea.TtsWordTone, (int)expectedUnit.Feature.TtsWordTone);

                writer.Write(builder.ToString());
            }

            writer.Write("\r\n");
            return index;
        }
コード例 #5
0
        private static TtsUnit ParseTtsUnit(string line, Language language)
        {
            TtsUnit unit = new TtsUnit(language);
            string[] items = line.Split(new char[] { ' ' },
                StringSplitOptions.RemoveEmptyEntries);
            unit.Feature = new TtsUnitFeature();
            unit.Feature.Parse(items, 0);
            unit.MetaUnit = ParseMetaUnitForViterbi(line);

            return unit;
        }
コード例 #6
0
        /// <summary>
        /// Build units from syllable.
        /// </summary>
        /// <param name="syllable">Syllable.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="pronunciationSeparator">Pronunciation separator.</param>
        /// <returns>Units.</returns>
        private static Collection<TtsUnit> BuildUnitsForSyllable(ScriptSyllable syllable,
            SliceData sliceData, PronunciationSeparator pronunciationSeparator)
        {
            Debug.Assert(syllable != null);
            Debug.Assert(sliceData != null);

            string syllableText = Core.Pronunciation.RemoveStress(syllable.Text.Trim());
            string[] slices = pronunciationSeparator.SplitSlices(syllableText);

            PosInSyllable[] pis = EstimatePosInSyllable(slices, sliceData);

            Collection<TtsUnit> units = new Collection<TtsUnit>();
            for (int sliceIndex = 0; sliceIndex < slices.Length; sliceIndex++)
            {
                string slice = slices[sliceIndex].Trim();
                if (string.IsNullOrEmpty(slice))
                {
                    continue;
                }

                TtsUnit unit = new TtsUnit(sliceData.Language);

                // break level
                unit.TtsBreak = (sliceIndex == slices.Length - 1) ? syllable.TtsBreak : TtsBreak.Phone;

                // pos in syllable
                unit.Feature.PosInSyllable = pis[sliceIndex];

                // NONE: punctuation type

                // emphasis
                unit.Feature.TtsEmphasis = syllable.TtsEmphasis;

                // stress mark
                unit.Feature.TtsStress = syllable.Stress;

                // fill unit name
                // remove stress mark and replace white space with '+' for unit name
                unit.MetaUnit.Name = Regex.Replace(slice, " +", @"+");
                unit.MetaUnit.Language = unit.Language;

                units.Add(unit);
            }

            return units;
        }
コード例 #7
0
        /// <summary>
        /// Read and parse unit data from the XML text reader to utterance.
        /// </summary>
        /// <param name="reader">XML text reader to read data from.</param>
        /// <param name="utterance">Target utterance to save result units.</param>
        private static void ParseUnit(XmlTextReader reader, TtsUtterance utterance)
        {
            TtsUnit unit = new TtsUnit(utterance.Script.Language);
            unit.MetaUnit.Name = reader.GetAttribute("val");

            if (reader.GetAttribute("iSyll") != null)
            {
                unit.Feature.PosInSyllable =
                    (PosInSyllable)Enum.Parse(typeof(PosInSyllable), reader.GetAttribute("iSyll"));
            }

            if (reader.GetAttribute("iWord") != null)
            {
                unit.Feature.PosInWord =
                    (PosInWord)Enum.Parse(typeof(PosInWord), reader.GetAttribute("iWord"));
            }

            if (reader.GetAttribute("iSent") != null)
            {
                unit.Feature.PosInSentence =
                    (PosInSentence)Enum.Parse(typeof(PosInSentence), reader.GetAttribute("iSent"));
            }

            Phoneme phoneme = Localor.GetPhoneme(utterance.Script.Language, utterance.Script.Engine);
            unit.Feature.LeftContextPhone = phoneme.TtsPhone2Id(reader.GetAttribute("lPh"));
            unit.Feature.RightContextPhone = phoneme.TtsPhone2Id(reader.GetAttribute("rPh"));

            if (reader.GetAttribute("em") != null)
            {
                unit.Feature.TtsEmphasis =
                    (TtsEmphasis)Enum.Parse(typeof(TtsEmphasis),
                    reader.GetAttribute("em"));
            }

            if (reader.GetAttribute("st") != null)
            {
                unit.Feature.TtsStress =
                    (TtsStress)Enum.Parse(typeof(TtsStress), reader.GetAttribute("st"));
            }

            utterance.Script.Units.Add(unit);
        }
コード例 #8
0
        /// <summary>
        /// Convert phones of TTS unit to SR phones.
        /// </summary>
        /// <param name="unit">TtsUnit to be processed.</param>
        /// <returns>SR phone array.</returns>
        private string[] ConvertToSrPhone(TtsUnit unit)
        {
            List<string> retPhones = new List<string>();

            // Go through each phone in this unit
            foreach (TtsMetaPhone phone in unit.MetaUnit.Phones)
            {
                // Map phone to Speech Recognition phone(s)
                string[] srPhones = Phoneme.Tts2SrPhones(phone.Name);
                if (srPhones == null)
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "Invalid TTS phone[{0}], which can not be converted to Speech Recognition Phone.",
                        phone.Name);
                    throw new InvalidDataException(message);
                }

                retPhones.AddRange(srPhones);
            }

            return retPhones.ToArray();
        }
コード例 #9
0
        /// <summary>
        /// Build one unit for mono MLF file.
        /// </summary>
        /// <param name="writer">Text writer to save MLF file.</param>
        /// <param name="unit">Unit.</param>
        private void BuildMonoMlf(TextWriter writer, TtsUnit unit)
        {
            string[] srPhones = ConvertToSrPhone(unit);

            foreach (string srPhone in srPhones)
            {
                if (writer != null)
                {
                    writer.WriteLine(srPhone);
                }
            }
        }
コード例 #10
0
        /// <summary>
        /// Build mlf from unit.
        /// </summary>
        /// <param name="unit">Unit.</param>
        /// <param name="item">Script item.</param>
        /// <param name="sw">Text writer.</param>
        /// <param name="writeToFile">Whethe writing to file.</param>
        /// <param name="phoneme">Phoneme.</param>
        /// <returns>Errors.</returns>
        private static ErrorSet BuildMonoMlf(TtsUnit unit, ScriptItem item, StreamWriter sw, 
            bool writeToFile, Phoneme phoneme)
        {
            Debug.Assert(unit != null);
            Debug.Assert(item != null);

            ErrorSet errors = new ErrorSet();
            List<string> allPhones = new List<string>();
            foreach (TtsMetaPhone phone in unit.MetaUnit.Phones)
            {
                string[] srPhones = phoneme.Tts2SrPhones(phone.Name);
                if (srPhones == null)
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "Invalid TTS phone[{0}], which can not be converted to Speech Recognition Phone.",
                        phone.Name);
                    errors.Add(ScriptError.OtherErrors, item.Id, message);
                    continue;
                }

                allPhones.AddRange(srPhones);
            }

            if (writeToFile)
            {
                foreach (string phone in allPhones)
                {
                    sw.WriteLine(phone);
                }
            }

            return errors;
        }