Example #1
0
        /// <summary>
        /// Tell a slice is nucleus, through checking whether
        /// 1) phone sequence already exists in the nucleus set
        /// 2) or there is any vowel in the phone set.
        /// </summary>
        /// <param name="ttsMetaUnit">TtsMetaUnit to test.</param>
        /// <returns>Ture if yes, otherwise false.</returns>
        public bool IsNucleus(TtsMetaUnit ttsMetaUnit)
        {
            if (ttsMetaUnit == null)
            {
                throw new ArgumentNullException("ttsMetaUnit");
            }

            // if the slice already exists in the nucleus set,
            // then return directly
            if (_nucleusSlices.IndexOf(ttsMetaUnit.Name) >= 0)
            {
                return true;
            }

            // else, check if there any vowel in the phone array
            Phoneme phoneme = Localor.GetPhoneme(Language);
            for (int i = 0; i < ttsMetaUnit.Phones.Length; i++)
            {
                if (phoneme.TtsVowelPhones.IndexOf(ttsMetaUnit.Phones[i].Name) >= 0)
                {
                    return true;
                }
            }

            return false;
        }
Example #2
0
 /// <summary>
 /// Initializes a new instance of the <see cref="TtsUnit"/> class.
 /// </summary>
 /// <param name="language">Language for this unit.</param>
 public TtsUnit(Language language)
 {
     _language = language;
     _languages.Add(_language);
     Feature = new TtsUnitFeature();
     MetaUnit = new TtsMetaUnit(language);
 }
Example #3
0
        /// <summary>
        /// Truncate one phone from nucleus.
        /// </summary>
        /// <param name="phoneme">Phoneme of the language to process.</param>
        /// <param name="rules">Truncation rules.</param>
        /// <param name="nucleus">CVC source to truncate.</param>
        /// <returns>Result: left part + right part.</returns>
        public static string[] TruncateOnePhoneFromNucleus(Phoneme phoneme,
            Collection<TruncateRule> rules, string nucleus)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (rules == null)
            {
                throw new ArgumentNullException("rules");
            }

            if (string.IsNullOrEmpty(nucleus))
            {
                throw new ArgumentNullException("nucleus");
            }

            TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language);
            ttsMetaUnit.Name = nucleus;
            string[] phoneNames = ttsMetaUnit.GetPhonesName();
            string leftPart = null;
            string rightPart = null;

            for (int i = 0; i < rules.Count; i++)
            {
                if (rules[i] == null)
                {
                    string message = Helper.NeutralFormat("rules[{0}] should not be null.", i);
                    throw new ArgumentException(message);
                }

                if (rules[i].Side == TruncateSide.Right)
                {
                    Match m = Regex.Match(rules[i].Phones,
                        @"\b" + phoneNames[phoneNames.Length - 1] + @"\b");
                    if (m.Success)
                    {
                        leftPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 0, phoneNames.Length - 1);
                        rightPart = ttsMetaUnit.Phones[phoneNames.Length - 1].Name;
                        break;
                    }
                }
                else if (rules[i].Side == TruncateSide.Left)
                {
                    Match m = Regex.Match(rules[i].Phones,
                        @"\b" + phoneNames[0] + @"\b");
                    if (m.Success)
                    {
                        leftPart = ttsMetaUnit.Phones[0].Name;
                        rightPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 1, phoneNames.Length - 1);
                        break;
                    }
                }
                else
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "Truncating side [{0}] is not supported.",
                        rules[i].Side);
                    Debug.Assert(false, message);
                    throw new NotSupportedException(message);
                }
            }

            if (string.IsNullOrEmpty(leftPart) || string.IsNullOrEmpty(rightPart))
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Nucleus [{0}] has empty left phone or right phone after truncating.",
                    nucleus);
                Trace.WriteLine(message);
                leftPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 0, phoneNames.Length - 1);
                rightPart = ttsMetaUnit.Phones[phoneNames.Length - 1].Name;
            }

            return new string[] { leftPart, rightPart };
        }
Example #4
0
        /// <summary>
        /// Build units for syllbale pronunciation,
        /// And the units are concatenated together in the string and seperated by ".".
        /// </summary>
        /// <param name="phoneme">Phoneme of the language to process with.</param>
        /// <param name="sliceData">Slice data to process.</param>
        /// <param name="syllable">Syllables to process.</param>
        /// <returns>Best unit list.</returns>
        public static string[] BuildUnits(Phoneme phoneme,
            SliceData sliceData, string syllable)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (phoneme.TtsSonorantPhones == null)
            {
                string message = Helper.NeutralFormat("phoneme.TtsSonorantPhones should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(syllable))
            {
                throw new ArgumentNullException("syllable");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            if (sliceData.OnsetSlices == null)
            {
                string message = Helper.NeutralFormat("sliceData.OnsetSlices should not be null.");
                throw new ArgumentException(message);
            }

            if (sliceData.NucleusSlices == null)
            {
                string message = Helper.NeutralFormat("sliceData.NucleusSlices should not be null.");
                throw new ArgumentException(message);
            }

            List<string> slicedUnits = new List<string>();

            string unstressedSyllable = Pronunciation.RemoveStress(syllable);

            ScriptItem scriptItem = new ScriptItem(phoneme.Language);

            // items contains phone and tone.
            string[] items = scriptItem.PronunciationSeparator.SplitPhones(unstressedSyllable);

            // Treate all syllable as one unit at first.
            TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language);
            ttsMetaUnit.Name = string.Join(" ", items);
            string[] phones = ttsMetaUnit.GetPhonesName();

            // Treat all phones in this syllable as a whole unit
            if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) >= 0)
            {
                // If it is alread defined in the predefined unit collection, return it
                slicedUnits.Add(TtsUnit.NucleusPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter));
                return slicedUnits.ToArray();
            }

            int vowelIndex = phoneme.GetFirstVowelIndex(phones);
            if (vowelIndex < 0)
            {
                // If no vowel in the syllable, treat all phones in this syllable as a unit if it is in unit table
                if (sliceData.OnsetSlices.IndexOf(ttsMetaUnit.Name) >= 0)
                {
                    slicedUnits.Add(TtsUnit.OnsetPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter));
                }
                else if (sliceData.CodaSlices.IndexOf(ttsMetaUnit.Name) >= 0)
                {
                    slicedUnits.Add(TtsUnit.CodaPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter));
                }
                else
                {
                    // otherwise, treat each phone as a coda unit
                    foreach (string phone in phones)
                    {
                        slicedUnits.Add(TtsUnit.CodaPrefix + phone);
                    }
                }

                return slicedUnits.ToArray();
            }

            // Search first cosonant sonarant from the left side of the vowel font in the syllable
            int firstSonarantIndex = vowelIndex;
            for (int i = vowelIndex - 1; i >= 0; i--)
            {
                if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0)
                {
                    firstSonarantIndex = i;
                }
            }

            // Search last cosonant sonarant from the right side of the vowel font in the syllable
            int lastSonarantIndex = vowelIndex;
            for (int i = vowelIndex + 1; i <= phones.Length - 1; i++)
            {
                if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0)
                {
                    lastSonarantIndex = i;
                }
            }

            // Treat all vowel and surrounding sonarant consonants as the nucleus unit first
            string nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones,
                firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1);

            TruncateRuleData truncateRuleData = Localor.GetTruncateRuleData(phoneme.Language);

            // Refine nucleus according to the predefined unit table
            while (lastSonarantIndex - firstSonarantIndex > 0 && sliceData.NucleusSlices.IndexOf(nucleus) <= 0)
            {
                // If the unit candidate is not listed in the predefined unit list, try to truncate it
                string[] leftRight =
                    PhoneMerger.TruncateOnePhoneFromNucleus(phoneme, truncateRuleData.NucleusTruncateRules,
                    nucleus);

                if (phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0)
                {
                    Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0);
                    firstSonarantIndex++;
                }
                else
                {
                    Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[1]) >= 0);
                    lastSonarantIndex--;
                }

                // Re-define the remaining nucleus unit
                nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones,
                    firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1);
            }

            slicedUnits.Add(TtsUnit.NucleusPrefix + nucleus.Replace(" ", TtsUnit.PhoneDelimiter));

            // Refine onset
            for (int index = firstSonarantIndex - 1; index >= 0; index--)
            {
                string onset = TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, 0, index + 1);
                if (sliceData.OnsetSlices.IndexOf(onset.Replace(TtsUnit.PhoneDelimiter, " ")) >= 0)
                {
                    slicedUnits.Insert(0, TtsUnit.OnsetPrefix + onset);

                    // Remove the number of added phones,
                    // except current phone itself which will be recuded by index--
                    index -= index;
                }
                else
                {
                    // Treat it as a single phone unit
                    slicedUnits.Insert(0,
                        TtsUnit.OnsetPrefix + TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, index, 1));
                }
            }

            // Refine coda, matching from right to left
            BuildCodaUnits(sliceData, ttsMetaUnit.Phones, lastSonarantIndex + 1, slicedUnits);

            return slicedUnits.ToArray();
        }
Example #5
0
        /// <summary>
        /// Estimate position in syllable for each slice in a slice set.
        /// </summary>
        /// <param name="slices">Slice collection to estimate.</param>
        /// <returns>Estimated result of position in syllable.</returns>
        private PosInSyllable[] EstimatePosInSyllable(string[] slices)
        {
            PosInSyllable[] pis = new PosInSyllable[slices.Length];
            int nucleusIndex = -1;

            for (int sliceIndex = 0; sliceIndex < slices.Length; sliceIndex++)
            {
                SliceData slicedata = Localor.GetSliceData(this.Language);
                TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(this.Language);
                ttsMetaUnit.Name = slices[sliceIndex];

                if (slicedata.IsNucleus(ttsMetaUnit))
                {
                    if (sliceIndex == 0)
                    {
                        if (sliceIndex == slices.Length - 1)
                        {
                            pis[sliceIndex] = PosInSyllable.NucleusInV;
                        }
                        else
                        {
                            pis[sliceIndex] = PosInSyllable.NucleusInVC;
                        }
                    }
                    else
                    {
                        if (sliceIndex == slices.Length - 1)
                        {
                            pis[sliceIndex] = PosInSyllable.NucleusInCV;
                        }
                        else
                        {
                            pis[sliceIndex] = PosInSyllable.NucleusInCVC;
                        }
                    }

                    nucleusIndex = sliceIndex;
                    break;
                }
            }

            for (int sliceIndex = 0; sliceIndex < nucleusIndex; sliceIndex++)
            {
                pis[sliceIndex] = PosInSyllable.Onset;
            }

            for (int sliceIndex = nucleusIndex + 1; sliceIndex < slices.Length; sliceIndex++)
            {
                pis[sliceIndex] = PosInSyllable.Coda;
            }

            return pis;
        }
Example #6
0
        /// <summary>
        /// Given one sentence's pronunciation string, convert
        /// One Phone-based segment file to Unit-based segment file.
        /// </summary>
        /// <param name="pronunciation">Pronunciation string.</param>
        /// <param name="filePath">Phone-based segment file.</param>
        /// <param name="targetFilePath">Unit-based segment file.</param>
        /// <returns>Data error found.</returns>
        public virtual DataError CombinePhone(string pronunciation,
            string filePath, string targetFilePath)
        {
            pronunciation = Core.Pronunciation.CleanDecorate(pronunciation);
            string[] slices = PronunciationSeparator.SplitSlices(pronunciation);
            Collection<WaveSegment> phoneSegs = SegmentFile.ReadAllData(filePath);

            DataError dataError = null;
            using (StreamWriter sw = new StreamWriter(targetFilePath))
            {
                int sliceIndex = 0;
                StringBuilder slice = new StringBuilder();
                for (int i = 0; i < phoneSegs.Count;)
                {
                    if (phoneSegs[i].IsSilenceFeature)
                    {
                        sw.WriteLine(phoneSegs[i].ToString());
                        i++;
                        continue;
                    }

                    if (sliceIndex >= slices.Length)
                    {
                        string sid = Path.GetFileNameWithoutExtension(filePath);
                        dataError = new DataError(filePath,
                            "Data does not align between phone segmentation and pronunciation in CombinePhone", sid);
                        break;
                    }

                    TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(Language);
                    ttsMetaUnit.Name = slices[sliceIndex];
                    sliceIndex++;

                    // Clear first
                    slice.Remove(0, slice.Length);
                    foreach (TtsMetaPhone phone in ttsMetaUnit.Phones)
                    {
                        if (string.IsNullOrEmpty(phone.Name))
                        {
                            continue;
                        }

                        if (slice.Length > 0)
                        {
                            slice.Append("+");
                        }

                        slice.Append(phone.FullName);
                    }

                    if (slice.Length == 0)
                    {
                        continue;
                    }

                    sw.Write(phoneSegs[i].StartTime.ToString("F5", CultureInfo.InvariantCulture));
                    sw.WriteLine(" " + slice.ToString());
                    i += ttsMetaUnit.Phones.Length;
                }
            }

            if (dataError != null)
            {
                try
                {
                    File.Delete(targetFilePath);
                }
                catch (IOException ioe)
                {
                    Console.WriteLine(ioe.Message);
                }
            }

            return dataError;
        }
Example #7
0
        private static TtsMetaUnit ParseMetaUnitForViterbi(string line)
        {
            string[] items = line.Split(new char[] { ' ' },
                StringSplitOptions.RemoveEmptyEntries);
            System.Diagnostics.Debug.Assert(items.Length > 13);

            int languageValue = int.Parse(items[12], CultureInfo.InvariantCulture);
            int unitId = int.Parse(items[13], CultureInfo.InvariantCulture);

            TtsMetaUnit mu = new TtsMetaUnit(Localor.MapLanguageId(languageValue));
            mu.Id = unitId;
            mu.Name = items[items.Length - 1];

            return mu;
        }
Example #8
0
        /// <summary>
        /// Validate the syllable in word pronunciation are valid or not.
        /// </summary>
        /// <param name="entry">Script item.</param>
        /// <param name="word">Pronunciation of word.</param>
        /// <returns>Data error found.</returns>
        public static DataError ValidateSyllables(ScriptItem entry, string word)
        {
            if (entry == null)
            {
                throw new ArgumentNullException("entry");
            }

            if (entry.PronunciationSeparator == null)
            {
                string message = Helper.NeutralFormat("entry.PronunciationSeparator should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(entry.PronunciationSeparator.Syllable))
            {
                string message = Helper.NeutralFormat("entry.PronunciationSeparator.Syllable should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(word))
            {
                throw new ArgumentNullException("word");
            }

            Phoneme phoneme = Localor.GetPhoneme(entry.Language);

            DataError dataError = null;

            string[] syllables = word.Split(new string[] { entry.PronunciationSeparator.Syllable },
                StringSplitOptions.None);
            for (int j = 0; j < syllables.Length; j++)
            {
                string syllable = syllables[j].Trim();
                if (string.IsNullOrEmpty(syllable))
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "The syllable[{0}] of word[{1}] pronunciation is empty by separator [{2}]",
                        j, word, entry.PronunciationSeparator.Syllable);
                    dataError = new DataError("null", message, entry.Id);
                    break;
                }

                if (Regex.Match(syllable, "^_(.*)_$").Success)
                {
                    // Special unit
                    continue;
                }

                string[] itmes = entry.PronunciationSeparator.SplitPhones(syllable);
                TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(entry.Language);
                ttsMetaUnit.Name = string.Join(" ", itmes);
                string[] phones = ttsMetaUnit.GetPhonesName();

                // Tell whether is a valid nucleus, 
                // which could be syllable with no vowel in some languages, like fr-CA
                SliceData sliceData = Localor.GetSliceData(entry.Language);
                if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) < 0)
                {
                    bool goodSyllable;

                    if (entry.Language == Language.EnUS)
                    {
                        // syllable that must have vowels
                        goodSyllable = IsGoodSyllableWithVowel(entry, phoneme, phones);
                    }
                    else if (entry.Language == Language.RuRU)
                    {
                        // A Russian syllable can have no sonorant
                        goodSyllable = IsSyllableWithLessVowel(entry, phoneme, phones);
                    }
                    else
                    {
                        // syllable that must have vowels or sonorants
                        goodSyllable = IsGoodSyllableWithSonorant(entry, phoneme, phones);
                    }

                    if (!goodSyllable)
                    {
                        int[] vowelIndexes = phoneme.GetVowelIndexes(phones);
                        string str1 = "There must be minimum {0} vowels or maximum {1} included in syllable ";
                        string str2 = "or the syllable should have one sonorant and more than one consonants, ";
                        string str3 = "but {2} vowels are found in syllable [{3}] of word [{4}].";
                        string message = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, 
                            entry.MinVowelCountInSyllable, entry.MaxVowelCountInSyllable,
                            vowelIndexes.Length, syllables[j], word);
                        dataError = new DataError("null", message, entry.Id);
                        break;
                    }
                }

                // check slice's pronunciation
                dataError = ValidateSlices(entry, syllable);
                if (dataError != null)
                {
                    break;
                }
            }

            return dataError;
        }
Example #9
0
        /// <summary>
        /// Given one item, convert
        /// One Phone-based segment file to Unit-based segment file.
        /// </summary>
        /// <param name="item">ScriptItem.</param>
        /// <param name="segmentFilePath">Phone-based segment file.</param>
        /// <param name="targetFilePath">Unit-based segment file.</param>
        /// <param name="ignoreTone">IgnoreTone.</param>
        /// <returns>Data error found.</returns>
        public static Error CombinePhonesToUnits(ScriptItem item, string segmentFilePath,
            string targetFilePath, bool ignoreTone)
        {
            if (item == null)
            {
                throw new ArgumentNullException("item");
            }

            StringBuilder sb = new StringBuilder();
            foreach (ScriptSentence sentence in item.Sentences)
            {
                foreach (ScriptWord word in sentence.Words)
                {
                    if (!string.IsNullOrEmpty(word.Pronunciation))
                    {
                        sb.AppendFormat("{0} / ", word.Pronunciation);
                    }
                }
            }

            string pronunciation = Pronunciation.RemoveStress(sb.ToString());
            string[] slices = item.PronunciationSeparator.SplitSlices(pronunciation);
            Collection<WaveSegment> phoneSegs = SegmentFile.ReadAllData(segmentFilePath);

            Error dataError = null;
            using (StreamWriter sw = new StreamWriter(targetFilePath))
            {
                int sliceIndex = 0;
                StringBuilder slice = new StringBuilder();
                for (int i = 0; i < phoneSegs.Count;)
                {
                    if (phoneSegs[i].IsSilenceFeature)
                    {
                        sw.WriteLine(phoneSegs[i].ToString());
                        i++;
                        continue;
                    }

                    if (sliceIndex >= slices.Length)
                    {
                        string strTmp = "Data does not align between phone segmentation and pronunciation in CombinePhone";
                        dataError = new Error(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat(strTmp));
                        break;
                    }

                    TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(item.Language);
                    ttsMetaUnit.Name = slices[sliceIndex];
                    sliceIndex++;

                    // Clear first
                    slice.Remove(0, slice.Length);
                    foreach (TtsMetaPhone phone in ttsMetaUnit.Phones)
                    {
                        if (string.IsNullOrEmpty(phone.Name))
                        {
                            continue;
                        }

                        if (slice.Length > 0)
                        {
                            slice.Append("+");
                        }

                        slice.Append(ignoreTone ? phone.Name : phone.FullName);
                    }

                    if (slice.Length == 0)
                    {
                        continue;
                    }

                    sw.Write(phoneSegs[i].StartTime.ToString("F5", CultureInfo.InvariantCulture));
                    sw.WriteLine(" " + slice.ToString());
                    i += ttsMetaUnit.Phones.Length;
                }
            }

            if (dataError != null)
            {
                try
                {
                    File.Delete(targetFilePath);
                }
                catch (IOException ioe)
                {
                    Console.WriteLine(ioe.Message);
                }
            }

            return dataError;
        }