예제 #1
0
        /// <summary>
        /// Load script file into sentence id and script entry maped dictionary.
        /// </summary>
        /// <param name="scriptFilePath">Script file to read.</param>
        /// <param name="language">Language of the script.</param>
        /// <param name="engineType">Engine of the script to support.</param>
        /// <param name="outEntries">Output of script items.</param>
        /// <returns>Data error set found.</returns>
        public static DataErrorSet ReadAllData(string scriptFilePath,
            Language language, EngineType engineType,
            SortedDictionary<string, ScriptItem> outEntries)
        {
            if (string.IsNullOrEmpty(scriptFilePath))
            {
                throw new ArgumentNullException("scriptFilePath");
            }

            if (outEntries == null)
            {
                throw new ArgumentNullException("outEntries");
            }

            Collection<ScriptItem> entriesInCollection = new Collection<ScriptItem>();
            DataErrorSet errorSet = ReadAllData(scriptFilePath,
                language, engineType, entriesInCollection);

            foreach (ScriptItem entry in entriesInCollection)
            {
                if (outEntries.ContainsKey(entry.Id))
                {
                    DataError error = new DataError();
                    error.SentenceId = entry.Id;
                    error.Message = "Sentence id duplicated.";
                    error.FilePath = scriptFilePath;
                    errorSet.Errors.Add(error);
                    continue;
                }

                outEntries.Add(entry.Id, entry);
            }

            return errorSet;
        }
예제 #2
0
        /// <summary>
        /// Given one sentence's pronunciation string, convert
        /// One Phone-based segment file to Unit-based segment file.
        /// </summary>
        /// <param name="pronunciation">Pronunciation string.</param>
        /// <param name="filePath">Phone-based segment file.</param>
        /// <param name="targetFilePath">Unit-based segment file.</param>
        /// <returns>Data error found.</returns>
        public virtual DataError CombinePhone(string pronunciation,
            string filePath, string targetFilePath)
        {
            pronunciation = Core.Pronunciation.CleanDecorate(pronunciation);
            string[] slices = PronunciationSeparator.SplitSlices(pronunciation);
            Collection<WaveSegment> phoneSegs = SegmentFile.ReadAllData(filePath);

            DataError dataError = null;
            using (StreamWriter sw = new StreamWriter(targetFilePath))
            {
                int sliceIndex = 0;
                StringBuilder slice = new StringBuilder();
                for (int i = 0; i < phoneSegs.Count;)
                {
                    if (phoneSegs[i].IsSilenceFeature)
                    {
                        sw.WriteLine(phoneSegs[i].ToString());
                        i++;
                        continue;
                    }

                    if (sliceIndex >= slices.Length)
                    {
                        string sid = Path.GetFileNameWithoutExtension(filePath);
                        dataError = new DataError(filePath,
                            "Data does not align between phone segmentation and pronunciation in CombinePhone", sid);
                        break;
                    }

                    TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(Language);
                    ttsMetaUnit.Name = slices[sliceIndex];
                    sliceIndex++;

                    // Clear first
                    slice.Remove(0, slice.Length);
                    foreach (TtsMetaPhone phone in ttsMetaUnit.Phones)
                    {
                        if (string.IsNullOrEmpty(phone.Name))
                        {
                            continue;
                        }

                        if (slice.Length > 0)
                        {
                            slice.Append("+");
                        }

                        slice.Append(phone.FullName);
                    }

                    if (slice.Length == 0)
                    {
                        continue;
                    }

                    sw.Write(phoneSegs[i].StartTime.ToString("F5", CultureInfo.InvariantCulture));
                    sw.WriteLine(" " + slice.ToString());
                    i += ttsMetaUnit.Phones.Length;
                }
            }

            if (dataError != null)
            {
                try
                {
                    File.Delete(targetFilePath);
                }
                catch (IOException ioe)
                {
                    Console.WriteLine(ioe.Message);
                }
            }

            return dataError;
        }
예제 #3
0
        /// <summary>
        /// Based on script file, build a mono-phone MLF (See HTK document) file .
        /// </summary>
        /// <param name="writer">Writer to write result out if not null.</param>
        /// <param name="scriptFilePath">Script file path.</param>
        /// <returns>Data error set found.</returns>
        private DataErrorSet BuildMonoMlf(TextWriter writer, string scriptFilePath)
        {
            Collection<ScriptItem> entries = new Collection<ScriptItem>();

            DataErrorSet errorSet = ReadAllData(scriptFilePath,
                Language, EngineType, entries);
            if (entries.Count == 0)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "There is no script sentence found in file [{0}].", scriptFilePath);
                throw new InvalidDataException(message);
            }

            if (writer != null)
            {
                // write MLF file header
                writer.WriteLine("#!MLF!#");
            }

            foreach (ScriptItem entry in entries)
            {
                try
                {
                    BuildMonoMlf(writer, entry);
                }
                catch (InvalidDataException ide)
                {
                    DataError error = new DataError(scriptFilePath,
                        Helper.BuildExceptionMessage(ide), entry.Id);
                    errorSet.Errors.Add(error);
                }
            }

            return errorSet;
        }
예제 #4
0
        /// <summary>
        /// Append a script into other script file.
        /// </summary>
        /// <param name="subScriptFilePath">Source script file.</param>
        /// <param name="outFilePath">Target script file.</param>
        /// <param name="append">Whether appending to target script file.</param>
        /// <returns>Invalid format script entry strings.</returns>
        private DataErrorSet AppendScript(string subScriptFilePath,
            string outFilePath, bool append)
        {
            DataErrorSet errorSet = new DataErrorSet();

            SortedDictionary<string, ScriptItem> existEntries = new SortedDictionary<string, ScriptItem>();
            if (append && File.Exists(outFilePath))
            {
                errorSet = ReadAllData(outFilePath, Language, EngineType, existEntries);
            }
            else
            {
                Helper.EnsureFolderExistForFile(outFilePath);
            }

            SortedDictionary<string, ScriptItem> subEntries = new SortedDictionary<string, ScriptItem>();
            DataErrorSet subErrorSet = ReadAllData(subScriptFilePath,
                 Language, EngineType, subEntries);
            errorSet.Merge(subErrorSet);

            using (StreamWriter sw = new StreamWriter(outFilePath, append, Encoding.Unicode))
            {
                foreach (string sid in subEntries.Keys)
                {
                    if (existEntries.ContainsKey(sid))
                    {
                        DataError error = new DataError(subScriptFilePath,
                            "Entry already exists in script file [" + outFilePath + "]", sid);
                        errorSet.Errors.Add(error);
                        continue;
                    }

                    // hook handling
                    DataError preAppendError = ProcessPronunciation(subEntries[sid]);
                    if (preAppendError != null)
                    {
                        errorSet.Errors.Add(preAppendError);
                        continue;
                    }

                    sw.WriteLine(subEntries[sid].ToString(true, true, true));
                }
            }

            return errorSet;
        }
예제 #5
0
        /// <summary>
        /// Build pronunciation script from file list.
        /// </summary>
        /// <param name="scriptFilePaths">Hiragana script file list.</param>
        /// <param name="outFilePath">Romaji pronunciation script file.</param>
        /// <returns>Data error set found.</returns>
        public DataErrorSet BuildScript(string[] scriptFilePaths,
            string outFilePath)
        {
            if (scriptFilePaths == null)
            {
                throw new ArgumentNullException("scriptFilePaths");
            }

            DataErrorSet errorSet = new DataErrorSet();

            for (int i = 0; i < scriptFilePaths.Length; i++)
            {
                string scriptFilePath = scriptFilePaths[i];

                if (string.IsNullOrEmpty(scriptFilePath))
                {
                    throw new InvalidDataException("scriptFilePath");
                }

                if (!scriptFilePath.EndsWith(ScriptFile.Extension, StringComparison.Ordinal))
                {
                    continue;
                }

                // all script files should be saved in unicode
                if (!Helper.IsUnicodeFile(scriptFilePath))
                {
                    DataError error = new DataError(scriptFilePath,
                        "script file should be saved in Unicode.");
                    errorSet.Errors.Add(error);
                    continue;
                }

                // do appending
                DataErrorSet subErrorSet = AppendScript(scriptFilePath,
                    outFilePath, (i != 0));

                // merge error messages
                errorSet.Merge(subErrorSet);
            }

            return errorSet;
        }
예제 #6
0
        /// <summary>
        /// Validate the phones in slice are valid or not.
        /// </summary>
        /// <param name="entry">Script item.</param>
        /// <param name="slice">Pronunciation of slice.</param>
        /// <returns>Data error found.</returns>
        public static DataError ValidatePhones(ScriptItem entry, string slice)
        {
            if (entry == null)
            {
                throw new ArgumentNullException("entry");
            }

            if (entry.PronunciationSeparator == null)
            {
                string message = Helper.NeutralFormat("entry.PronunciationSeparator should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(entry.PronunciationSeparator.Phone))
            {
                string message = Helper.NeutralFormat("entry.PronunciationSeparator.Phone should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(slice))
            {
                throw new ArgumentNullException("slice");
            }

            DataError error = null;

            Phoneme phoneme = Localor.GetPhoneme(entry.Language);
            string[] items = slice.Split(new string[] { entry.PronunciationSeparator.Phone },
                StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < items.Length; i++)
            {
                // TODO: PS#13181 Offline tools:Syllable veridation and pronunciation design
                if (items[i] == "1" || items[i] == "2" || items[i] == "3")
                {
                    continue;
                }

                if (items[i].StartsWith("_", StringComparison.Ordinal) && items[i].EndsWith("_", StringComparison.Ordinal))
                {
                    // special phone
                    continue;
                }

                if (phoneme.ToneManager.NameMap.ContainsKey(items[i]))
                {
                    // tone
                    continue;
                }

                if (phoneme.TtsPhones.IndexOf(items[i]) < 0)
                {
                    // invalid tts phone found
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "The phone[{0}] in slice[{1}] is invalid",
                        items[i], slice);
                    error = new DataError("null", message, entry.Id);
                    break;
                }
            }

            return error;
        }
예제 #7
0
        /// <summary>
        /// Validate the slices in syllable are valid or not.
        /// </summary>
        /// <param name="entry">Script item.</param>
        /// <param name="syllable">Pronunciation of syllable.</param>
        /// <returns>Data error found.</returns>
        public static DataError ValidateSlices(ScriptItem entry, string syllable)
        {
            if (entry == null)
            {
                throw new ArgumentNullException("entry");
            }

            if (entry.PronunciationSeparator == null)
            {
                string message = Helper.NeutralFormat("entry.PronunciationSeparator should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(entry.PronunciationSeparator.Slice))
            {
                string message = Helper.NeutralFormat("entry.PronunciationSeparator.Slice should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(syllable))
            {
                throw new ArgumentNullException("syllable");
            }

            DataError dataError = null;

            string[] slices = syllable.Split(new string[] { entry.PronunciationSeparator.Slice },
                StringSplitOptions.None);
            for (int k = 0; k < slices.Length; k++)
            {
                if (string.IsNullOrEmpty(slices[k]))
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "The slice[{0}] of syllable[{1}] is empty by separator [{2}]",
                        k, syllable, entry.PronunciationSeparator.Slice);
                    dataError = new DataError("null", message, entry.Id);
                    break;
                }

                // check phones
                dataError = ValidatePhones(entry, slices[k]);
                if (dataError != null)
                {
                    break;
                }
            }

            return dataError;
        }
예제 #8
0
        /// <summary>
        /// Validate the syllable in word pronunciation are valid or not.
        /// </summary>
        /// <param name="entry">Script item.</param>
        /// <param name="word">Pronunciation of word.</param>
        /// <returns>Data error found.</returns>
        public static DataError ValidateSyllables(ScriptItem entry, string word)
        {
            if (entry == null)
            {
                throw new ArgumentNullException("entry");
            }

            if (entry.PronunciationSeparator == null)
            {
                string message = Helper.NeutralFormat("entry.PronunciationSeparator should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(entry.PronunciationSeparator.Syllable))
            {
                string message = Helper.NeutralFormat("entry.PronunciationSeparator.Syllable should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(word))
            {
                throw new ArgumentNullException("word");
            }

            Phoneme phoneme = Localor.GetPhoneme(entry.Language);

            DataError dataError = null;

            string[] syllables = word.Split(new string[] { entry.PronunciationSeparator.Syllable },
                StringSplitOptions.None);
            for (int j = 0; j < syllables.Length; j++)
            {
                string syllable = syllables[j].Trim();
                if (string.IsNullOrEmpty(syllable))
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "The syllable[{0}] of word[{1}] pronunciation is empty by separator [{2}]",
                        j, word, entry.PronunciationSeparator.Syllable);
                    dataError = new DataError("null", message, entry.Id);
                    break;
                }

                if (Regex.Match(syllable, "^_(.*)_$").Success)
                {
                    // Special unit
                    continue;
                }

                string[] itmes = entry.PronunciationSeparator.SplitPhones(syllable);
                TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(entry.Language);
                ttsMetaUnit.Name = string.Join(" ", itmes);
                string[] phones = ttsMetaUnit.GetPhonesName();

                // Tell whether is a valid nucleus, 
                // which could be syllable with no vowel in some languages, like fr-CA
                SliceData sliceData = Localor.GetSliceData(entry.Language);
                if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) < 0)
                {
                    bool goodSyllable;

                    if (entry.Language == Language.EnUS)
                    {
                        // syllable that must have vowels
                        goodSyllable = IsGoodSyllableWithVowel(entry, phoneme, phones);
                    }
                    else if (entry.Language == Language.RuRU)
                    {
                        // A Russian syllable can have no sonorant
                        goodSyllable = IsSyllableWithLessVowel(entry, phoneme, phones);
                    }
                    else
                    {
                        // syllable that must have vowels or sonorants
                        goodSyllable = IsGoodSyllableWithSonorant(entry, phoneme, phones);
                    }

                    if (!goodSyllable)
                    {
                        int[] vowelIndexes = phoneme.GetVowelIndexes(phones);
                        string str1 = "There must be minimum {0} vowels or maximum {1} included in syllable ";
                        string str2 = "or the syllable should have one sonorant and more than one consonants, ";
                        string str3 = "but {2} vowels are found in syllable [{3}] of word [{4}].";
                        string message = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, 
                            entry.MinVowelCountInSyllable, entry.MaxVowelCountInSyllable,
                            vowelIndexes.Length, syllables[j], word);
                        dataError = new DataError("null", message, entry.Id);
                        break;
                    }
                }

                // check slice's pronunciation
                dataError = ValidateSlices(entry, syllable);
                if (dataError != null)
                {
                    break;
                }
            }

            return dataError;
        }
예제 #9
0
        /// <summary>
        /// Validate the word pronunciations in sentence are valid or not.
        /// </summary>
        /// <param name="entry">Script item.</param>
        /// <param name="isBadPhoneValid">True means that bad phone is valid.</param>
        /// <returns>Data error found.</returns>
        public static DataError ValidatePronunciation(ScriptItem entry, bool isBadPhoneValid)
        {
            DataError dataError = null;

            string[] words = entry.Pronunciation.Split(
                new string[] { entry.PronunciationSeparator.Word },
                StringSplitOptions.None);
            for (int i = 0; i < words.Length; i++)
            {
                if ((i == 0 || i == words.Length - 1) &&
                    string.IsNullOrEmpty(words[i]))
                {
                    // It makes sense for first or last one is with empty string
                    continue;
                }

                if (string.IsNullOrEmpty(words[i]))
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "The word[{0}] pronunciation is empty by separator [{1}]",
                        i, entry.PronunciationSeparator.Word);
                    dataError = new DataError("null", message, entry.Id);
                    break;
                }

                // Check syllable's pronunciation
                string newWord = words[i];
                if (isBadPhoneValid)
                {
                    // remove the bad phone label from bad phone.
                    string pattern = @"([a-zA-Z]+)" + BadPhoneLabel;
                    string replacement = @"$1";
                    newWord = Regex.Replace(newWord, pattern, replacement, RegexOptions.CultureInvariant);
                }

                dataError = ValidateSyllables(entry, newWord);
                if (dataError != null)
                {
                    break;
                }
            }

            return dataError;
        }