/// <summary>
        /// Initializes a new instance of the <see cref="PhoneMerger"/> class.
        /// </summary>
        /// <param name="phoneSet">Phone set.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="truncRule">Truncate rule data.</param>
        public PhoneMerger(TtsPhoneSet phoneSet, SliceData sliceData, TruncateRuleData truncRule)
        {
            if (phoneSet == null)
            {
                throw new ArgumentNullException("phoneSet");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            if (truncRule == null)
            {
                throw new ArgumentNullException("truncRule");
            }

            _phoneSet = phoneSet;
            _sliceData = sliceData;
            _truncateRuleData = truncRule;
        }
        /// <summary>
        /// Build units for this sentence.
        /// </summary>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="buildUnitFeature">Whether build unit features.</param>
        private void BuildUnits(Phoneme phoneme, SliceData sliceData, bool buildUnitFeature)
        {
            Helper.ThrowIfNull(phoneme);
            Helper.ThrowIfNull(sliceData);

            _units.Clear();

            string punctuationPattern = ScriptItem.PunctuationPattern;
            for (int wordIndex = 0; wordIndex < Words.Count; wordIndex++)
            {
                ScriptWord word = Words[wordIndex];
                if (!word.IsPronouncableNormalWord ||
                    (!buildUnitFeature && string.IsNullOrEmpty(word.Pronunciation)))
                {
                    continue;
                }

                // look forward one item, test whether that is '?' mark
                WordType wordType = WordType.Normal;
                while (wordIndex < Words.Count - 1
                    && Words[wordIndex + 1].WordType != WordType.Normal)
                {
                    WordType nextType = Localor.MapPunctuation(Words[wordIndex + 1].Grapheme,
                        punctuationPattern);

                    // advance one more
                    if (nextType == WordType.OtherPunctuation)
                    {
                        wordType = nextType;
                    }
                    else
                    {
                        wordType = nextType;
                        break;
                    }

                    wordIndex++;
                }

                word.Units.Clear();
                word.BuildUnitWithoutFeature(sliceData, ScriptItem.PronunciationSeparator);
                foreach (TtsUnit unit in word.Units)
                {
                    unit.WordType = wordType;
                }

                Helper.AppendCollection<TtsUnit>(_units, word.Units);
            }

            if (buildUnitFeature)
            {
                BuildUnitFeatures(phoneme);
            }
        }
        /// <summary>
        /// Get the unit list this sentence has.
        /// </summary>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="buildUnitFeature">Whether build unit features.</param>
        /// <returns>Tts units.</returns>
        public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData,
            bool buildUnitFeature)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            if (_needBuildUnits)
            {
                BuildUnits(phoneme, sliceData, buildUnitFeature);
                _needBuildUnits = false;
            }

            return _units;
        }
 /// <summary>
 /// Get the unit list this sentence has.
 /// </summary>
 /// <param name="phoneme">Phoneme.</param>
 /// <param name="sliceData">Slice data.</param>
 /// <returns>Tts units.</returns>
 public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData)
 {
     return GetUnits(phoneme, sliceData, true);
 }
Beispiel #5
0
        /// <summary>
        /// Estimate pos in syllable for each slice.
        /// </summary>
        /// <param name="slices">Slices.</param>
        /// <param name="sliceData">Slice data table.</param>
        /// <returns>PosInSyllable list.</returns>
        private static PosInSyllable[] EstimatePosInSyllable(string[] slices, SliceData sliceData)
        {
            PosInSyllable[] pis = new PosInSyllable[slices.Length];
            int nucleusIndex = -1;

            for (int sliceIndex = 0; sliceIndex < slices.Length; sliceIndex++)
            {
                TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(sliceData.Language);
                ttsMetaUnit.Name = slices[sliceIndex];

                if (sliceData.IsNucleus(ttsMetaUnit))
                {
                    if (sliceIndex == 0)
                    {
                        if (sliceIndex == slices.Length - 1)
                        {
                            pis[sliceIndex] = PosInSyllable.NucleusInV;
                        }
                        else
                        {
                            pis[sliceIndex] = PosInSyllable.NucleusInVC;
                        }
                    }
                    else
                    {
                        if (sliceIndex == slices.Length - 1)
                        {
                            pis[sliceIndex] = PosInSyllable.NucleusInCV;
                        }
                        else
                        {
                            pis[sliceIndex] = PosInSyllable.NucleusInCVC;
                        }
                    }

                    nucleusIndex = sliceIndex;
                    break;
                }
            }

            for (int sliceIndex = 0; sliceIndex < nucleusIndex; sliceIndex++)
            {
                if (sliceIndex == 0)
                {
                    pis[sliceIndex] = PosInSyllable.Onset;
                }
                else
                {
                    pis[sliceIndex] = PosInSyllable.OnsetNext;
                }
            }

            for (int sliceIndex = nucleusIndex + 1; sliceIndex < slices.Length; sliceIndex++)
            {
                if (sliceIndex == slices.Length - 1)
                {
                    pis[sliceIndex] = PosInSyllable.Coda;
                }
                else
                {
                    pis[sliceIndex] = PosInSyllable.CodaNext;
                }
            }

            return pis;
        }
        public static ErrorSet BuildMonoMlf(string scriptFilePath, string outFilePath, bool writeToFile,
            Phoneme phoneme, XmlScriptValidateSetting validateSetting, SliceData sliceData)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (validateSetting == null)
            {
                throw new ArgumentNullException("validateSetting");
            }

            validateSetting.VerifySetting();

            ErrorSet errors = new ErrorSet();
            StreamWriter sw = null;

            if (writeToFile)
            {
                sw = new StreamWriter(outFilePath, false, Encoding.ASCII);
                sw.WriteLine("#!MLF!#");
            }

            try
            {
                XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting);
                script.Remove(GetNeedDeleteItemIds(script.ErrorSet));
                if (script.Items.Count == 0)
                {
                    throw new InvalidDataException(
                        Helper.NeutralFormat("No valid items in {0}.", scriptFilePath));
                }

                errors.Merge(script.ErrorSet);
                foreach (ScriptItem item in script.Items)
                {
                    errors.Merge(BuildMonoMlf(item, sw, writeToFile, phoneme, sliceData));
                }
            }
            finally
            {
                if (sw != null)
                {
                    sw.Close();
                }
            }

            if (writeToFile)
            {
                Debug.Assert(HtkTool.VerifyMlfFormat(outFilePath));
            }

            return errors;
        }
        /// <summary>
        /// Build coda units from the phone list.
        /// </summary>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="phones">Phones to process.</param>
        /// <param name="codaOffset">The offset of the first phone in coda group.</param>
        /// <param name="slicedUnits">Unit container to append result coda units.</param>
        private static void BuildCodaUnits(SliceData sliceData,
            TtsMetaPhone[] phones, int codaOffset, List<string> slicedUnits)
        {
            int remainPhoneCount = phones.Length - codaOffset;
            int codaUnitOffset = slicedUnits.Count;

            // t w ih 1 k s t
            if (remainPhoneCount > 0)
            {
                int codaStartCursor = codaOffset;
                while (remainPhoneCount > 0)
                {
                    int phoneCount = remainPhoneCount - (codaStartCursor - codaOffset);
                    string tentativeCoda =
                        TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, phones,
                        codaStartCursor, phoneCount);
                    if (remainPhoneCount != 1 &&
                        sliceData.CodaSlices.IndexOf(tentativeCoda.Replace(TtsUnit.PhoneDelimiter, " ")) < 0 &&
                        phoneCount != 1)
                    {
                        codaStartCursor++;
                    }
                    else
                    {
                        // Left single phone will be treated as coda unit
                        slicedUnits.Insert(codaUnitOffset, TtsUnit.CodaPrefix + tentativeCoda);
                        remainPhoneCount = codaStartCursor - codaOffset;
                        codaStartCursor = codaOffset;
                    }
                }
            }
        }
        /// <summary>
        /// Build units for syllbale pronunciation,
        /// And the units are concatenated together in the string and seperated by ".".
        /// </summary>
        /// <param name="phoneme">Phoneme of the language to process with.</param>
        /// <param name="sliceData">Slice data to process.</param>
        /// <param name="syllable">Syllables to process.</param>
        /// <returns>Best unit list.</returns>
        public static string[] BuildUnits(Phoneme phoneme,
            SliceData sliceData, string syllable)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (phoneme.TtsSonorantPhones == null)
            {
                string message = Helper.NeutralFormat("phoneme.TtsSonorantPhones should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(syllable))
            {
                throw new ArgumentNullException("syllable");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            if (sliceData.OnsetSlices == null)
            {
                string message = Helper.NeutralFormat("sliceData.OnsetSlices should not be null.");
                throw new ArgumentException(message);
            }

            if (sliceData.NucleusSlices == null)
            {
                string message = Helper.NeutralFormat("sliceData.NucleusSlices should not be null.");
                throw new ArgumentException(message);
            }

            List<string> slicedUnits = new List<string>();

            string unstressedSyllable = Pronunciation.RemoveStress(syllable);

            ScriptItem scriptItem = new ScriptItem(phoneme.Language);

            // items contains phone and tone.
            string[] items = scriptItem.PronunciationSeparator.SplitPhones(unstressedSyllable);

            // Treate all syllable as one unit at first.
            TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language);
            ttsMetaUnit.Name = string.Join(" ", items);
            string[] phones = ttsMetaUnit.GetPhonesName();

            // Treat all phones in this syllable as a whole unit
            if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) >= 0)
            {
                // If it is alread defined in the predefined unit collection, return it
                slicedUnits.Add(TtsUnit.NucleusPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter));
                return slicedUnits.ToArray();
            }

            int vowelIndex = phoneme.GetFirstVowelIndex(phones);
            if (vowelIndex < 0)
            {
                // If no vowel in the syllable, treat all phones in this syllable as a unit if it is in unit table
                if (sliceData.OnsetSlices.IndexOf(ttsMetaUnit.Name) >= 0)
                {
                    slicedUnits.Add(TtsUnit.OnsetPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter));
                }
                else if (sliceData.CodaSlices.IndexOf(ttsMetaUnit.Name) >= 0)
                {
                    slicedUnits.Add(TtsUnit.CodaPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter));
                }
                else
                {
                    // otherwise, treat each phone as a coda unit
                    foreach (string phone in phones)
                    {
                        slicedUnits.Add(TtsUnit.CodaPrefix + phone);
                    }
                }

                return slicedUnits.ToArray();
            }

            // Search first cosonant sonarant from the left side of the vowel font in the syllable
            int firstSonarantIndex = vowelIndex;
            for (int i = vowelIndex - 1; i >= 0; i--)
            {
                if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0)
                {
                    firstSonarantIndex = i;
                }
            }

            // Search last cosonant sonarant from the right side of the vowel font in the syllable
            int lastSonarantIndex = vowelIndex;
            for (int i = vowelIndex + 1; i <= phones.Length - 1; i++)
            {
                if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0)
                {
                    lastSonarantIndex = i;
                }
            }

            // Treat all vowel and surrounding sonarant consonants as the nucleus unit first
            string nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones,
                firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1);

            TruncateRuleData truncateRuleData = Localor.GetTruncateRuleData(phoneme.Language);

            // Refine nucleus according to the predefined unit table
            while (lastSonarantIndex - firstSonarantIndex > 0 && sliceData.NucleusSlices.IndexOf(nucleus) <= 0)
            {
                // If the unit candidate is not listed in the predefined unit list, try to truncate it
                string[] leftRight =
                    PhoneMerger.TruncateOnePhoneFromNucleus(phoneme, truncateRuleData.NucleusTruncateRules,
                    nucleus);

                if (phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0)
                {
                    Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0);
                    firstSonarantIndex++;
                }
                else
                {
                    Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[1]) >= 0);
                    lastSonarantIndex--;
                }

                // Re-define the remaining nucleus unit
                nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones,
                    firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1);
            }

            slicedUnits.Add(TtsUnit.NucleusPrefix + nucleus.Replace(" ", TtsUnit.PhoneDelimiter));

            // Refine onset
            for (int index = firstSonarantIndex - 1; index >= 0; index--)
            {
                string onset = TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, 0, index + 1);
                if (sliceData.OnsetSlices.IndexOf(onset.Replace(TtsUnit.PhoneDelimiter, " ")) >= 0)
                {
                    slicedUnits.Insert(0, TtsUnit.OnsetPrefix + onset);

                    // Remove the number of added phones,
                    // except current phone itself which will be recuded by index--
                    index -= index;
                }
                else
                {
                    // Treat it as a single phone unit
                    slicedUnits.Insert(0,
                        TtsUnit.OnsetPrefix + TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, index, 1));
                }
            }

            // Refine coda, matching from right to left
            BuildCodaUnits(sliceData, ttsMetaUnit.Phones, lastSonarantIndex + 1, slicedUnits);

            return slicedUnits.ToArray();
        }
        /// <summary>
        /// Validate language data files.
        /// </summary>
        /// <param name="language">Language of the data files.</param>
        /// <returns>Error set.</returns>
        public ErrorSet ValidateLanguageData(Language language)
        {
            ErrorSet errorSet = new ErrorSet();
            if (!IsEmpty())
            {
                if (!string.IsNullOrEmpty(_phoneSet))
                {
                    TtsPhoneSet ttsPhoneSet = new TtsPhoneSet();
                    ttsPhoneSet.Load(PhoneSet);
                    if (ttsPhoneSet.Language != language)
                    {
                        errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage,
                            Localor.LanguageToString(language),
                            Localor.LanguageToString(ttsPhoneSet.Language),
                            Localor.PhoneSetFileName, PhoneSet));
                    }
                }

                if (!string.IsNullOrEmpty(_unitTable))
                {
                    SliceData sliceData = new SliceData();
                    sliceData.Language = language;
                    sliceData.Load(UnitTable);
                    if (sliceData.IsEmpty())
                    {
                        errorSet.Add(new Error(VoiceCreationLanguageDataError.EmptyLanguageDataFile,
                            Localor.LanguageToString(language),
                            Localor.UnitTableFileName, UnitTable));
                    }
                }

                if (!string.IsNullOrEmpty(_lexicalAttributeSchema))
                {
                    LexicalAttributeSchema lexicalAttributeSchema = new LexicalAttributeSchema();
                    lexicalAttributeSchema.Load(LexicalAttributeSchema);
                    if (lexicalAttributeSchema.Language != language)
                    {
                        errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage,
                            Localor.LanguageToString(language),
                            Localor.LanguageToString(lexicalAttributeSchema.Language),
                            Localor.PhoneSetFileName, LexicalAttributeSchema));
                    }
                }

                if (!string.IsNullOrEmpty(_truncateRule))
                {
                    TruncateRuleData truncateRuleData = new TruncateRuleData();
                    truncateRuleData.Load(TruncateRule);
                    if (truncateRuleData.Language != language)
                    {
                        errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage,
                            Localor.LanguageToString(language),
                            Localor.LanguageToString(truncateRuleData.Language),
                            Localor.TruncateRulesFileName, TruncateRule));
                    }
                }

                if (!string.IsNullOrEmpty(_ttsToSapiVisemeId))
                {
                    PhoneMap phoneMap = PhoneMap.CreatePhoneMap(TtsToSapiVisemeId);
                    if (phoneMap.Language != language)
                    {
                        errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage,
                            Localor.LanguageToString(language),
                            Localor.LanguageToString(phoneMap.Language),
                            Localor.TtsToSapiVisemeIdFileName, TtsToSapiVisemeId));
                    }
                }

                if (!string.IsNullOrEmpty(_ttsToSrPhone))
                {
                    PhoneMap phoneMap = PhoneMap.CreatePhoneMap(TtsToSrPhone);
                    if (phoneMap.Language != language)
                    {
                        errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage,
                            Localor.LanguageToString(language),
                            Localor.LanguageToString(phoneMap.Language),
                            Localor.TtsToSrPhoneFileName, TtsToSrPhone));
                    }
                }

                if (!string.IsNullOrEmpty(_ttsToIpaPhone))
                {
                    PhoneMap phoneMap = PhoneMap.CreatePhoneMap(TtsToIpaPhone);
                    if (phoneMap.Language != language)
                    {
                        errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage,
                            Localor.LanguageToString(language),
                            Localor.LanguageToString(phoneMap.Language),
                            Localor.TtsToIpaPhoneFileName, TtsToIpaPhone));
                    }
                }

                if (!string.IsNullOrEmpty(_fontMeta))
                {
                    PhoneMap phoneMap = PhoneMap.CreatePhoneMap(FontMeta);
                    if (phoneMap.Language != language)
                    {
                        errorSet.Add(new Error(VoiceCreationLanguageDataError.MismatchLanguage,
                            Localor.LanguageToString(language),
                            Localor.LanguageToString(phoneMap.Language),
                            Localor.FontMetaFileName, FontMeta));
                    }
                }
            }
            else
            {
                Trace.WriteLine("Using stocked language data with tools...");
            }

            return errorSet;
        }
Beispiel #10
0
        private static SliceData LoadSliceData(Language language)
        {
            SliceData sliceData = null;

            using (StreamReader unitTableReader = LoadResource(language, Localor.UnitTableFileName))
            {
                if (unitTableReader != null)
                {
                    sliceData = new SliceData();
                    sliceData.Language = language;
                    sliceData.Load(unitTableReader);
                }
            }

            return sliceData;
        }
        /// <summary>
        /// Extract acoustic features for a given script file.
        /// </summary>
        /// <param name="script">Script file instance.</param>
        /// <param name="phoneme">Phoneme used to get units.</param>
        /// <param name="sliceData">Slice data used to get units.</param>
        /// <param name="fileMap">File list map.</param>
        /// <param name="segmentDir">Segmentation file directory.</param>
        /// <param name="wave16kDir">16k Hz waveform file directory.</param>
        /// <param name="epochDir">Epoch file directory.</param>
        /// <param name="targetFilePath">Target acoustic file path.</param>
        public static void ExtractAcoustic(XmlScriptFile script, Phoneme phoneme, SliceData sliceData,
            FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir, string targetFilePath)
        {
            // Parameters validation
            if (script == null)
            {
                throw new ArgumentNullException("script");
            }

            if (string.IsNullOrEmpty(script.FilePath))
            {
                throw new ArgumentException("script.FilePath is null");
            }

            if (fileMap == null)
            {
                throw new ArgumentNullException("fileMap");
            }

            if (fileMap.Map == null)
            {
                throw new ArgumentException("fileMap.Map");
            }

            if (fileMap.Map.Keys == null)
            {
                throw new ArgumentException("fileMap.Map.Keys is null");
            }

            if (string.IsNullOrEmpty(segmentDir))
            {
                throw new ArgumentNullException("segmentDir");
            }

            if (string.IsNullOrEmpty(wave16kDir))
            {
                throw new ArgumentNullException("wave16kDir");
            }

            if (string.IsNullOrEmpty(epochDir))
            {
                throw new ArgumentNullException("epochDir");
            }

            if (!Directory.Exists(segmentDir))
            {
                throw Helper.CreateException(typeof(DirectoryNotFoundException),
                    segmentDir);
            }

            if (!Directory.Exists(wave16kDir))
            {
                throw Helper.CreateException(typeof(DirectoryNotFoundException),
                    wave16kDir);
            }

            if (!Directory.Exists(epochDir))
            {
                throw Helper.CreateException(typeof(DirectoryNotFoundException),
                    epochDir);
            }

            if (string.IsNullOrEmpty(targetFilePath))
            {
                throw new ArgumentNullException("targetFilePath");
            }

            Helper.EnsureFolderExistForFile(targetFilePath);

            using (StreamWriter sw = new StreamWriter(targetFilePath))
            {
                // iterate each script item or sentence
                foreach (string sid in fileMap.Map.Keys)
                {
                    if (!script.ItemDic.ContainsKey(sid))
                    {
                        string message = string.Format(CultureInfo.InvariantCulture,
                            "Sentence [{0}] does not exist in script file [{1}].",
                            sid, script.FilePath);
                        throw new InvalidDataException(message);
                    }

                    ExtractAcoustic(sw, script, sid, phoneme, sliceData, fileMap, segmentDir, wave16kDir, epochDir);
                }
            }
        }
        /// <summary>
        /// Check data consistence between script file and segmentation files.
        /// </summary>
        /// <param name="fileMap">File list map.</param>
        /// <param name="script">Script file instance.</param>
        /// <param name="phoneme">Phoneme used to get units.</param>
        /// <param name="sliceData">Slice data used to get units.</param>
        /// <param name="segmentDir">Segment file directory.</param>
        /// <returns>Data error set found.</returns>
        public static ErrorSet ValidateDataAlignment(FileListMap fileMap, XmlScriptFile script, 
            Phoneme phoneme, SliceData sliceData, string segmentDir)
        {
            // Parameters validation
            if (string.IsNullOrEmpty(segmentDir))
            {
                throw new ArgumentNullException("segmentDir");
            }

            if (fileMap == null)
            {
                throw new ArgumentNullException("fileMap");
            }

            if (fileMap.Map == null)
            {
                throw new ArgumentException("fileMap.Map is null");
            }

            if (fileMap.Map.Keys == null)
            {
                throw new ArgumentException("fileMap.Map.Keys is null");
            }

            if (script == null)
            {
                throw new ArgumentNullException("script");
            }

            ErrorSet errorSet = new ErrorSet();

            foreach (ScriptItem item in script.Items)
            {
                try
                {
                    if (!fileMap.Map.ContainsKey(item.Id))
                    {
                        errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("File list map does not contain item"));
                        continue;
                    }

                    ValidateDataAlignment(item, phoneme, sliceData, fileMap, segmentDir, errorSet);
                }
                catch (InvalidDataException ide)
                {
                    errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.BuildExceptionMessage(ide));
                }
            }

            foreach (string sid in fileMap.Map.Keys)
            {
                if (!script.ItemDic.ContainsKey(sid))
                {
                    errorSet.Add(ScriptError.OtherErrors, sid, Helper.NeutralFormat("script file does not contain item"));
                }
            }

            return errorSet;
        }
        /// <summary>
        /// Extract acoustic features for a given sentence.
        /// </summary>
        /// <param name="writer">Stream writer to write acoustic features.</param>
        /// <param name="script">Script file instance.</param>
        /// <param name="sid">Script item id.</param>
        /// <param name="phoneme">Phoneme used to get units.</param>
        /// <param name="sliceData">Slice data used to get units.</param>
        /// <param name="fileMap">File list map.</param>
        /// <param name="segmentDir">Segmentation file directory.</param>
        /// <param name="wave16kDir">16k Hz waveform file directory.</param>
        /// <param name="epochDir">Epoch file directory.</param>
        private static void ExtractAcoustic(StreamWriter writer, XmlScriptFile script, string sid,
            Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir,
            string wave16kDir, string epochDir)
        {
            ScriptItem scriptItem = script.ItemDic[sid];

            // find the absolute file paths for each kind data file 
            string wave16kFilePath = Path.Combine(wave16kDir, fileMap.Map[scriptItem.Id] + ".wav");
            string epochFilePath = Path.Combine(epochDir, fileMap.Map[scriptItem.Id] + ".epoch");
            string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[scriptItem.Id] + ".txt");

            // load data files
            SegmentFile segFile = new SegmentFile();
            segFile.Load(segmentFilePath);

            EggAcousticFeature eggFile = new EggAcousticFeature();
            eggFile.LoadEpoch(epochFilePath);

            WaveAcousticFeature waveFile = new WaveAcousticFeature();
            waveFile.Load(wave16kFilePath);

            // calculate acoustic features for each segments in the files
            int totalCount = segFile.NonSilenceWaveSegments.Count;
            Collection<TtsUnit> units = scriptItem.GetUnits(phoneme, sliceData);
            if (units.Count != totalCount)
            {
                string str1 = "Unit number mis-matched between sentence [{0}] in ";
                string str2 = "script file [{1}] and in the alignment file [{2}]. ";
                string str3 = "There are {3} units in script but {4} units in alignment.";
                string message = string.Format(CultureInfo.InvariantCulture,
                    str1 + str2 + str3,
                    sid, script.FilePath, segmentFilePath,
                    units.Count, totalCount);
                throw new InvalidDataException(message);
            }

            for (int i = 0; i < totalCount; i++)
            {
                // for each wave segment
                WaveSegment ws = segFile.NonSilenceWaveSegments[i];

                // get unit sample scope
                int sampleOffset = (int)(ws.StartTime * waveFile.SamplesPerSecond);
                int sampleLength = (int)(ws.Duration * waveFile.SamplesPerSecond);
                int sampleEnd = sampleOffset + sampleLength;

                int epochOffset = 0;
                int epochEnd = 0;

                // calculate average pitch, pitch average
                float averagePitch, pitchRange;
                eggFile.GetPitchAndRange(sampleOffset,
                    sampleLength, out averagePitch, out pitchRange);
                ws.AveragePitch = averagePitch;
                ws.PitchRange = pitchRange;

                // calculate root mean square, and before that ajust the segment alignment with
                // the epoch data
                epochOffset = eggFile.AdjustAlignment(ref sampleOffset);
                epochEnd = eggFile.AdjustAlignment(ref sampleEnd);

                if (epochOffset > epochEnd)
                {
                    string info = string.Format(CultureInfo.InvariantCulture,
                        "epochOffset[{0}] should not be bigger than epochEnd[{1}]",
                        epochOffset, epochEnd);
                    throw new InvalidDataException(info);
                }

                if (sampleEnd > waveFile.SampleNumber)
                {
                    string str1 = "Mis-match found between alignment file [{0}] and waveform file [{1}], ";
                    string str2 = "for the end sample of alignment is [{2}] but";
                    string str3 = " the total sample number of waveform file is [{3}].";
                    string info = string.Format(CultureInfo.InvariantCulture,
                        str1 + str2 + str3,
                        segmentFilePath, wave16kFilePath,
                        epochEnd, waveFile.SampleNumber);

                    throw new InvalidDataException(info);
                }

                ws.RootMeanSquare = waveFile.CalculateRms(sampleOffset, sampleEnd - sampleOffset);

                // calculate epoch
                int epoch16KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch,
                    epochOffset, epochEnd - epochOffset, null);
                int epoch8KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch8k,
                    epochOffset, epochEnd - epochOffset, null);

                // leave (epoch offset in sentence) (epoch length)
                // (16k compressed epoch lenght) (8k compressed epoch lenght) as zero
                string message = string.Format(CultureInfo.InvariantCulture,
                    "{0,12} {1,3} {2,9:0.000000} {3,9:0.000000} {4,7} {5,5} {6,4} {7,3} {8,3} {9,3} {10,7:0.0} {11,5:0.0} {12,4:0.0} {13}",
                    scriptItem.Id, i,
                    ws.StartTime, ws.Duration, sampleOffset, sampleEnd - sampleOffset,
                    epochOffset, epochEnd - epochOffset,
                    epoch16KCompressLength, epoch8KCompressLength,
                    ws.RootMeanSquare, ws.AveragePitch, ws.PitchRange,
                    units[i].FullName);

                writer.WriteLine(message);
            }
        }
        /// <summary>
        /// Check data consistence between script item and segmentation file.
        /// </summary>
        /// <param name="item">Script item.</param>
        /// <param name="phoneme">Phoneme used to get units.</param>
        /// <param name="sliceData">Slice data used to get units.</param>
        /// <param name="fileMap">File list map.</param>
        /// <param name="segmentDir">Segment file directory.</param>
        /// <param name="errorSet">Data error set found.</param>
        public static void ValidateDataAlignment(ScriptItem item,
            Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir, ErrorSet errorSet)
        {
            string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt");

            StringBuilder errorMessage = new StringBuilder();
            SegmentFile segmentFile = ValidateAlignmentFile(segmentFilePath, errorMessage);
            if (errorMessage.Length != 0)
            {
                errorSet.Add(ScriptError.OtherErrors, item.Id, errorMessage.ToString());
            }
            else
            {
                Collection<TtsUnit> units = item.GetUnits(phoneme, sliceData);

                if (segmentFile.WaveSegments.Count == 0)
                {
                    string message = Helper.NeutralFormat(
                        "There is no valid alignment data in alignment file {0}.", segmentFilePath);
                    errorSet.Add(ScriptError.OtherErrors, item.Id, message);
                }
                else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilenceFeature)
                {
                    string message = Helper.NeutralFormat(
                        "Alignment file {0} is invalid, for without silence segment at the end.", segmentFilePath);
                    errorSet.Add(ScriptError.OtherErrors, item.Id, message);
                }
                else if (units.Count != segmentFile.NonSilenceWaveSegments.Count)
                {
                    string message = Helper.NeutralFormat(
                        "script units {0} do not match with non-silence " +
                            "segments {1} in segmentation file.",
                        units.Count,
                        segmentFile.NonSilenceWaveSegments.Count);
                    errorSet.Add(ScriptError.OtherErrors, item.Id, message);
                }
                else
                {
                    // go through each segments
                    for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++)
                    {
                        WaveSegment segment = segmentFile.NonSilenceWaveSegments[i];
                        TtsUnit unit = units[i];

                        if (segment.Label != WaveSegment.FormatLabel(unit.MetaUnit.Name))
                        {
                            string message = Helper.NeutralFormat(
                                "units [{0}/{1}] at {2} do not match between script and segment.",
                                WaveSegment.FormatLabel(unit.MetaUnit.Name),
                                segment.Label,
                                i);
                            errorSet.Add(ScriptError.OtherErrors, item.Id, message);
                        }
                    }
                }
            }
        }
Beispiel #15
0
        /// <summary>
        /// Get the unit list this item has.
        /// </summary>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <returns>Tts units.</returns>
        public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            Collection<TtsUnit> units = new Collection<TtsUnit>();
            foreach (ScriptSentence sentence in Sentences)
            {
                foreach (TtsUnit unit in sentence.GetUnits(phoneme, sliceData))
                {
                    units.Add(unit);
                }
            }

            return units;
        }
Beispiel #16
0
        /// <summary>
        /// Format phone string in Word to slice string.
        /// </summary>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="wordPron">Word pronunciation to convert.</param>
        /// <returns>Word pronunciation string in slice.</returns>
        public static string RewritePhones2Units(SliceData sliceData,
            string wordPron)
        {
            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            if (string.IsNullOrEmpty(wordPron))
            {
                throw new ArgumentNullException("wordPron");
            }

            string[] syllables = Regex.Split(wordPron, @"\s*[&|\-]\s*");
            List<string> tgtSylls = new List<string>();
            for (int i = 0; i < syllables.Length; i++)
            {
                TtsStress nucleusStress = Pronunciation.GetStress(syllables[i]);

                string[] units = BuildUnits(Localor.GetPhoneme(sliceData.Language),
                    sliceData, syllables[i]);

                string tgtslice = string.Join(" . ", units);
                tgtslice = tgtslice.Replace(TtsUnit.OnsetPrefix, string.Empty);
                tgtslice = tgtslice.Replace(TtsUnit.NucleusPrefix, string.Empty);
                tgtslice = tgtslice.Replace(TtsUnit.CodaPrefix, string.Empty);
                tgtslice = tgtslice.Replace(TtsUnit.PhoneDelimiter, " ");

                if (nucleusStress != TtsStress.None)
                {
                    tgtslice = SetVowelStress(Localor.GetPhoneme(sliceData.Language),
                        tgtslice, nucleusStress);
                }

                tgtSylls.Add(tgtslice);
            }

            return string.Join(" - ", tgtSylls.ToArray());
        }
Beispiel #17
0
        /// <summary>
        /// Get the unit list this word has.
        /// </summary>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="buildUnitFeature">Whether build unit features.</param>
        /// <returns>Tts units.</returns>
        public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData,
            bool buildUnitFeature)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            if (WordType == WordType.Normal && _units.Count == 0)
            {
                if (Sentence == null)
                {
                    throw new InvalidDataException(Helper.NeutralFormat("word should belong to a sentence."));
                }

                Sentence.GetUnits(phoneme, sliceData, buildUnitFeature);
            }

            return _units;
        }
Beispiel #18
0
        /// <summary>
        /// Build word unit without filling features.
        /// </summary>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="pronunciationSeparator">Pronunciation separator.</param>
        public void BuildUnitWithoutFeature(SliceData sliceData,
            PronunciationSeparator pronunciationSeparator)
        {
            if (Units.Count > 0)
            {
                return;
            }

            UpdateUnitSyllables();

            for (int syllableIndex = 0; syllableIndex < UnitSyllables.Count; syllableIndex++)
            {
                ScriptSyllable syllable = UnitSyllables[syllableIndex];
                syllable.Tag = this;

                Collection<TtsUnit> syllableUnits = BuildUnitsForSyllable(syllable, sliceData, pronunciationSeparator);
                for (int i = 0; i < syllableUnits.Count; i++)
                {
                    syllableUnits[i].WordType = WordType;
                    syllableUnits[i].Tag = syllable;
                    syllableUnits[i].Word = this;

                    Units.Add(syllableUnits[i]);
                }
            }

            // Parse TCGPP score to TtsMetaPhone
            if (!string.IsNullOrEmpty(_tcgppScores))
            {
                string[] tcgppScores = _tcgppScores.Split(new char[] { TcgppScoreDelimeter },
                    StringSplitOptions.RemoveEmptyEntries);
                int index = 0;
                foreach (TtsUnit unit in Units)
                {
                    foreach (TtsMetaPhone phone in unit.MetaUnit.Phones)
                    {
                        if (index >= tcgppScores.Length)
                        {
                            throw new InvalidDataException(Helper.NeutralFormat(
                                "Invalid TCGPP score format [{0}]", _tcgppScores));
                        }

                        phone.TcgppScore = int.Parse(tcgppScores[index]);
                        index++;
                    }
                }
            }
        }
Beispiel #19
0
        /// <summary>
        /// Build units from syllable.
        /// </summary>
        /// <param name="syllable">Syllable.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="pronunciationSeparator">Pronunciation separator.</param>
        /// <returns>Units.</returns>
        private static Collection<TtsUnit> BuildUnitsForSyllable(ScriptSyllable syllable,
            SliceData sliceData, PronunciationSeparator pronunciationSeparator)
        {
            Debug.Assert(syllable != null);
            Debug.Assert(sliceData != null);

            string syllableText = Core.Pronunciation.RemoveStress(syllable.Text.Trim());
            string[] slices = pronunciationSeparator.SplitSlices(syllableText);

            PosInSyllable[] pis = EstimatePosInSyllable(slices, sliceData);

            Collection<TtsUnit> units = new Collection<TtsUnit>();
            for (int sliceIndex = 0; sliceIndex < slices.Length; sliceIndex++)
            {
                string slice = slices[sliceIndex].Trim();
                if (string.IsNullOrEmpty(slice))
                {
                    continue;
                }

                TtsUnit unit = new TtsUnit(sliceData.Language);

                // break level
                unit.TtsBreak = (sliceIndex == slices.Length - 1) ? syllable.TtsBreak : TtsBreak.Phone;

                // pos in syllable
                unit.Feature.PosInSyllable = pis[sliceIndex];

                // NONE: punctuation type

                // emphasis
                unit.Feature.TtsEmphasis = syllable.TtsEmphasis;

                // stress mark
                unit.Feature.TtsStress = syllable.Stress;

                // fill unit name
                // remove stress mark and replace white space with '+' for unit name
                unit.MetaUnit.Name = Regex.Replace(slice, " +", @"+");
                unit.MetaUnit.Language = unit.Language;

                units.Add(unit);
            }

            return units;
        }
Beispiel #20
0
        /// <summary>
        /// Convert this unit to slide data.
        /// </summary>
        /// <param name="language">Language of the Slice data.</param>
        /// <param name="unitFullNames">Full unit name collections.</param>
        /// <returns>Converted result.</returns>
        public static SliceData ToSliceData(Language language, IEnumerable<string> unitFullNames)
        {
            if (unitFullNames == null)
            {
                throw new ArgumentNullException("unitFullNames");
            }

            SliceData sliceData = new SliceData();
            sliceData.Language = language;

            foreach (string name in unitFullNames)
            {
                sliceData.ParseUnit(name);
            }

            return sliceData;
        }
        /// <summary>
        /// Build mlf from script item.
        /// </summary>
        /// <param name="item">Script item.</param>
        /// <param name="sw">Text writer.</param>
        /// <param name="writeToFile">Whether writing to file.</param>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <returns>Errors.</returns>
        private static ErrorSet BuildMonoMlf(ScriptItem item, StreamWriter sw, 
            bool writeToFile, Phoneme phoneme, SliceData sliceData)
        {
            Debug.Assert(item != null);
            Debug.Assert(phoneme != null);

            if (writeToFile && sw == null)
            {
                throw new ArgumentNullException("sw");
            }

            Collection<ScriptWord> allPronouncedNormalWords = item.AllPronouncedNormalWords;
            ErrorSet errors = new ErrorSet();
            if (allPronouncedNormalWords.Count == 0)
            {
                errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("No pronounced normal word."));
            }
            else
            {
                for (int i = 0; i < allPronouncedNormalWords.Count; i++)
                {
                    ScriptWord word = allPronouncedNormalWords[i];
                    Debug.Assert(word != null);
                    if (string.IsNullOrEmpty(word.Pronunciation))
                    {
                        errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("No pronunciation normal word '{1}' in script item {0}.", item.Id, word.Grapheme));
                    }
                }

                if (errors.Count == 0)
                {
                    if (writeToFile)
                    {
                        sw.WriteLine("\"*/{0}.lab\"", item.Id);
                        sw.WriteLine(Phoneme.SilencePhone);
                    }

                    for (int i = 0; i < allPronouncedNormalWords.Count; i++)
                    {
                        ScriptWord word = allPronouncedNormalWords[i];
                        Collection<TtsUnit> units = word.GetUnits(phoneme, sliceData);
                        if (phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.PhoneBased)
                        {
                            foreach (TtsUnit unit in units)
                            {
                                errors.Merge(BuildMonoMlf(unit, item, sw, writeToFile, phoneme));
                            }
                        }
                        else if (phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.SyllableBased)
                        {
                            foreach (ScriptSyllable syllable in word.UnitSyllables)
                            {
                                errors.Merge(BuildMonoMlf(syllable, item, sw, writeToFile, phoneme));
                            }
                        }

                        if (writeToFile && i + 1 < allPronouncedNormalWords.Count)
                        {
                            sw.WriteLine(Phoneme.ShortPausePhone);
                        }
                    }

                    if (writeToFile)
                    {
                        sw.WriteLine(Phoneme.SilencePhone);
                        sw.WriteLine(".");  // end of sentence
                    }
                }
            }

            return errors;
        }