/// <summary>
        /// Get the unit list this sentence has.
        /// </summary>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="buildUnitFeature">Whether build unit features.</param>
        /// <returns>Tts units.</returns>
        public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData,
            bool buildUnitFeature)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            if (_needBuildUnits)
            {
                BuildUnits(phoneme, sliceData, buildUnitFeature);
                _needBuildUnits = false;
            }

            return _units;
        }
        /// <summary>
        /// Build units for this sentence.
        /// </summary>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="buildUnitFeature">Whether build unit features.</param>
        private void BuildUnits(Phoneme phoneme, SliceData sliceData, bool buildUnitFeature)
        {
            Helper.ThrowIfNull(phoneme);
            Helper.ThrowIfNull(sliceData);

            _units.Clear();

            string punctuationPattern = ScriptItem.PunctuationPattern;
            for (int wordIndex = 0; wordIndex < Words.Count; wordIndex++)
            {
                ScriptWord word = Words[wordIndex];
                if (!word.IsPronouncableNormalWord ||
                    (!buildUnitFeature && string.IsNullOrEmpty(word.Pronunciation)))
                {
                    continue;
                }

                // look forward one item, test whether that is '?' mark
                WordType wordType = WordType.Normal;
                while (wordIndex < Words.Count - 1
                    && Words[wordIndex + 1].WordType != WordType.Normal)
                {
                    WordType nextType = Localor.MapPunctuation(Words[wordIndex + 1].Grapheme,
                        punctuationPattern);

                    // advance one more
                    if (nextType == WordType.OtherPunctuation)
                    {
                        wordType = nextType;
                    }
                    else
                    {
                        wordType = nextType;
                        break;
                    }

                    wordIndex++;
                }

                word.Units.Clear();
                word.BuildUnitWithoutFeature(sliceData, ScriptItem.PronunciationSeparator);
                foreach (TtsUnit unit in word.Units)
                {
                    unit.WordType = wordType;
                }

                Helper.AppendCollection<TtsUnit>(_units, word.Units);
            }

            if (buildUnitFeature)
            {
                BuildUnitFeatures(phoneme);
            }
        }
示例#3
0
        /// <summary>
        /// Get the unit list this word has.
        /// </summary>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <param name="buildUnitFeature">Whether build unit features.</param>
        /// <returns>Tts units.</returns>
        public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData,
            bool buildUnitFeature)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            if (WordType == WordType.Normal && _units.Count == 0)
            {
                if (Sentence == null)
                {
                    throw new InvalidDataException(Helper.NeutralFormat("word should belong to a sentence."));
                }

                Sentence.GetUnits(phoneme, sliceData, buildUnitFeature);
            }

            return _units;
        }
 /// <summary>
 /// Get the unit list this sentence has.
 /// </summary>
 /// <param name="phoneme">Phoneme.</param>
 /// <param name="sliceData">Slice data.</param>
 /// <returns>Tts units.</returns>
 public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData)
 {
     return GetUnits(phoneme, sliceData, true);
 }
示例#5
0
        /// <summary>
        /// Build units for syllbale pronunciation,
        /// And the units are concatenated together in the string and seperated by ".".
        /// </summary>
        /// <param name="phoneme">Phoneme of the language to process with.</param>
        /// <param name="sliceData">Slice data to process.</param>
        /// <param name="syllable">Syllables to process.</param>
        /// <returns>Best unit list.</returns>
        public static string[] BuildUnits(Phoneme phoneme,
            SliceData sliceData, string syllable)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (phoneme.TtsSonorantPhones == null)
            {
                string message = Helper.NeutralFormat("phoneme.TtsSonorantPhones should not be null.");
                throw new ArgumentException(message);
            }

            if (string.IsNullOrEmpty(syllable))
            {
                throw new ArgumentNullException("syllable");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            if (sliceData.OnsetSlices == null)
            {
                string message = Helper.NeutralFormat("sliceData.OnsetSlices should not be null.");
                throw new ArgumentException(message);
            }

            if (sliceData.NucleusSlices == null)
            {
                string message = Helper.NeutralFormat("sliceData.NucleusSlices should not be null.");
                throw new ArgumentException(message);
            }

            List<string> slicedUnits = new List<string>();

            string unstressedSyllable = Pronunciation.RemoveStress(syllable);

            ScriptItem scriptItem = new ScriptItem(phoneme.Language);

            // items contains phone and tone.
            string[] items = scriptItem.PronunciationSeparator.SplitPhones(unstressedSyllable);

            // Treate all syllable as one unit at first.
            TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language);
            ttsMetaUnit.Name = string.Join(" ", items);
            string[] phones = ttsMetaUnit.GetPhonesName();

            // Treat all phones in this syllable as a whole unit
            if (sliceData.NucleusSlices.IndexOf(ttsMetaUnit.Name) >= 0)
            {
                // If it is alread defined in the predefined unit collection, return it
                slicedUnits.Add(TtsUnit.NucleusPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter));
                return slicedUnits.ToArray();
            }

            int vowelIndex = phoneme.GetFirstVowelIndex(phones);
            if (vowelIndex < 0)
            {
                // If no vowel in the syllable, treat all phones in this syllable as a unit if it is in unit table
                if (sliceData.OnsetSlices.IndexOf(ttsMetaUnit.Name) >= 0)
                {
                    slicedUnits.Add(TtsUnit.OnsetPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter));
                }
                else if (sliceData.CodaSlices.IndexOf(ttsMetaUnit.Name) >= 0)
                {
                    slicedUnits.Add(TtsUnit.CodaPrefix + ttsMetaUnit.Name.Replace(" ", TtsUnit.PhoneDelimiter));
                }
                else
                {
                    // otherwise, treat each phone as a coda unit
                    foreach (string phone in phones)
                    {
                        slicedUnits.Add(TtsUnit.CodaPrefix + phone);
                    }
                }

                return slicedUnits.ToArray();
            }

            // Search first cosonant sonarant from the left side of the vowel font in the syllable
            int firstSonarantIndex = vowelIndex;
            for (int i = vowelIndex - 1; i >= 0; i--)
            {
                if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0)
                {
                    firstSonarantIndex = i;
                }
            }

            // Search last cosonant sonarant from the right side of the vowel font in the syllable
            int lastSonarantIndex = vowelIndex;
            for (int i = vowelIndex + 1; i <= phones.Length - 1; i++)
            {
                if (phoneme.TtsSonorantPhones.IndexOf(phones[i]) >= 0)
                {
                    lastSonarantIndex = i;
                }
            }

            // Treat all vowel and surrounding sonarant consonants as the nucleus unit first
            string nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones,
                firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1);

            TruncateRuleData truncateRuleData = Localor.GetTruncateRuleData(phoneme.Language);

            // Refine nucleus according to the predefined unit table
            while (lastSonarantIndex - firstSonarantIndex > 0 && sliceData.NucleusSlices.IndexOf(nucleus) <= 0)
            {
                // If the unit candidate is not listed in the predefined unit list, try to truncate it
                string[] leftRight =
                    PhoneMerger.TruncateOnePhoneFromNucleus(phoneme, truncateRuleData.NucleusTruncateRules,
                    nucleus);

                if (phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0)
                {
                    Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[0]) >= 0);
                    firstSonarantIndex++;
                }
                else
                {
                    Debug.Assert(phoneme.TtsPhones.IndexOf(leftRight[1]) >= 0);
                    lastSonarantIndex--;
                }

                // Re-define the remaining nucleus unit
                nucleus = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones,
                    firstSonarantIndex, lastSonarantIndex - firstSonarantIndex + 1);
            }

            slicedUnits.Add(TtsUnit.NucleusPrefix + nucleus.Replace(" ", TtsUnit.PhoneDelimiter));

            // Refine onset
            for (int index = firstSonarantIndex - 1; index >= 0; index--)
            {
                string onset = TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, 0, index + 1);
                if (sliceData.OnsetSlices.IndexOf(onset.Replace(TtsUnit.PhoneDelimiter, " ")) >= 0)
                {
                    slicedUnits.Insert(0, TtsUnit.OnsetPrefix + onset);

                    // Remove the number of added phones,
                    // except current phone itself which will be recuded by index--
                    index -= index;
                }
                else
                {
                    // Treat it as a single phone unit
                    slicedUnits.Insert(0,
                        TtsUnit.OnsetPrefix + TtsMetaPhone.Join(TtsUnit.PhoneDelimiter, ttsMetaUnit.Phones, index, 1));
                }
            }

            // Refine coda, matching from right to left
            BuildCodaUnits(sliceData, ttsMetaUnit.Phones, lastSonarantIndex + 1, slicedUnits);

            return slicedUnits.ToArray();
        }
示例#6
0
        /// <summary>
        /// Build mlf from syllable.
        /// </summary>
        /// <param name="syllable">Syllable.</param>
        /// <param name="item">Script item.</param>
        /// <param name="sw">Text writer.</param>
        /// <param name="writeToFile">Whethe writing to file.</param>
        /// <param name="phoneme">Phoneme.</param>
        /// <returns>Errors.</returns>
        private static ErrorSet BuildMonoMlf(ScriptSyllable syllable, ScriptItem item, StreamWriter sw,
            bool writeToFile, Phoneme phoneme)
        {
            Debug.Assert(syllable != null);
            Debug.Assert(item != null);

            ErrorSet errors = new ErrorSet();
            string syllableText = Pronunciation.RemoveStress(syllable.Text.Trim());
            string[] srPhones = phoneme.Tts2SrPhones(syllableText.Trim());
            if (srPhones == null)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Invalid TTS syllable[{0}], which can not be converted to Speech Recognition Phone.",
                     syllableText);
                errors.Add(ScriptError.OtherErrors, item.Id, message);
            }

            if (writeToFile && srPhones != null)
            {
                foreach (string phone in srPhones)
                {
                    sw.WriteLine(phone);
                }
            }

            return errors;
        }
示例#7
0
        /// <summary>
        /// Extract acoustic features for a given script file.
        /// </summary>
        /// <param name="script">Script file instance.</param>
        /// <param name="phoneme">Phoneme used to get units.</param>
        /// <param name="sliceData">Slice data used to get units.</param>
        /// <param name="fileMap">File list map.</param>
        /// <param name="segmentDir">Segmentation file directory.</param>
        /// <param name="wave16kDir">16k Hz waveform file directory.</param>
        /// <param name="epochDir">Epoch file directory.</param>
        /// <param name="targetFilePath">Target acoustic file path.</param>
        public static void ExtractAcoustic(XmlScriptFile script, Phoneme phoneme, SliceData sliceData,
            FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir, string targetFilePath)
        {
            // Parameters validation
            if (script == null)
            {
                throw new ArgumentNullException("script");
            }

            if (string.IsNullOrEmpty(script.FilePath))
            {
                throw new ArgumentException("script.FilePath is null");
            }

            if (fileMap == null)
            {
                throw new ArgumentNullException("fileMap");
            }

            if (fileMap.Map == null)
            {
                throw new ArgumentException("fileMap.Map");
            }

            if (fileMap.Map.Keys == null)
            {
                throw new ArgumentException("fileMap.Map.Keys is null");
            }

            if (string.IsNullOrEmpty(segmentDir))
            {
                throw new ArgumentNullException("segmentDir");
            }

            if (string.IsNullOrEmpty(wave16kDir))
            {
                throw new ArgumentNullException("wave16kDir");
            }

            if (string.IsNullOrEmpty(epochDir))
            {
                throw new ArgumentNullException("epochDir");
            }

            if (!Directory.Exists(segmentDir))
            {
                throw Helper.CreateException(typeof(DirectoryNotFoundException),
                    segmentDir);
            }

            if (!Directory.Exists(wave16kDir))
            {
                throw Helper.CreateException(typeof(DirectoryNotFoundException),
                    wave16kDir);
            }

            if (!Directory.Exists(epochDir))
            {
                throw Helper.CreateException(typeof(DirectoryNotFoundException),
                    epochDir);
            }

            if (string.IsNullOrEmpty(targetFilePath))
            {
                throw new ArgumentNullException("targetFilePath");
            }

            Helper.EnsureFolderExistForFile(targetFilePath);

            using (StreamWriter sw = new StreamWriter(targetFilePath))
            {
                // iterate each script item or sentence
                foreach (string sid in fileMap.Map.Keys)
                {
                    if (!script.ItemDic.ContainsKey(sid))
                    {
                        string message = string.Format(CultureInfo.InvariantCulture,
                            "Sentence [{0}] does not exist in script file [{1}].",
                            sid, script.FilePath);
                        throw new InvalidDataException(message);
                    }

                    ExtractAcoustic(sw, script, sid, phoneme, sliceData, fileMap, segmentDir, wave16kDir, epochDir);
                }
            }
        }
示例#8
0
        /// <summary>
        /// Set nucleus vowel stress mark.
        /// </summary>
        /// <param name="phoneme">Phoneme of the language to process.</param>
        /// <param name="pronunciation">Pronunciation to set pronunciation.</param>
        /// <param name="stress">Stress mark to set for the vowel in the pronunciation.</param>
        /// <returns>Pronunciation with stress.</returns>
        public static string SetVowelStress(Phoneme phoneme, string pronunciation, TtsStress stress)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (string.IsNullOrEmpty(pronunciation))
            {
                return null;
            }

            if (stress > TtsStress.None)
            {
                string[] phones = pronunciation.Split(new char[] { ' ' },
                    StringSplitOptions.RemoveEmptyEntries);
                int vowelIndex = phoneme.GetFirstVowelIndex(phones);
                if (vowelIndex < 0)
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "There is no vowel found in the syllable pronunciation [{0}]",
                        phones);
                    throw new InvalidDataException(message);
                }

                phones[vowelIndex] = string.Format(CultureInfo.InvariantCulture,
                    "{0} {1}", phones[vowelIndex], (int)stress);

                return string.Join(" ", phones);
            }
            else
            {
                return pronunciation;
            }
        }
示例#9
0
        /// <summary>
        /// Extract acoustic features for a given sentence.
        /// </summary>
        /// <param name="writer">Stream writer to write acoustic features.</param>
        /// <param name="script">Script file instance.</param>
        /// <param name="sid">Script item id.</param>
        /// <param name="phoneme">Phoneme used to get units.</param>
        /// <param name="sliceData">Slice data used to get units.</param>
        /// <param name="fileMap">File list map.</param>
        /// <param name="segmentDir">Segmentation file directory.</param>
        /// <param name="wave16kDir">16k Hz waveform file directory.</param>
        /// <param name="epochDir">Epoch file directory.</param>
        private static void ExtractAcoustic(StreamWriter writer, XmlScriptFile script, string sid,
            Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir,
            string wave16kDir, string epochDir)
        {
            ScriptItem scriptItem = script.ItemDic[sid];

            // find the absolute file paths for each kind data file 
            string wave16kFilePath = Path.Combine(wave16kDir, fileMap.Map[scriptItem.Id] + ".wav");
            string epochFilePath = Path.Combine(epochDir, fileMap.Map[scriptItem.Id] + ".epoch");
            string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[scriptItem.Id] + ".txt");

            // load data files
            SegmentFile segFile = new SegmentFile();
            segFile.Load(segmentFilePath);

            EggAcousticFeature eggFile = new EggAcousticFeature();
            eggFile.LoadEpoch(epochFilePath);

            WaveAcousticFeature waveFile = new WaveAcousticFeature();
            waveFile.Load(wave16kFilePath);

            // calculate acoustic features for each segments in the files
            int totalCount = segFile.NonSilenceWaveSegments.Count;
            Collection<TtsUnit> units = scriptItem.GetUnits(phoneme, sliceData);
            if (units.Count != totalCount)
            {
                string str1 = "Unit number mis-matched between sentence [{0}] in ";
                string str2 = "script file [{1}] and in the alignment file [{2}]. ";
                string str3 = "There are {3} units in script but {4} units in alignment.";
                string message = string.Format(CultureInfo.InvariantCulture,
                    str1 + str2 + str3,
                    sid, script.FilePath, segmentFilePath,
                    units.Count, totalCount);
                throw new InvalidDataException(message);
            }

            for (int i = 0; i < totalCount; i++)
            {
                // for each wave segment
                WaveSegment ws = segFile.NonSilenceWaveSegments[i];

                // get unit sample scope
                int sampleOffset = (int)(ws.StartTime * waveFile.SamplesPerSecond);
                int sampleLength = (int)(ws.Duration * waveFile.SamplesPerSecond);
                int sampleEnd = sampleOffset + sampleLength;

                int epochOffset = 0;
                int epochEnd = 0;

                // calculate average pitch, pitch average
                float averagePitch, pitchRange;
                eggFile.GetPitchAndRange(sampleOffset,
                    sampleLength, out averagePitch, out pitchRange);
                ws.AveragePitch = averagePitch;
                ws.PitchRange = pitchRange;

                // calculate root mean square, and before that ajust the segment alignment with
                // the epoch data
                epochOffset = eggFile.AdjustAlignment(ref sampleOffset);
                epochEnd = eggFile.AdjustAlignment(ref sampleEnd);

                if (epochOffset > epochEnd)
                {
                    string info = string.Format(CultureInfo.InvariantCulture,
                        "epochOffset[{0}] should not be bigger than epochEnd[{1}]",
                        epochOffset, epochEnd);
                    throw new InvalidDataException(info);
                }

                if (sampleEnd > waveFile.SampleNumber)
                {
                    string str1 = "Mis-match found between alignment file [{0}] and waveform file [{1}], ";
                    string str2 = "for the end sample of alignment is [{2}] but";
                    string str3 = " the total sample number of waveform file is [{3}].";
                    string info = string.Format(CultureInfo.InvariantCulture,
                        str1 + str2 + str3,
                        segmentFilePath, wave16kFilePath,
                        epochEnd, waveFile.SampleNumber);

                    throw new InvalidDataException(info);
                }

                ws.RootMeanSquare = waveFile.CalculateRms(sampleOffset, sampleEnd - sampleOffset);

                // calculate epoch
                int epoch16KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch,
                    epochOffset, epochEnd - epochOffset, null);
                int epoch8KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch8k,
                    epochOffset, epochEnd - epochOffset, null);

                // leave (epoch offset in sentence) (epoch length)
                // (16k compressed epoch lenght) (8k compressed epoch lenght) as zero
                string message = string.Format(CultureInfo.InvariantCulture,
                    "{0,12} {1,3} {2,9:0.000000} {3,9:0.000000} {4,7} {5,5} {6,4} {7,3} {8,3} {9,3} {10,7:0.0} {11,5:0.0} {12,4:0.0} {13}",
                    scriptItem.Id, i,
                    ws.StartTime, ws.Duration, sampleOffset, sampleEnd - sampleOffset,
                    epochOffset, epochEnd - epochOffset,
                    epoch16KCompressLength, epoch8KCompressLength,
                    ws.RootMeanSquare, ws.AveragePitch, ws.PitchRange,
                    units[i].FullName);

                writer.WriteLine(message);
            }
        }
示例#10
0
        /// <summary>
        /// Check data consistence between script file and segmentation files.
        /// </summary>
        /// <param name="fileMap">File list map.</param>
        /// <param name="script">Script file instance.</param>
        /// <param name="phoneme">Phoneme used to get units.</param>
        /// <param name="sliceData">Slice data used to get units.</param>
        /// <param name="segmentDir">Segment file directory.</param>
        /// <returns>Data error set found.</returns>
        public static ErrorSet ValidateDataAlignment(FileListMap fileMap, XmlScriptFile script, 
            Phoneme phoneme, SliceData sliceData, string segmentDir)
        {
            // Parameters validation
            if (string.IsNullOrEmpty(segmentDir))
            {
                throw new ArgumentNullException("segmentDir");
            }

            if (fileMap == null)
            {
                throw new ArgumentNullException("fileMap");
            }

            if (fileMap.Map == null)
            {
                throw new ArgumentException("fileMap.Map is null");
            }

            if (fileMap.Map.Keys == null)
            {
                throw new ArgumentException("fileMap.Map.Keys is null");
            }

            if (script == null)
            {
                throw new ArgumentNullException("script");
            }

            ErrorSet errorSet = new ErrorSet();

            foreach (ScriptItem item in script.Items)
            {
                try
                {
                    if (!fileMap.Map.ContainsKey(item.Id))
                    {
                        errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("File list map does not contain item"));
                        continue;
                    }

                    ValidateDataAlignment(item, phoneme, sliceData, fileMap, segmentDir, errorSet);
                }
                catch (InvalidDataException ide)
                {
                    errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.BuildExceptionMessage(ide));
                }
            }

            foreach (string sid in fileMap.Map.Keys)
            {
                if (!script.ItemDic.ContainsKey(sid))
                {
                    errorSet.Add(ScriptError.OtherErrors, sid, Helper.NeutralFormat("script file does not contain item"));
                }
            }

            return errorSet;
        }
示例#11
0
        /// <summary>
        /// Check data consistence between script item and segmentation file.
        /// </summary>
        /// <param name="item">Script item.</param>
        /// <param name="phoneme">Phoneme used to get units.</param>
        /// <param name="sliceData">Slice data used to get units.</param>
        /// <param name="fileMap">File list map.</param>
        /// <param name="segmentDir">Segment file directory.</param>
        /// <param name="errorSet">Data error set found.</param>
        public static void ValidateDataAlignment(ScriptItem item,
            Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir, ErrorSet errorSet)
        {
            string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt");

            StringBuilder errorMessage = new StringBuilder();
            SegmentFile segmentFile = ValidateAlignmentFile(segmentFilePath, errorMessage);
            if (errorMessage.Length != 0)
            {
                errorSet.Add(ScriptError.OtherErrors, item.Id, errorMessage.ToString());
            }
            else
            {
                Collection<TtsUnit> units = item.GetUnits(phoneme, sliceData);

                if (segmentFile.WaveSegments.Count == 0)
                {
                    string message = Helper.NeutralFormat(
                        "There is no valid alignment data in alignment file {0}.", segmentFilePath);
                    errorSet.Add(ScriptError.OtherErrors, item.Id, message);
                }
                else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilenceFeature)
                {
                    string message = Helper.NeutralFormat(
                        "Alignment file {0} is invalid, for without silence segment at the end.", segmentFilePath);
                    errorSet.Add(ScriptError.OtherErrors, item.Id, message);
                }
                else if (units.Count != segmentFile.NonSilenceWaveSegments.Count)
                {
                    string message = Helper.NeutralFormat(
                        "script units {0} do not match with non-silence " +
                            "segments {1} in segmentation file.",
                        units.Count,
                        segmentFile.NonSilenceWaveSegments.Count);
                    errorSet.Add(ScriptError.OtherErrors, item.Id, message);
                }
                else
                {
                    // go through each segments
                    for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++)
                    {
                        WaveSegment segment = segmentFile.NonSilenceWaveSegments[i];
                        TtsUnit unit = units[i];

                        if (segment.Label != WaveSegment.FormatLabel(unit.MetaUnit.Name))
                        {
                            string message = Helper.NeutralFormat(
                                "units [{0}/{1}] at {2} do not match between script and segment.",
                                WaveSegment.FormatLabel(unit.MetaUnit.Name),
                                segment.Label,
                                i);
                            errorSet.Add(ScriptError.OtherErrors, item.Id, message);
                        }
                    }
                }
            }
        }
示例#12
0
        public static ErrorSet BuildMonoMlf(string scriptFilePath, string outFilePath, bool writeToFile,
            Phoneme phoneme, XmlScriptValidateSetting validateSetting, SliceData sliceData)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (validateSetting == null)
            {
                throw new ArgumentNullException("validateSetting");
            }

            validateSetting.VerifySetting();

            ErrorSet errors = new ErrorSet();
            StreamWriter sw = null;

            if (writeToFile)
            {
                sw = new StreamWriter(outFilePath, false, Encoding.ASCII);
                sw.WriteLine("#!MLF!#");
            }

            try
            {
                XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting);
                script.Remove(GetNeedDeleteItemIds(script.ErrorSet));
                if (script.Items.Count == 0)
                {
                    throw new InvalidDataException(
                        Helper.NeutralFormat("No valid items in {0}.", scriptFilePath));
                }

                errors.Merge(script.ErrorSet);
                foreach (ScriptItem item in script.Items)
                {
                    errors.Merge(BuildMonoMlf(item, sw, writeToFile, phoneme, sliceData));
                }
            }
            finally
            {
                if (sw != null)
                {
                    sw.Close();
                }
            }

            if (writeToFile)
            {
                Debug.Assert(HtkTool.VerifyMlfFormat(outFilePath));
            }

            return errors;
        }
示例#13
0
        /// <summary>
        /// Build mlf from unit.
        /// </summary>
        /// <param name="unit">Unit.</param>
        /// <param name="item">Script item.</param>
        /// <param name="sw">Text writer.</param>
        /// <param name="writeToFile">Whethe writing to file.</param>
        /// <param name="phoneme">Phoneme.</param>
        /// <returns>Errors.</returns>
        private static ErrorSet BuildMonoMlf(TtsUnit unit, ScriptItem item, StreamWriter sw, 
            bool writeToFile, Phoneme phoneme)
        {
            Debug.Assert(unit != null);
            Debug.Assert(item != null);

            ErrorSet errors = new ErrorSet();
            List<string> allPhones = new List<string>();
            foreach (TtsMetaPhone phone in unit.MetaUnit.Phones)
            {
                string[] srPhones = phoneme.Tts2SrPhones(phone.Name);
                if (srPhones == null)
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "Invalid TTS phone[{0}], which can not be converted to Speech Recognition Phone.",
                        phone.Name);
                    errors.Add(ScriptError.OtherErrors, item.Id, message);
                    continue;
                }

                allPhones.AddRange(srPhones);
            }

            if (writeToFile)
            {
                foreach (string phone in allPhones)
                {
                    sw.WriteLine(phone);
                }
            }

            return errors;
        }
示例#14
0
        /// <summary>
        /// Build unit features for this sentence.
        /// </summary>
        /// <param name="phoneme">Phoneme.</param>
        private void BuildUnitFeatures(Phoneme phoneme)
        {
            Helper.ThrowIfNull(phoneme);

            TtsUnit preUnit = null;
            ScriptSyllable preSyllable = null;
            ScriptWord preWord = null;
            TtsUnit nextUnit = null;

            for (int i = 0; i < _units.Count; i++)
            {
                TtsUnit unit = _units[i];

                ScriptSyllable syllable = (ScriptSyllable)unit.Tag;
                ScriptWord word = (ScriptWord)syllable.Tag;

                // Build context
                nextUnit = (i + 1 < _units.Count) ? _units[i + 1] : null;

                preUnit = (i > 0) ? _units[i - 1] : null;
                preSyllable = ScriptItem.FindPreviousSyllable(_units, i);
                preWord = ScriptItem.FindPreviousWord(Words, word);

                bool unitAtWordHead = preUnit == null ||
                    word != (ScriptWord)((ScriptSyllable)preUnit.Tag).Tag;
                bool unitAtWordTail = nextUnit == null ||
                    word != (ScriptWord)((ScriptSyllable)nextUnit.Tag).Tag;

                if (preUnit == null ||
                    (unitAtWordHead && preWord != null && ((int)preWord.Break >= (int)TtsBreak.InterPhrase)) ||
                    preUnit.MetaUnit.Special)
                {
                    unit.Feature.LeftContextPhone = phoneme.TtsPhone2Id(Phoneme.SilencePhone);
                    unit.Feature.LeftContextTone = ToneManager.NoneContextTone;
                }
                else
                {
                    unit.Feature.LeftContextPhone = phoneme.TtsPhone2Id(preUnit.MetaUnit.RightPhone);
                    unit.Feature.LeftContextTone = preUnit.MetaUnit.RightTone;
                }

                if (nextUnit == null ||
                    (unitAtWordTail && ((int)word.Break >= (int)TtsBreak.InterPhrase)) ||
                    nextUnit.MetaUnit.Special)
                {
                    unit.Feature.RightContextPhone = phoneme.TtsPhone2Id(Phoneme.SilencePhone);
                    unit.Feature.RightContextTone = ToneManager.NoneContextTone;
                }
                else
                {
                    unit.Feature.RightContextPhone = phoneme.TtsPhone2Id(nextUnit.MetaUnit.LeftPhone);
                    unit.Feature.RightContextTone = nextUnit.MetaUnit.LeftTone;
                }

                // adjust position in syllable
                unit.Feature.PosInSyllable = ScriptItem.CalculatePosInSyllable(preUnit, unit);

                // syllable position in word
                unit.Feature.PosInWord = ScriptItem.CalculatePosInWord(preSyllable, syllable);

                // word position in sentence
                unit.Feature.PosInSentence = ScriptItem.CalculatePosInSentence(preWord, word);
                if (unit.WordType == WordType.Question)
                {
                    unit.Feature.PosInSentence = PosInSentence.Quest;
                }

                // The unit in last syllable will get the same WordTone as the word.
                if (word.UnitSyllables.IndexOf(syllable) == word.UnitSyllables.Count - 1)
                {
                    unit.Feature.TtsWordTone = word.WordTone;
                }
                else
                {
                    unit.Feature.TtsWordTone = TtsWordTone.Continue;
                }
            }
        }
示例#15
0
        /// <summary>
        /// Check if the syllable has valid vowel number.
        /// </summary>
        /// <param name="entry">Script entry.</param>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="phones">Phones of the syllable.</param>
        /// <returns>Bool.</returns>
        private static bool IsGoodSyllableWithVowel(ScriptItem entry,
                        Phoneme phoneme,
                        string[] phones)
        {
            bool goodSyllable = IsSyllableWithEnoughVowel(entry, phoneme, phones) &&
                                IsSyllableWithLessVowel(entry, phoneme, phones);

            return goodSyllable;
        }
示例#16
0
        /// <summary>
        /// Get the unit list this item has.
        /// </summary>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <returns>Tts units.</returns>
        public Collection<TtsUnit> GetUnits(Phoneme phoneme, SliceData sliceData)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (sliceData == null)
            {
                throw new ArgumentNullException("sliceData");
            }

            Collection<TtsUnit> units = new Collection<TtsUnit>();
            foreach (ScriptSentence sentence in Sentences)
            {
                foreach (TtsUnit unit in sentence.GetUnits(phoneme, sliceData))
                {
                    units.Add(unit);
                }
            }

            return units;
        }
示例#17
0
 /// <summary>
 /// Check if the syllable has too many vowels.
 /// </summary>
 /// <param name="entry">Script entry.</param>
 /// <param name="phoneme">Phoneme.</param>
 /// <param name="phones">Phones of the syllable.</param>
 /// <returns>True if not having too many.</returns>
 private static bool IsSyllableWithLessVowel(ScriptItem entry,
                 Phoneme phoneme,
                 string[] phones)
 {
     int[] vowelIndexes = phoneme.GetVowelIndexes(phones);
     return vowelIndexes.Length <= entry.MaxVowelCountInSyllable;
 }
示例#18
0
        /// <summary>
        /// Truncate one phone from nucleus.
        /// </summary>
        /// <param name="phoneme">Phoneme of the language to process.</param>
        /// <param name="rules">Truncation rules.</param>
        /// <param name="nucleus">CVC source to truncate.</param>
        /// <returns>Result: left part + right part.</returns>
        public static string[] TruncateOnePhoneFromNucleus(Phoneme phoneme,
            Collection<TruncateRule> rules, string nucleus)
        {
            if (phoneme == null)
            {
                throw new ArgumentNullException("phoneme");
            }

            if (rules == null)
            {
                throw new ArgumentNullException("rules");
            }

            if (string.IsNullOrEmpty(nucleus))
            {
                throw new ArgumentNullException("nucleus");
            }

            TtsMetaUnit ttsMetaUnit = new TtsMetaUnit(phoneme.Language);
            ttsMetaUnit.Name = nucleus;
            string[] phoneNames = ttsMetaUnit.GetPhonesName();
            string leftPart = null;
            string rightPart = null;

            for (int i = 0; i < rules.Count; i++)
            {
                if (rules[i] == null)
                {
                    string message = Helper.NeutralFormat("rules[{0}] should not be null.", i);
                    throw new ArgumentException(message);
                }

                if (rules[i].Side == TruncateSide.Right)
                {
                    Match m = Regex.Match(rules[i].Phones,
                        @"\b" + phoneNames[phoneNames.Length - 1] + @"\b");
                    if (m.Success)
                    {
                        leftPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 0, phoneNames.Length - 1);
                        rightPart = ttsMetaUnit.Phones[phoneNames.Length - 1].Name;
                        break;
                    }
                }
                else if (rules[i].Side == TruncateSide.Left)
                {
                    Match m = Regex.Match(rules[i].Phones,
                        @"\b" + phoneNames[0] + @"\b");
                    if (m.Success)
                    {
                        leftPart = ttsMetaUnit.Phones[0].Name;
                        rightPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 1, phoneNames.Length - 1);
                        break;
                    }
                }
                else
                {
                    string message = string.Format(CultureInfo.InvariantCulture,
                        "Truncating side [{0}] is not supported.",
                        rules[i].Side);
                    Debug.Assert(false, message);
                    throw new NotSupportedException(message);
                }
            }

            if (string.IsNullOrEmpty(leftPart) || string.IsNullOrEmpty(rightPart))
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "Nucleus [{0}] has empty left phone or right phone after truncating.",
                    nucleus);
                Trace.WriteLine(message);
                leftPart = TtsMetaPhone.Join(" ", ttsMetaUnit.Phones, 0, phoneNames.Length - 1);
                rightPart = ttsMetaUnit.Phones[phoneNames.Length - 1].Name;
            }

            return new string[] { leftPart, rightPart };
        }
示例#19
0
        /// <summary>
        /// Check if the syllable has vowel or has a sonorant phoneme.
        /// </summary>
        /// <param name="entry">Script entry.</param>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="phones">Phones of the syllable.</param>
        /// <returns>Bool.</returns>
        private static bool IsGoodSyllableWithSonorant(ScriptItem entry,
                        Phoneme phoneme,
                        string[] phones)
        {
            bool goodSyllable = IsSyllableWithLessVowel(entry, phoneme, phones);

            if (goodSyllable)
            {
                if (!IsSyllableWithEnoughVowel(entry, phoneme, phones))
                {
                    if (phoneme.GetVowelIndexes(phones).Length == 0)
                    {
                        // no vowel, should have one sonorant and more than one consonants
                        int[] sonorantIndexes = phoneme.GetSonorantIndexes(phones);
                        if (sonorantIndexes.Length == 0 || phones.Length == 1)
                        {
                            goodSyllable = false;
                        }
                    }
                    else
                    {
                        goodSyllable = false;
                    }
                }
            }

            return goodSyllable;
        }
示例#20
0
        /// <summary>
        /// Load phoneme.
        /// </summary>
        /// <param name="language">Language of phoneme to load.</param>
        /// <returns>Phoneme.</returns>
        public static Phoneme Create(Language language)
        {
            Phoneme phoneme = new Phoneme();
            phoneme.Language = language;

            TtsPhoneSet phoneSet = Localor.GetPhoneSet(language);
            bool loaded = false;
            if (phoneSet != null)
            {
                phoneme.ParseData(phoneSet);
                loaded = true;
            }

            return loaded ? phoneme : null;
        }
示例#21
0
        /// <summary>
        /// Build mlf from script item.
        /// </summary>
        /// <param name="item">Script item.</param>
        /// <param name="sw">Text writer.</param>
        /// <param name="writeToFile">Whether writing to file.</param>
        /// <param name="phoneme">Phoneme.</param>
        /// <param name="sliceData">Slice data.</param>
        /// <returns>Errors.</returns>
        private static ErrorSet BuildMonoMlf(ScriptItem item, StreamWriter sw, 
            bool writeToFile, Phoneme phoneme, SliceData sliceData)
        {
            Debug.Assert(item != null);
            Debug.Assert(phoneme != null);

            if (writeToFile && sw == null)
            {
                throw new ArgumentNullException("sw");
            }

            Collection<ScriptWord> allPronouncedNormalWords = item.AllPronouncedNormalWords;
            ErrorSet errors = new ErrorSet();
            if (allPronouncedNormalWords.Count == 0)
            {
                errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("No pronounced normal word."));
            }
            else
            {
                for (int i = 0; i < allPronouncedNormalWords.Count; i++)
                {
                    ScriptWord word = allPronouncedNormalWords[i];
                    Debug.Assert(word != null);
                    if (string.IsNullOrEmpty(word.Pronunciation))
                    {
                        errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("No pronunciation normal word '{1}' in script item {0}.", item.Id, word.Grapheme));
                    }
                }

                if (errors.Count == 0)
                {
                    if (writeToFile)
                    {
                        sw.WriteLine("\"*/{0}.lab\"", item.Id);
                        sw.WriteLine(Phoneme.SilencePhone);
                    }

                    for (int i = 0; i < allPronouncedNormalWords.Count; i++)
                    {
                        ScriptWord word = allPronouncedNormalWords[i];
                        Collection<TtsUnit> units = word.GetUnits(phoneme, sliceData);
                        if (phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.PhoneBased)
                        {
                            foreach (TtsUnit unit in units)
                            {
                                errors.Merge(BuildMonoMlf(unit, item, sw, writeToFile, phoneme));
                            }
                        }
                        else if (phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.SyllableBased)
                        {
                            foreach (ScriptSyllable syllable in word.UnitSyllables)
                            {
                                errors.Merge(BuildMonoMlf(syllable, item, sw, writeToFile, phoneme));
                            }
                        }

                        if (writeToFile && i + 1 < allPronouncedNormalWords.Count)
                        {
                            sw.WriteLine(Phoneme.ShortPausePhone);
                        }
                    }

                    if (writeToFile)
                    {
                        sw.WriteLine(Phoneme.SilencePhone);
                        sw.WriteLine(".");  // end of sentence
                    }
                }
            }

            return errors;
        }