Exemplo n.º 1
0
        /// <summary>
        /// Shift segment data with certain silence duration.
        /// </summary>
        /// <param name="silenceDuration">Silence duration in second.</param>
        /// <param name="sourceDir">Source segment directory.</param>
        /// <param name="targetDir">Target segment directory.</param>
        /// <returns>Data error set found.</returns>
        public static DataErrorSet ShiftSegmentFiles(float silenceDuration,
            string sourceDir, string targetDir)
        {
            DataErrorSet errorSet = new DataErrorSet();
            SegmentFile sf = new SegmentFile();
            Dictionary<string, string> sourceMap =
                Microsoft.Tts.Offline.FileListMap.Build(sourceDir, ".txt");
            foreach (string sid in sourceMap.Keys)
            {
                string sourceFilePath = null;
                string dstFilePath = null;
                try
                {
                    dstFilePath = Path.Combine(targetDir, sourceMap[sid] + ".txt");
                    if (File.Exists(dstFilePath))
                    {
                        continue;
                    }

                    sourceFilePath = Path.Combine(sourceDir, sourceMap[sid] + ".txt");
                    Helper.EnsureFolderExistForFile(dstFilePath);

                    sf.Load(sourceFilePath);
                    sf.Shift(silenceDuration);

                    sf.Save(dstFilePath);
                }
                catch (InvalidDataException ide)
                {
                    errorSet.Errors.Add(new DataError(sourceFilePath,
                        Helper.BuildExceptionMessage(ide), sid));
                }
            }

            return errorSet;
        }
        public TtsUtterance Build(ScriptItem item, SegmentFile segmentFile, bool buildAllWords, int subSentenceIndex)
        {
            Helper.ThrowIfNull(item);

            TtsUtterance utterance = new TtsUtterance();
            int phoneIndex = 0;
            try
            {
                // Silence indicates a silence word.
                if (segmentFile != null &&
                    segmentFile.WaveSegments[phoneIndex].IsSilenceFeature)
                {
                    phoneIndex += AppendSilenceWord(utterance, segmentFile.WaveSegments[phoneIndex].Label);
                }

                // Creates a words map for ToBI accent.
                Dictionary<ScriptWord, TtsWord> mapWords = new Dictionary<ScriptWord, TtsWord>();

                int sentenceIndex = 0;
                foreach (ScriptSentence scriptSentence in item.Sentences)
                {
                    // Only add certain sentence in the scriptItem.
                    if (subSentenceIndex != -1 && sentenceIndex++ != subSentenceIndex)
                    {
                        continue;
                    }

                    // Treats unkown sentence type as declarative.
                    if (scriptSentence.SentenceType != SentenceType.Unknown)
                    {
                        utterance.SentenceType = (TtsSentenceType)scriptSentence.SentenceType;
                    }
                    else
                    {
                        utterance.SentenceType = (TtsSentenceType)SentenceType.Declarative;
                    }

                    utterance.SentenceEmotionType = (EmotionmlCategory)scriptSentence.Emotion;

                    // Converts each word in script sentence.
                    foreach (ScriptWord scriptWord in scriptSentence.Words)
                    {
                        if (buildAllWords || scriptWord.IsPronouncableNormalWord)
                        {
                            phoneIndex += AppendNormalWord(utterance, scriptWord);

                            // Adds into words map.
                            mapWords.Add(scriptWord, utterance.Words[utterance.Words.Count - 1]);

                            // Breaks if meets the end of the utterance.
                            if (segmentFile != null &&
                                phoneIndex >= segmentFile.WaveSegments.Count)
                            {
                                break;
                            }

                            if (segmentFile != null &&
                                segmentFile.WaveSegments[phoneIndex].IsSilenceFeature)
                            {
                                phoneIndex += AppendSilenceWord(utterance, segmentFile.WaveSegments[phoneIndex].Label);
                            }
                        }
                        else if (buildAllWords || (NeedPunctuation && scriptWord.WordType == WordType.Punctuation))
                        {
                            phoneIndex += AppendPunctuationWord(utterance, scriptWord);
                        }
                    }
                }

                // Builds phone list.
                int[] pauseDurations = new int[(int)TtsPauseLevel.PAU_IDX_SENTENCE + 1];
                Array.Clear(pauseDurations, 0, pauseDurations.Length);
                utterance.BuildPhoneList(Phoneme, pauseDurations, 0, 0);

                // Builds ToBI accent, which should be happened after phone list built.
                BuildToBIInformation(mapWords);

                // Builds phrase list.
                utterance.BuildPhraseList();

                // Builds character list.
                utterance.BuildContextCharacters();

                return utterance;
            }
            catch (EspException e)
            {
                throw new InvalidDataException(
                    Helper.NeutralFormat("Build utterance error on sentence \"{0}\"", item.Id), e);
            }
        }
        /// <summary>
        /// Extracts the features of the given script item.
        /// </summary>
        /// <param name="item">
        /// The script item.
        /// </param>
        /// <param name="segmentFile">
        /// The segmentation file.
        /// </param>
        /// <returns>
        /// The sentence contains all the features.
        /// </returns>
        /// <exception cref="InvalidDataException">
        /// Exception.
        /// </exception>
        private Sentence Extract(ScriptItem item, SegmentFile segmentFile)
        {
            UtteranceBuilder builder = new UtteranceBuilder(PhoneSet, PosSet, Phoneme)
            {
                NeedPos = NeedPos,
                NeedToBI = NeedToBI,
            };

            // Builds a utterance first.
            Sentence sentence = null;
            using (TtsUtterance utterance = builder.Build(item, segmentFile, false, -1))
            {
                // Extract ToneIndex if the language is zh-CN
                if (Language.ZhCN == (Language)PhoneSet.Language)
                {
                    ChineseToneIndexExtractor.Process(utterance, item);
                }

                if (UtteranceExtenders != null)
                {
                    // Uses the utterance extender here.
                    foreach (IUtteranceExtender extender in UtteranceExtenders)
                    {
                        extender.Process(utterance, item);
                    }
                }

                // Creates a sentence to store all the features.
                sentence = Extract(item.Id, utterance);

                for (int i = 0; i < sentence.PhoneSegments.Count; ++i)
                {
                    // Create candidates for each phoneme.
                    sentence.PhoneSegments[i].StartTimeInSecond = (float)segmentFile.WaveSegments[i].StartTime;
                    sentence.PhoneSegments[i].EndTimeInSecond = (float)segmentFile.WaveSegments[i].EndTime;
                }
            }

            return sentence;
        }
        /// <summary>
        /// Extracts features from the given script.
        /// </summary>
        /// <param name="script">
        /// The xml script file.
        /// </param>
        /// <param name="fileListMap">
        /// The file list map.
        /// </param>
        /// <param name="alignmentDir">
        /// The alignment directory.
        /// </param>
        /// <param name="waveDir">
        /// The wave directory.
        /// </param>
        /// <returns>
        /// The extracted features in training sentence set.
        /// </returns>
        /// <exception cref="ArgumentNullException">
        /// Exception.
        /// </exception>
        public TrainingSentenceSet Extract(XmlScriptFile script, FileListMap fileListMap, string alignmentDir,
            string waveDir)
        {
            if (script == null)
            {
                throw new ArgumentNullException("script");
            }

            if (fileListMap == null)
            {
                throw new ArgumentNullException("fileListMap");
            }

            if (alignmentDir == null)
            {
                throw new ArgumentNullException("alignmentDir");
            }

            if (waveDir == null)
            {
                throw new ArgumentNullException("waveDir");
            }

            TrainingSentenceSet sentenceSet = new TrainingSentenceSet { FileListMap = fileListMap };
            List<string> errList = new List<string>();

            foreach (string sid in fileListMap.Map.Keys)
            {
                ScriptItem item = script.ItemDic[sid];

                try
                {
                    // Loads the segmentation file.
                    SegmentFile segmentFile = new SegmentFile();
                    segmentFile.Load(fileListMap.BuildPath(alignmentDir, sid, "txt"));

                    // Loads the waveform file to set the end time of the last segmentation.
                    WaveFile waveFile = new WaveFile();
                    waveFile.Load(fileListMap.BuildPath(waveDir, sid, FileExtensions.Waveform));
                    segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].EndTime = waveFile.Duration;

                    // Extracts the single script item.
                    Sentence sentence = Extract(item, segmentFile);
                    sentence.TrainingSet = sentenceSet;
                    sentenceSet.Sentences.Add(sid, sentence);
                }
                catch (Exception e)
                {
                    if (!(e is InvalidDataException))
                    {
                        throw;
                    }

                    // Removes the error sentences.
                    Logger.Log(Helper.BuildExceptionMessage(e));
                    script.Remove(sid);
                    errList.Add(sid);
                }
            }

            fileListMap.RemoveItems(errList);
            return sentenceSet;
        }
Exemplo n.º 5
0
        /// <summary>
        /// Check data consistence between script item and segmentation file.
        /// </summary>
        /// <param name="script">Script file instance.</param>
        /// <param name="item">Script item.</param>
        /// <param name="fileMap">File list map.</param>
        /// <param name="segmentDir">Segment file directory.</param>
        /// <param name="errorSet">Data error set found.</param>
        /// <param name="phoneBasedSegment">Phone based alignment or unit based alignment.</param>
        public static void ValidateDataAlignment(ScriptFile script, ScriptItem item,
            FileListMap fileMap, string segmentDir, DataErrorSet errorSet, bool phoneBasedSegment)
        {
            string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt");

            SegmentFile segmentFile = new SegmentFile();
            segmentFile.Load(segmentFilePath);

            if (segmentFile.WaveSegments.Count == 0)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "There is no valid alignment data into alignment file.");
                errorSet.Errors.Add(new DataError(segmentFilePath, message, item.Id));
            }
            else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilencePhone)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "The alignment file is invalid, for without silence segment at the end.");
                errorSet.Errors.Add(new DataError(segmentFilePath, message, item.Id));
            }
            else if (!phoneBasedSegment && item.Units.Count != segmentFile.NonSilenceWaveSegments.Count)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "script units {0} do not match with non-silence segments {1} in segmentation file.",
                    item.Units.Count, segmentFile.NonSilenceWaveSegments.Count);
                errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
            }
            else if (phoneBasedSegment && item.GetPhones().Length != segmentFile.NonSilenceWaveSegments.Count)
            {
                string message = string.Format(CultureInfo.InvariantCulture,
                    "script phones {0} do not match with non-silence segments {1} in segmentation file.",
                    item.GetPhones().Length, segmentFile.NonSilenceWaveSegments.Count);
                errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
            } 
            else
            {
                // go through each segments
                if (phoneBasedSegment)
                {
                    string[] phones = item.GetPhones();
                    for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++)
                    {
                        WaveSegment segment = segmentFile.NonSilenceWaveSegments[i];
                        
                        if (segment.Label != phones[i])
                        {
                            string message = string.Format(CultureInfo.InvariantCulture,
                                "phone [{0}/{1}] at {2} does not match between script and segment.",
                                WaveSegment.FormatLabel(phones[i]), segment.Label, i);
                            errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
                        }
                    }
                }
                else
                {
                    for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++)
                    {
                        WaveSegment segment = segmentFile.NonSilenceWaveSegments[i];
                        TtsUnit unit = item.Units[i];

                        if (segment.Label != WaveSegment.FormatLabel(unit.MetaUnit.Name))
                        {
                            string message = string.Format(CultureInfo.InvariantCulture,
                                "units [{0}/{1}] at {2} do not match between script and segment.",
                                WaveSegment.FormatLabel(unit.MetaUnit.Name), segment.Label, i);
                            errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id));
                        }
                    }
                }
            }
        }
Exemplo n.º 6
0
        /// <summary>
        /// Extract acoustic features for a given sentence.
        /// </summary>
        /// <param name="writer">Stream writer to write acoustic features.</param>
        /// <param name="script">Script file instance.</param>
        /// <param name="sid">Sentence id.</param>
        /// <param name="fileMap">File list map.</param>
        /// <param name="segmentDir">Segmentation file directory.</param>
        /// <param name="wave16kDir">16k Hz waveform file directory.</param>
        /// <param name="epochDir">Epoch file directory.</param>
        private static void ExtractAcoustic(StreamWriter writer, ScriptFile script, string sid,
            FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir)
        {
            ScriptItem scriptItem = script.Items[sid];

            // find the absolute file paths for each kind data file 
            string wave16kFilePath = Path.Combine(wave16kDir, fileMap.Map[scriptItem.Id] + ".wav");
            string epochFilePath = Path.Combine(epochDir, fileMap.Map[scriptItem.Id] + ".epoch");
            string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[scriptItem.Id] + ".txt");

            // load data files
            SegmentFile segFile = new SegmentFile();
            segFile.Load(segmentFilePath);

            EggAcousticFeature eggFile = new EggAcousticFeature();
            eggFile.LoadEpoch(epochFilePath);

            WaveAcousticFeature waveFile = new WaveAcousticFeature();
            waveFile.Load(wave16kFilePath);

            // calculate acoustic features for each segments in the files
            int totalCount = segFile.NonSilenceWaveSegments.Count;
            if (scriptItem.Units.Count != totalCount)
            {
                string str1 = "Unit number mis-matched between sentence [{0}] in ";
                string str2 = "script file [{1}] and in the alignment file [{2}]. ";
                string str3 = "There are {3} units in script but {4} units in alignment.";
                string message = string.Format(CultureInfo.InvariantCulture,
                    str1 + str2 + str3,
                    sid, script.FilePath, segmentFilePath,
                    scriptItem.Units.Count, totalCount);
                throw new InvalidDataException(message);
            }

            for (int i = 0; i < totalCount; i++)
            {
                // for each wave segment
                WaveSegment ws = segFile.NonSilenceWaveSegments[i];

                // get unit sample scope
                int sampleOffset = (int)(ws.StartTime * waveFile.SamplesPerSecond);
                int sampleLength = (int)(ws.Duration * waveFile.SamplesPerSecond);
                int sampleEnd = sampleOffset + sampleLength;

                int epochOffset = 0;
                int epochEnd = 0;

                // calculate average pitch, pitch average
                float averagePitch, pitchRange;
                eggFile.GetPitchAndRange(sampleOffset,
                    sampleLength, out averagePitch, out pitchRange);
                ws.AveragePitch = averagePitch;
                ws.PitchRange = pitchRange;

                // calculate root mean square, and before that ajust the segment alignment with
                // the epoch data
                epochOffset = eggFile.AdjustAlignment(ref sampleOffset);
                epochEnd = eggFile.AdjustAlignment(ref sampleEnd);

                if (epochOffset > epochEnd)
                {
                    string info = string.Format(CultureInfo.InvariantCulture,
                        "epochOffset[{0}] should not be bigger than epochEnd[{1}]",
                        epochOffset, epochEnd);
                    throw new InvalidDataException(info);
                }

                if (sampleEnd > waveFile.SampleNumber)
                {
                    string str1 = "Mis-match found between alignment file [{0}] and waveform file [{1}], ";
                    string str2 = "for the end sample of alignment is [{2}] but";
                    string str3 = " the total sample number of waveform file is [{3}].";
                    string info = string.Format(CultureInfo.InvariantCulture,
                        str1 + str2 + str3,
                        segmentFilePath, wave16kFilePath,
                        epochEnd, waveFile.SampleNumber);

                    throw new InvalidDataException(info);
                }

                ws.RootMeanSquare = waveFile.CalculateRms(sampleOffset, sampleEnd - sampleOffset);

                // calculate epoch
                int epoch16KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch,
                    epochOffset, epochEnd - epochOffset, null);
                int epoch8KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch8k,
                    epochOffset, epochEnd - epochOffset, null);

                // leave (epoch offset in sentence) (epoch length)
                // (16k compressed epoch lenght) (8k compressed epoch lenght) as zero
                string message = string.Format(CultureInfo.InvariantCulture,
                    "{0,12} {1,3} {2,9:0.000000} {3,9:0.000000} {4,7} {5,5} {6,4} {7,3} {8,3} {9,3} {10,7:0.0} {11,5:0.0} {12,4:0.0} {13}",
                    scriptItem.Id, i,
                    ws.StartTime, ws.Duration, sampleOffset, sampleEnd - sampleOffset,
                    epochOffset, epochEnd - epochOffset,
                    epoch16KCompressLength, epoch8KCompressLength,
                    ws.RootMeanSquare, ws.AveragePitch, ws.PitchRange,
                    scriptItem.Units[i].FullName);

                writer.WriteLine(message);
            }
        }
Exemplo n.º 7
0
        /// <summary>
        /// Validate alingment file.
        /// </summary>
        /// <param name="alignmentFile">Alignment file to validate.</param>
        /// <param name="builder">String builder for error message.</param>
        /// <returns>The position of the last silence alignment.</returns>
        private static int ValidateAlignmentFile(string alignmentFile, StringBuilder builder)
        {
            // sample position of the last silence alignment
            int lastSilenceAlign = 0;

            // validate the file present or not
            // and count the duration of the content
            // Validate alignment file existance
            if (!File.Exists(alignmentFile))
            {
                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "Alignment file [{0}] does not exist.",
                    alignmentFile);
            }
            else
            {
                SegmentFile segFile = new SegmentFile();
                segFile.Load(alignmentFile);
                WaveSegment lastSeg = segFile.WaveSegments[segFile.WaveSegments.Count - 1];
                if (lastSeg.IsSilencePhone)
                {
                    // the last one should be silence of the segment file
                    lastSilenceAlign = (int)(lastSeg.StartTime * 16000);
                }
                else
                {
                    builder.AppendFormat(CultureInfo.InvariantCulture,
                        "The ending segment of alignment file [{0}] is not silence.",
                        alignmentFile);
                }
            }

            return lastSilenceAlign;
        }
Exemplo n.º 8
0
        /// <summary>
        /// Load MLF file.
        /// </summary>
        /// <param name="filePath">MLF file path.</param>
        /// <returns>Segment file dictionary, indexed by sentence id.</returns>
        public static Dictionary<string, SegmentFile> ReadAllDataFromMlf(string filePath)
        {
            Dictionary<string, SegmentFile> sfs = new Dictionary<string, SegmentFile>();

            using (StreamReader sr = new StreamReader(filePath))
            {
                string line = null;
                line = sr.ReadLine();
                if (line != "#!MLF!#")
                {
                    throw new InvalidDataException("Invalid file header " + filePath);
                }

                while ((line = sr.ReadLine()) != null)
                {
                    // line should be sentence file path
                    Match m = Regex.Match(line, @".*/(\S*)\.");
                    if (!m.Success)
                    {
                        throw new InvalidDataException("Invalid format in file "
                            + filePath + ", line " + line);
                    }

                    SegmentFile sf = new SegmentFile();

                    sf.FilePath = m.Groups[1].Value;

                    sf.Load(sr);

                    sfs.Add(sf.Id, sf);
                }
            }

            return sfs;
        }
Exemplo n.º 9
0
        /// <summary>
        /// Validate alingment file.
        /// </summary>
        /// <param name="alignmentFile">Alignment file to validate.</param>
        /// <param name="builder">String builder for error message.</param>
        /// <returns>The segment file.</returns>
        private static SegmentFile ValidateAlignmentFile(string alignmentFile, StringBuilder builder)
        {
            SegmentFile segmentFile = new SegmentFile();

            try
            {
                segmentFile.Load(alignmentFile);
            }
            catch (FileNotFoundException)
            {
                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "Alignment file [{0}] does not exist.",
                    alignmentFile);
            }
            catch (InvalidDataException e)
            {
                builder.Append(Helper.BuildExceptionMessage(e));
            }

            return segmentFile;
        }
Exemplo n.º 10
0
        /// <summary>
        /// Gets the last silence align of the given segment file.
        /// </summary>
        /// <param name="segmentFile">The given segment file.</param>
        /// <param name="builder">The string builder for error message.</param>
        /// <returns>The last silence align.</returns>
        private static int GetLastSilenceAlign(SegmentFile segmentFile, StringBuilder builder)
        {
            int lastSilenceAlign = 0;

            WaveSegment lastSeg = segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1];
            if (lastSeg.IsSilencePhone)
            {
                // The last one should be silence of the segment file
                lastSilenceAlign = (int)(lastSeg.StartTime * 16000);
            }
            else
            {
                builder.AppendFormat(CultureInfo.InvariantCulture,
                    "The ending segment of alignment file [{0}] is not silence.",
                    segmentFile.FilePath);
            }

            return lastSilenceAlign;
        }