/// <summary> /// Extract acoustic features for a given script file. /// </summary> /// <param name="script">Script file instance.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segmentation file directory.</param> /// <param name="wave16kDir">16k Hz waveform file directory.</param> /// <param name="epochDir">Epoch file directory.</param> /// <param name="targetFilePath">Target acoustic file path.</param> public static void ExtractAcoustic(ScriptFile script, FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir, string targetFilePath) { // Parameters validation if (script == null) { throw new ArgumentNullException("script"); } if (string.IsNullOrEmpty(script.FilePath)) { throw new ArgumentException("script.FilePath is null"); } if (script.Items == null) { throw new ArgumentException("script.Items is null"); } if (fileMap == null) { throw new ArgumentNullException("fileMap"); } if (fileMap.Map == null) { throw new ArgumentException("fileMap.Map is null"); } if (fileMap.Map.Keys == null) { throw new ArgumentException("fileMap.Map.Keys is null"); } if (string.IsNullOrEmpty(segmentDir)) { throw new ArgumentNullException("segmentDir"); } if (string.IsNullOrEmpty(wave16kDir)) { throw new ArgumentNullException("wave16kDir"); } if (string.IsNullOrEmpty(epochDir)) { throw new ArgumentNullException("epochDir"); } if (!Directory.Exists(segmentDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), segmentDir); } if (!Directory.Exists(wave16kDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), wave16kDir); } if (!Directory.Exists(epochDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), epochDir); } if (string.IsNullOrEmpty(targetFilePath)) { throw new ArgumentNullException("targetFilePath"); } Helper.EnsureFolderExistForFile(targetFilePath); using (StreamWriter sw = new StreamWriter(targetFilePath)) { // iterate each script item or sentence foreach (string sid in fileMap.Map.Keys) { if (!script.Items.ContainsKey(sid)) { string message = string.Format(CultureInfo.InvariantCulture, "Sentence [{0}] does not exist in script file [{1}].", sid, script.FilePath); throw new InvalidDataException(message); } ExtractAcoustic(sw, script, sid, fileMap, segmentDir, wave16kDir, epochDir); } } }
/// <summary> /// Check data consistence between script item and segmentation file. /// </summary> /// <param name="script">Script file instance.</param> /// <param name="item">Script item.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segment file directory.</param> /// <param name="errorSet">Data error set found.</param> /// <param name="phoneBasedSegment">Phone based alignment or unit based alignment.</param> public static void ValidateDataAlignment(ScriptFile script, ScriptItem item, FileListMap fileMap, string segmentDir, DataErrorSet errorSet, bool phoneBasedSegment) { string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt"); SegmentFile segmentFile = new SegmentFile(); segmentFile.Load(segmentFilePath); if (segmentFile.WaveSegments.Count == 0) { string message = string.Format(CultureInfo.InvariantCulture, "There is no valid alignment data into alignment file."); errorSet.Errors.Add(new DataError(segmentFilePath, message, item.Id)); } else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilencePhone) { string message = string.Format(CultureInfo.InvariantCulture, "The alignment file is invalid, for without silence segment at the end."); errorSet.Errors.Add(new DataError(segmentFilePath, message, item.Id)); } else if (!phoneBasedSegment && item.Units.Count != segmentFile.NonSilenceWaveSegments.Count) { string message = string.Format(CultureInfo.InvariantCulture, "script units {0} do not match with non-silence segments {1} in segmentation file.", item.Units.Count, segmentFile.NonSilenceWaveSegments.Count); errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id)); } else if (phoneBasedSegment && item.GetPhones().Length != segmentFile.NonSilenceWaveSegments.Count) { string message = string.Format(CultureInfo.InvariantCulture, "script phones {0} do not match with non-silence segments {1} in segmentation file.", item.GetPhones().Length, segmentFile.NonSilenceWaveSegments.Count); errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id)); } else { // go through each segments if (phoneBasedSegment) { string[] phones = item.GetPhones(); for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++) { WaveSegment segment = segmentFile.NonSilenceWaveSegments[i]; if (segment.Label != phones[i]) { string message = string.Format(CultureInfo.InvariantCulture, "phone [{0}/{1}] at {2} does not match between script and segment.", WaveSegment.FormatLabel(phones[i]), segment.Label, i); errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id)); } } } else { for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++) { WaveSegment segment = segmentFile.NonSilenceWaveSegments[i]; TtsUnit unit = item.Units[i]; if (segment.Label != WaveSegment.FormatLabel(unit.MetaUnit.Name)) { string message = string.Format(CultureInfo.InvariantCulture, "units [{0}/{1}] at {2} do not match between script and segment.", WaveSegment.FormatLabel(unit.MetaUnit.Name), segment.Label, i); errorSet.Errors.Add(new DataError(script.FilePath, message, item.Id)); } } } } }
/// <summary> /// Check data file consistence between script file and filemap file. /// </summary> /// <param name="fileMap">File list map.</param> /// <param name="script">Script file instance.</param> /// <returns>Data error set found.</returns> public static DataErrorSet ValidateDataAlignment(FileListMap fileMap, ScriptFile script) { // Parameters validation if (fileMap == null) { throw new ArgumentNullException("fileMap"); } if (fileMap.Map == null) { throw new ArgumentException("fileMap.Map is null"); } if (fileMap.Map.Keys == null) { throw new ArgumentException("fileMap.Map.Keys is null"); } if (string.IsNullOrEmpty(fileMap.FilePath)) { throw new ArgumentException("fileMap.FilePath is null"); } if (script == null) { throw new ArgumentNullException("script"); } if (script.Items == null) { throw new ArgumentException("script.Items is null"); } if (script.Items.Keys == null) { throw new ArgumentException("script.Items.Keys is null"); } if (string.IsNullOrEmpty(script.FilePath)) { throw new ArgumentException("script.FilePath is null"); } DataErrorSet errorSet = new DataErrorSet(); // go through sentence ids listed in the filemap first foreach (string sid in fileMap.Map.Keys) { if (!script.Items.ContainsKey(sid)) { string message = string.Format(CultureInfo.InvariantCulture, "Sentence [{0}] is found in the filemap [{1}], but not listed in script file [{2}].", sid, fileMap.FilePath, script.FilePath); errorSet.Errors.Add(new DataError(script.FilePath, message, sid)); } } // check sentence ids in the script file foreach (string sid in script.Items.Keys) { if (!fileMap.Map.ContainsKey(sid)) { string message = string.Format(CultureInfo.InvariantCulture, "Sentence [{0}] is found in the script [{1}], but not listed in filemap [{2}].", sid, script.FilePath, fileMap.FilePath); errorSet.Errors.Add(new DataError(script.FilePath, message, sid)); } } return errorSet; }
/// <summary> /// Check data consistence between script file and segmentation files. /// </summary> /// <param name="fileMap">File list map.</param> /// <param name="script">Script file instance.</param> /// <param name="segmentDir">Segment file directory.</param> /// <returns>Data error set found.</returns> public static DataErrorSet ValidateDataAlignment( FileListMap fileMap, ScriptFile script, string segmentDir) { // Parameters validation if (string.IsNullOrEmpty(segmentDir)) { throw new ArgumentNullException("segmentDir"); } if (fileMap == null) { throw new ArgumentNullException("fileMap"); } if (fileMap.Map == null) { throw new ArgumentException("fileMap.Map is null"); } if (fileMap.Map.Keys == null) { throw new ArgumentException("fileMap.Map.Keys is null"); } if (script == null) { throw new ArgumentNullException("script"); } if (script.Items == null) { throw new ArgumentException("script.Items is null"); } if (script.Items.Values == null) { throw new ArgumentException("script.Items.Values is null"); } DataErrorSet errorSet = new DataErrorSet(); foreach (ScriptItem item in script.Items.Values) { try { if (!fileMap.Map.ContainsKey(item.Id)) { errorSet.Errors.Add(new DataError(script.FilePath, "File list map does not contain sentences.", item.Id)); continue; } ValidateDataAlignment(script, item, fileMap, segmentDir, errorSet, false); } catch (InvalidDataException ide) { errorSet.Errors.Add(new DataError(script.FilePath, Helper.BuildExceptionMessage(ide), item.Id)); } } foreach (string sid in fileMap.Map.Keys) { if (!script.Items.ContainsKey(sid)) { errorSet.Errors.Add(new DataError(script.FilePath, "script file does not contain the sentence.", sid)); } } return errorSet; }
/// <summary> /// Extract acoustic features for a given sentence. /// </summary> /// <param name="writer">Stream writer to write acoustic features.</param> /// <param name="script">Script file instance.</param> /// <param name="sid">Sentence id.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segmentation file directory.</param> /// <param name="wave16kDir">16k Hz waveform file directory.</param> /// <param name="epochDir">Epoch file directory.</param> private static void ExtractAcoustic(StreamWriter writer, ScriptFile script, string sid, FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir) { ScriptItem scriptItem = script.Items[sid]; // find the absolute file paths for each kind data file string wave16kFilePath = Path.Combine(wave16kDir, fileMap.Map[scriptItem.Id] + ".wav"); string epochFilePath = Path.Combine(epochDir, fileMap.Map[scriptItem.Id] + ".epoch"); string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[scriptItem.Id] + ".txt"); // load data files SegmentFile segFile = new SegmentFile(); segFile.Load(segmentFilePath); EggAcousticFeature eggFile = new EggAcousticFeature(); eggFile.LoadEpoch(epochFilePath); WaveAcousticFeature waveFile = new WaveAcousticFeature(); waveFile.Load(wave16kFilePath); // calculate acoustic features for each segments in the files int totalCount = segFile.NonSilenceWaveSegments.Count; if (scriptItem.Units.Count != totalCount) { string str1 = "Unit number mis-matched between sentence [{0}] in "; string str2 = "script file [{1}] and in the alignment file [{2}]. "; string str3 = "There are {3} units in script but {4} units in alignment."; string message = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, sid, script.FilePath, segmentFilePath, scriptItem.Units.Count, totalCount); throw new InvalidDataException(message); } for (int i = 0; i < totalCount; i++) { // for each wave segment WaveSegment ws = segFile.NonSilenceWaveSegments[i]; // get unit sample scope int sampleOffset = (int)(ws.StartTime * waveFile.SamplesPerSecond); int sampleLength = (int)(ws.Duration * waveFile.SamplesPerSecond); int sampleEnd = sampleOffset + sampleLength; int epochOffset = 0; int epochEnd = 0; // calculate average pitch, pitch average float averagePitch, pitchRange; eggFile.GetPitchAndRange(sampleOffset, sampleLength, out averagePitch, out pitchRange); ws.AveragePitch = averagePitch; ws.PitchRange = pitchRange; // calculate root mean square, and before that ajust the segment alignment with // the epoch data epochOffset = eggFile.AdjustAlignment(ref sampleOffset); epochEnd = eggFile.AdjustAlignment(ref sampleEnd); if (epochOffset > epochEnd) { string info = string.Format(CultureInfo.InvariantCulture, "epochOffset[{0}] should not be bigger than epochEnd[{1}]", epochOffset, epochEnd); throw new InvalidDataException(info); } if (sampleEnd > waveFile.SampleNumber) { string str1 = "Mis-match found between alignment file [{0}] and waveform file [{1}], "; string str2 = "for the end sample of alignment is [{2}] but"; string str3 = " the total sample number of waveform file is [{3}]."; string info = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, segmentFilePath, wave16kFilePath, epochEnd, waveFile.SampleNumber); throw new InvalidDataException(info); } ws.RootMeanSquare = waveFile.CalculateRms(sampleOffset, sampleEnd - sampleOffset); // calculate epoch int epoch16KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch, epochOffset, epochEnd - epochOffset, null); int epoch8KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch8k, epochOffset, epochEnd - epochOffset, null); // leave (epoch offset in sentence) (epoch length) // (16k compressed epoch lenght) (8k compressed epoch lenght) as zero string message = string.Format(CultureInfo.InvariantCulture, "{0,12} {1,3} {2,9:0.000000} {3,9:0.000000} {4,7} {5,5} {6,4} {7,3} {8,3} {9,3} {10,7:0.0} {11,5:0.0} {12,4:0.0} {13}", scriptItem.Id, i, ws.StartTime, ws.Duration, sampleOffset, sampleEnd - sampleOffset, epochOffset, epochEnd - epochOffset, epoch16KCompressLength, epoch8KCompressLength, ws.RootMeanSquare, ws.AveragePitch, ws.PitchRange, scriptItem.Units[i].FullName); writer.WriteLine(message); } }
/// <summary> /// Create script file instance for specified language and engine type. /// </summary> /// <param name="language">Which language to create for.</param> /// <param name="engine">Engine type.</param> /// <returns>Script file instance.</returns> public static ScriptFile CreateScriptFile(Language language, EngineType engine) { if (language == Language.Neutral) { return new ScriptFile(); } try { string typeName = "Microsoft.Tts.Offline." + language.ToString() + "ScriptFile"; Type scriptFileType = typeof(ScriptFile); ScriptFile script = (ScriptFile)scriptFileType.Assembly.CreateInstance(typeName); if (script == null) { // TODO: Enable logging here for easier diagnostics script = new ScriptFile(language); } script.EngineType = engine; return script; } catch (MissingMethodException mme) { string message = string.Format(CultureInfo.InvariantCulture, "Language {0} is not implemented.", language); throw new NotSupportedException(message, mme); } }
/// <summary> /// Remove error sentence out of script file. /// </summary> /// <param name="errorSet">Data error set.</param> /// <param name="scriptFilePath">Script file path.</param> public static void RemoveErrorSentence(DataErrorSet errorSet, string scriptFilePath) { if (errorSet == null) { throw new ArgumentNullException("errorSet"); } if (errorSet.Errors == null) { throw new ArgumentException("errorSet.Errors is null"); } ScriptFile script = new ScriptFile(); script.Load(scriptFilePath); foreach (DataError error in errorSet.Errors) { if (string.IsNullOrEmpty(error.SentenceId)) { continue; } if (script.Items.ContainsKey(error.SentenceId)) { script.Items.Remove(error.SentenceId); } } script.Save(scriptFilePath); }
/// <summary> /// Validate the phone sequence in the script file. It will check: /// 1) word alignment with the pronunciation string. This means that /// for each word it should have one and only one corresponding /// pronunciation /// 2) the pronunciation should be syllabified, and for each syllbale /// there is one and only one vowel. It can have one stress mark /// 3) each phones in the pronunciation string, should be valid in /// that langugage phoneme set. /// </summary> /// <param name="script">The script oebjct to be validated.</param> /// <returns>Errors/problems found in the script.</returns> public static DataErrorSet ValidatePronunciation(ScriptFile script) { if (script == null) { throw new ArgumentNullException("script"); } if (script.Items == null) { throw new ArgumentException("script.Items is null"); } if (script.Items.Keys == null) { throw new ArgumentException("script.Items.Keys is null"); } DataErrorSet errorSet = new DataErrorSet(); foreach (string sid in script.Items.Keys) { try { ScriptItem item = script.Items[sid]; DataError subError = script.ProcessPronunciation(item); if (subError != null) { errorSet.Errors.Add(subError); } } catch (InvalidDataException ide) { errorSet.Errors.Add(new DataError(script.FilePath, Helper.BuildExceptionMessage(ide), sid)); } catch (KeyNotFoundException knfe) { errorSet.Errors.Add(new DataError(script.FilePath, Helper.BuildExceptionMessage(knfe), sid)); } } return errorSet; }
/// <summary> /// Convert XML script to two-line script. /// </summary> /// <param name="xmlScript">Input XML script.</param> /// <param name="targetFile">Output script.</param> /// <param name="phoneSet"> /// Phone set used to convert pronunciation /// It can be null when you can directly get the word's pronunciation in the word's attribute. /// </param> /// <returns>Errors happened.</returns> public static ErrorSet ConvertXmlScriptToTwoLineScript(string xmlScript, string targetFile, TtsPhoneSet phoneSet) { if (string.IsNullOrEmpty(xmlScript)) { throw new ArgumentNullException("xmlScript"); } if (string.IsNullOrEmpty(targetFile)) { throw new ArgumentNullException("targetFile"); } if (!Directory.Exists(Path.GetDirectoryName(targetFile))) { throw new DirectoryNotFoundException(targetFile); } ErrorSet errorSet = new ErrorSet(); XmlScriptFile script = new XmlScriptFile(); script.Load(xmlScript); ScriptFile oldScript = new ScriptFile(script.Language); foreach (ScriptItem item in script.Items) { ErrorSet itemErrors = new ErrorSet(); ScriptItem oldItem = ConvertScriptItemToTwoLineFormat(item, phoneSet, itemErrors); if (itemErrors.Count != 0) { errorSet.Merge(itemErrors); } else { oldScript.Items.Add(oldItem.Id, oldItem); } } oldScript.Save(targetFile, true, true); return errorSet; }