/// <summary> /// Validation data alignment between feature file and script file. /// </summary> /// <param name="featureFile">Feature file.</param> /// <param name="scriptFile">Script file.</param> /// <param name="language">Language.</param> /// <returns>Data error set found.</returns> public static ErrorSet ValidateFeatureData(string featureFile, string scriptFile, Language language) { ErrorSet errorSet = new ErrorSet(); TtsPhoneSet phoneSet = Localor.GetPhoneSet(language); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null); XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFile, validateSetting); if (script.ErrorSet.Count > 0) { string message = string.Format(CultureInfo.InvariantCulture, "{0} error(s) found in the script file [{1}]", script.ErrorSet.Count, scriptFile); throw new InvalidDataException(message); } XmlUnitFeatureFile unitFeatureFile = new XmlUnitFeatureFile(featureFile); if (unitFeatureFile.Units.Count <= 0) { string message = string.Format(CultureInfo.InvariantCulture, "Zero unit feature item in unit feature file {0}", featureFile); errorSet.Add(VoiceFontError.OtherErrors, message); throw new InvalidDataException(message); } if (unitFeatureFile.Language != language) { string message = string.Format(CultureInfo.InvariantCulture, "Different lanuage\r\nScript File {0}: lang = {1}\r\n Feature File {2}: lang = {3}", scriptFile, Localor.LanguageToString(language), featureFile, Localor.LanguageToString(unitFeatureFile.Language)); throw new InvalidDataException(message); } foreach (string key in unitFeatureFile.Units.Keys) { UnitFeature unit = unitFeatureFile.Units[key]; string sid = unit.SentenceId; int unitIndex = unit.Index; string unitName = unit.Name; if (unit.Index < 0) { string message = string.Format(CultureInfo.InvariantCulture, "invalid unit index [{0}] found in feature file [{1}]. It should not be negative integer for unit indexing.", unit.Index, featureFile); errorSet.Add(VoiceFontError.OtherErrors, message); continue; } try { if (!script.ItemDic.ContainsKey(unit.SentenceId)) { string message = string.Format(CultureInfo.InvariantCulture, "sentence id {0} in feature file [{1}] is not in script file [{2}]", sid, featureFile, scriptFile); errorSet.Add(ScriptError.OtherErrors, sid, message); continue; } ScriptItem item = script.ItemDic[sid]; Phoneme phoneme = Localor.GetPhoneme(language); SliceData sliceData = Localor.GetSliceData(language); Collection<TtsUnit> itemUnits = item.GetUnits(phoneme, sliceData); if (unitIndex >= itemUnits.Count) { string message = string.Format(CultureInfo.InvariantCulture, "the {0}th unit [{1}] in sentence {2} of feature file [{3}] is out of range for sentence {2} in script file [{4}]", unitIndex, unitName, sid, featureFile, scriptFile); errorSet.Add(ScriptError.OtherErrors, sid, message); continue; } TtsUnit ttsUnit = itemUnits[unitIndex]; string sliceName = ttsUnit.FullName.Replace(' ', '+'); if (sliceName != unitName) { string str1 = "the {0}th unit [{1}] in sentence {3} of feature file [{4}] "; string str2 = "is not matched with {0}th unit [{2}] for sentence {3} in script file [{5}]"; string message = string.Format(CultureInfo.InvariantCulture, str1 + str2, unitIndex, unitName, sliceName, sid, featureFile, scriptFile); errorSet.Add(ScriptError.OtherErrors, sid, message); continue; } } catch (InvalidDataException ide) { errorSet.Add(ScriptError.OtherErrors, sid, Helper.BuildExceptionMessage(ide)); } } return errorSet; }
/// <summary> /// Find unmatching sentences between filemap file and script file /// <param /> /// This function should be merged with that in forcedalignment into common library. /// </summary> /// <param name="scriptFilePath">The location of script file.</param> /// <param name="language">Language of the script file.</param> /// <param name="mapFilePath">The location of file fist map path.</param> /// <returns>Unmatching sentence ids.</returns> public static ErrorSet FindUnmatchedSentences(string scriptFilePath, Language language, string mapFilePath) { ErrorSet errorSet = new ErrorSet(); TtsPhoneSet phoneSet = Localor.GetPhoneSet(language); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null); XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting); script.Remove(ScriptHelper.GetNeedDeleteItemIds(script.ErrorSet)); Dictionary<string, string> map = Microsoft.Tts.Offline.FileListMap.ReadAllData(mapFilePath); errorSet.Merge(script.ErrorSet); foreach (string sid in script.ItemDic.Keys) { if (!map.ContainsKey(sid)) { string message = Helper.NeutralFormat( "Script item {0} in script file but not in file list map file", sid); errorSet.Add(ScriptError.OtherErrors, sid, message); } } foreach (string sid in map.Keys) { if (!script.ItemDic.ContainsKey(sid)) { string message = Helper.NeutralFormat( "Script item {0} in file list map file but not in script file", sid); errorSet.Add(ScriptError.OtherErrors, sid, message); } } return errorSet; }
/// <summary> /// Check whether a script item is valid /// We don't check schema here /// Validation conditions: /// 1. Normal word should have pronunciation /// 2. Pronunciation should be good /// 3. POS should be in POS set /// We could use some flag to control the validation conditions /// When we need flexible control. /// </summary> /// <param name="item">The item to be checked.</param> /// <param name="errors">Errors if item is invalid.</param> /// <param name="validateSetting">Validation data set.</param> /// <returns>True is valid.</returns> public static bool IsValidItem(ScriptItem item, ErrorSet errors, XmlScriptValidateSetting validateSetting) { if (item == null) { throw new ArgumentNullException("item"); } if (errors == null) { throw new ArgumentNullException("errors"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } validateSetting.VerifySetting(); XmlScriptValidationScope scope = validateSetting.ValidationScope; bool valid = true; errors.Clear(); int sentIndex = 0; foreach (ScriptSentence sentence in item.Sentences) { int wordIndex = 0; foreach (ScriptWord word in sentence.Words) { if ((scope & XmlScriptValidationScope.Pronunciation) == XmlScriptValidationScope.Pronunciation) { // check pronunciation string pron = null; if (word.WordType == WordType.Normal) { pron = word.GetPronunciation(validateSetting.PhoneSet); } if (!string.IsNullOrEmpty(pron)) { ErrorSet pronErrors = Core.Pronunciation.Validate(pron, validateSetting.PhoneSet); foreach (Error error in pronErrors.Errors) { errors.Add(ScriptError.PronunciationError, error, item.Id, word.Grapheme); } } else if (word.WordType == WordType.Normal) { // Pronunciation is optional for normal word, will give warning if empty pronunciation for normal word. errors.Add(ScriptError.EmptyPronInNormalWord, item.Id, word.Grapheme); } } if ((scope & XmlScriptValidationScope.POS) == XmlScriptValidationScope.POS) { // check pos name if (!string.IsNullOrEmpty(word.PosString) && !validateSetting.PosSet.Items.ContainsKey(word.PosString)) { errors.Add(ScriptError.UnrecognizedPos, item.Id, word.Grapheme, word.Pronunciation, word.PosString); } } string nodePath = string.Format(CultureInfo.InvariantCulture, "Sentence[{0}].Word[{1}]", sentIndex, wordIndex); word.IsValid(item.Id, nodePath, scope, errors); wordIndex++; } sentIndex++; } if ((scope & XmlScriptValidationScope.SegmentSequence) == XmlScriptValidationScope.SegmentSequence) { CheckSegments(item, errors); } if (errors.Count > 0) { valid = false; } return valid; }
/// <summary> /// Parse XML document for script file path. /// </summary> /// <param name="dom">XML configuration document.</param> /// <param name="filePath">XML configuration file path.</param> /// <param name="nsmgr">Namespace.</param> /// <param name="updateScriptPath">Whether update script path.</param> private void ParseScriptFile(XmlDocument dom, string filePath, XmlNamespaceManager nsmgr, bool updateScriptPath) { // ScriptFilePath XmlNode node = dom.DocumentElement.SelectSingleNode(@"tts:scriptFile/@path", nsmgr); CheckPath(node.InnerText, false, filePath, "scriptFile"); ScriptPath = node.InnerText; if (updateScriptPath && MergeItems.Count == 0) { // temporarily add codes to convert *.txt to *.xml // will delete it when all scripts are converted to xml // Merge script if the input is a script folder ScriptPath = Path.Combine(OutputDir, @"Interm\MergedScript.xml"); Helper.EnsureFolderExistForFile(ScriptPath); if (Directory.Exists(node.InnerText)) { string inputDir = node.InnerText; if (Directory.GetFiles(inputDir, "*.xml", SearchOption.TopDirectoryOnly).Length == 0) { inputDir = Path.Combine(inputDir, "temp"); Helper.EnsureFolderExist(inputDir); foreach (string input in Directory.GetFiles( node.InnerText, "*.txt", SearchOption.TopDirectoryOnly)) { string output = Path.Combine(inputDir, Path.GetFileNameWithoutExtension(input) + ".xml"); ScriptHelper.ConvertTwoLineScriptToXmlScript(input, output, Language); } } TtsPhoneSet phoneSet = Localor.GetPhoneSet(Language); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null); ScriptHelper.MergeScripts(inputDir, ScriptPath, false, validateSetting); if (!inputDir.Equals(node.InnerText)) { Directory.Delete(inputDir, true); } } else if (File.Exists(node.InnerText)) { if (Path.GetExtension(node.InnerText).Equals(".txt")) { ScriptHelper.ConvertTwoLineScriptToXmlScript(node.InnerText, ScriptPath, Language); } else { File.Copy(node.InnerText, ScriptPath, true); } } else if (!File.Exists(node.InnerText)) { // if merge enable, the script path will the target path // this file will be the merge result of source script files string message = string.Format(CultureInfo.InvariantCulture, "Could not find the scriptFile file [{0}], which is specified in config file [{1}]", node.InnerText, filePath); throw new FileNotFoundException(message); } } }
/// <summary> /// Load script and check it. /// </summary> /// <param name="scriptFile">File to be loaded.</param> /// <param name="validateSetting">Validation data set.</param> /// <returns>Script loaded.</returns> public static XmlScriptFile LoadWithValidation(string scriptFile, XmlScriptValidateSetting validateSetting) { if (string.IsNullOrEmpty(scriptFile)) { throw new ArgumentNullException("scriptFile"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } validateSetting.VerifySetting(); XmlScriptFile script = new XmlScriptFile(); script.Load(scriptFile); script.PhoneSet = validateSetting.PhoneSet; script.PosSet = validateSetting.PosSet; script.Validate(validateSetting); return script; }
/// <summary> /// Add one item to script file. /// This method will check whether the item is balid before adding. /// </summary> /// <param name="item">The item to be added.</param> /// <param name="errors">The errors if failed to add.</param> /// <param name="validate">Whether validate schema and content.</param> /// <param name="sort">Whether insert the script item in the sort position.</param> /// <returns>True if successfully added.</returns> public bool Add(ScriptItem item, ErrorSet errors, bool validate, bool sort) { if (item == null) { throw new ArgumentNullException("item"); } if (errors == null) { throw new ArgumentNullException("errors"); } // check schema, should throw exception if invalid CheckSchema(item); bool added = true; errors.Clear(); // content checking, should add to errors if invalid if (_itemDic.ContainsKey(item.Id)) { errors.Add(ScriptError.DuplicateItemId, item.Id); } if (validate) { ErrorSet contentErrors = new ErrorSet(); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(PhoneSet, PosSet); ScriptItem.IsValidItem(item, contentErrors, validateSetting); errors.Merge(contentErrors); } if (errors.Count > 0) { added = false; } if (added) { _itemDic.Add(item.Id, item); if (sort) { bool inserted = false; for (int i = 0; i < _items.Count; i++) { if (string.Compare(item.Id, _items[i].Id, StringComparison.OrdinalIgnoreCase) < 0) { _items.Insert(i, item); inserted = true; break; } } if (!inserted) { _items.Add(item); } } else { _items.Add(item); } } return added; }
public void ParseConfig(XmlElement config) { if (config == null) { throw new ArgumentNullException("config"); } Debug.Assert(!string.IsNullOrEmpty(config.GetAttribute("language"))); Debug.Assert(!string.IsNullOrEmpty(config.GetAttribute("engine"))); _primaryLanguage = Localor.StringToLanguage( config.GetAttribute("language")); _engineType = (EngineType)Enum.Parse(typeof(EngineType), config.GetAttribute("engine")); XmlElement eleLangData = config.SelectSingleNode("languageData") as XmlElement; VoiceCreationLanguageData languageData = new VoiceCreationLanguageData(); if (eleLangData != null) { languageData.ParseLanguageDataFromXmlElement(true, eleLangData); languageData.SetLanguageData(_primaryLanguage); } else { languageData.CartQuestions = config.SelectSingleNode("question/@path").InnerText; } _voiceName = config.GetAttribute("voiceName"); _tokenId = config.GetAttribute("tokenId"); _fontPath = config.SelectSingleNode("font/@path").InnerText; TtsPhoneSet phoneSet = Localor.GetPhoneSet(_primaryLanguage); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null); _scriptFile = XmlScriptFile.LoadWithValidation(config.SelectSingleNode("script/@path").InnerText, validateSetting); _scriptFile.Remove(ScriptHelper.GetNeedDeleteItemIds(_scriptFile.ErrorSet)); FileMap = new FileListMap(); FileMap.Load(config.SelectSingleNode("filemap/@path").InnerText); _weightTable = new WeightTable(_primaryLanguage, _engineType); _weightTable.Load(config.SelectSingleNode("weighttable/@path").InnerText); _cartTreeManager = new CartTreeManager(); _cartTreeManager.CartTreeDir = config.SelectSingleNode("treedir/@path").InnerText; if (!Directory.Exists(_cartTreeManager.CartTreeDir)) { string message = string.Format(CultureInfo.InvariantCulture, "The treeDir path does not exist at [{0}]", _cartTreeManager.CartTreeDir); throw new DirectoryNotFoundException(message); } _cartTreeManager.CartQuestionFile = languageData.CartQuestions; if (!File.Exists(_cartTreeManager.CartQuestionFile)) { string message = string.Format(CultureInfo.InvariantCulture, "The tree question file path does not exist at [{0}]", _cartTreeManager.CartQuestionFile); throw new DirectoryNotFoundException(message); } _cartTreeManager.UnitDescriptFile = config.SelectSingleNode("unitdescript/@path").InnerText; if (!File.Exists(_cartTreeManager.UnitDescriptFile)) { string message = string.Format(CultureInfo.InvariantCulture, "The unit description file path does not exist at [{0}]", _cartTreeManager.UnitDescriptFile); throw new DirectoryNotFoundException(message); } _unitFeatureFilePath = config.SelectSingleNode("wavesequence/@path").InnerText; if (!File.Exists(_unitFeatureFilePath)) { string message = string.Format(CultureInfo.InvariantCulture, "The wave sequence file path does not exist at [{0}]", _unitFeatureFilePath); throw new DirectoryNotFoundException(message); } _wave16kDirectories.Clear(); foreach (XmlNode dirNode in config.SelectNodes("wave16k/@path")) { string waveDir = dirNode.InnerText.Trim(); if (!Directory.Exists(waveDir)) { string message = string.Format(CultureInfo.InvariantCulture, "The wave16k path does not exist at [{0}]", waveDir); throw new DirectoryNotFoundException(message); } _wave16kDirectories.Add(waveDir); } _segmentDirectories.Clear(); foreach (XmlNode dirNode in config.SelectNodes("segment/@path")) { string alignmentDir = dirNode.InnerText.Trim(); if (!Directory.Exists(alignmentDir)) { string message = string.Format(CultureInfo.InvariantCulture, "The alignment path does not exist at [{0}]", alignmentDir); throw new DirectoryNotFoundException(message); } _segmentDirectories.Add(alignmentDir); } }
public static ErrorSet BuildMonoMlf(string scriptFilePath, string outFilePath, bool writeToFile, Phoneme phoneme, XmlScriptValidateSetting validateSetting, SliceData sliceData) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (sliceData == null) { throw new ArgumentNullException("phoneme"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } validateSetting.VerifySetting(); ErrorSet errors = new ErrorSet(); StreamWriter sw = null; if (writeToFile) { sw = new StreamWriter(outFilePath, false, Encoding.ASCII); sw.WriteLine("#!MLF!#"); } try { XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting); script.Remove(GetNeedDeleteItemIds(script.ErrorSet)); if (script.Items.Count == 0) { throw new InvalidDataException( Helper.NeutralFormat("No valid items in {0}.", scriptFilePath)); } errors.Merge(script.ErrorSet); foreach (ScriptItem item in script.Items) { errors.Merge(BuildMonoMlf(item, sw, writeToFile, phoneme, sliceData)); } } finally { if (sw != null) { sw.Close(); } } if (writeToFile) { Debug.Assert(HtkTool.VerifyMlfFormat(outFilePath)); } return errors; }
/// <summary> /// Merge scripts in a folder into a script file. /// Error items are removed from the output file. /// </summary> /// <param name="scriptDir">Dir conataining script file.</param> /// <param name="targetFile">Target file path.</param> /// <param name="resetId">True means resetting id.</param> /// <param name="validateSetting">Validation setting.</param> /// <param name="contentController">Contenct controller.</param> /// <returns>Error set.</returns> public static ErrorSet MergeScripts(string scriptDir, string targetFile, bool resetId, XmlScriptValidateSetting validateSetting, object contentController) { ErrorSet errorSet = new ErrorSet(); XmlScriptFile mergedScript = MergeScripts(scriptDir, errorSet, resetId, validateSetting, contentController); Helper.EnsureFolderExistForFile(targetFile); mergedScript.Save(targetFile, Encoding.Unicode, contentController); return errorSet; }
/// <summary> /// Merge scripts in a folder into a script file. /// Error items are removed from the output file. /// </summary> /// <param name="scriptDir">Dir conataining script file.</param> /// <param name="errorSet">Error set.</param> /// <param name="resetId">True means resetting id.</param> /// <param name="validateSetting">Validation setting.</param> /// <param name="contentController">Contenct controller.</param> /// <returns>Xml script file.</returns> public static XmlScriptFile MergeScripts(string scriptDir, ErrorSet errorSet, bool resetId, XmlScriptValidateSetting validateSetting, object contentController) { if (string.IsNullOrEmpty(scriptDir)) { throw new ArgumentNullException("scriptDir"); } if (errorSet == null) { throw new ArgumentNullException("errorSet"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } if (!Directory.Exists(scriptDir)) { throw new DirectoryNotFoundException(scriptDir); } validateSetting.VerifySetting(); XmlScriptValidationScope scope = validateSetting.ValidationScope; string[] subFiles = Directory.GetFiles( scriptDir, "*" + XmlScriptFile.Extension, SearchOption.AllDirectories); XmlScriptFile mergedScript = new XmlScriptFile(); long id = 0; foreach (string file in subFiles) { XmlScriptFile script = new XmlScriptFile(); script.Load(file, contentController); if (mergedScript.Language == Language.Neutral) { mergedScript.Language = script.Language; } else if (mergedScript.Language != script.Language) { throw new InvalidDataException(Helper.NeutralFormat("Inconsistent langage in {0}", file)); } if (scope != XmlScriptValidationScope.None) { script.PosSet = validateSetting.PosSet; script.PhoneSet = validateSetting.PhoneSet; script.Validate(validateSetting); script.Remove(GetNeedDeleteItemIds(script.ErrorSet)); } errorSet.Merge(script.ErrorSet); foreach (ScriptItem item in script.Items) { item.Id = resetId ? Helper.NeutralFormat("{0:D10}", ++id) : item.Id; ErrorSet addErrors = new ErrorSet(); if (!mergedScript.Add(item, addErrors, false)) { // Added failed errorSet.Merge(addErrors); } } } return mergedScript; }
/// <summary> /// Merge scripts in a folder into a script file /// Error items are removed from the output file. /// </summary> /// <param name="scriptDir">Dir conataining script file.</param> /// <param name="targetFile">Merged file.</param> /// <param name="resetId">True means resetting id.</param> /// <param name="validateSetting">Validation setting.</param> /// <returns>ErrorSet.</returns> public static ErrorSet MergeScripts(string scriptDir, string targetFile, bool resetId, XmlScriptValidateSetting validateSetting) { XmlScriptFile.ContentControler controler = new XmlScriptFile.ContentControler(); controler.SaveComments = false; return MergeScripts(scriptDir, targetFile, resetId, validateSetting, controler); }