/// <summary> /// Traverse script file. /// </summary> /// <param name="visitor">Script item processor interface.</param> /// <param name="importDataPathList">File list for importing data model.</param> /// <param name="inScriptFile">Input script file.</param> /// <param name="outScriptFile">Output script file.</param> /// <param name="logger">Text logger.</param> /// <param name="parameters">Parameters.</param> public static void TraverseScriptFile(IScriptItemProcessor visitor, Dictionary<string, string> importDataPathList, string inScriptFile, string outScriptFile, TextLogger logger, object parameters) { if (string.IsNullOrEmpty(inScriptFile)) { throw new ArgumentNullException("inScriptFile"); } if (!File.Exists(inScriptFile)) { throw new FileNotFoundException(inScriptFile); } XmlScriptFile script = new XmlScriptFile(); string message = Helper.NeutralFormat("Processing file : {0}", inScriptFile); Console.WriteLine(message); string fileName = Path.GetFileName(inScriptFile); try { script.Load(inScriptFile); List<string> listDiscard = new List<string>(); foreach (ScriptItem scriptItem in script.Items) { try { // When the mode is "Import F0/Power/Segment", the importDataPathList will not be null. if (importDataPathList == null || importDataPathList.ContainsKey(scriptItem.Id)) { visitor.ProcessItem(scriptItem, parameters); } else { listDiscard.Add(scriptItem.Id); } } catch (InvalidDataException exception) { if (logger != null) { logger.LogLine("ERROR : [File {0}][Item {1}]{2}", fileName, scriptItem.Id, exception.Message); } } } foreach (string id in listDiscard) { script.Remove(id); } if (!string.IsNullOrEmpty(outScriptFile)) { Helper.TestWritable(outScriptFile); script.Save(outScriptFile, Encoding.Unicode); } } catch (InvalidDataException exception) { if (logger != null) { logger.LogLine("ERROR : [File {0}]{1}", fileName, exception.Message); } } }
/// <summary> /// Create the lexicon from Xml Script file. /// </summary> /// <param name="scriptFile">Xml script file.</param> /// <param name="defaultPos">Part of Speech String.</param> /// <param name="mainLexicon">MainLexicon.</param> /// <returns>Lexicon.</returns> public static Lexicon CreateFromXmlScriptFile(XmlScriptFile scriptFile, string defaultPos, Lexicon mainLexicon) { if (scriptFile == null) { throw new ArgumentNullException("scriptFile"); } if (string.IsNullOrEmpty(defaultPos)) { throw new ArgumentNullException("defaultPos"); } Lexicon lexicon = new Lexicon(scriptFile.Language); foreach (ScriptItem item in scriptFile.Items) { foreach (ScriptWord scriptWord in item.AllPronouncedWords) { string word = scriptWord.Grapheme; // Create LexiconPronunciaton Node LexiconPronunciation pron = new LexiconPronunciation(lexicon.Language); pron.Symbolic = scriptWord.Pronunciation; if (mainLexicon != null) { LexicalItem mainLexiconItem = mainLexicon.Lookup(word, true); if (mainLexiconItem != null) { LexiconPronunciation lexPron = mainLexiconItem.FindPronunciation(pron.Symbolic, true); if (lexPron != null) { pron.Symbolic = lexPron.Symbolic; } } } LexiconItemProperty property = new LexiconItemProperty(); if (string.IsNullOrEmpty(scriptWord.PosString)) { property.PartOfSpeech = new PosItem(defaultPos); } else { property.PartOfSpeech = new PosItem(scriptWord.PosString); } pron.Properties.Add(property); if (!lexicon.Items.ContainsKey(word)) { LexicalItem lexicalItem = new LexicalItem(lexicon.Language); lexicalItem.Grapheme = word; lexicalItem.Pronunciations.Add(pron); lexicon.Items.Add(word, lexicalItem); } else { bool needAdd = true; foreach (LexiconPronunciation pronunciation in lexicon.Items[word].Pronunciations) { if (pronunciation.Symbolic.Equals(pron.Symbolic, StringComparison.InvariantCultureIgnoreCase)) { needAdd = false; if (!pronunciation.Properties.Contains(property)) { pronunciation.Properties.Add(property); } } } if (needAdd) { lexicon.Items[word].Pronunciations.Add(pron); } } } } return lexicon; }
/// <summary> /// Load script. /// </summary> /// <param name="scriptPath">ScriptPath.</param> /// <param name="errorSet">ErrorSet.</param> private void LoadScript(string scriptPath, ErrorSet errorSet) { XmlScriptFile xmlScript = new XmlScriptFile(); XmlScriptFile.ContentControler controler = new XmlScriptFile.ContentControler(); controler.LoadComments = _loadComments; xmlScript.Load(scriptPath, controler); if (_language == xmlScript.Language) { _xmlScriptFiles.Add(xmlScript); foreach (Error error in xmlScript.ErrorSet.Errors) { errorSet.Add(new Error(ScriptError.ScriptCollectionError, Path.GetFileName(scriptPath), error.ToString())); } } else { errorSet.Add(ScriptError.InvalidLanguage, Localor.LanguageToString(xmlScript.Language), Localor.LanguageToString(_language), Path.GetFileName(scriptPath)); } }
/// <summary> /// Extract Domain Lexicon from script. /// </summary> /// <param name="scriptFolder">Script Folder.</param> /// <param name="domainListFile">Domain List File.</param> /// <param name="inMainLex">Input Main Lexicon.</param> /// <param name="defaultPartOfSpeech">Default Part of Speech.</param> /// <param name="mergeMode">Merging Mode for Lexicon.</param> /// <param name="phoneSet">Phone set.</param> /// <param name="attribSchema">Lexical attribute schema.</param> /// <returns>Lexicon.</returns> private Lexicon ExtractDomainLexicon(string scriptFolder, string domainListFile, Lexicon inMainLex, string defaultPartOfSpeech, MergeMode mergeMode, TtsPhoneSet phoneSet, LexicalAttributeSchema attribSchema) { if (attribSchema != null) { if (PosItem.Validate(defaultPartOfSpeech, null, attribSchema).Count > 0) { Log("Default Part of speech {0} is unrecognized according to attribute schema, extraction breaks", defaultPartOfSpeech); return null; } } Lexicon outLex = null; foreach (string domainName in Helper.FileLines(domainListFile)) { string domainFilePath = Path.Combine(scriptFolder, domainName); XmlScriptFile scriptFile = new XmlScriptFile(); scriptFile.Load(domainFilePath); if (outLex != null && outLex.Language != scriptFile.Language) { throw new InvalidDataException(Helper.NeutralFormat( "Found inconsistent language \"{0}\" against previous one \"{1}\" in the file of \"{2}\"", scriptFile.Language.ToString(), outLex.Language.ToString(), domainFilePath)); } Lexicon lexicon = Lexicon.CreateFromXmlScriptFile(scriptFile, defaultPartOfSpeech, inMainLex); if (phoneSet != null && attribSchema != null) { lexicon.Validate(phoneSet, attribSchema); if (lexicon.ErrorSet.Count > 0) { Console.Error.WriteLine("The script file {0} contains {1} errors, skip!", domainFilePath, lexicon.ErrorSet.Count); Log("The script file {0} contains {1} errors:", domainFilePath, lexicon.ErrorSet.Count); foreach (Error error in lexicon.ErrorSet.Errors) { Log(error.ToString()); } // Skip this domain lexicon continue; } } if (outLex == null) { outLex = lexicon; } else { MergeLexicon(outLex, lexicon, mergeMode); } } if (outLex.Items.Count == 0) { Log("The final lexicon is empty."); } return outLex; }
/// <summary> /// Sort XML script file items. /// </summary> /// <param name="scriptFile">Script file path.</param> /// <param name="targetFilePath">Target file path.</param> private static void ExportSortedCleanXmlScriptFiles(XmlScriptFile scriptFile, string targetFilePath) { XmlScriptFile sortedCleanScriptFile = new XmlScriptFile(); sortedCleanScriptFile.Language = scriptFile.Language; sortedCleanScriptFile.Encoding = scriptFile.Encoding; SortedDictionary<string, ScriptItem> sortedItems = new SortedDictionary<string, ScriptItem>(); foreach (KeyValuePair<string, ScriptItem> pair in scriptFile.ItemDic) { sortedItems.Add(pair.Key, pair.Value); } foreach (KeyValuePair<string, ScriptItem> pair in sortedItems) { sortedCleanScriptFile.Items.Add(pair.Value); sortedCleanScriptFile.ItemDic.Add(pair.Key, pair.Value); } XmlScriptFile.ContentControler controler = new XmlScriptFile.ContentControler(); controler.SaveComments = false; sortedCleanScriptFile.Save(targetFilePath, scriptFile.Encoding, controler); }