/// <summary> /// Split into domain PolyphonyRuleFile array. /// </summary> /// <returns>PolyphonyRuleFile array.</returns> public PolyphonyRuleFile[] SplitIntoPolyphonyRuleFiles() { Dictionary<string, PolyphonyRuleFile> files = new Dictionary<string, PolyphonyRuleFile>(); foreach (PolyphonyRule word in _polyphonyWords) { if (files.ContainsKey(word.Domain)) { files[word.Domain]._polyphonyWords.Add(word); } else { PolyphonyRuleFile file = new PolyphonyRuleFile(); file.DomainTag = word.Domain; file._keyTypes = _keyTypes; file._keyString = _keyString; file._polyphonyWords.Add(word); files.Add(word.Domain, file); } } return files.Values.ToArray(); }
public ErrorSet Build(string moduleDataName, Stream outputStream, bool isEnableValidate, string formatGuid) { ////#region Check arguments if (string.IsNullOrEmpty(moduleDataName)) { throw new ArgumentNullException("dataName"); } if (outputStream == null) { throw new ArgumentNullException("outputStream"); } ////#endregion ErrorSet errorSet = new ErrorSet(); ErrorSet subErrorSet = new ErrorSet(); try { switch (moduleDataName) { case ModuleDataName.PhoneSet: TtsPhoneSet phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, errorSet); if (!errorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Merge(PhoneSetCompiler.Compile(phoneSet, outputStream)); } break; case ModuleDataName.BackendPhoneSet: phoneSet = (TtsPhoneSet)GetObject(RawDataName.BackendPhoneSet, errorSet); if (!errorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Merge(PhoneSetCompiler.Compile(phoneSet, outputStream)); } break; case ModuleDataName.PosSet: TtsPosSet posSet = (TtsPosSet)GetObject(RawDataName.PosSet, errorSet); if (!errorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Merge(PosSetCompiler.Compile(posSet, outputStream)); } break; case ModuleDataName.PosTaggerPos: LexicalAttributeSchema schema = (LexicalAttributeSchema)GetObject( RawDataName.LexicalAttributeSchema, subErrorSet); MergeDependencyError(errorSet, subErrorSet, _schemaFullName); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { TtsPosSet postaggingPosSet = TtsPosSet.LoadPosTaggingPosFromSchema(schema); errorSet.Merge(PosSetCompiler.CompilePosTaggerPos(postaggingPosSet, outputStream)); } break; case ModuleDataName.Lexicon: errorSet = CompileLexicon(outputStream); break; case ModuleDataName.CharTable: ErrorSet charTableErrorSet = CompileCharTable(outputStream); if (!isEnableValidate) { foreach (Error error in charTableErrorSet.Errors) { error.Severity = ErrorSeverity.Warning; } } errorSet.Merge(charTableErrorSet); break; case ModuleDataName.SentenceSeparator: string sentSepDataDir = _dataHandlerList.Datas[RawDataName.SentenceSeparatorDataPath].Path; Collection<string> compiledSentenceSeparatorFiles = new Collection<string>(); errorSet = SentenceSeparatorCompiler.Compile(sentSepDataDir, outputStream, compiledSentenceSeparatorFiles); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledSentenceSeparatorFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("sentence separator", compiledSentenceSeparatorFiles)); } break; case ModuleDataName.WordBreaker: { System.IO.MemoryStream memStream = new MemoryStream(); string wordBreakerDataDir = _dataHandlerList.Datas[RawDataName.WordBreakerDataPath].Path; Collection<string> compiledWordBreakerFiles = new Collection<string>(); errorSet = WordBreakerCompiler.Compile(wordBreakerDataDir, outputStream, compiledWordBreakerFiles, formatGuid); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledWordBreakerFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("word breaker", compiledWordBreakerFiles)); } } break; case ModuleDataName.PostWordBreaker: string postWordBreakerFilePath = _dataHandlerList.Datas[RawDataName.PostWordBreaker].Path; errorSet = PostWordBreakerCompiler.Compile(postWordBreakerFilePath, outputStream); break; case ModuleDataName.ChineseTone: string chineseToneFilePath = _dataHandlerList.Datas[RawDataName.ChineseTone].Path; errorSet = ChineseToneCompiler.Compile(chineseToneFilePath, outputStream); break; case ModuleDataName.AcronymDisambiguation: { string acronymDisambiguationDataDir = _dataHandlerList.Datas[RawDataName.AcronymDisambiguation].Path; Collection<string> compiledFiles = new Collection<string>(); errorSet = CrfModelCompiler.Compile(acronymDisambiguationDataDir, outputStream, compiledFiles, _language); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("AcronymDisambiguation", compiledFiles)); } } break; case ModuleDataName.NEDisambiguation: { string strNeDisambiguationDataDir = _dataHandlerList.Datas[RawDataName.NEDisambiguation].Path; Collection<string> compiledFiles = new Collection<string>(); errorSet = CrfModelCompiler.Compile(strNeDisambiguationDataDir, outputStream, compiledFiles, _language); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("NEDisambiguation", compiledFiles)); } } break; case ModuleDataName.SyllabifyRule: string syllabifyRuleFilePath = _dataHandlerList.Datas[RawDataName.SyllabifyRule].Path; phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, subErrorSet); MergeDependencyError(errorSet, subErrorSet, RawDataName.PhoneSet); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Merge(SyllabifyRuleCompiler.Compile(syllabifyRuleFilePath, phoneSet, outputStream)); } break; case ModuleDataName.UnitGenerator: string truncRuleFilePath = _dataHandlerList.Datas[RawDataName.TruncateRule].Path; phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, subErrorSet); MergeDependencyError(errorSet, subErrorSet, RawDataName.PhoneSet); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Merge(UnitGeneratorDataCompiler.Compile(truncRuleFilePath, phoneSet, outputStream)); } break; case ModuleDataName.PolyphoneRule: string generalRuleFilePath = _dataHandlerList.Datas[RawDataName.PolyphoneRule].Path; phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, errorSet); PolyphonyRuleFile polyRuleFile = new PolyphonyRuleFile(); ErrorSet polyErrorSet = polyRuleFile.Load(generalRuleFilePath, phoneSet); if (!isEnableValidate) { foreach (Error error in polyErrorSet.Errors) { error.Severity = ErrorSeverity.Warning; } } errorSet.Merge(polyErrorSet); errorSet.Merge(CompileGeneralRule(generalRuleFilePath, outputStream)); break; case ModuleDataName.BoundaryPronChangeRule: generalRuleFilePath = _dataHandlerList.Datas[RawDataName.BoundaryPronChangeRule].Path; errorSet = CompileGeneralRule(generalRuleFilePath, outputStream); break; case ModuleDataName.SentenceDetector: generalRuleFilePath = _dataHandlerList.Datas[RawDataName.SentenceDetectRule].Path; RuleFile ruleFile = new RuleFile(); List<string> dupKeys = ruleFile.GetDupKeys(generalRuleFilePath); if (dupKeys.Count > 0) { foreach (string key in dupKeys) { errorSet.Add(new Error(DataCompilerError.DuplicateItemKey, key)); } } else { errorSet = CompileGeneralRule(generalRuleFilePath, outputStream); } break; case ModuleDataName.QuotationMarkTable: QuotationMarkTable quoteTable = QuotationMarkTable.Read(_dataHandlerList.Datas[RawDataName.QuotationMarkTable].Path); errorSet = QuotationMarkCompiler.Compile(quoteTable, outputStream); break; case ModuleDataName.ParallelStructTable: schema = (LexicalAttributeSchema)GetObject( RawDataName.LexicalAttributeSchema, subErrorSet); MergeDependencyError(errorSet, subErrorSet, _schemaFullName); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { TtsPosSet postaggingPosSet = TtsPosSet.LoadPosTaggingPosFromSchema(schema); ParallelStructTable parallelStructTable = ParallelStructTable.Read(_dataHandlerList.Datas[RawDataName.ParallelStructTable].Path); if (postaggingPosSet != null) { errorSet = ParallelStructCompiler.Compile(parallelStructTable, postaggingPosSet, outputStream); } } break; case ModuleDataName.WordFeatureSuffixTable: schema = (LexicalAttributeSchema)GetObject( RawDataName.LexicalAttributeSchema, subErrorSet); MergeDependencyError(errorSet, subErrorSet, _schemaFullName); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { WordFeatureSuffixTable suffixTable = WordFeatureSuffixTable.Read(_dataHandlerList.Datas[RawDataName.WordFeatureSuffixTable].Path); errorSet = WordFeatureSuffixCompiler.Compile(suffixTable, outputStream); } break; case ModuleDataName.LtsRule: string ltsRuleDataPath = _dataHandlerList.Datas[RawDataName.LtsRuleDataPath].Path; errorSet = CompileLtsRule(ltsRuleDataPath, outputStream); break; case ModuleDataName.PhoneEventData: PhoneConverterWrapper pcw = null; // Check if the language has phone mapping data. if (_moduleDataSet.ContainsKey(ModuleDataName.PhoneMappingRule)) { // Check phone mapping binary data dependency. if (_moduleDataSet[ModuleDataName.PhoneMappingRule].Data != null) { pcw = new PhoneConverterWrapper(_language, _moduleDataSet[ModuleDataName.PhoneMappingRule].Data); } else { errorSet.Add(DataCompilerError.DependenciesNotValid, "Please make sure that PhoneMappingRule has been compiled before PhoneEvent"); } } if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0) { phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, errorSet); errorSet = PhoneEventCompiler.Compile(phoneSet, pcw, outputStream); } break; case ModuleDataName.PosRule: string lexicalRuleFilePath = _dataHandlerList.Datas[RawDataName.PosLexicalRule].Path; string contextualRuleFilePath = _dataHandlerList.Datas[RawDataName.PosContextualRule].Path; schema = (LexicalAttributeSchema)GetObject( RawDataName.LexicalAttributeSchema, subErrorSet); MergeDependencyError(errorSet, subErrorSet, _schemaFullName); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { posSet = TtsPosSet.LoadPosTaggingPosFromSchema(schema); string posSetFilePath = Helper.GetTempFileName(); posSet.Save(posSetFilePath, Encoding.Unicode); errorSet.Merge(CompilePosRule(lexicalRuleFilePath, contextualRuleFilePath, posSetFilePath, outputStream)); File.Delete(posSetFilePath); } break; case ModuleDataName.TnRule: { string tnmlRuleFilePath = _dataHandlerList.Datas[moduleDataName].Path; string schemaFilePath = _dataHandlerList.Datas[RawDataName.LexicalAttributeSchema].Path; errorSet = CompileTnml(tnmlRuleFilePath, schemaFilePath, outputStream, true); } break; case ModuleDataName.FstNERule: { string fstNERuleFilePath = _dataHandlerList.Datas[moduleDataName].Path; errorSet = CompileFstNE(fstNERuleFilePath, outputStream); } break; case ModuleDataName.CompoundRule: { string tnmlRuleFilePath = _dataHandlerList.Datas[moduleDataName].Path; string schemaFilePath = _dataHandlerList.Datas[RawDataName.LexicalAttributeSchema].Path; phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, errorSet); ErrorSet compundRuleError = DataFileValidator.ValidateCompoundRule( _dataHandlerList.Datas[moduleDataName].Path, phoneSet); if (!isEnableValidate) { foreach (Error error in compundRuleError.Errors) { error.Severity = ErrorSeverity.Warning; } } errorSet.Merge(compundRuleError); errorSet.Merge(CompileTnml(tnmlRuleFilePath, schemaFilePath, outputStream, false)); } break; case ModuleDataName.PhoneMappingRule: case ModuleDataName.BackendPhoneMappingRule: case ModuleDataName.FrontendBackendPhoneMappingRule: case ModuleDataName.MixLingualPOSConverterData: { string tnmlRuleFilePath = _dataHandlerList.Datas[moduleDataName].Path; string schemaFilePath = _dataHandlerList.Datas[RawDataName.LexicalAttributeSchema].Path; errorSet = CompileTnml(tnmlRuleFilePath, schemaFilePath, outputStream, false); } break; case ModuleDataName.ForeignLtsCollection: errorSet = CompileForeignLtsCollection(_dataHandlerList.Datas[moduleDataName].Path, outputStream); break; case ModuleDataName.PolyphonyModel: { string polyphonyModelDataDir = _dataHandlerList.Datas[RawDataName.PolyphonyModel].Path; Collection<string> compiledFiles = new Collection<string>(); errorSet = CrfModelCompiler.Compile(polyphonyModelDataDir, outputStream, compiledFiles, _language); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("PolyphonyModel", compiledFiles)); } } break; case ModuleDataName.RNNPolyphonyModel: { string polyphonyModelDataPath = _dataHandlerList.Datas[RawDataName.RNNPolyphonyModel].Path; Collection<string> compiledFiles = new Collection<string>(); errorSet = RNNModelCompiler.Compile(polyphonyModelDataPath, outputStream, compiledFiles); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("PolyphonyModel", compiledFiles)); } } break; default: errorSet.Add(DataCompilerError.InvalidModuleData, moduleDataName); break; } } catch (Exception ex) { Type exceptionType = ex.GetType(); if (exceptionType.Equals(typeof(FileNotFoundException)) || exceptionType.Equals(typeof(ArgumentNullException)) || exceptionType.Equals(typeof(XmlException)) || exceptionType.Equals(typeof(InvalidDataException))) { errorSet.Add(DataCompilerError.RawDataNotFound, moduleDataName, Helper.BuildExceptionMessage(ex)); } else { throw; } } return errorSet; }