/// <summary> /// Load stemmer file. /// </summary> /// <param name="stemmerFilePath">Stemmer file path.</param> /// <param name="errorSet">Error set.</param> /// <returns>Loaded stemmer items.</returns> public static Dictionary<string, string> Read(string stemmerFilePath, ErrorSet errorSet) { Dictionary<string, string> stemmer = new Dictionary<string, string>(); foreach (string line in Helper.FileLines(stemmerFilePath)) { string[] items = line.Split(Delimitor.TabChars, StringSplitOptions.RemoveEmptyEntries); if (items.Length < 2) { errorSet.Add(StemmerFileError.OneColumnLine, line, stemmerFilePath); continue; } for (int i = 1; i < items.Length; i++) { if (stemmer.ContainsKey(items[i])) { // Skips this one if there already has it. continue; } stemmer.Add(items[i], items[0]); } } return stemmer; }
private void Check_FathersNameOrHusbandsNameEntered() { if (NameOfFatherOrHusband.IsNullOrEmpty()) { string error; if (SonOfOrWifeOf == SonOfWifeOfDotOfENUM.WifeOf) { error = string.Format("You have not entered the name of the Husband for '{0}'. Please enter the Husband Name. ", PersonFullName()); _err.Add(error, MethodBase.GetCurrentMethod()); } if (SonOfOrWifeOf == SonOfWifeOfDotOfENUM.SonOf) { error = string.Format("You have not entered the name of the Father for '{0}'. Please enter the Father Name. ", PersonFullName()); _err.Add(error, MethodBase.GetCurrentMethod()); } } }
/// <summary> /// Convert the gender string to gender id. /// </summary> /// <param name="gender">Gender string.</param> /// <param name="id">Gender id.</param> /// <returns>Error set.</returns> public static ErrorSet StringToId(string gender, out int id) { if (string.IsNullOrEmpty(gender)) { throw new ArgumentNullException("gender"); } id = 0; ErrorSet errorSet = new ErrorSet(); for (int i = 0; i < gender.Length; i++) { Gender genderId = GetId(gender[i]); if (genderId == Gender.None) { errorSet.Add(GenderError.UnrecognizedGender, gender[i].ToString()); } else if ((((int)genderId) & id) != 0) { errorSet.Add(GenderError.DuplicateGender, gender[i].ToString()); } else { id |= (int)genderId; } } return errorSet; }
/// <summary> /// AddParseError. /// </summary> /// <param name="errorSet">ErrorSet.</param> /// <param name="lineNum">LineNum.</param> /// <param name="parseErrorSet">ParseErrorSet.</param> private void AddParseError(ErrorSet errorSet, int lineNum, ErrorSet parseErrorSet) { foreach (Error parseError in parseErrorSet.Errors) { Error error = new Error(PolyRuleError.ParseError, parseError, lineNum.ToString(CultureInfo.InvariantCulture)); // Keep the same severity with the original error severity. error.Severity = parseError.Severity; errorSet.Add(error); } }
/// <summary> /// Validate char element. /// </summary> /// <param name="element">Char element.</param> /// <param name="errorSet">Errors.</param> /// <param name="wordsNotInLexicon">WordsNotInLexicon.</param> public void ValidateCharElement(CharElement element, ErrorSet errorSet, Collection<string> wordsNotInLexicon) { if (errorSet == null) { throw new ArgumentNullException("errors"); } if (element == null) { throw new ArgumentNullException("element"); } if (string.IsNullOrEmpty(element.Symbol)) { errorSet.Add(new Error(CharTableError.EmptySymbol)); } else { EnsureInitialized(); if (_lexicon.Lookup(element.Symbol, true) == null) { errorSet.Add(new Error(CharTableError.SymbolNotInLexicon, element.Symbol)); } if (!element.IsAlphabet()) { // IsolatedExpansion should not empty for no-alphabet if (!string.IsNullOrEmpty(element.IsolatedExpansion)) { ValidateExpansion(element.IsolatedExpansion, "IsolatedSymbolReadout", errorSet, wordsNotInLexicon); } else { errorSet.Add(new Error(CharTableError.EmptyIsolatedSymbol, element.Symbol)); } // ContextualSymbolReadout is optional for no-alphabet if (!string.IsNullOrEmpty(element.ContextualExpansion)) { ValidateExpansion(element.ContextualExpansion, "ContextualSymbolReadout", errorSet, wordsNotInLexicon); } // No-alphabet should not have features. if (element.Feature != CharTableCompiler.CharFeature.None) { errorSet.Add(new Error(CharTableError.NonAlphabetShouldNoFeatures, element.Symbol)); } // No-alphabet should not have pronunciation. if (!string.IsNullOrEmpty(element.Pronunciation)) { errorSet.Add(new Error(CharTableError.NonAlphabetShouldNoPronunciation, element.Symbol)); } } else { if (string.IsNullOrEmpty(element.Pronunciation)) { errorSet.Add(new Error(CharTableError.AlphabetNoPron, element.Symbol)); } else { ErrorSet pronErrorSet = Pronunciation.Validate( element.Pronunciation, _phoneset); foreach (Error error in pronErrorSet.Errors) { Error alphabetError = new Error(CharTableError.AlphabetInvalidPron, error, element.Symbol, element.Pronunciation); // Keep the same error level with pronunciation error. alphabetError.Severity = error.Severity; errorSet.Add(alphabetError); } } if (element.Feature == CharTableCompiler.CharFeature.None) { errorSet.Add(new Error(CharTableError.AlphabetNoFeatures, element.Symbol)); } if (!string.IsNullOrEmpty(element.IsolatedExpansion) || !string.IsNullOrEmpty(element.ContextualExpansion)) { errorSet.Add(new Error(CharTableError.AlphabetShouldNoExpansion, element.Symbol)); } } } }
/// <summary> /// Find unmatching sentences between filemap file and script file /// <param /> /// This function should be merged with that in forcedalignment into common library. /// </summary> /// <param name="scriptFilePath">The location of script file.</param> /// <param name="language">Language of the script file.</param> /// <param name="mapFilePath">The location of file fist map path.</param> /// <returns>Unmatching sentence ids.</returns> public static ErrorSet FindUnmatchedSentences(string scriptFilePath, Language language, string mapFilePath) { ErrorSet errorSet = new ErrorSet(); TtsPhoneSet phoneSet = Localor.GetPhoneSet(language); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null); XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting); script.Remove(ScriptHelper.GetNeedDeleteItemIds(script.ErrorSet)); Dictionary<string, string> map = Microsoft.Tts.Offline.FileListMap.ReadAllData(mapFilePath); errorSet.Merge(script.ErrorSet); foreach (string sid in script.ItemDic.Keys) { if (!map.ContainsKey(sid)) { string message = Helper.NeutralFormat( "Script item {0} in script file but not in file list map file", sid); errorSet.Add(ScriptError.OtherErrors, sid, message); } } foreach (string sid in map.Keys) { if (!script.ItemDic.ContainsKey(sid)) { string message = Helper.NeutralFormat( "Script item {0} in file list map file but not in script file", sid); errorSet.Add(ScriptError.OtherErrors, sid, message); } } return errorSet; }
/// <summary> /// Check whether a script item is valid /// We don't check schema here /// Validation conditions: /// 1. Normal word should have pronunciation /// 2. Pronunciation should be good /// 3. POS should be in POS set /// We could use some flag to control the validation conditions /// When we need flexible control. /// </summary> /// <param name="item">The item to be checked.</param> /// <param name="errors">Errors if item is invalid.</param> /// <param name="validateSetting">Validation data set.</param> /// <returns>True is valid.</returns> public static bool IsValidItem(ScriptItem item, ErrorSet errors, XmlScriptValidateSetting validateSetting) { if (item == null) { throw new ArgumentNullException("item"); } if (errors == null) { throw new ArgumentNullException("errors"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } validateSetting.VerifySetting(); XmlScriptValidationScope scope = validateSetting.ValidationScope; bool valid = true; errors.Clear(); int sentIndex = 0; foreach (ScriptSentence sentence in item.Sentences) { int wordIndex = 0; foreach (ScriptWord word in sentence.Words) { if ((scope & XmlScriptValidationScope.Pronunciation) == XmlScriptValidationScope.Pronunciation) { // check pronunciation string pron = null; if (word.WordType == WordType.Normal) { pron = word.GetPronunciation(validateSetting.PhoneSet); } if (!string.IsNullOrEmpty(pron)) { ErrorSet pronErrors = Core.Pronunciation.Validate(pron, validateSetting.PhoneSet); foreach (Error error in pronErrors.Errors) { errors.Add(ScriptError.PronunciationError, error, item.Id, word.Grapheme); } } else if (word.WordType == WordType.Normal) { // Pronunciation is optional for normal word, will give warning if empty pronunciation for normal word. errors.Add(ScriptError.EmptyPronInNormalWord, item.Id, word.Grapheme); } } if ((scope & XmlScriptValidationScope.POS) == XmlScriptValidationScope.POS) { // check pos name if (!string.IsNullOrEmpty(word.PosString) && !validateSetting.PosSet.Items.ContainsKey(word.PosString)) { errors.Add(ScriptError.UnrecognizedPos, item.Id, word.Grapheme, word.Pronunciation, word.PosString); } } string nodePath = string.Format(CultureInfo.InvariantCulture, "Sentence[{0}].Word[{1}]", sentIndex, wordIndex); word.IsValid(item.Id, nodePath, scope, errors); wordIndex++; } sentIndex++; } if ((scope & XmlScriptValidationScope.SegmentSequence) == XmlScriptValidationScope.SegmentSequence) { CheckSegments(item, errors); } if (errors.Count > 0) { valid = false; } return valid; }
/// <summary> /// Check expansion type. /// </summary> /// <param name="expansion">Expansion words.</param> /// <param name="expansionType">Type string.</param> /// <param name="errorSet">Errors.</param> /// <param name="wordsNotInLexicon">WordsNotInLexicon.</param> private void ValidateExpansion(string expansion, string expansionType, ErrorSet errorSet, Collection<string> wordsNotInLexicon) { if (expansion == null) { throw new ArgumentNullException("expansion"); } if (expansionType == null) { throw new ArgumentNullException("expansionType"); } if (errorSet == null) { throw new ArgumentNullException("errorSet"); } string[] arr = expansion.Split(new char[] { '\t', ' ' }, StringSplitOptions.RemoveEmptyEntries); foreach (string word in arr) { // skip validating ExpansionWordNotInLexicon chartable error for English readout in zhXX // because English word cannot be in lexicon of these languages if ((Language == Language.ZhCN || Language == Language.ZhTW || Language == Language.ZhHK) && Helper.IsEnglishWord(word)) { continue; } if (_lexicon.Lookup(word, true) == null) { errorSet.Add(new Error(CharTableError.ExpansionWordNotInLexicon, word)); if (wordsNotInLexicon != null && !wordsNotInLexicon.Contains(word)) { wordsNotInLexicon.Add(word); } } } }
/// <summary> /// Slice the pronunciation of each script item in the script file. /// </summary> /// <param name="script">Script file to slice.</param> /// <returns>Data error found during the slicing.</returns> public ErrorSet Slice(XmlScriptFile script) { if (script == null) { throw new ArgumentNullException("script"); } ErrorSet errorSet = new ErrorSet(); foreach (ScriptItem entry in script.Items) { try { Slice(entry); } catch (InvalidDataException ide) { string message = Helper.NeutralFormat("Error in item {0} of file {1}: {2}", entry.Id, script.FilePath, Helper.BuildExceptionMessage(ide)); errorSet.Add(ScriptError.OtherErrors, entry.Id, message); } } return errorSet; }
/// <summary> /// TryParseKeyDeclear. /// </summary> /// <param name="line">Line.</param> /// <param name="firstKeyString">FirstKeyString.</param> /// <param name="errorSet">ErrorSet.</param> /// <returns>Whether the line is key declear line.</returns> private bool TryParseKeyDeclear(string line, ref bool firstKeyString, ErrorSet errorSet) { bool isKeyDeclearLine = false; // No need check key declear after finish parsing the declear part. if (IsDeclearKey(line)) { isKeyDeclearLine = true; string keyName = string.Empty; KeyType keyType = KeyType.String; ParseDeclearKey(line, ref keyName, ref keyType); if (_keyTypes.ContainsKey(keyName)) { errorSet.Add(PolyRuleError.DuplicateKeyName, keyName); } else { _keyTypes.Add(keyName, keyType); if (firstKeyString) { _keyString = keyName; firstKeyString = false; } } } return isKeyDeclearLine; }
/// <summary> /// Load. /// </summary> /// <param name="filePath">FilePath.</param> /// <param name="attributeSchema">LexicalAttributeSchema.</param> /// <returns>The errotset.</returns> public ErrorSet Load(string filePath, LexicalAttributeSchema attributeSchema) { if (string.IsNullOrEmpty(filePath)) { throw new ArgumentNullException("filePath"); } if (!File.Exists(filePath)) { throw Helper.CreateException(typeof(FileNotFoundException), filePath); } if (!Helper.IsUnicodeFile(filePath)) { throw new InvalidDataException(Helper.NeutralFormat( "Invalid corpus file format(not UNICODE), should be UNICODE.")); } _paragraphs.Clear(); int lineNumber = 0; ErrorSet errorSetWithLine = new ErrorSet(); foreach (string line in Helper.FileLines(filePath, Encoding.Unicode, false)) { lineNumber ++; if (string.IsNullOrEmpty(line.Trim())) { continue; } PosCorpusParagraph paragraph = new PosCorpusParagraph(); ErrorSet errorSet = paragraph.Parse(line, attributeSchema); if (errorSet.Errors.Count == 0) { Debug.Assert(paragraph.Words.Count > 0); _paragraphs.Add(paragraph); } else { foreach (Error error in errorSet.Errors) { errorSetWithLine.Add(PosCorpusError.ErrorWithLine, error, lineNumber.ToString(CultureInfo.InvariantCulture)); } } } _filePath = filePath; return errorSetWithLine; }
/// <summary> /// Parse word pos pair. /// </summary> /// <param name="wordPosPair">Word.</param> /// <param name="attributeSchema">AttributeSchema.</param> /// <returns>ErrorSet.</returns> public ErrorSet Parse(string wordPosPair, LexicalAttributeSchema attributeSchema) { ErrorSet errorSet = new ErrorSet(); int slashIndex = wordPosPair.LastIndexOf(WordPosDelimeter); if (slashIndex < 0 || slashIndex > wordPosPair.Length - 1) { errorSet.Add(PosCorpusError.InvalidFormat, wordPosPair); } else if (slashIndex == 0) { errorSet.Add(PosCorpusError.EmptyWord, wordPosPair); } else if (slashIndex == wordPosPair.Length - 1) { errorSet.Add(PosCorpusError.EmptyPos, wordPosPair); } else { WordText = wordPosPair.Substring(0, slashIndex); string originalPos = wordPosPair.Substring(slashIndex + 1); if (attributeSchema != null) { string posTaggingPos = attributeSchema.GetPosTaggingPos(originalPos); if (string.IsNullOrEmpty(posTaggingPos)) { errorSet.Add(PosCorpusError.NoPosTaggingPos, originalPos); } else { Pos = posTaggingPos; } } else { Pos = originalPos; } } return errorSet; }
public static ErrorSet Compile(string rnnModelPath, Stream outputStream, Collection<string> addedFileNames) { if (string.IsNullOrEmpty(rnnModelPath)) { throw new ArgumentNullException("rnnModelPath"); } if (outputStream == null) { throw new ArgumentNullException("outputStream"); } if (addedFileNames == null) { throw new ArgumentNullException("addedFileNames"); } ErrorSet errorSet = new ErrorSet(); if (!File.Exists(rnnModelPath)) { errorSet.Add(RNNModelCompilerError.ModelDataNotFound, rnnModelPath); } else { BinaryWriter bw = new BinaryWriter(outputStream, Encoding.Unicode); Dictionary<string, float> polyCharactersInfo = null; List<string> polyphones = null; List<float> thresholds = null; // load polyphonic characters that should be enabled in product. string polyphonicCharFile = Path.Combine(new DirectoryInfo(Path.GetDirectoryName(rnnModelPath)).Parent.FullName, "RNNPolyphoneList.txt"); if (File.Exists(polyphonicCharFile)) { // If the list file is existed, load it. polyCharactersInfo = LoadPolyphonicInfo(polyphonicCharFile, errorSet); } else { errorSet.Add(RNNModelCompilerError.PolyphonicCharFileNotFound, polyphonicCharFile); } polyphones = GetPolyphonicChars(polyCharactersInfo); thresholds = GetPolyphonicThreshold(polyCharactersInfo); uint polyCharCount = 0; uint modelOffset = 0; // write the count of polyphonic characters and polyphonic characters using (StringPool plycharSp = new StringPool()) { Collection<int> polycharOffsets = new Collection<int>(); StringPool.WordsToStringPool(polyphones, plycharSp, polycharOffsets); polyCharCount = (uint)polycharOffsets.Count; bw.Write(modelOffset); bw.Write(polyCharCount); foreach (float threshold in thresholds) { bw.Write(threshold); } byte[] plycharPool = plycharSp.ToArray(); foreach (int offset in polycharOffsets) { bw.Write((uint)offset); } bw.Write(plycharPool, 0, plycharPool.Length); } modelOffset = (uint)bw.BaseStream.Position; // write rnn models using (FileStream fs = new FileStream(rnnModelPath, FileMode.Open, FileAccess.Read)) using (BinaryReader br = new BinaryReader(fs)) { bw.Write(br.ReadBytes((int)fs.Length)); } bw.Flush(); bw.Seek(0, SeekOrigin.Begin); bw.Write(modelOffset); bw.Flush(); addedFileNames.Add(rnnModelPath); } return errorSet; }
/// <summary> /// Load Polyphonic Characters That Should Be Enabled in Product. /// </summary> /// <param name="listFile">List File Path.</param> /// <param name="errorSet">Error set.</param> /// <returns>Polyphonic characters set.</returns> private static Dictionary<string, float> LoadPolyphonicInfo(string listFile, ErrorSet errorSet) { Dictionary<string, float> polyCharactersInfo = new Dictionary<string, float>(); using (StreamReader sr = new StreamReader(listFile)) { while (!sr.EndOfStream) { string line = sr.ReadLine(); if (!string.IsNullOrEmpty(line)) { string[] parts = line.Split(new char[] { '\t' }); if (parts.Length == 2) { string character = parts[0]; float threshold = float.Parse(parts[1]); if (character.Length > 1) { errorSet.Add(RNNModelCompilerError.InvalidCharacterListFormat, listFile); } else { polyCharactersInfo.Add(character, threshold); } } else { errorSet.Add(RNNModelCompilerError.InvalidCharacterListFormat, listFile); } } } } if (polyCharactersInfo.Count == 0) { errorSet.Add(RNNModelCompilerError.PolyphonicCharNotFound, listFile); } return polyCharactersInfo; }
/// <summary> /// Check for segment error for a specific segment. /// </summary> /// <param name="errors">Error list.</param> /// <param name="itemID">Script ID.</param> /// <param name="nodePath">Path of the node.</param> /// <param name="segmentInterval">Segment interval.</param> /// <param name="preSegEnd">Preivous segment end point.</param> private static void CheckSegment(ErrorSet errors, string itemID, string nodePath, SegmentInterval segmentInterval, ref int preSegEnd) { if (segmentInterval.Begin < preSegEnd) { string errorPath = string.Format(CultureInfo.InvariantCulture, "{0}.Acoustics", nodePath); errors.Add(ScriptError.SegmentSequenceError, itemID, errorPath, segmentInterval.Begin.ToString(CultureInfo.InvariantCulture), preSegEnd.ToString(CultureInfo.InvariantCulture)); } preSegEnd = segmentInterval.End; }
/// <summary> /// TryParseConditionLine. /// </summary> /// <param name="line">Line.</param> /// <param name="phoneSet">PhoneSet.</param> /// <param name="polyphonyWord">PolyphonyWord.</param> /// <param name="errorSet">ErrorSet.</param> /// <returns>Whether the line is condition line.</returns> private bool TryParseConditionLine(string line, TtsPhoneSet phoneSet, PolyphonyRule polyphonyWord, ErrorSet errorSet) { bool isConditionLine = false; if (IsConditionLine(line)) { isConditionLine = true; if (polyphonyWord == null) { errorSet.Add(PolyRuleError.MissKeyValueLine, line); } errorSet.AddRange(ParseConditionLine(line, phoneSet, polyphonyWord)); } return isConditionLine; }
/// <summary> /// Get the object. /// </summary> /// <param name="errorSet">ErrorSet.</param> /// <returns>Object.</returns> public object GetObject(ErrorSet errorSet) { if (errorSet == null) { throw new ArgumentNullException("errorSet"); } if (!_processedLoad && _object == null) { _processedLoad = true; if (string.IsNullOrEmpty(this.Path)) { errorSet.Add(DataCompilerError.PathNotInitialized, this.Name); } else if (!File.Exists(this.Path)) { errorSet.Add(DataCompilerError.RawDataNotFound, this.Name, this.Path); } else { _object = LoadDataObject(errorSet); } } else if (_processedLoad && _object == null) { errorSet.Add(DataCompilerError.RawDataError, this.Name); } return _object; }
/// <summary> /// TryParseKeyLine. /// </summary> /// <param name="line">Line.</param> /// <param name="polyphonyWord">PolyphonyWord.</param> /// <param name="errorSet">ErrorSet.</param> /// <param name="domain">Domain.</param> /// <returns>Whether the line is key line.</returns> private bool TryParseKeyLine(string line, ref PolyphonyRule polyphonyWord, ErrorSet errorSet, string domain) { bool isKeyLine = false; if (IsKeyLine(line)) { isKeyLine = true; if (polyphonyWord != null) { if (polyphonyWord.PolyphonyProns.Count == 0) { errorSet.Add(PolyRuleError.NoConditionForWord, polyphonyWord.Word); } else { _polyphonyWords.Add(polyphonyWord); } } polyphonyWord = new PolyphonyRule(); polyphonyWord.Domain = domain; int errorCountBeforeParsing = errorSet.Errors.Count; string keyValue = ParseKeyValueLine(line, errorSet); if (errorSet.Errors.Count == errorCountBeforeParsing) { polyphonyWord.Word = keyValue; } } return isKeyLine; }
/// <summary> /// Compile the trunc rule into binary writer. /// </summary> /// <param name="truncRuleFileName">File path of trunc rule.</param> /// <param name="phoneSet">Phone set.</param> /// <param name="bw">Binary writer.</param> /// <returns>Error.</returns> private static ErrorSet CompTruncRuleData(string truncRuleFileName, TtsPhoneSet phoneSet, BinaryWriter bw) { // maximum truncate rule length is 5 phonmes currently const int MaxTruncRuleLength = 5; ErrorSet errorSet = new ErrorSet(); List<TruncateNucleusRule> rules = new List<TruncateNucleusRule>(); XmlDocument xmldoc = new XmlDocument(); xmldoc.Load(truncRuleFileName); XmlNamespaceManager nm = new XmlNamespaceManager(xmldoc.NameTable); nm.AddNamespace("tts", "http://schemas.microsoft.com/tts/toolsuite"); XmlNodeList nodeList = xmldoc.DocumentElement.SelectNodes( "/tts:offline/tts:truncateRules/tts:truncateRule", nm); if (nodeList != null) { foreach (XmlNode node in nodeList) { XmlNodeList phoneNodeList; XmlElement xmlNode = node as XmlElement; string side = xmlNode.GetAttribute("side"); int direction = 0; if (side.Equals("Right", StringComparison.OrdinalIgnoreCase)) { direction = 2; // TruncFromRight } else if (side.Equals("Left", StringComparison.OrdinalIgnoreCase)) { direction = 1; // TruncFromLeft } else { errorSet.Add(UnitGeneratorDataCompilerError.WrongRuleSide, side, xmlNode.InnerXml); } phoneNodeList = xmlNode.SelectNodes("tts:phone", nm); if (phoneNodeList.Count > MaxTruncRuleLength) { errorSet.Add(UnitGeneratorDataCompilerError.RuleLengthExceeded, MaxTruncRuleLength.ToString(CultureInfo.InvariantCulture), xmlNode.InnerXml); } else { int idx = 0; short[] ids = new short[MaxTruncRuleLength + 1]; foreach (XmlNode phoneNode in phoneNodeList) { XmlElement xmlPhoneNode = phoneNode as XmlElement; string phoneValue = xmlPhoneNode.GetAttribute("value"); Phone phone = phoneSet.GetPhone(phoneValue); if (phone != null) { ids[idx++] = (short)phone.Id; } else { errorSet.Add(UnitGeneratorDataCompilerError.InvalidPhone, phoneValue); } } ids[idx] = 0; TruncateNucleusRule rule = new TruncateNucleusRule(); rule.Ids = ids; rule.Direction = direction; rules.Add(rule); } } } // write the data bw.Write(rules.Count); foreach (TruncateNucleusRule ci in rules) { bw.Write(ci.Direction); for (int i = 0; i < ci.Ids.Length; i++) { bw.Write(BitConverter.GetBytes(ci.Ids[i])); } } return errorSet; }
/// <summary> /// ParsePolyCondition. /// </summary> /// <param name="expression">Expression.</param> /// <param name="condition">Condition.</param> /// <param name="errorSet">ErrorSet.</param> private void ParsePolyCondition(string expression, PolyphonyCondition condition, ErrorSet errorSet) { string subExpression = expression; // If the value is string, then search operator before """ if (subExpression.IndexOf('"') > 0) { subExpression = subExpression.Substring(0, subExpression.IndexOf('"')); } foreach (string oper in _allOperators) { if (subExpression.IndexOf(oper) >= 0) { condition.Operator = oper; break; } } bool succeeded = true; if (string.IsNullOrEmpty(condition.Operator)) { errorSet.Add(PolyRuleError.MissingOperatorInCondition, expression); succeeded = false; } if (succeeded) { condition.Key = expression.Substring(0, expression.IndexOf(condition.Operator)).Trim(); if (!_keyTypes.ContainsKey(condition.Key)) { errorSet.Add(PolyRuleError.NotDeclearedConditionKey, condition.Key, expression); succeeded = false; } } if (succeeded) { string valueExpression = expression.Substring( expression.IndexOf(condition.Operator) + condition.Operator.Length).Trim(); if (_keyTypes[condition.Key] == KeyType.String) { Match match = Regex.Match(valueExpression, @"^""(.*)""$"); if (match.Success) { valueExpression = match.Groups[1].ToString(); } else { errorSet.Add(PolyRuleError.InvalidConditionFormat, expression); succeeded = false; } } else { int intValue; if (!int.TryParse(valueExpression, out intValue)) { errorSet.Add(PolyRuleError.InvalidConditionFormat, expression); succeeded = false; } } condition.Value = valueExpression; } }
/// <summary> /// Convert the case string to case id. If all fail, id will be 0. /// </summary> /// <param name="caseStr">Case string.</param> /// <param name="id">Case id.</param> /// <returns>Error set.</returns> public static ErrorSet StringToId(string caseStr, out int id) { if (string.IsNullOrEmpty(caseStr)) { throw new ArgumentNullException("caseStr"); } id = 0; ErrorSet errorSet = new ErrorSet(); for (int i = 0; i < caseStr.Length; i++) { int caseId = (int)GetId(caseStr[i]); if (caseId == 0) { errorSet.Add(CaseError.UnrecognizedCase, caseStr[i].ToString()); } else if ((caseId & id) != 0) { errorSet.Add(CaseError.DuplicateCase, caseStr[i].ToString()); } else { id |= caseId; } } return errorSet; }
/// <summary> /// Check if the pronunciation for general domain exist. /// We check general domain pronunciation to avoid the case that word have address domain pronunciation, but got LTSed in general domain. /// </summary> /// <param name="domainLexItems">Domain lexiconItems.</param> /// <param name="errorSet">The errorSet.</param> /// <param name="word">The current word.</param> private void CheckGeneralPronExist(Dictionary<string, LexicalItem> domainLexItems, ErrorSet errorSet, string word) { Helper.ThrowIfNull(domainLexItems); Helper.ThrowIfNull(errorSet); if (!domainLexItems.ContainsKey(DomainItem.GeneralDomain)) { errorSet.Add(LexiconError.LackGeneralDomainPronError, word); } }
/// <summary> /// ParseConditionLine. /// </summary> /// <param name="line">Line.</param> /// <param name="phoneSet">PhoneSet.</param> /// <param name="polyphonyWord">PolyphonyWord.</param> /// <returns>ErrorSet.</returns> private ErrorSet ParseConditionLine(string line, TtsPhoneSet phoneSet, PolyphonyRule polyphonyWord) { ErrorSet errorSet = new ErrorSet(); Match match = Regex.Match(line, ConditionLineRegex); if (match.Groups.Count < 3) { errorSet.Add(PolyRuleError.InvalidConditionFormat, line); } else { PolyphonyPron polyphonyPron = new PolyphonyPron(); polyphonyPron.Pron = match.Groups[2].ToString().Trim(); // Allow empty pronunciation for polyphony rule. if (!string.IsNullOrEmpty(polyphonyPron.Pron) && phoneSet != null) { errorSet.AddRange(Pronunciation.Validate(polyphonyPron.Pron, phoneSet)); } string conditions = match.Groups[1].ToString().Trim(); bool hasMatched = false; foreach (Match conditionMatch in Regex.Matches(conditions, ConditionRegex)) { hasMatched = true; string expression = conditionMatch.Value; PolyphonyCondition condition = new PolyphonyCondition(); ParsePolyCondition(expression.Trim(), condition, errorSet); polyphonyPron.Conditions.Add(condition); } if (hasMatched) { if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0) { if (polyphonyWord == null) { errorSet.Add(PolyRuleError.MissKeyValueLine, line); } else { polyphonyWord.PolyphonyProns.Add(polyphonyPron); } } } else { errorSet.Add(PolyRuleError.InvalidConditionFormat, line); } } return errorSet; }
/// <summary> /// Compiler. /// </summary> /// <param name="truncRuleFileName">File path of trunc rule.</param> /// <param name="phoneSet">Phone set.</param> /// <param name="outputStream">Output Stream.</param> /// <returns>ErrorSet.</returns> public static ErrorSet Compile(string truncRuleFileName, TtsPhoneSet phoneSet, Stream outputStream) { if (string.IsNullOrEmpty(truncRuleFileName)) { throw new ArgumentNullException("truncRuleFileName"); } // pauseLengthFileName could be null if (phoneSet == null) { throw new ArgumentNullException("phoneSet"); } if (outputStream == null) { throw new ArgumentNullException("outputStream"); } ErrorSet errorSet = new ErrorSet(); phoneSet.Validate(); if (phoneSet.ErrorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Add(UnitGeneratorDataCompilerError.InvalidPhoneSet); } else { BinaryWriter bw = new BinaryWriter(outputStream); { errorSet.Merge(CompTruncRuleData(truncRuleFileName, phoneSet, bw)); } } return errorSet; }
/// <summary> /// ParseKeyValueLine. /// </summary> /// <param name="line">Line.</param> /// <param name="errorSet">ErrorSet.</param> /// <returns>Condition value.</returns> private string ParseKeyValueLine(string line, ErrorSet errorSet) { string primaryKeyValue = string.Empty; Match match = Regex.Match(line, KeyLineRegex); if (match.Groups.Count != 2) { errorSet.Add(PolyRuleError.InvalidPrimaryKeyValueForamt, line); } else { primaryKeyValue = match.Groups[1].ToString(); } return primaryKeyValue; }
/// <summary> /// Validate char table. /// </summary> /// <param name="table">Char table.</param> /// <param name="shallow">Shallow validation.</param> /// <param name="wordsNotInLexicon">WordsNotInLexicon.</param> /// <returns>ErrorSet.</returns> public ErrorSet Validate(CharTable table, bool shallow, Collection<string> wordsNotInLexicon) { if (table == null) { throw new ArgumentNullException("table"); } ErrorSet errorSet = new ErrorSet(); int upperCaseNumber = 0; int lowerCaseNumber = 0; int digitNumber = 0; Collection<string> symbols = new Collection<string>(); foreach (CharElement charElement in table.CharList) { if (charElement.Type == CharElement.CharType.UpperCase) { upperCaseNumber++; } else if (charElement.Type == CharElement.CharType.LowerCase) { lowerCaseNumber++; } else if (charElement.Type == CharElement.CharType.Digit) { digitNumber++; } if (!symbols.Contains(charElement.Symbol)) { symbols.Add(charElement.Symbol); } else { errorSet.Add(new Error(CharTableError.DuplicateSymbol, charElement.Symbol)); } if (!shallow) { ValidateCharElement(charElement, errorSet, wordsNotInLexicon); } } if (upperCaseNumber != lowerCaseNumber) { errorSet.Add(new Error(CharTableError.MismatchUpperAndLower, upperCaseNumber.ToString(CultureInfo.InvariantCulture), lowerCaseNumber.ToString(CultureInfo.InvariantCulture))); } if (digitNumber != 10) { errorSet.Add(new Error(CharTableError.ErrorDigitCount)); } return errorSet; }
/// <summary> /// Check if there're duplicate polyphony word definitions. /// </summary> /// <returns>ErrorSet.</returns> private ErrorSet CheckDupWordDefinitions() { ErrorSet errorSet = new ErrorSet(); List<string> polyWords = new List<string>(); foreach (PolyphonyRule rule in _polyphonyWords) { if (polyWords.Contains(rule.Word)) { errorSet.Add(PolyRuleError.DuplicateWordDefinitions, rule.Word); } else { polyWords.Add(rule.Word); } } return errorSet; }
/// <summary> /// Validation data alignment between feature file and script file. /// </summary> /// <param name="featureFile">Feature file.</param> /// <param name="scriptFile">Script file.</param> /// <param name="language">Language.</param> /// <returns>Data error set found.</returns> public static ErrorSet ValidateFeatureData(string featureFile, string scriptFile, Language language) { ErrorSet errorSet = new ErrorSet(); TtsPhoneSet phoneSet = Localor.GetPhoneSet(language); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null); XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFile, validateSetting); if (script.ErrorSet.Count > 0) { string message = string.Format(CultureInfo.InvariantCulture, "{0} error(s) found in the script file [{1}]", script.ErrorSet.Count, scriptFile); throw new InvalidDataException(message); } XmlUnitFeatureFile unitFeatureFile = new XmlUnitFeatureFile(featureFile); if (unitFeatureFile.Units.Count <= 0) { string message = string.Format(CultureInfo.InvariantCulture, "Zero unit feature item in unit feature file {0}", featureFile); errorSet.Add(VoiceFontError.OtherErrors, message); throw new InvalidDataException(message); } if (unitFeatureFile.Language != language) { string message = string.Format(CultureInfo.InvariantCulture, "Different lanuage\r\nScript File {0}: lang = {1}\r\n Feature File {2}: lang = {3}", scriptFile, Localor.LanguageToString(language), featureFile, Localor.LanguageToString(unitFeatureFile.Language)); throw new InvalidDataException(message); } foreach (string key in unitFeatureFile.Units.Keys) { UnitFeature unit = unitFeatureFile.Units[key]; string sid = unit.SentenceId; int unitIndex = unit.Index; string unitName = unit.Name; if (unit.Index < 0) { string message = string.Format(CultureInfo.InvariantCulture, "invalid unit index [{0}] found in feature file [{1}]. It should not be negative integer for unit indexing.", unit.Index, featureFile); errorSet.Add(VoiceFontError.OtherErrors, message); continue; } try { if (!script.ItemDic.ContainsKey(unit.SentenceId)) { string message = string.Format(CultureInfo.InvariantCulture, "sentence id {0} in feature file [{1}] is not in script file [{2}]", sid, featureFile, scriptFile); errorSet.Add(ScriptError.OtherErrors, sid, message); continue; } ScriptItem item = script.ItemDic[sid]; Phoneme phoneme = Localor.GetPhoneme(language); SliceData sliceData = Localor.GetSliceData(language); Collection<TtsUnit> itemUnits = item.GetUnits(phoneme, sliceData); if (unitIndex >= itemUnits.Count) { string message = string.Format(CultureInfo.InvariantCulture, "the {0}th unit [{1}] in sentence {2} of feature file [{3}] is out of range for sentence {2} in script file [{4}]", unitIndex, unitName, sid, featureFile, scriptFile); errorSet.Add(ScriptError.OtherErrors, sid, message); continue; } TtsUnit ttsUnit = itemUnits[unitIndex]; string sliceName = ttsUnit.FullName.Replace(' ', '+'); if (sliceName != unitName) { string str1 = "the {0}th unit [{1}] in sentence {3} of feature file [{4}] "; string str2 = "is not matched with {0}th unit [{2}] for sentence {3} in script file [{5}]"; string message = string.Format(CultureInfo.InvariantCulture, str1 + str2, unitIndex, unitName, sliceName, sid, featureFile, scriptFile); errorSet.Add(ScriptError.OtherErrors, sid, message); continue; } } catch (InvalidDataException ide) { errorSet.Add(ScriptError.OtherErrors, sid, Helper.BuildExceptionMessage(ide)); } } return errorSet; }
/// <summary> /// Check if there're duplicate conditions in a rule. /// </summary> /// <returns>ErrorSet.</returns> public ErrorSet CheckDupRuleConditions() { ErrorSet errorSet = new ErrorSet(); SortedDictionary<string, string> conditions = new SortedDictionary<string, string>(); foreach (PolyphonyPron pron in this.PolyphonyProns) { string sortedConditionStr = pron.SortedConditionString; if (conditions.ContainsKey(sortedConditionStr)) { if (conditions[sortedConditionStr] == pron.Pron) { errorSet.Add(PolyRuleError.DuplicateRuleConditionsForSamePron, pron.ConditionString, pron.Pron, this.Word); } else { errorSet.Add(PolyRuleError.DuplicateRuleConditionsForDifferentPron, pron.ConditionString, conditions[sortedConditionStr], pron.Pron, this.Word); } } else { conditions.Add(sortedConditionStr, pron.Pron); } } return errorSet; }
/// <summary> /// Load. /// </summary> /// <param name="filePath">FilePath.</param> /// <param name="phoneSet">PhoneSet.</param> /// <returns>ErrorSet.</returns> public ErrorSet Load(string filePath, TtsPhoneSet phoneSet) { // This validation is needed by Fxcop checking parameters. if (phoneSet == null) { phoneSet = null; } if (string.IsNullOrEmpty(filePath)) { throw new ArgumentNullException("filePath"); } if (!File.Exists(filePath)) { throw Helper.CreateException(typeof(FileNotFoundException), filePath); } if (!Helper.IsUnicodeFile(filePath)) { throw new InvalidDataException(Helper.NeutralFormat( "Polyphony rule file [{0}] is not unicode.", filePath)); } ErrorSet errorSet = new ErrorSet(); _keyTypes.Clear(); bool finishReadHead = false; bool firstKeyString = true; PolyphonyRule polyphonyWord = null; int lineNum = 0; string domain = DomainItem.GeneralDomain; foreach (string line in Helper.FileLines(filePath, Encoding.Unicode, false)) { lineNum++; string trimedLine = line.Trim(); if (string.IsNullOrEmpty(trimedLine)) { continue; } if (IsComment(trimedLine)) { continue; } if (IsDomainTag(trimedLine)) { ParseDomainKey(trimedLine, ref domain); continue; } ErrorSet parseErrorSet = new ErrorSet(); if (!finishReadHead) { bool isKeyDeclear = TryParseKeyDeclear(trimedLine, ref firstKeyString, parseErrorSet); AddParseError(errorSet, lineNum, parseErrorSet); if (isKeyDeclear) { continue; } else { finishReadHead = true; } } PolyruleKeys.Instance.KeyTypes = _keyTypes; parseErrorSet.Clear(); bool isKeyLine = TryParseKeyLine(trimedLine, ref polyphonyWord, parseErrorSet, domain); domain = DomainItem.GeneralDomain; AddParseError(errorSet, lineNum, parseErrorSet); if (isKeyLine) { continue; } parseErrorSet.Clear(); bool isConditionLine = TryParseConditionLine(trimedLine, phoneSet, polyphonyWord, parseErrorSet); AddParseError(errorSet, lineNum, parseErrorSet); if (isConditionLine) { continue; } errorSet.Add(PolyRuleError.InvalidLineFormat, lineNum.ToString(CultureInfo.InvariantCulture), trimedLine); } if (polyphonyWord != null) { _polyphonyWords.Add(polyphonyWord); } if (string.IsNullOrEmpty(_keyString)) { errorSet.Add(PolyRuleError.MissPrimaryKey, filePath); } errorSet.AddRange(CheckDupWordDefinitions()); foreach (PolyphonyRule rule in _polyphonyWords) { errorSet.AddRange(rule.CheckDupRuleConditions()); } return errorSet; }
/// <summary> /// Validate the pronunciation for the word. /// </summary> /// <param name="word">Word.</param> /// <param name="lexPron">Lexicon pronunciation.</param> /// <param name="ttsPhoneSet">TTS phone set.</param> /// <param name="errorSet">Error set.</param> private static void ValidatePronunciation(string word, LexiconPronunciation lexPron, TtsPhoneSet ttsPhoneSet, ErrorSet errorSet) { // Validate the pronunciation information ErrorSet pronErrorSet = Pronunciation.Validate(lexPron.Symbolic, ttsPhoneSet); bool invalid = false; foreach (Error error in pronErrorSet.Errors) { errorSet.Add(LexiconError.PronunciationError, error, word); if (error.Severity == ErrorSeverity.MustFix && !(error.Enum.Equals(PronunciationError.VowelAndSonorantCountLessThanMinimum) || error.Enum.Equals(PronunciationError.VowelAndSonorantCountGreaterThanMaximum) || error.Enum.Equals(PronunciationError.VowelCountLessThanMinimum) || error.Enum.Equals(PronunciationError.VowelCountGreaterThanMaximum))) { invalid = true; } } lexPron.Valid = lexPron.Valid && !invalid; }