Ejemplo n.º 1
0
        /// <summary>
        /// Validate Lexicon according to pos set and phone set.
        /// </summary>
        /// <param name="ttsPhoneSet">TTS phone set.</param>
        /// <param name="ttsPosSet">TTS POS set.</param>
        public void Validate(TtsPhoneSet ttsPhoneSet, TtsPosSet ttsPosSet)
        {
            if (ttsPosSet == null)
            {
                throw new ArgumentNullException("ttsPosSet");
            }

            if (ttsPhoneSet == null)
            {
                throw new ArgumentNullException("ttsPhoneSet");
            }

            if (!ttsPosSet.Language.Equals(Language))
            {
                throw new InvalidDataException(Error.BuildMessage(CommonError.NotConsistentLanguage,
                    Language.ToString(), "lexicon", ttsPosSet.Language.ToString(), "pos set"));
            }

            if (!ttsPhoneSet.Language.Equals(Language))
            {
                throw new InvalidDataException(Error.BuildMessage(CommonError.NotConsistentLanguage,
                    Language.ToString(), "lexicon", ttsPhoneSet.Language.ToString(), "phone set"));
            }

            Validate(ttsPhoneSet, ttsPosSet, null);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Validate Lexicon according to phone set and lexical attribute schema.
        /// </summary>
        /// <param name="ttsPhoneSet">TTS phone set.</param>
        /// <param name="attributeSchema">TTS attribute schema.</param>
        public void Validate(TtsPhoneSet ttsPhoneSet, LexicalAttributeSchema attributeSchema)
        {
            if (attributeSchema == null)
            {
                throw new ArgumentNullException("attributeSchema");
            }

            if (ttsPhoneSet == null)
            {
                throw new ArgumentNullException("ttsPhoneSet");
            }

            if (!attributeSchema.Language.Equals(Language))
            {
                throw new InvalidDataException(Error.BuildMessage(CommonError.NotConsistentLanguage,
                    Language.ToString(), "lexicon", attributeSchema.Language.ToString(), 
                    "lexical attribute Schema"));
            }

            if (!ttsPhoneSet.Language.Equals(Language))
            {
                throw new InvalidDataException(Error.BuildMessage(CommonError.NotConsistentLanguage,
                    Language.ToString(), "lexicon", ttsPhoneSet.Language.ToString(), "phone set"));
            }

            Validate(ttsPhoneSet, null, attributeSchema);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// TryParseConditionLine.
        /// </summary>
        /// <param name="line">Line.</param>
        /// <param name="phoneSet">PhoneSet.</param>
        /// <param name="polyphonyWord">PolyphonyWord.</param>
        /// <param name="errorSet">ErrorSet.</param>
        /// <returns>Whether the line is condition line.</returns>
        private bool TryParseConditionLine(string line, TtsPhoneSet phoneSet,
            PolyphonyRule polyphonyWord, ErrorSet errorSet)
        {
            bool isConditionLine = false;
            if (IsConditionLine(line))
            {
                isConditionLine = true;
                if (polyphonyWord == null)
                {
                    errorSet.Add(PolyRuleError.MissKeyValueLine, line);
                }

                errorSet.AddRange(ParseConditionLine(line, phoneSet, polyphonyWord));
            }

            return isConditionLine;
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Validate Lexicon according to TTS phone set and pos set or lexical attribute schema.
        /// </summary>
        /// <param name="ttsPhoneSet">TTS phone set.</param>
        /// <param name="ttsPosSet">TTS POS set.</param>
        /// <param name="attributeSchema">Lexical attribute Schema.</param>
        private void Validate(TtsPhoneSet ttsPhoneSet, TtsPosSet ttsPosSet,
            LexicalAttributeSchema attributeSchema)
        {
            Debug.Assert(ttsPhoneSet != null);
            Debug.Assert(ttsPosSet != null || attributeSchema != null);
            bool dependentDataValid = true;
            ttsPhoneSet.Validate();
            if (ttsPhoneSet.ErrorSet.Contains(ErrorSeverity.MustFix))
            {
                ErrorSet.Add(LexiconError.InvalidDependentData, "Phone set");
                dependentDataValid = false;
            }

            if (ttsPosSet != null)
            {
                ttsPosSet.Validate();
                if (ttsPosSet.ErrorSet.Contains(ErrorSeverity.MustFix))
                {
                    ErrorSet.Add(LexiconError.InvalidDependentData, "POS set");
                    dependentDataValid = false;
                }
            }

            if (attributeSchema != null)
            {
                attributeSchema.Validate();
                if (attributeSchema.ErrorSet.Contains(ErrorSeverity.MustFix))
                {
                    ErrorSet.Add(LexiconError.InvalidDependentData, "Lexical Attribute Schema");
                    dependentDataValid = false;
                }
            }

            if (dependentDataValid)
            {
                bool containValidItem = false;
                foreach (LexicalItem lexItem in Items.Values)
                {
                    ErrorSet errorSet = lexItem.Validate(ttsPhoneSet, ttsPosSet, attributeSchema);
                    ErrorSet.Merge(errorSet);
                    containValidItem = containValidItem || lexItem.Valid;
                }

                if (!containValidItem)
                {
                    ErrorSet.Add(LexiconError.EmptyLexicon);
                }
            }

            validated = true;
        }
Ejemplo n.º 5
0
        /// <summary>
        /// ParseConditionLine.
        /// </summary>
        /// <param name="line">Line.</param>
        /// <param name="phoneSet">PhoneSet.</param>
        /// <param name="polyphonyWord">PolyphonyWord.</param>
        /// <returns>ErrorSet.</returns>
        private ErrorSet ParseConditionLine(string line, TtsPhoneSet phoneSet,
            PolyphonyRule polyphonyWord)
        {
            ErrorSet errorSet = new ErrorSet();
            Match match = Regex.Match(line, ConditionLineRegex);
            if (match.Groups.Count < 3)
            {
                errorSet.Add(PolyRuleError.InvalidConditionFormat,
                    line);
            }
            else
            {
                PolyphonyPron polyphonyPron = new PolyphonyPron();
                polyphonyPron.Pron = match.Groups[2].ToString().Trim();

                // Allow empty pronunciation for polyphony rule.
                if (!string.IsNullOrEmpty(polyphonyPron.Pron) && phoneSet != null)
                {
                    errorSet.AddRange(Pronunciation.Validate(polyphonyPron.Pron, phoneSet));
                }

                string conditions = match.Groups[1].ToString().Trim();
                bool hasMatched = false;
                foreach (Match conditionMatch in Regex.Matches(conditions, ConditionRegex))
                {
                    hasMatched = true;
                    string expression = conditionMatch.Value;
                    PolyphonyCondition condition = new PolyphonyCondition();
                    ParsePolyCondition(expression.Trim(), condition, errorSet);
                    polyphonyPron.Conditions.Add(condition);
                }

                if (hasMatched)
                {
                    if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0)
                    {
                        if (polyphonyWord == null)
                        {
                            errorSet.Add(PolyRuleError.MissKeyValueLine, line);
                        }
                        else
                        {
                            polyphonyWord.PolyphonyProns.Add(polyphonyPron);
                        }
                    }
                }
                else
                {
                    errorSet.Add(PolyRuleError.InvalidConditionFormat, line);
                }
            }

            return errorSet;
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Load.
        /// </summary>
        /// <param name="filePath">FilePath.</param>
        /// <param name="phoneSet">PhoneSet.</param>
        /// <returns>ErrorSet.</returns>
        public ErrorSet Load(string filePath, TtsPhoneSet phoneSet)
        {
            // This validation is needed by Fxcop checking parameters.
            if (phoneSet == null)
            {
                phoneSet = null;
            }

            if (string.IsNullOrEmpty(filePath))
            {
                throw new ArgumentNullException("filePath");
            }

            if (!File.Exists(filePath))
            {
                throw Helper.CreateException(typeof(FileNotFoundException), filePath);
            }

            if (!Helper.IsUnicodeFile(filePath))
            {
                throw new InvalidDataException(Helper.NeutralFormat(
                    "Polyphony rule file [{0}] is not unicode.", filePath));
            }

            ErrorSet errorSet = new ErrorSet();
            _keyTypes.Clear();
            bool finishReadHead = false;
            bool firstKeyString = true;
            PolyphonyRule polyphonyWord = null;

            int lineNum = 0;
            string domain = DomainItem.GeneralDomain;
            foreach (string line in Helper.FileLines(filePath, Encoding.Unicode, false))
            {
                lineNum++;
                string trimedLine = line.Trim();
                if (string.IsNullOrEmpty(trimedLine))
                {
                    continue;
                }

                if (IsComment(trimedLine))
                {
                    continue;
                }

                if (IsDomainTag(trimedLine))
                {
                    ParseDomainKey(trimedLine, ref domain);
                    continue;
                }

                ErrorSet parseErrorSet = new ErrorSet();
                if (!finishReadHead)
                {
                    bool isKeyDeclear = TryParseKeyDeclear(trimedLine,
                        ref firstKeyString, parseErrorSet);
                    AddParseError(errorSet, lineNum, parseErrorSet);
                    if (isKeyDeclear)
                    {
                        continue;
                    }
                    else
                    {
                        finishReadHead = true;
                    }
                }

                PolyruleKeys.Instance.KeyTypes = _keyTypes;

                parseErrorSet.Clear();
                bool isKeyLine = TryParseKeyLine(trimedLine,
                    ref polyphonyWord, parseErrorSet, domain);

                domain = DomainItem.GeneralDomain;
                AddParseError(errorSet, lineNum, parseErrorSet);
                if (isKeyLine)
                {
                    continue;
                }

                parseErrorSet.Clear();
                bool isConditionLine = TryParseConditionLine(trimedLine, phoneSet,
                    polyphonyWord, parseErrorSet);
                AddParseError(errorSet, lineNum, parseErrorSet);
                if (isConditionLine)
                {
                    continue;
                }

                errorSet.Add(PolyRuleError.InvalidLineFormat,
                    lineNum.ToString(CultureInfo.InvariantCulture), trimedLine);
            }

            if (polyphonyWord != null)
            {
                _polyphonyWords.Add(polyphonyWord);
            }

            if (string.IsNullOrEmpty(_keyString))
            {
                errorSet.Add(PolyRuleError.MissPrimaryKey,
                    filePath);
            }

            errorSet.AddRange(CheckDupWordDefinitions());

            foreach (PolyphonyRule rule in _polyphonyWords)
            {
                errorSet.AddRange(rule.CheckDupRuleConditions());
            }

            return errorSet;
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Validate the pronunciation for the word.
        /// </summary>
        /// <param name="word">Word.</param>
        /// <param name="lexPron">Lexicon pronunciation.</param>
        /// <param name="ttsPhoneSet">TTS phone set.</param>
        /// <param name="errorSet">Error set.</param>
        private static void ValidatePronunciation(string word, LexiconPronunciation lexPron, TtsPhoneSet ttsPhoneSet,
            ErrorSet errorSet)
        {
            // Validate the pronunciation information
            ErrorSet pronErrorSet = Pronunciation.Validate(lexPron.Symbolic, ttsPhoneSet);
            bool invalid = false;
            foreach (Error error in pronErrorSet.Errors)
            {
                errorSet.Add(LexiconError.PronunciationError, error, word);
                if (error.Severity == ErrorSeverity.MustFix &&
                    !(error.Enum.Equals(PronunciationError.VowelAndSonorantCountLessThanMinimum) ||
                      error.Enum.Equals(PronunciationError.VowelAndSonorantCountGreaterThanMaximum) ||
                      error.Enum.Equals(PronunciationError.VowelCountLessThanMinimum) ||
                      error.Enum.Equals(PronunciationError.VowelCountGreaterThanMaximum)))
                {
                    invalid = true;
                }
            }

            lexPron.Valid = lexPron.Valid && !invalid;
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Validate lexicon item.
        /// </summary>
        /// <param name="ttsPhoneSet">Phone set to validate lexicon item's pronunciation.</param>
        /// <param name="ttsPosSet">Pos set of the lexicon item.</param>
        /// <param name="attributeSchema">Attribute schema.</param>
        /// <returns>Error set of the validation.</returns>
        public ErrorSet Validate(TtsPhoneSet ttsPhoneSet, TtsPosSet ttsPosSet,
            LexicalAttributeSchema attributeSchema)
        {
            Debug.Assert(ttsPhoneSet != null);
            Debug.Assert(ttsPosSet != null || attributeSchema != null);
            ErrorSet errorSet = new ErrorSet();

            // Merge duplicate pronunciation node
            Collection<LexiconPronunciation> distinctPronunciations = new Collection<LexiconPronunciation>();
            Dictionary<string, int> pronunciationIndex = new Dictionary<string, int>();
            int pronunciationCount = 0;
            foreach (LexiconPronunciation lexPron in Pronunciations)
            {
                // Validate duplicate pronunciation node
                if (pronunciationIndex.ContainsKey(lexPron.Symbolic))
                {
                    errorSet.Add(LexiconError.DuplicatePronunciationNode, Grapheme, lexPron.Symbolic);

                    lexPron.Valid = false;
                    foreach (LexiconItemProperty property in lexPron.Properties)
                    {
                        Collection<LexiconItemProperty> targetProperties =
                            distinctPronunciations[pronunciationIndex[lexPron.Symbolic]].Properties;
                        if (!targetProperties.Contains(property))
                        {
                            targetProperties.Add(property);
                        }
                        else
                        {
                            errorSet.Add(LexiconError.DuplicateProperty, Grapheme, lexPron.Symbolic);
                        }
                    }
                }
                else
                {
                    distinctPronunciations.Add(lexPron);
                    pronunciationIndex[lexPron.Symbolic] = pronunciationCount;
                    pronunciationCount++;
                }
            }

            _pronunciations = distinctPronunciations;

            int invalidPronNodeNum = 0;
            foreach (LexiconPronunciation lexPron in Pronunciations)
            {
                // lexPron.Valid will be false if contains error.
                ValidatePronunciation(Grapheme, lexPron, ttsPhoneSet, errorSet);

                // Validate the POS information
                int invalidPropertyNum = 0;
                foreach (LexiconItemProperty property in lexPron.Properties)
                {
                    // Lexicon schema ensures that the POS property is existed
                    Debug.Assert(property.PartOfSpeech != null);

                    if (PosItem.Validate(property.PartOfSpeech.Value,
                        ttsPosSet, attributeSchema).Count > 0)
                    {
                        errorSet.Add(LexiconError.UnrecognizedPos, Grapheme,
                            lexPron.Symbolic, property.PartOfSpeech.Value);
                        property.Valid = false;
                    }

                    if (attributeSchema != null)
                    {
                        ErrorSet attributeErrorSet = ValidateAttributeSet(property, attributeSchema);
                        foreach (Error error in attributeErrorSet.Errors)
                        {
                            errorSet.Add(LexiconError.AttributeError, error, Grapheme, lexPron.Symbolic);
                        }

                        if (attributeErrorSet.Contains(ErrorSeverity.MustFix))
                        {
                            property.Valid = false;
                        }
                    }

                    if (property.AttributeSet.Count > 0 &&
                        (property.Case != null || property.Gender != null || property.Number != null))
                    {
                        errorSet.Add(LexiconError.MixedPropertyDefinition, Grapheme, lexPron.Symbolic);
                        property.Valid = false;
                    }
                    else
                    {
                        ValidateCase(Grapheme, property, errorSet);
                        ValidateGender(Grapheme, property, errorSet);
                        ValidateNumber(Grapheme, property, errorSet);
                    }

                    if (!property.Valid)
                    {
                        invalidPropertyNum++;
                    }
                }

                if (lexPron.Properties.Count == invalidPropertyNum)
                {
                    lexPron.Valid = false;
                }

                if (!lexPron.Valid)
                {
                    invalidPronNodeNum++;
                }
            }

            if (Pronunciations.Count == invalidPronNodeNum)
            {
                Valid = false;
            }

            return errorSet;
        }
Ejemplo n.º 9
0
 /// <summary>
 /// Builds phone ids included in this forest.
 /// </summary>
 /// <param name="phoneSet">Phone set used to convert phone string label to phone instance.</param>
 public void BuildPhones(TtsPhoneSet phoneSet)
 {
     Helper.ThrowIfNull(phoneSet);
     _phones.Clear();
     foreach (DecisionTree tree in TreeList)
     {
         if (!_phones.ContainsKey(tree.Phone))
         {
             _phones.Add(tree.Phone, phoneSet.ToPhone(tree.Phone));
         }
     }
 }
Ejemplo n.º 10
0
        /// <summary>
        /// Extract Domain Lexicon from script.
        /// </summary>
        /// <param name="scriptFolder">Script Folder.</param>
        /// <param name="domainListFile">Domain List File.</param>
        /// <param name="inMainLex">Input Main Lexicon.</param>
        /// <param name="defaultPartOfSpeech">Default Part of Speech.</param>
        /// <param name="mergeMode">Merging Mode for Lexicon.</param>
        /// <param name="phoneSet">Phone set.</param>
        /// <param name="attribSchema">Lexical attribute schema.</param>
        /// <returns>Lexicon.</returns>
        private Lexicon ExtractDomainLexicon(string scriptFolder, string domainListFile,
            Lexicon inMainLex, string defaultPartOfSpeech, MergeMode mergeMode,
            TtsPhoneSet phoneSet, LexicalAttributeSchema attribSchema)
        {
            if (attribSchema != null)
            {
                if (PosItem.Validate(defaultPartOfSpeech, null, attribSchema).Count > 0)
                {
                    Log("Default Part of speech {0} is unrecognized according to attribute schema, extraction breaks",
                        defaultPartOfSpeech);
                    return null;
                }
            }

            Lexicon outLex = null;
            foreach (string domainName in Helper.FileLines(domainListFile))
            {
                string domainFilePath = Path.Combine(scriptFolder, domainName);
                XmlScriptFile scriptFile = new XmlScriptFile();
                scriptFile.Load(domainFilePath);
                if (outLex != null && outLex.Language != scriptFile.Language)
                {
                    throw new InvalidDataException(Helper.NeutralFormat(
                        "Found inconsistent language \"{0}\" against previous one \"{1}\" in the file of \"{2}\"",
                        scriptFile.Language.ToString(),
                        outLex.Language.ToString(), domainFilePath));
                }

                Lexicon lexicon = Lexicon.CreateFromXmlScriptFile(scriptFile, defaultPartOfSpeech, inMainLex);
                if (phoneSet != null && attribSchema != null)
                {
                    lexicon.Validate(phoneSet, attribSchema);
                    if (lexicon.ErrorSet.Count > 0)
                    {
                        Console.Error.WriteLine("The script file {0} contains {1} errors, skip!",
                            domainFilePath, lexicon.ErrorSet.Count);
                        Log("The script file {0} contains {1} errors:",
                            domainFilePath, lexicon.ErrorSet.Count);
                        foreach (Error error in lexicon.ErrorSet.Errors)
                        {
                            Log(error.ToString());
                        }

                        // Skip this domain lexicon
                        continue;
                    }
                }

                if (outLex == null)
                {
                    outLex = lexicon;
                }
                else
                {
                    MergeLexicon(outLex, lexicon, mergeMode);
                }
            }

            if (outLex.Items.Count == 0)
            {
                Log("The final lexicon is empty.");
            }
            
            return outLex;
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Builds a map between the phone name and its type id.
        /// </summary>
        /// <param name="phoneSet">The given phoneset.</param>
        /// <returns>The Dictionary which key is unit name and the value is unit index id.</returns>
        public static Dictionary<string, int> BuildPhoneNameIdMap(TtsPhoneSet phoneSet)
        {
            // Adds the phone one by one.
            int maxId = 0;
            Dictionary<string, int> result = new Dictionary<string, int>();
            foreach (Phone phone in phoneSet.Phones)
            {
                // Please notice here, the phone set file contains the runtime silence, but not the silence.
                string name = Phoneme.ToHtk(phone.Name);
                result.Add(name, phone.Id);
                if (phone.Id > maxId)
                {
                    maxId = phone.Id;
                }
            }

            // Ensures there is continuous and starts from 0.
            for (int i = 0; i <= maxId; ++i)
            {
                if (!result.ContainsValue(i))
                {
                    // Adds a null phoneme here for padding.
                    result.Add(Helper.NeutralFormat("_{0}_{1}_", Phoneme.Null, i), i);
                }
            }

            return result;
        }