예제 #1
0
 /// <summary>
 /// Load Lexicon Data object.
 /// </summary>
 /// <param name="errorSet">ErrorSet.</param>
 /// <returns>Lexicon Data object.</returns>
 internal override object LoadDataObject(ErrorSet errorSet)
 {
     Lexicon lexicon = new Lexicon(this.Language);
     Lexicon.ContentControler lexiconControler = new Lexicon.ContentControler();
     lexiconControler.IsCaseSensitive = true;
     lexicon.Load(this.Path, lexiconControler);
     return lexicon;
 }
예제 #2
0
        public static Lexicon GetLexicon(Language language)
        {
            Lexicon lexicon = null;
            if (_ttsLexiconMap.ContainsKey(language))
            {
                lexicon = _ttsLexiconMap[language];
            }
            else
            {
                using (StreamReader reader = Localor.LoadResource(language, Localor.LexiconFileName))
                {
                    if (reader != null)
                    {
                        lexicon = new Lexicon(language);
                        lexicon.Load(reader);
                        _ttsLexiconMap[language] = lexicon;
                    }
                }
            }

            return lexicon;
        }
예제 #3
0
        /// <summary>
        /// Initialize the validator.
        /// </summary>
        public void EnsureInitialized()
        {
            Debug.Assert(LexiconFilePath != null || Lexicon != null);
            Debug.Assert(PhoneSetFilePath != null || PhoneSet != null);

            if (_lexicon == null)
            {
                _lexicon = new Lexicon();
                _lexicon.Load(LexiconFilePath);
            }

            if (_phoneset == null)
            {
                _phoneset = new TtsPhoneSet();
                _phoneset.Load(PhoneSetFilePath);
            }

            if (_phoneset.Language != _lexicon.Language)
            {
                string message = Utility.Helper.NeutralFormat(
                    "phoneset and lexicon language should match");
                throw new InvalidDataException(message);
            }

            _language = _lexicon.Language;
        }
예제 #4
0
        /// <summary>
        /// Import domain lexicon into current lexicon.
        /// </summary>
        /// <param name="domainLex">Domain lexicon.</param>
        /// <param name="trustDomainLexicon">Whether domain lexion is trusting.</param>
        public void ImportDomainLexicon(Lexicon domainLex, bool trustDomainLexicon)
        {
            if (!string.IsNullOrEmpty(_domainTag))
            {
                throw new InvalidDataException(
                    string.Format("Target lexicon is not a unified lexicon, it is in \"{0}\" domain.", _domainTag));
            }

            if (!ValidateDomainLexicon(domainLex))
            {
                throw new InvalidDataException("The lexicon to import is not a domain lexicon.");
            }

            foreach (LexicalItem domainLexItem in domainLex.Items.Values)
            {
                if (_items.ContainsKey(domainLexItem.Grapheme))
                {
                    ErrorSet importError = _items[domainLexItem.Grapheme].ImportDomainLexicalItem(domainLexItem, domainLex.DomainTag, trustDomainLexicon);

                    ErrorSet.Merge(importError);
                }
                else
                {
                    LexicalItem clonedItem = domainLexItem.Clone();
                    clonedItem.Frequency = 0;
                    foreach (LexiconPronunciation pron in clonedItem.Pronunciations)
                    {
                        pron.Frequency = 0;
                    }

                    _items.Add(domainLexItem.Grapheme, clonedItem);
                }
            }
        }
예제 #5
0
        /// <summary>
        /// Create the lexicon from Xml Script file.
        /// </summary>
        /// <param name="scriptFile">Xml script file.</param>
        /// <param name="defaultPos">Part of Speech String.</param>
        /// <param name="mainLexicon">MainLexicon.</param>
        /// <returns>Lexicon.</returns>
        public static Lexicon CreateFromXmlScriptFile(XmlScriptFile scriptFile, string defaultPos, Lexicon mainLexicon)
        {
            if (scriptFile == null)
            {
                throw new ArgumentNullException("scriptFile");
            }

            if (string.IsNullOrEmpty(defaultPos))
            {
                throw new ArgumentNullException("defaultPos");
            }

            Lexicon lexicon = new Lexicon(scriptFile.Language);
            foreach (ScriptItem item in scriptFile.Items)
            {
                foreach (ScriptWord scriptWord in item.AllPronouncedWords)
                {
                    string word = scriptWord.Grapheme;

                    // Create LexiconPronunciaton Node
                    LexiconPronunciation pron = new LexiconPronunciation(lexicon.Language);
                    pron.Symbolic = scriptWord.Pronunciation;

                    if (mainLexicon != null)
                    {
                        LexicalItem mainLexiconItem = mainLexicon.Lookup(word, true);
                        if (mainLexiconItem != null)
                        {
                            LexiconPronunciation lexPron = mainLexiconItem.FindPronunciation(pron.Symbolic, true);
                            if (lexPron != null)
                            {
                                pron.Symbolic = lexPron.Symbolic;
                            }
                        }
                    }

                    LexiconItemProperty property = new LexiconItemProperty();
                    if (string.IsNullOrEmpty(scriptWord.PosString))
                    {
                        property.PartOfSpeech = new PosItem(defaultPos);
                    }
                    else
                    {
                        property.PartOfSpeech = new PosItem(scriptWord.PosString);
                    }

                    pron.Properties.Add(property);
                    
                    if (!lexicon.Items.ContainsKey(word))
                    {
                        LexicalItem lexicalItem = new LexicalItem(lexicon.Language);
                        lexicalItem.Grapheme = word;
                        lexicalItem.Pronunciations.Add(pron);
                        lexicon.Items.Add(word, lexicalItem);
                    }
                    else
                    {
                        bool needAdd = true;
                        foreach (LexiconPronunciation pronunciation in lexicon.Items[word].Pronunciations)
                        {
                            if (pronunciation.Symbolic.Equals(pron.Symbolic, StringComparison.InvariantCultureIgnoreCase))
                            {
                                needAdd = false;
                                if (!pronunciation.Properties.Contains(property))
                                {
                                    pronunciation.Properties.Add(property);
                                }
                            }
                        }

                        if (needAdd)
                        {
                            lexicon.Items[word].Pronunciations.Add(pron);
                        }
                    }
                }
            }
            
            return lexicon;
        }
예제 #6
0
 public static void WriteAllData(string lexiconFilePath, Lexicon lexicon, Encoding encoding)
 {
     lexicon.Save(lexiconFilePath, encoding);
 }
예제 #7
0
        /// <summary>
        /// Load LexicalItem from XmlNode.
        /// </summary>
        /// <param name="parentLexicon">Lexicon.</param>
        /// <param name="wordNode">XmlNode.</param>
        /// <param name="nsmgr">XmlNamespaceManager.</param>
        /// <param name="contentController">Object.</param>
        private void LoadLexicalItem(Lexicon parentLexicon, XmlNode wordNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController)
        {
            LexicalItem lexiconItem = LexicalItem.Load(parentLexicon, wordNode, nsmgr, contentController, ErrorSet);

            // If no pronunciation at last, we drop the word item.
            if (lexiconItem != null && lexiconItem.Pronunciations.Count > 0)
            {
                if (_items.ContainsKey(lexiconItem.Grapheme))
                {
                    ErrorSet.Add(LexiconError.DuplicateWordEntry, lexiconItem.Grapheme);
                    foreach (LexiconPronunciation pronunciation in lexiconItem.Pronunciations)
                    {
                        pronunciation.Parent = _items[lexiconItem.Grapheme];
                        _items[lexiconItem.Grapheme].Pronunciations.Add(pronunciation);
                    }
                }
                else
                {
                    if (parentLexicon._isBaseline)
                    {
                        lexiconItem.Origin = LexiconOrigin.Baseline;
                    }
                    else
                    {
                        lexiconItem.Origin = LexiconOrigin.Current;
                    }
                    
                    _items.Add(lexiconItem.Grapheme, lexiconItem);
                }
            }
        }
예제 #8
0
        /// <summary>
        /// Load XML file.
        /// </summary>
        /// <param name="xmlDoc">XmlDoc.</param>
        /// <param name="nsmgr">Nsmgr.</param>
        /// <param name="contentController">Content controller.</param>
        protected override void Load(XmlDocument xmlDoc, XmlNamespaceManager nsmgr, object contentController)
        {
            ContentControler lexiconContentController = contentController as ContentControler;
            Debug.Assert(contentController == null || lexiconContentController != null);
            if (lexiconContentController == null)
            {
                lexiconContentController = new ContentControler();
            }

            Language language = Localor.StringToLanguage(xmlDoc.DocumentElement.Attributes["lang"].InnerText);
            if (!Language.Equals(Language.Neutral) && !language.Equals(Language))
            {
                ErrorSet.Add(CommonError.NotConsistentLanguage,
                    Language.ToString(), "initial one", language.ToString(), "lexicon");
            }

            Language = language;
            if (xmlDoc.DocumentElement.Attributes["domain"] != null)
            {
                string domainTag = xmlDoc.DocumentElement.Attributes["domain"].InnerText;
                if (!string.IsNullOrEmpty(domainTag))
                {
                    DomainTag = domainTag;
                }
            }

            // Load current lexicon
            _items.Clear();
            XmlNodeList wordNodes = xmlDoc.DocumentElement.SelectNodes("tts:w", nsmgr);
            foreach (XmlNode wordNode in wordNodes)
            {
                LoadLexicalItem(this, wordNode, nsmgr, lexiconContentController);
            }

            // Get baseline lexicon file path
            string baseLexiconFilePath = string.Empty;
            if (xmlDoc.DocumentElement.FirstChild != null &&
                xmlDoc.DocumentElement.FirstChild.LocalName == "include" &&
                xmlDoc.DocumentElement.FirstChild.Attributes["href"] != null)
            {
                BaseLexiconRelativeFilePath = xmlDoc.DocumentElement.FirstChild.Attributes["href"].InnerText;
                if (!string.IsNullOrEmpty(BaseLexiconRelativeFilePath))
                {
                    baseLexiconFilePath = Helper.GetFullPath(Path.GetDirectoryName(this.FilePath), BaseLexiconRelativeFilePath);
                }
            }

            if (!string.IsNullOrEmpty(baseLexiconFilePath) && File.Exists(baseLexiconFilePath))
            {
                Lexicon baseLexicon = new Lexicon();
                baseLexicon._isBaseline = true;

                // Load baseline lexicon
                baseLexicon.Load(baseLexiconFilePath, lexiconContentController);

                // Merge current lexicon and baseline lexicon
                foreach (var baseItem in baseLexicon.Items)
                {
                    // We drop those items if they have "deleted" status when LoadLexicalItem(),
                    // so there's no deleted words in both lexicons.

                    // if this item isn't in current lexicon, add it into current lexicon
                    if (!_items.ContainsKey(baseItem.Key))
                    {
                        _items.Add(baseItem.Key, baseItem.Value);
                    }
                    //// if this item is already in current lexicon, keep current word item
                    /*else
                    {

                    } */
                }
            }
        }
예제 #9
0
        /// <summary>
        /// Merge lexicon. when facing conflict word, use the attributes in the first lexicon.
        /// </summary>
        /// <param name="mergedLexicon">Main lexicon and merged lexicon.</param>
        /// <param name="subLexicon">Sub lexicon.</param>
        private void MergeLexiconWithKeepFirstOne(Lexicon mergedLexicon, Lexicon subLexicon)
        {
            Collection<string> skippedPronMessage = new Collection<string>();
            int addedWord = 0;

            foreach (KeyValuePair<string, LexicalItem> subLexiconItem in subLexicon.Items)
            {
                string word = subLexiconItem.Key;

                // If the sub lexicon item doesn't exist in merged lexicon, then add it.
                LexicalItem originalItem = mergedLexicon.Lookup(word, true);
                if (originalItem == null)
                {
                    mergedLexicon.Items.Add(subLexiconItem.Key, subLexiconItem.Value);
                    addedWord++;
                    continue;
                }

                foreach (LexiconPronunciation newPron in subLexiconItem.Value.Pronunciations)
                {
                    if (!originalItem.ContainsPronunciation(newPron.Symbolic))
                    {
                        skippedPronMessage.Add(Helper.NeutralFormat(
                            "Pronunciation for word [{0}] has been skipped: [{1}]",
                            subLexiconItem.Key, newPron.Symbolic));
                    }
                }
            }

            // Log the message
            {
                Log("---------------------------------");
                Log("Totally:");
                Log("[{0}] pronunciations have been skipped.",
                    skippedPronMessage.Count);
                Log(Helper.NeutralFormat("[{0}] new words have been added by the latter lexicon", addedWord));
            }
        }
예제 #10
0
        /// <summary>
        /// Merge lexicon, when facing conflict word, use the attributes in the last lexicon.
        /// </summary>
        /// <param name="mergedLexicon">Main lexicon and merged lexicon.</param>
        /// <param name="subLexicon">Sub lexicon.</param>
        private void MergeLexiconWithKeepLastOne(Lexicon mergedLexicon, Lexicon subLexicon)
        {
            Collection<string> replacedPronMessage = new Collection<string>();
            Collection<string> replacedPropertyMessage = new Collection<string>();
            Collection<string> existedWords = new Collection<string>();
            Collection<string> existedWordsInLower = new Collection<string>();

            // Dump the conflict pronunciations or properties from the merged lexicon
            foreach (KeyValuePair<string, LexicalItem> mergedLexiconItem in mergedLexicon.Items)
            {
                string word = mergedLexiconItem.Key;
                LexicalItem newItem = subLexicon.Lookup(word, true);

                // If the sub lexicon contain the same grapheme, then delete the one in original lexicon.
                if (newItem != null)
                {
                    existedWords.Add(word);
                    existedWordsInLower.Add(word.ToLowerInvariant());
                    foreach (LexiconPronunciation originalPron in mergedLexiconItem.Value.Pronunciations)
                    {
                        LexiconPronunciation existedPron = newItem.FindPronunciation(originalPron.Symbolic);
                        if (existedPron == null)
                        {
                            replacedPronMessage.Add(Helper.NeutralFormat(
                                "Pronunciation for word [{0}] has been removed: [{1}]",
                                mergedLexiconItem.Key, originalPron.Symbolic));
                        }
                        else
                        {
                            foreach (LexiconItemProperty subItemProperty in originalPron.Properties)
                            {
                                bool hasProperty = false;

                                // Find old properties in new(sub) item.
                                foreach (LexiconItemProperty itemProperty in existedPron.Properties)
                                {
                                    if (itemProperty.Equals(subItemProperty))
                                    {
                                        hasProperty = true;
                                        break;
                                    }
                                }

                                // Add the property if doesn't contains it.
                                if (!hasProperty)
                                {
                                    replacedPropertyMessage.Add(Helper.NeutralFormat(
                                        "Property has been replaced for word [{0}]'s pronunciation [{1}] : [{2}]",
                                        word, originalPron.Symbolic, subItemProperty.ToString()));
                                }
                            }
                        }
                    }
                }
            }

            // Remove the duplicate word entries
            foreach (string word in existedWords)
            {
                mergedLexicon.Items.Remove(word);
            }

            // Add new word entries into merged lexicon.
            int newWord = 0;
            foreach (KeyValuePair<string, LexicalItem> subLexiconItem in subLexicon.Items)
            {
                mergedLexicon.Items.Add(subLexiconItem.Key, subLexiconItem.Value);
                if (!existedWordsInLower.Contains(subLexiconItem.Key.ToLowerInvariant()))
                {
                    newWord++;
                }
            }

            // Log the Message
            {
                Log("---------------------------------");
                Log("Totally:");
                Log(Helper.NeutralFormat("[{0}] words have been replaced by the latter lexicon", replacedPronMessage.Count));
                Log("[{0}] properties have been replaced.", replacedPropertyMessage.Count);
                Log(Helper.NeutralFormat("[{0}] new words have been added by the latter lexicon", newWord));
            }
        }
예제 #11
0
        /// <summary>
        /// Merge lexicon, when facing conflict word, keep all attributes.
        /// </summary>
        /// <param name="mergedLexicon">Main lexicon and merged lexicon.</param>
        /// <param name="subLexicon">Sub lexicon.</param>
        private void MergeLexiconWithKeepAll(Lexicon mergedLexicon, Lexicon subLexicon)
        {
            Collection<string> addedPronMessage = new Collection<string>();
            Collection<string> addedPropertyMessage = new Collection<string>();
            Collection<string> duplicateWordMessage = new Collection<string>();
            int addedWord = 0;

            foreach (KeyValuePair<string, LexicalItem> subLexiconItem in subLexicon.Items)
            {
                string word = subLexiconItem.Key;
                if (!mergedLexicon.Items.ContainsKey(word))
                {
                    mergedLexicon.Items.Add(subLexiconItem.Key, subLexiconItem.Value);
                    addedWord++;
                    continue;
                }

                LexicalItem mergedItem = mergedLexicon.Items[word];
                bool theSameWord = true;
                foreach (LexiconPronunciation subItemPron in subLexiconItem.Value.Pronunciations)
                {
                    LexiconPronunciation mergedItemPron = null;

                    // Find subLexiconItem's pronunciation in the merged item.
                    foreach (LexiconPronunciation itemPron in mergedItem.Pronunciations)
                    {
                        if (itemPron.Symbolic.Equals(subItemPron.Symbolic, StringComparison.OrdinalIgnoreCase))
                        {
                            mergedItemPron = itemPron;
                            break;
                        }
                    }

                    // If the pronunciation doesn't exist in merged item, then add it.
                    if (mergedItemPron == null)
                    {
                        mergedItem.Pronunciations.Add(subItemPron);
                        theSameWord = false;
                        addedPronMessage.Add(Helper.NeutralFormat(
                            "Pronunciation [{0}] has been added to word [{1}]",
                            subItemPron.Symbolic, mergedItem.Grapheme));
                    }
                    else
                    {
                        foreach (LexiconItemProperty subItemProperty in subItemPron.Properties)
                        {
                            bool hasProperty = false;

                            // Find subLexiconItemPron's property in the merged item.
                            foreach (LexiconItemProperty itemProperty in mergedItemPron.Properties)
                            {
                                if (itemProperty.Equals(subItemProperty))
                                {
                                    hasProperty = true;
                                    break;
                                }
                            }

                            // Add the property if doesn't contains it.
                            if (!hasProperty)
                            {
                                mergedItemPron.Properties.Add(subItemProperty);
                                theSameWord = false;
                                addedPropertyMessage.Add(Helper.NeutralFormat(
                                    "Property has been added to word [{0}]'s pronunciation [{1}] : [{2}]",
                                    mergedItem.Grapheme, subItemPron.Symbolic, subItemProperty.ToString()));
                            }
                        }
                    }
                }

                if (theSameWord)
                {
                    duplicateWordMessage.Add(Helper.NeutralFormat(
                        "Word [{0}] has been dropped because of duplication.", subLexiconItem.Key));
                }
            }

            // Log the message
            {
                Log("---------------------------------");
                Log("Totally:");
                Log("[{0}] words have been dropped because of duplication.", duplicateWordMessage.Count);
                Log("[{0}] pronunciations and [{1}] properties have been added.",
                    addedPronMessage.Count, addedPropertyMessage.Count);
            }
        }
예제 #12
0
 /// <summary>
 /// Merge lexicons.
 /// </summary>
 /// <param name="mergedLexicon">Lexicon to be merged to.</param>
 /// <param name="subLexicon">Lexicon to be merged.</param>
 /// <param name="mergeMode">MergeMode.</param>
 private void MergeLexicon(Lexicon mergedLexicon, Lexicon subLexicon, MergeMode mergeMode)
 {
     switch (mergeMode)
     {
         case MergeMode.KeepAll:
             MergeLexiconWithKeepAll(mergedLexicon, subLexicon);
             break;
         case MergeMode.KeepLastOne:
             MergeLexiconWithKeepLastOne(mergedLexicon, subLexicon);
             break;
         case MergeMode.KeepFirstOne:
             MergeLexiconWithKeepFirstOne(mergedLexicon, subLexicon);
             break;
         default:
             break;
     }
 }
예제 #13
0
        /// <summary>
        /// Extract Domain Lexicon from script.
        /// </summary>
        /// <param name="scriptFolder">Script Folder.</param>
        /// <param name="domainListFile">Domain List File.</param>
        /// <param name="inMainLex">Input Main Lexicon.</param>
        /// <param name="defaultPartOfSpeech">Default Part of Speech.</param>
        /// <param name="mergeMode">Merging Mode for Lexicon.</param>
        /// <param name="phoneSet">Phone set.</param>
        /// <param name="attribSchema">Lexical attribute schema.</param>
        /// <returns>Lexicon.</returns>
        private Lexicon ExtractDomainLexicon(string scriptFolder, string domainListFile,
            Lexicon inMainLex, string defaultPartOfSpeech, MergeMode mergeMode,
            TtsPhoneSet phoneSet, LexicalAttributeSchema attribSchema)
        {
            if (attribSchema != null)
            {
                if (PosItem.Validate(defaultPartOfSpeech, null, attribSchema).Count > 0)
                {
                    Log("Default Part of speech {0} is unrecognized according to attribute schema, extraction breaks",
                        defaultPartOfSpeech);
                    return null;
                }
            }

            Lexicon outLex = null;
            foreach (string domainName in Helper.FileLines(domainListFile))
            {
                string domainFilePath = Path.Combine(scriptFolder, domainName);
                XmlScriptFile scriptFile = new XmlScriptFile();
                scriptFile.Load(domainFilePath);
                if (outLex != null && outLex.Language != scriptFile.Language)
                {
                    throw new InvalidDataException(Helper.NeutralFormat(
                        "Found inconsistent language \"{0}\" against previous one \"{1}\" in the file of \"{2}\"",
                        scriptFile.Language.ToString(),
                        outLex.Language.ToString(), domainFilePath));
                }

                Lexicon lexicon = Lexicon.CreateFromXmlScriptFile(scriptFile, defaultPartOfSpeech, inMainLex);
                if (phoneSet != null && attribSchema != null)
                {
                    lexicon.Validate(phoneSet, attribSchema);
                    if (lexicon.ErrorSet.Count > 0)
                    {
                        Console.Error.WriteLine("The script file {0} contains {1} errors, skip!",
                            domainFilePath, lexicon.ErrorSet.Count);
                        Log("The script file {0} contains {1} errors:",
                            domainFilePath, lexicon.ErrorSet.Count);
                        foreach (Error error in lexicon.ErrorSet.Errors)
                        {
                            Log(error.ToString());
                        }

                        // Skip this domain lexicon
                        continue;
                    }
                }

                if (outLex == null)
                {
                    outLex = lexicon;
                }
                else
                {
                    MergeLexicon(outLex, lexicon, mergeMode);
                }
            }

            if (outLex.Items.Count == 0)
            {
                Log("The final lexicon is empty.");
            }
            
            return outLex;
        }
예제 #14
0
        /// <summary>
        /// Execute.
        /// </summary>
        protected override void Execute()
        {
            if (this.InMode == LexiconProcessorMode.Merge)
            {
                Log("Merge Lexicon");
                OutLexicon = new Lexicon();
                OutLexicon.Language = InMainLexicon.Language;
                OutLexicon.Encoding = InMainLexicon.Encoding;
                foreach (KeyValuePair<string, LexicalItem> item in InMainLexicon.Items)
                {
                    OutLexicon.Items.Add(item.Key, item.Value);
                }

                if (InAdditionalLexicon != null)
                {
                    MergeLexicon(OutLexicon, InAdditionalLexicon, InMergeMode);
                }
            }
            else if (this.InMode == LexiconProcessorMode.ExtractDomainLexicon)
            {
                Log("Extract Domain Lexicon");
                OutLexicon = ExtractDomainLexicon(InScriptFolder, InDomainListFile,
                    InMainLexicon, InPos, InMergeMode,
                    InPhoneSet, InAttribSchema);
            }
            else if (this.InMode == LexiconProcessorMode.ExtractSubLexicon)
            {
                Log("Extract Sub Lexicon");
                OutLexicon = ExtractSubLexicon(InCorpusType, InCorpusFile, InMainLexicon);
            }
            else if (this.InMode == LexiconProcessorMode.ExtractWordListFromLexicon)
            {
                Log("Extract Word List from Lexicon");
                OutWordList = InMainLexicon.ListWords();
            }

            if (!string.IsNullOrEmpty(InSetOutLexiconPath) && this.InMode != LexiconProcessorMode.ExtractWordListFromLexicon
                && OutLexicon != null)
            {
                OutLexicon.Save(GetOutPathUnderResultDirectory(InSetOutLexiconPath));
            }
        }
예제 #15
0
        /// <summary>
        /// Load LexicalItem from XmlNode.
        /// </summary>
        /// <param name="parentLexicon">Lexicon.</param>
        /// <param name="wordNode">XmlNode.</param>
        /// <param name="nsmgr">XmlNamespaceManager.</param>
        /// <param name="contentController">Object.</param>
        /// <param name="errorSet">ErrorSet.</param>
        /// <returns>LexicalItem.</returns>
        internal static LexicalItem Load(Lexicon parentLexicon, XmlNode wordNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet)
        {
            Debug.Assert(parentLexicon != null && wordNode != null && nsmgr != null &&
                contentController != null && errorSet != null);

            XmlElement wordElement = wordNode as XmlElement;
            LexicalItem lexiconItem = new LexicalItem(parentLexicon.Language);
            lexiconItem.Parent = parentLexicon;

            string grapheme = wordElement.GetAttribute("v");
            if (string.IsNullOrEmpty(grapheme))
            {
                errorSet.Add(LexiconError.InvalidWordEntry,
                    new Error(WordEntryError.EmptyWord), grapheme);
                lexiconItem = null;
            }
            else if (!grapheme.Trim().Equals(grapheme, StringComparison.OrdinalIgnoreCase))
            {
                errorSet.Add(LexiconError.InvalidWordEntry,
                        new Error(WordEntryError.LeadingOrTrailingSpace), grapheme);
                lexiconItem = null;
            }
            else
            {
                Regex regex = new Regex("(  )|\t");
                if (regex.IsMatch(grapheme.Trim()))
                {
                    errorSet.Add(LexiconError.InvalidWordEntry,
                        new Error(WordEntryError.ContainingTabOrMultipleSpaces), grapheme);
                }
            }

            if (lexiconItem != null)
            {
                // Before share lexicon object to lexicon reviewer tool,
                // we drop those items if they have "deleted" status
                string statusValue = wordElement.GetAttribute("s");
                if (!string.IsNullOrEmpty(statusValue))
                {
                    lexiconItem.Status = (Lexicon.LexiconStatus)Enum.Parse(typeof(Lexicon.LexiconStatus),
                        statusValue, true);
                }

                if (!contentController.IsHistoryCheckingMode && lexiconItem.Status == Lexicon.LexiconStatus.Deleted)
                {
                    lexiconItem = null;
                }
                else
                {
                    lexiconItem.Alias = wordElement.GetAttribute("alias");
                    CultureInfo cultureInfo = new CultureInfo(Localor.LanguageToString(parentLexicon.Language), false);
                    lexiconItem._text = grapheme;
                    lexiconItem.Grapheme = contentController.IsCaseSensitive ? grapheme.Trim() : grapheme.Trim().ToLower(cultureInfo);
                    lexiconItem.OldGrapheme = lexiconItem.Grapheme;

                    // Check whether this word is reviewed
                    string reviewedValue = wordElement.GetAttribute("r");
                    if (!string.IsNullOrEmpty(reviewedValue))
                    {
                        lexiconItem.Reviewed = bool.Parse(reviewedValue);
                    }

                    // Get word's frequency. If there's no such information, set frequency to zero
                    int frequency = 0;
                    int.TryParse(wordElement.GetAttribute("f"), out frequency);
                    lexiconItem.Frequency = frequency;

                    // Load comment
                    lexiconItem.Comment = wordElement.GetAttribute("c");

                    if (lexiconItem.Status != Lexicon.LexiconStatus.Original)
                    {
                        // Get original word text.
                        string originalWordText = wordElement.GetAttribute("vo");
                        if (!string.IsNullOrEmpty(originalWordText))
                        {
                            lexiconItem.OldGrapheme = originalWordText;
                        }
                    }

                    foreach (XmlNode pronNode in wordNode.SelectNodes("tts:p", nsmgr))
                    {
                        LexiconPronunciation lexPron = LexiconPronunciation.Load(lexiconItem,
                            pronNode, nsmgr, contentController, errorSet);
                        if (lexPron != null)
                        {
                            lexiconItem.Pronunciations.Add(lexPron);
                        }
                    }
                }
            }

            return lexiconItem;
        }
예제 #16
0
        /// <summary>
        /// ExtractSubLexicon.
        /// </summary>
        /// <param name="corpusType">Corpus type.</param>
        /// <param name="corpusFile">Corpus file.</param>
        /// <param name="mainLexicon">Main lexicon.</param>
        /// <returns>Lexicon.</returns>
        private Lexicon ExtractSubLexicon(string corpusType, string corpusFile,
            Lexicon mainLexicon)
        {
            List<string> words = null;
            if (string.Compare(corpusType, "WORDLIST", true) == 0)
            {
                words = ExtractWordsFromWordList(corpusFile);
            }
            else
            {
                throw new InvalidDataException("Unsupported corpus type");
            }

            List<string> missedLexWords = new List<string>();
            Lexicon newLex = mainLexicon.ExtractSubLexicon(words, missedLexWords);
            foreach (string word in missedLexWords)
            {
                if (word.IndexOf("[break=") == -1)
                {
                    string logWord = word.Replace("{", "{{");
                    logWord = logWord.Replace("}", "}}");
                    Log("[" + logWord + "] not in main lexicon!");
                }
            }

            return newLex;
        }
예제 #17
0
 /// <summary>
 /// Initializes a new instance of the <see cref="LexiconSearcher"/> class.
 /// </summary>
 /// <param name="lex">Host lexicon.</param>
 public LexiconSearcher(Lexicon lex)
 {
     _lex = lex;
 }
예제 #18
0
        /// <summary>
        /// Validate domain lexicon, and check whether it only contains one domain tag.
        /// </summary>
        /// <param name="domainLex">Domain Lexicon.</param>
        /// <returns>Whether valid.</returns>
        private static bool ValidateDomainLexicon(Lexicon domainLex)
        {
            Helper.ThrowIfNull(domainLex);

            bool valid = true;
            if (string.IsNullOrEmpty(domainLex.DomainTag))
            {
                valid = false;
            }
            else
            {
                foreach (LexicalItem domainLexItem in domainLex.Items.Values)
                {
                    if (!domainLexItem.OnlyContainsOneDomain(domainLex.DomainTag))
                    {
                        valid = false;
                        break;
                    }
                }
            }

            return valid;
        }
예제 #19
0
        /// <summary>
        /// Load DomainItem.
        /// </summary>
        /// <param name="parentProperty">LexiconItemProperty.</param>
        /// <param name="domainNode">XmlNode.</param>
        /// <param name="nsmgr">XmlNamespaceManager.</param>
        /// <param name="contentController">Object.</param>
        /// <param name="errorSet">ErrorSet.</param>
        /// <returns>DomainItem.</returns>
        internal static DomainItem Load(LexiconItemProperty parentProperty, XmlNode domainNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet)
        {
            Debug.Assert(parentProperty != null && parentProperty.Parent != null && parentProperty.Parent.Parent != null &&
                domainNode != null && contentController != null && nsmgr != null);

            DomainItem domainItem = new DomainItem();
            XmlElement domainElem = domainNode as XmlElement;
            Debug.Assert(domainElem != null);

            string domainStatusValue = domainElem.GetAttribute("s");
            if (!string.IsNullOrEmpty(domainStatusValue))
            {
                domainItem.Status = (Lexicon.LexiconStatus)Enum.Parse(typeof(Lexicon.LexiconStatus), domainStatusValue, true);

                // Lexicon object is shared with lexicon reviewer tool,
                // We drop those items if they have "deleted" status when it is not loaded by lexicon reviewer tool
                if (domainItem.Status == Lexicon.LexiconStatus.Deleted && !contentController.IsHistoryCheckingMode)
                {
                    domainItem = null;
                }
            }

            if (domainItem != null)
            {
                // Check whether pronunciation is prefered in this domain
                string preferedValue = domainElem.GetAttribute("p");
                if (!string.IsNullOrEmpty(preferedValue))
                {
                    domainItem.IsFirstPronunciation = bool.Parse(preferedValue);
                }

                string domainValue = domainElem.GetAttribute("v");
                string originalDomainValue = domainElem.GetAttribute("vo");
                if (string.IsNullOrEmpty(domainValue))
                {
                    Error error = new Error(DomainError.EmptyDomain);
                    errorSet.Add(LexiconError.DomainError,
                        error, parentProperty.Parent.Parent.Text, parentProperty.Parent.Symbolic);
                    domainItem = null;
                }
                else
                {
                    domainItem.Value = domainValue.ToLower();
                    if (!string.IsNullOrEmpty(originalDomainValue) &&
                        domainItem.Status != Lexicon.LexiconStatus.Original)
                    {
                        domainItem.OldValue = originalDomainValue.ToLower();
                    }
                    else
                    {
                        domainItem.OldValue = domainValue;
                    }
                }
            }

            return domainItem;
        }
예제 #20
0
        /// <summary>
        /// Add domain specified LexicalItem into dictionary.
        /// </summary>
        /// <param name="domainLexicons">Dictionary.</param>
        /// <param name="domainTag">Domain tag.</param>
        /// <param name="lexItem">LexicalItem.</param>
        private void FillDomainLexicalItem(Dictionary<string, Lexicon> domainLexicons, string domainTag, LexicalItem lexItem)
        {
            Helper.ThrowIfNull(domainLexicons);
            Helper.ThrowIfNull(domainTag);
            Helper.ThrowIfNull(lexItem);

            if (domainLexicons.ContainsKey(domainTag))
            {
                if (!domainLexicons[domainTag].Items.ContainsKey(lexItem.Grapheme))
                {
                    domainLexicons[domainTag].Items.Add(lexItem.Grapheme, lexItem);
                }
                else
                {
                    throw new InvalidDataException(
                        string.Format("Duplicate lexicon word \"{0}\" in \"{1}\" domain.", lexItem.Grapheme, domainTag));
                }
            }
            else
            {
                Lexicon newLexicon = new Lexicon(Language);
                newLexicon.Encoding = Encoding;
                newLexicon.DomainTag = domainTag;
                newLexicon.Items.Add(lexItem.Grapheme, lexItem);
                domainLexicons.Add(domainTag, newLexicon);
            }
        }
예제 #21
0
        /// <summary>
        /// Load AttributeItem from XmlNode.
        /// </summary>
        /// <param name="parentProperty">LexiconItemProperty.</param>
        /// <param name="attributeNode">XmlNode.</param>
        /// <param name="nsmgr">XmlNamespaceManager.</param>
        /// <param name="contentController">Object.</param>
        /// <param name="errorSet">ErrorSet.</param>
        /// <returns>AttributeItem.</returns>
        internal static AttributeItem Load(LexiconItemProperty parentProperty, XmlNode attributeNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet)
        {
            Debug.Assert(parentProperty != null && parentProperty.Parent != null && parentProperty.Parent.Parent != null &&
                attributeNode != null && contentController != null && nsmgr != null);

            AttributeItem attributeItem = new AttributeItem();

            XmlElement attributeElem = attributeNode as XmlElement;
            Debug.Assert(attributeElem != null);
            string attrStatusValue = attributeElem.GetAttribute("s");
            if (!string.IsNullOrEmpty(attrStatusValue))
            {
                attributeItem.Status = (Lexicon.LexiconStatus)Enum.Parse(
                    typeof(Lexicon.LexiconStatus), attrStatusValue, true);

                // Lexicon object is shared with lexicon reviewer tool,
                // We drop those items if they have "deleted" status when it is not loaded by lexicon reviewer tool
                if (attributeItem.Status == Lexicon.LexiconStatus.Deleted && !contentController.IsHistoryCheckingMode)
                {
                    attributeItem = null;
                }
            }

            if (attributeItem != null)
            {
                string category = attributeElem.GetAttribute("category");
                string value = attributeElem.GetAttribute("value");
                string originalValue = attributeElem.GetAttribute("vo");

                if (string.IsNullOrEmpty(category))
                {
                    Error error = new Error(LexicalAttributeError.EmptyCategory);
                    errorSet.Add(LexiconError.AttributeError,
                        error, parentProperty.Parent.Parent.Text, parentProperty.Parent.Symbolic);
                    attributeItem = null;
                }
                else if (string.IsNullOrEmpty(value))
                {
                    Error error = new Error(LexicalAttributeError.EmptyValue);
                    errorSet.Add(LexiconError.AttributeError,
                        error, parentProperty.Parent.Parent.Text, parentProperty.Parent.Symbolic);
                    attributeItem = null;
                }
                else
                {
                    attributeItem.Value = value;
                    attributeItem.CategoryName = category;
                    if (!string.IsNullOrEmpty(originalValue) &&
                        attributeItem.Status != Lexicon.LexiconStatus.Original)
                    {
                        attributeItem.OldValue = originalValue;
                    }
                    else
                    {
                        attributeItem.OldValue = value;
                    }
                }
            }

            return attributeItem;
        }
예제 #22
0
        /// <summary>
        /// Read all lexicon items from XML lexicon file.
        /// </summary>
        /// <param name="lexiconFilePath">XML lexicon filepath.</param>
        /// <returns>Lexicon.</returns>
        public static Lexicon ReadAllData(string lexiconFilePath)
        {
            if (string.IsNullOrEmpty(lexiconFilePath))
            {
                throw new ArgumentNullException("lexiconFilePath");
            }

            Lexicon lexicon = new Lexicon();
            lexicon.Load(lexiconFilePath);
            return lexicon;
        }
예제 #23
0
        /// <summary>
        /// Load LexiconItemProperty from XmlNode.
        /// </summary>
        /// <param name="parentLexPron">LexiconPronunciation.</param>
        /// <param name="propertyNode">XmlNode.</param>
        /// <param name="nsmgr">XmlNamespaceManager.</param>
        /// <param name="contentController">Object.</param>
        /// <param name="errorSet">ErrorSet.</param>
        /// <returns>LexiconItemProperty.</returns>
        internal static LexiconItemProperty Load(LexiconPronunciation parentLexPron, XmlNode propertyNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet)
        {
            Debug.Assert(parentLexPron != null && parentLexPron.Parent != null && propertyNode != null &&
                nsmgr != null && contentController != null && errorSet != null);

            LexiconItemProperty property = new LexiconItemProperty();
            property.Parent = parentLexPron;

            XmlElement propertyElem = propertyNode as XmlElement;
            string stateValue = propertyElem.GetAttribute("s");
            if (!string.IsNullOrEmpty(stateValue))
            {
                property.Status = (Lexicon.LexiconStatus)Enum.Parse(typeof(Lexicon.LexiconStatus), stateValue, true);
            }

            if (!contentController.IsHistoryCheckingMode && property.Status == Lexicon.LexiconStatus.Deleted)
            {
                property = null;
            }
            else
            {
                PosItem posItem = PosItem.Load(propertyNode, nsmgr);
                if (posItem != null)
                {
                    property.PartOfSpeech = posItem;
                }

                GenderItem genderItem = GenderItem.Load(propertyNode, nsmgr);
                if (genderItem != null)
                {
                    property.Gender = genderItem;
                }

                CaseItem caseItem = CaseItem.Load(propertyNode, nsmgr);
                if (caseItem != null)
                {
                    property.Case = caseItem;
                }

                NumberItem numberItem = NumberItem.Load(propertyNode, nsmgr);
                if (numberItem != null)
                {
                    property.Number = numberItem;
                }

                foreach (XmlNode domainNode in propertyNode.SelectNodes("tts:domain", nsmgr))
                {
                    DomainItem domainItem = DomainItem.Load(property, domainNode, nsmgr, contentController, errorSet);
                    if (domainItem != null)
                    {
                        if (!property.Domains.ContainsKey(domainItem.Value))
                        {
                            property.Domains.Add(domainItem.Value, domainItem);
                        }
                        else
                        {
                            Error error = new Error(DomainError.DuplicateDomain, domainItem.Value);
                            errorSet.Add(LexiconError.DomainError,
                                error, parentLexPron.Parent.Text, parentLexPron.Symbolic);
                        }
                    }
                }

                string lexLevelDomain = (parentLexPron.Parent.Parent as Lexicon).DomainTag;
                if (property.Domains.Count == 0)
                {
                    if (string.IsNullOrEmpty(lexLevelDomain))
                    {
                        property.ChangeDomain(new DomainItem());
                    }
                    else
                    {
                        property.ChangeDomain(new DomainItem(lexLevelDomain));
                    }
                }
                else if (!string.IsNullOrEmpty(lexLevelDomain))
                {
                    Error error = new Error(DomainError.InvalidDomainTags);
                    errorSet.Add(LexiconError.DomainError,
                        error, parentLexPron.Parent.Text, parentLexPron.Symbolic);
                }

                foreach (XmlNode attributeNode in propertyNode.SelectNodes("tts:attr", nsmgr))
                {
                    AttributeItem attributeItem = AttributeItem.Load(property, attributeNode, nsmgr, contentController, errorSet);
                    if (attributeItem != null)
                    {
                        property.AddAttribute(attributeItem);
                    }
                }
            }

            return property;
        }
예제 #24
0
        /// <summary>
        /// Extract a sub lexicon from a word list and return those words which not in the main lexicon.
        /// </summary>
        /// <param name="words">Words list to extract.</param>
        /// <param name="missedLexWords">Words that not in the main lexicon.</param>
        /// <returns>New sub lexicon.</returns>
        public Lexicon ExtractSubLexicon(List<string> words, List<string> missedLexWords)
        {
            Lexicon newLex = new Lexicon();
            newLex.Language = Language;
            newLex.Encoding = Encoding;
            newLex.PhoneSet = PhoneSet;
            newLex.PosSet = PosSet;
            Dictionary<string, object> missedWords = null;
            if (missedLexWords != null)
            {
                missedLexWords.Clear();
                missedWords = new Dictionary<string, object>(StringComparer.InvariantCulture);
            }

            foreach (string word in words)
            {
                if (string.IsNullOrEmpty(word) || newLex.Items.ContainsKey(word))
                {
                    continue;
                }

                // First do case sensitive lookup; if not found, do case insensitive lookup.
                LexicalItem wordItem = newLex.Lookup(word);
                if (wordItem == null)
                {
                    wordItem = Lookup(word, true);
                }

                if (wordItem != null)
                {
                    newLex.Items.Add(word, wordItem);
                }
                else
                {
                    if (missedWords != null && !missedWords.ContainsKey(word))
                    {
                        missedWords.Add(word, null);
                    }
                }
            }

            if (missedLexWords != null)
            {
                missedLexWords.AddRange(missedWords.Keys);
            }

            return newLex;
        }
예제 #25
0
        /// <summary>
        /// Load LexiconPronunciation from XmlNode.
        /// </summary>
        /// <param name="parentLexItem">LexicalItem.</param>
        /// <param name="pronNode">XmlNode.</param>
        /// <param name="nsmgr">XmlNamespaceManager.</param>
        /// <param name="contentController">Object.</param>
        /// <param name="errorSet">ErrorSet.</param>
        /// <returns>LexiconPronunciation.</returns>
        internal static LexiconPronunciation Load(LexicalItem parentLexItem, XmlNode pronNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet)
        {
            Debug.Assert(parentLexItem != null && pronNode != null &&
                nsmgr != null && contentController != null && errorSet != null);

            LexiconPronunciation lexPron = new LexiconPronunciation(parentLexItem.Language);
            lexPron.Parent = parentLexItem;

            XmlElement pronElem = pronNode as XmlElement;
            Debug.Assert(pronElem != null);

            string pronStatusValue = pronElem.GetAttribute("s");
            if (!string.IsNullOrEmpty(pronStatusValue))
            {
                lexPron.Status = (Lexicon.LexiconStatus)Enum.Parse(typeof(Lexicon.LexiconStatus),
                    pronStatusValue, true);
            }

            // Lexicon object is shared with lexicon reviewer tool,
            // We drop those items if they have "deleted" status when it is not loaded by lexicon reviewer tool
            if (!contentController.IsHistoryCheckingMode && lexPron.Status == Lexicon.LexiconStatus.Deleted)
            {
                lexPron = null;
            }
            else
            {
                Regex regex = new Regex(@"\s{2,}");
                lexPron.Symbolic = pronElem.GetAttribute("v").Trim();
                lexPron.Symbolic = regex.Replace(lexPron.Symbolic, " ").ToLowerInvariant();
                lexPron.OldSymbolic = lexPron.Symbolic;

                // Get pronunciation original position.
                string originalPronPosition = pronElem.GetAttribute("o");
                if (!string.IsNullOrEmpty(originalPronPosition))
                {
                    lexPron.OldPosition = int.Parse(originalPronPosition, CultureInfo.InvariantCulture);
                }

                if (lexPron.Status != Lexicon.LexiconStatus.Original)
                {
                    string originalPronText = pronElem.GetAttribute("vo");
                    if (!string.IsNullOrEmpty(originalPronText))
                    {
                        lexPron.OldSymbolic = originalPronText;
                    }
                }

                // Get word's frequency. If there's no such information, set frequency to zero
                int frequency = 0;
                int.TryParse(pronElem.GetAttribute("f"), out frequency);
                lexPron.Frequency = frequency;

                foreach (XmlNode propertyNode in pronNode.SelectNodes("tts:pr", nsmgr))
                {
                    LexiconItemProperty property = LexiconItemProperty.Load(lexPron, propertyNode, nsmgr, contentController, errorSet);
                    if (property != null)
                    {
                        if (contentController.IsHistoryCheckingMode || !lexPron.Properties.Contains(property))
                        {
                            lexPron.Properties.Add(property);
                        }
                        else
                        {
                            errorSet.Add(LexiconError.DuplicateProperty, parentLexItem.Text, lexPron.Symbolic);
                        }
                    }
                }
            }

            return lexPron;
        }
예제 #26
0
        public Lexicon[] SplitIntoDomainLexicons(SP.ServiceProvider sp, ErrorSet errorSet)
        {
            // Dictionary key="domain tag string", value="Lexicon instance"
            Dictionary<string, Lexicon> domainLexicons = new Dictionary<string, Lexicon>();
            Lexicon generalLexicon = new Lexicon(this.Language);
            generalLexicon.Encoding = Encoding;
            generalLexicon.DomainTag = DomainItem.GeneralDomain;
            domainLexicons.Add(generalLexicon.DomainTag, generalLexicon);

            foreach (KeyValuePair<string, LexicalItem> pair in this.Items)
            {
                Dictionary<string, LexicalItem> domainLexItems = pair.Value.SplitToDomainLexicalItems();
                bool same = ArePronsSameForAllDomains(domainLexItems);
                bool added = false;

                bool isExpandedWords = false;

                // check if has pronunciation is expaned.
                // for chinese/japanese/korean have no expanded words, they needn't to check if is expanded word.
                if (Language != Language.ZhCN &&
                    Language != Language.ZhHK &&
                    Language != Language.ZhTW &&
                    Language != Language.JaJP &&
                    Language != Language.KoKR)
                {
                    foreach (LexiconPronunciation pron in pair.Value.Pronunciations)
                    {
                        if (LexicalItem.IsExpandedWord(pair.Value.Grapheme, pair.Value.Language, pron, sp))
                        {
                            isExpandedWords = true;
                            break;
                        }
                    }
                }

                // for zh-XX, if there is any english word, the word must not be regularly.
                if ((Language == Language.ZhCN || Language == Language.ZhTW || Language == Language.ZhHK) 
                    && Helper.IsEnglishWord(pair.Value.Grapheme))
                {
                    isExpandedWords = true;
                }

                // word is not expaneded
                if (!isExpandedWords)
                {
                    if (same || domainLexItems.Count == 1)
                    {
                        LexicalItem newLexItem = pair.Value.Clone();
                        newLexItem.CleanAllDomainTags();
                        FillDomainLexicalItem(domainLexicons, DomainItem.GeneralDomain, newLexItem);
                        added = true;
                    }
                    else
                    {
                        CheckGeneralPronExist(domainLexItems, errorSet, pair.Value.Grapheme);
                    }
                }

                if (!added)
                {
                    foreach (KeyValuePair<string, LexicalItem> lexItemPair in domainLexItems)
                    {
                        FillDomainLexicalItem(domainLexicons, lexItemPair.Key, lexItemPair.Value);
                    }
                }
            }

            return domainLexicons.Values.ToArray();
        }
예제 #27
0
 /// <summary>
 /// Set status of all domains to specified status
 /// Change all status if "forceApply" is set as True
 /// Change the statu only if its status is Original when "forceApply" is set as False.
 /// </summary>
 /// <param name="status">LexiconStatus.</param>
 /// <param name="forceApply">Whether force apply status.</param>
 public void SetStatusOnAllDomains(Lexicon.LexiconStatus status, bool forceApply)
 {
     foreach (LexiconPronunciation pron in _pronunciations)
     {
         foreach (LexiconItemProperty property in pron.Properties)
         {
             foreach (DomainItem domain in property.Domains.Values)
             {
                 if (forceApply)
                 {
                     domain.Status = status;
                 }
                 else if (domain.Status == Lexicon.LexiconStatus.Original)
                 {
                     domain.Status = status;
                 }
             }
         }
     }
 }
예제 #28
0
        /// <summary>
        /// Check and geneate isolated symbol lexion.
        /// </summary>
        /// <param name="chartable">Char table.</param>
        /// <param name="posSymbol">Pos of symbol.</param>
        /// <param name="lexiconOutput">Lexicon output.</param>
        /// <param name="errors">Errors.</param>
        public void CheckContextualSymbolInLexicon(CharTable chartable,
            string posSymbol, string lexiconOutput, Collection<string> errors)
        {
            if (chartable == null)
            {
                throw new ArgumentNullException("chartable");
            }

            if (errors == null)
            {
                throw new ArgumentNullException("errors");
            }

            if (posSymbol == null)
            {
                throw new ArgumentNullException("posSymbol");
            }

            Lexicon lexicon = new Lexicon(chartable.Language);
            Collection<string> polyWord = new Collection<string>();

            foreach (CharElement charElement in chartable.CharList)
            {
                LexicalItem symbolItem = _lexicon.Lookup(
                    charElement.Symbol.ToString(), true);
                LexicalItem lexiconItem = new LexicalItem(lexicon.Language);
                LexiconPronunciation lexiconPron = new LexiconPronunciation(
                    lexicon.Language);
                string pron = string.Empty;
                string expansion = charElement.ContextualExpansion;

                if (string.IsNullOrEmpty(expansion))
                {
                    continue;
                }

                lexiconItem.Grapheme = charElement.Symbol.ToString();
                Collection<string> errorStrings = new Collection<string>();
                bool hasError = _lexicon.GetPronunciationForWords(expansion, errorStrings, polyWord, ref pron);
                if (!hasError && !string.IsNullOrEmpty(pron))
                {
                    bool addWord = true;
                    if (symbolItem != null)
                    {
                        string[] prons = Pronunciation.SplitIntoPhones(pron);
                        foreach (LexiconPronunciation existPron in symbolItem.Pronunciations)
                        {
                            bool same = true;
                            string[] existProns = Pronunciation.SplitIntoPhones(existPron.Symbolic);
                            if (existProns.Length == prons.Length)
                            {
                                for (int i = 0; i < prons.Length; i++)
                                {
                                    if (existProns[i] != prons[i])
                                    {
                                        same = false;
                                        break;
                                    }
                                }

                                if (same)
                                {
                                    addWord = false;
                                    break;
                                }
                            }
                        }
                    }

                    // add the word if the symbol or pronunicaiton is not in lexicon
                    if (addWord)
                    {
                        lexiconPron.Symbolic = pron;
                        LexiconItemProperty lip = new LexiconItemProperty();
                        lip.PartOfSpeech = new PosItem(posSymbol);
                        lexiconPron.Properties.Add(lip);
                        lexiconItem.Pronunciations.Add(lexiconPron);
                        lexicon.Items.Add(lexiconItem.Grapheme, lexiconItem);
                        if (symbolItem != null)
                        {
                            errors.Add(AttributeError.SymbolDiffPronFromLex + charElement.Symbol.ToString());
                        }
                    }
                    else
                    {
                        errors.Add(AttributeError.InfoSymbolInLex + charElement.Symbol.ToString());
                    }
                }
                else
                {
                    errors.Add(AttributeError.SymbolPronGenError + charElement.Symbol.ToString());
                }
            }

            Lexicon.WriteAllData(lexiconOutput, lexicon, Encoding.Unicode);
        }
예제 #29
0
        private ErrorSet CompileLexicon(Stream outputStream)
        {
            if (outputStream == null)
            {
                throw new ArgumentNullException("outputStream");
            }

            ErrorSet errorSet = new ErrorSet();

            ErrorSet subErrorSet = new ErrorSet();
            LexicalAttributeSchema schema = (LexicalAttributeSchema)GetObject(
                RawDataName.LexicalAttributeSchema, subErrorSet);
            MergeDependencyError(errorSet, subErrorSet, _schemaFullName);

            subErrorSet.Clear();
            TtsPhoneSet phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, subErrorSet);
            MergeDependencyError(errorSet, subErrorSet, RawDataName.PhoneSet);

            if (!errorSet.Contains(ErrorSeverity.MustFix))
            {
                Microsoft.Tts.Offline.Core.Lexicon lexicon = (Microsoft.Tts.Offline.Core.Lexicon)GetObject(RawDataName.Lexicon, errorSet);
                errorSet.Merge(lexicon.ErrorSet);

                // Change to case insensitive lexicon
                MemoryStream lexiconStream = new MemoryStream();
                using (XmlWriter xmlWriter = XmlWriter.Create(lexiconStream))
                {
                    Microsoft.Tts.Offline.Core.Lexicon.ContentControler lexiconControler = 
                        new Microsoft.Tts.Offline.Core.Lexicon.ContentControler();
                    lexiconControler.IsCaseSensitive = true;
                    lexicon.Save(xmlWriter, lexiconControler);
                }

                lexiconStream.Seek(0, SeekOrigin.Begin);
                Microsoft.Tts.Offline.Core.Lexicon caseInsensitiveLexicon = new Microsoft.Tts.Offline.Core.Lexicon();
                using (StreamReader sr = new StreamReader(lexiconStream))
                {
                    caseInsensitiveLexicon.Load(sr);
                }

                if (caseInsensitiveLexicon != null && !errorSet.Contains(ErrorSeverity.MustFix))
                {
                    caseInsensitiveLexicon.LexicalAttributeSchema = schema;

                    caseInsensitiveLexicon.PhoneSet = phoneSet;
                    caseInsensitiveLexicon.Validate();

                    // Set severity of errors only in case-insensitive lexicon to NoError for they're not treated as real error
                    caseInsensitiveLexicon.ErrorSet.SetSeverity(ErrorSeverity.NoError);

                    string vendorLexiconPath = Helper.GetTempFileName();

                    caseInsensitiveLexicon.SaveToVendorLexicon(vendorLexiconPath);

                    string toolFileName = ToolName.BldVendor2;
                    string binaryLexiconPath = Helper.GetTempFileName();

                    string compilingArguments = Helper.NeutralFormat("-v {0} V2 \"{1}\" \"{2}\" \"{3}\" TTS",
                        (int)_language, _dataHandlerList.Datas[RawDataName.LexicalAttributeSchema].Path,
                        vendorLexiconPath, binaryLexiconPath);
                    string toolPath = Path.Combine(ToolDir, toolFileName);

                    CheckToolExists(toolPath, errorSet);
                    if (!errorSet.Contains(ErrorSeverity.MustFix))
                    {
                        HandleCommandLine(ModuleDataName.Lexicon, toolPath, compilingArguments,
                            binaryLexiconPath, outputStream, errorSet);
                    }

                    File.Delete(vendorLexiconPath);

                    errorSet.Merge(caseInsensitiveLexicon.ErrorSet);
                }
                else if (lexicon == null)
                {
                    errorSet.Add(DataCompilerError.RawDataError, "Lexicon");
                }
                else
                {
                    errorSet.Merge(caseInsensitiveLexicon.ErrorSet);
                }
            }

            return errorSet;
        }