/// <summary>
        /// Check and geneate isolated symbol lexion.
        /// </summary>
        /// <param name="chartable">Char table.</param>
        /// <param name="posSymbol">Pos of symbol.</param>
        /// <param name="lexiconOutput">Lexicon output.</param>
        /// <param name="errors">Errors.</param>
        public void CheckContextualSymbolInLexicon(CharTable chartable,
            string posSymbol, string lexiconOutput, Collection<string> errors)
        {
            if (chartable == null)
            {
                throw new ArgumentNullException("chartable");
            }

            if (errors == null)
            {
                throw new ArgumentNullException("errors");
            }

            if (posSymbol == null)
            {
                throw new ArgumentNullException("posSymbol");
            }

            Lexicon lexicon = new Lexicon(chartable.Language);
            Collection<string> polyWord = new Collection<string>();

            foreach (CharElement charElement in chartable.CharList)
            {
                LexicalItem symbolItem = _lexicon.Lookup(
                    charElement.Symbol.ToString(), true);
                LexicalItem lexiconItem = new LexicalItem(lexicon.Language);
                LexiconPronunciation lexiconPron = new LexiconPronunciation(
                    lexicon.Language);
                string pron = string.Empty;
                string expansion = charElement.ContextualExpansion;

                if (string.IsNullOrEmpty(expansion))
                {
                    continue;
                }

                lexiconItem.Grapheme = charElement.Symbol.ToString();
                Collection<string> errorStrings = new Collection<string>();
                bool hasError = _lexicon.GetPronunciationForWords(expansion, errorStrings, polyWord, ref pron);
                if (!hasError && !string.IsNullOrEmpty(pron))
                {
                    bool addWord = true;
                    if (symbolItem != null)
                    {
                        string[] prons = Pronunciation.SplitIntoPhones(pron);
                        foreach (LexiconPronunciation existPron in symbolItem.Pronunciations)
                        {
                            bool same = true;
                            string[] existProns = Pronunciation.SplitIntoPhones(existPron.Symbolic);
                            if (existProns.Length == prons.Length)
                            {
                                for (int i = 0; i < prons.Length; i++)
                                {
                                    if (existProns[i] != prons[i])
                                    {
                                        same = false;
                                        break;
                                    }
                                }

                                if (same)
                                {
                                    addWord = false;
                                    break;
                                }
                            }
                        }
                    }

                    // add the word if the symbol or pronunicaiton is not in lexicon
                    if (addWord)
                    {
                        lexiconPron.Symbolic = pron;
                        LexiconItemProperty lip = new LexiconItemProperty();
                        lip.PartOfSpeech = new PosItem(posSymbol);
                        lexiconPron.Properties.Add(lip);
                        lexiconItem.Pronunciations.Add(lexiconPron);
                        lexicon.Items.Add(lexiconItem.Grapheme, lexiconItem);
                        if (symbolItem != null)
                        {
                            errors.Add(AttributeError.SymbolDiffPronFromLex + charElement.Symbol.ToString());
                        }
                    }
                    else
                    {
                        errors.Add(AttributeError.InfoSymbolInLex + charElement.Symbol.ToString());
                    }
                }
                else
                {
                    errors.Add(AttributeError.SymbolPronGenError + charElement.Symbol.ToString());
                }
            }

            Lexicon.WriteAllData(lexiconOutput, lexicon, Encoding.Unicode);
        }
Exemple #2
0
        /// <summary>
        /// Create the lexicon from Xml Script file.
        /// </summary>
        /// <param name="scriptFile">Xml script file.</param>
        /// <param name="defaultPos">Part of Speech String.</param>
        /// <param name="mainLexicon">MainLexicon.</param>
        /// <returns>Lexicon.</returns>
        public static Lexicon CreateFromXmlScriptFile(XmlScriptFile scriptFile, string defaultPos, Lexicon mainLexicon)
        {
            if (scriptFile == null)
            {
                throw new ArgumentNullException("scriptFile");
            }

            if (string.IsNullOrEmpty(defaultPos))
            {
                throw new ArgumentNullException("defaultPos");
            }

            Lexicon lexicon = new Lexicon(scriptFile.Language);
            foreach (ScriptItem item in scriptFile.Items)
            {
                foreach (ScriptWord scriptWord in item.AllPronouncedWords)
                {
                    string word = scriptWord.Grapheme;

                    // Create LexiconPronunciaton Node
                    LexiconPronunciation pron = new LexiconPronunciation(lexicon.Language);
                    pron.Symbolic = scriptWord.Pronunciation;

                    if (mainLexicon != null)
                    {
                        LexicalItem mainLexiconItem = mainLexicon.Lookup(word, true);
                        if (mainLexiconItem != null)
                        {
                            LexiconPronunciation lexPron = mainLexiconItem.FindPronunciation(pron.Symbolic, true);
                            if (lexPron != null)
                            {
                                pron.Symbolic = lexPron.Symbolic;
                            }
                        }
                    }

                    LexiconItemProperty property = new LexiconItemProperty();
                    if (string.IsNullOrEmpty(scriptWord.PosString))
                    {
                        property.PartOfSpeech = new PosItem(defaultPos);
                    }
                    else
                    {
                        property.PartOfSpeech = new PosItem(scriptWord.PosString);
                    }

                    pron.Properties.Add(property);
                    
                    if (!lexicon.Items.ContainsKey(word))
                    {
                        LexicalItem lexicalItem = new LexicalItem(lexicon.Language);
                        lexicalItem.Grapheme = word;
                        lexicalItem.Pronunciations.Add(pron);
                        lexicon.Items.Add(word, lexicalItem);
                    }
                    else
                    {
                        bool needAdd = true;
                        foreach (LexiconPronunciation pronunciation in lexicon.Items[word].Pronunciations)
                        {
                            if (pronunciation.Symbolic.Equals(pron.Symbolic, StringComparison.InvariantCultureIgnoreCase))
                            {
                                needAdd = false;
                                if (!pronunciation.Properties.Contains(property))
                                {
                                    pronunciation.Properties.Add(property);
                                }
                            }
                        }

                        if (needAdd)
                        {
                            lexicon.Items[word].Pronunciations.Add(pron);
                        }
                    }
                }
            }
            
            return lexicon;
        }
        /// <summary>
        /// Generate a LexicalItem from a dictionary.
        /// </summary>
        /// <param name="prons">Pronunciation dictionary.</param>
        private void GenerateLexicalItem(Dictionary<string, Collection<string>> prons)
        {
            Helper.ThrowIfNull(prons);

            foreach (string key in prons.Keys)
            {
                LexiconPronunciation pron = new LexiconPronunciation(_language);
                pron.Symbolic = key;
                foreach (string pos in prons[key])
                {
                    PosItem posItem = new PosItem(pos);
                    LexiconItemProperty property = new LexiconItemProperty(posItem);
                    pron.Properties.Add(property);
                }

                _item.Pronunciations.Add(pron);
            }
        }
 /// <summary>
 /// Compare two pronunciation's original position.
 /// </summary>
 /// <param name="firstPron">First pronunciation to be compared.</param>
 /// <param name="secondPron">Second pronunciation to be compared.</param>
 /// <returns>
 /// Bigger than zero, firstPron's position bigger than the second one;
 /// Equal to zero, firstPron's position equal to the second one;
 /// less than zero, firstPron's position less than the second one.</returns>
 private static int ComparePronOriginalPosition(LexiconPronunciation firstPron, LexiconPronunciation secondPron)
 {
     return firstPron.OldPosition - secondPron.OldPosition;
 }
Exemple #5
0
        /// <summary>
        /// Save lexicon properties.
        /// </summary>
        /// <param name="graphme">Word graphme.</param>
        /// <param name="lexPron">Lexicon pronunciation.</param>
        /// <param name="attributeStringList">Attribute string list.</param>
        private void SaveProperty(string graphme, LexiconPronunciation lexPron, Collection<string> attributeStringList)
        {
            foreach (LexiconItemProperty pr in lexPron.Properties)
            {
                if (!pr.Valid)
                {
                    this.ErrorSet.Add(LexiconCompilerError.RemoveInvalidProperty,
                        graphme, lexPron.Symbolic, pr.PartOfSpeech.Value);
                    continue;
                }

                List<ArrayList> attributes = new List<ArrayList>();

                Debug.Assert(pr.PartOfSpeech != null && !string.IsNullOrEmpty(pr.PartOfSpeech.Value));
                if (pr.PartOfSpeech != null && !string.IsNullOrEmpty(pr.PartOfSpeech.Value))
                {
                    ArrayList attrbuteList = new ArrayList();

                    attrbuteList.Add(
                        _attributeSchema.GenerateString("POS", pr.PartOfSpeech.Value));

                    attributes.Add(attrbuteList);
                }

                if (pr.Case != null && !string.IsNullOrEmpty(pr.Case.Value))
                {
                    ArrayList attrbuteList = new ArrayList();

                    ArrayList valueList = CaseItem.ConvertIntoArray(pr.Case.Value, ErrorSet);

                    for (int i = 0; i < valueList.Count; i++)
                    {
                        attrbuteList.Add(
                            _attributeSchema.GenerateString("F_CASE", valueList[i].ToString()));
                    }

                    attributes.Add(attrbuteList);
                }

                if (pr.Gender != null && !string.IsNullOrEmpty(pr.Gender.Value))
                {
                    ArrayList attrbuteList = new ArrayList();

                    ArrayList valueList = GenderItem.ConvertIntoArray(pr.Gender.Value, ErrorSet);

                    for (int i = 0; i < valueList.Count; i++)
                    {
                        attrbuteList.Add(
                            _attributeSchema.GenerateString("F_GENDER", valueList[i].ToString()));
                    }

                    attributes.Add(attrbuteList);
                }

                // Write out number information if present
                if (pr.Number != null && !string.IsNullOrEmpty(pr.Number.Value))
                {
                    ArrayList attrbuteList = new ArrayList();

                    ArrayList valueList = NumberItem.ConvertIntoArray(pr.Number.Value, ErrorSet);

                    for (int i = 0; i < valueList.Count; i++)
                    {
                        attrbuteList.Add(
                            _attributeSchema.GenerateString("F_NUMBER", valueList[i].ToString()));
                    }

                    attributes.Add(attrbuteList);
                }

                foreach (KeyValuePair<string, List<AttributeItem>> pair in pr.AttributeSet)
                {
                    ArrayList attrbuteList = new ArrayList();

                    foreach (AttributeItem attr in pair.Value)
                    {
                        string attribute = LexicalAttributeSchema.GenerateString(pair.Key, attr.Value);

                        if (!string.IsNullOrEmpty(attribute))
                        {
                            attrbuteList.Add(attribute);
                        }
                    }

                    attributes.Add(attrbuteList);
                }

                // fill a terminal null in end of the list
                attributes.Add(null);

                BuildAttributeStringList(attributeStringList, string.Empty, attributes.ToArray(), 0);
            }
        }
        public static bool IsExpandedWord(string word, Language language,
            LexiconPronunciation pronunciation,
            SP.ServiceProvider sp)
        {
            // If the phone count is more than the letter count, the word is probably expanded.
            if (PhonesMoreThanLetters(word, pronunciation.Symbolic, language))
            {
                // if this word is not spell, it is treated as expanded.
                if (!pronunciation.IsSpellOut(sp))
                {
                    return true;
                }
            }

            return false;
        }
        /// <summary>
        /// Validate the pronunciation for the word.
        /// </summary>
        /// <param name="word">Word.</param>
        /// <param name="lexPron">Lexicon pronunciation.</param>
        /// <param name="ttsPhoneSet">TTS phone set.</param>
        /// <param name="errorSet">Error set.</param>
        private static void ValidatePronunciation(string word, LexiconPronunciation lexPron, TtsPhoneSet ttsPhoneSet,
            ErrorSet errorSet)
        {
            // Validate the pronunciation information
            ErrorSet pronErrorSet = Pronunciation.Validate(lexPron.Symbolic, ttsPhoneSet);
            bool invalid = false;
            foreach (Error error in pronErrorSet.Errors)
            {
                errorSet.Add(LexiconError.PronunciationError, error, word);
                if (error.Severity == ErrorSeverity.MustFix &&
                    !(error.Enum.Equals(PronunciationError.VowelAndSonorantCountLessThanMinimum) ||
                      error.Enum.Equals(PronunciationError.VowelAndSonorantCountGreaterThanMaximum) ||
                      error.Enum.Equals(PronunciationError.VowelCountLessThanMinimum) ||
                      error.Enum.Equals(PronunciationError.VowelCountGreaterThanMaximum)))
                {
                    invalid = true;
                }
            }

            lexPron.Valid = lexPron.Valid && !invalid;
        }
        /// <summary>
        /// Load LexiconPronunciation from XmlNode.
        /// </summary>
        /// <param name="parentLexItem">LexicalItem.</param>
        /// <param name="pronNode">XmlNode.</param>
        /// <param name="nsmgr">XmlNamespaceManager.</param>
        /// <param name="contentController">Object.</param>
        /// <param name="errorSet">ErrorSet.</param>
        /// <returns>LexiconPronunciation.</returns>
        internal static LexiconPronunciation Load(LexicalItem parentLexItem, XmlNode pronNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet)
        {
            Debug.Assert(parentLexItem != null && pronNode != null &&
                nsmgr != null && contentController != null && errorSet != null);

            LexiconPronunciation lexPron = new LexiconPronunciation(parentLexItem.Language);
            lexPron.Parent = parentLexItem;

            XmlElement pronElem = pronNode as XmlElement;
            Debug.Assert(pronElem != null);

            string pronStatusValue = pronElem.GetAttribute("s");
            if (!string.IsNullOrEmpty(pronStatusValue))
            {
                lexPron.Status = (Lexicon.LexiconStatus)Enum.Parse(typeof(Lexicon.LexiconStatus),
                    pronStatusValue, true);
            }

            // Lexicon object is shared with lexicon reviewer tool,
            // We drop those items if they have "deleted" status when it is not loaded by lexicon reviewer tool
            if (!contentController.IsHistoryCheckingMode && lexPron.Status == Lexicon.LexiconStatus.Deleted)
            {
                lexPron = null;
            }
            else
            {
                Regex regex = new Regex(@"\s{2,}");
                lexPron.Symbolic = pronElem.GetAttribute("v").Trim();
                lexPron.Symbolic = regex.Replace(lexPron.Symbolic, " ").ToLowerInvariant();
                lexPron.OldSymbolic = lexPron.Symbolic;

                // Get pronunciation original position.
                string originalPronPosition = pronElem.GetAttribute("o");
                if (!string.IsNullOrEmpty(originalPronPosition))
                {
                    lexPron.OldPosition = int.Parse(originalPronPosition, CultureInfo.InvariantCulture);
                }

                if (lexPron.Status != Lexicon.LexiconStatus.Original)
                {
                    string originalPronText = pronElem.GetAttribute("vo");
                    if (!string.IsNullOrEmpty(originalPronText))
                    {
                        lexPron.OldSymbolic = originalPronText;
                    }
                }

                // Get word's frequency. If there's no such information, set frequency to zero
                int frequency = 0;
                int.TryParse(pronElem.GetAttribute("f"), out frequency);
                lexPron.Frequency = frequency;

                foreach (XmlNode propertyNode in pronNode.SelectNodes("tts:pr", nsmgr))
                {
                    LexiconItemProperty property = LexiconItemProperty.Load(lexPron, propertyNode, nsmgr, contentController, errorSet);
                    if (property != null)
                    {
                        if (contentController.IsHistoryCheckingMode || !lexPron.Properties.Contains(property))
                        {
                            lexPron.Properties.Add(property);
                        }
                        else
                        {
                            errorSet.Add(LexiconError.DuplicateProperty, parentLexItem.Text, lexPron.Symbolic);
                        }
                    }
                }
            }

            return lexPron;
        }
        /// <summary>
        /// Clone current pronunciation.
        /// </summary>
        /// <returns>Cloned LexiconPronunciation.</returns>
        public LexiconPronunciation Clone()
        {
            LexiconPronunciation clonedPron = new LexiconPronunciation();
            clonedPron.Valid = _valid;
            clonedPron.OldPosition = _oldPosition;
            clonedPron._symbolic = _symbolic;
            clonedPron._oldSymbolic = _oldSymbolic;
            clonedPron.Frequency = Frequency;
            clonedPron.Language = _language;
            clonedPron.Status = Status;
            clonedPron.LexiconType = _type;

            foreach (LexiconItemProperty property in _properties)
            {
                LexiconItemProperty clonedProperty = property.Clone();
                clonedPron.Properties.Add(clonedProperty);
                clonedProperty.Parent = clonedPron;
            }

            return clonedPron;
        }
        /// <summary>
        /// Import domain pronunciation.
        /// </summary>
        /// <param name="domainPron">Domain LexiconPronunciation.</param>
        /// <param name="domainTag">Domain tag.</param>
        /// <param name="first">Whether this pronunciation is the first one in domain lexicon.</param>
        /// <returns>Whether this LexiconPronunciation changed .</returns>
        public bool ImportDomainPronunciation(LexiconPronunciation domainPron, string domainTag, bool first)
        {
            Helper.ThrowIfNull(domainPron);
            Helper.ThrowIfNull(domainTag);

            if (!domainPron.OnlyContainsOneDomain(domainTag))
            {
                throw new InvalidDataException("It is invalid to include any other domain in property level.");
            }

            if (!first)
            {
                RemovePronunciationIsFirstTags(domainTag);
            }

            bool changed = false;
            foreach (LexiconItemProperty domainProperty in domainPron.Properties)
            {
                if (domainProperty.Gender != null ||
                    domainProperty.Case != null ||
                    domainProperty.Number != null)
                {
                    throw new InvalidDataException("domain lexicon contains old format <gender> <case> <number>. Please convert them to new format <attr> before import.");
                }

                // look for target property that domain tag will import to
                bool propertyImported = false;
                foreach (LexiconItemProperty targetProperty in _properties)
                {
                    if (targetProperty.Gender != null ||
                        targetProperty.Case != null ||
                        targetProperty.Number != null)
                    {
                        throw new InvalidDataException("target lexicon contains old format <gender> <case> <number>. Please convert them to new format <attr> before import.");
                    }

                    // If main lexicon and domain lexicon have same pos in <pr>, import the domain lexicon attributes to main lexicon.
                    if (HistoryValue.Equals(targetProperty.PartOfSpeech, domainProperty.PartOfSpeech))
                    {
                        propertyImported = true;

                        // found a proper <pr> to import domain tag.
                        DomainItem domainItem = domainProperty.Domains[domainTag];
                        Helper.ThrowIfNull(domainItem);
                        DomainItem newDomainItem = new DomainItem(domainItem.Value);
                        if (targetProperty.ImportDomainItem(newDomainItem))
                        {
                            changed = true;
                        }

                        targetProperty.Domains[domainTag].IsFirstPronunciation = first;

                        // Import domain lexicon attributes to main lexicon.
                        foreach (string attributeKey in domainProperty.AttributeSet.Keys)
                        {
                            if (targetProperty.AttributeSet.ContainsKey(attributeKey))
                            {
                                // Union main lexicon and domain lexicon and remove duplicate.
                                targetProperty.AttributeSet[attributeKey] = targetProperty.AttributeSet[attributeKey].Union(domainProperty.AttributeSet[attributeKey]).ToList();
                            }
                            else
                            {
                                targetProperty.AttributeSet.Add(attributeKey, domainProperty.AttributeSet[attributeKey]);
                            }
                        }
                    }
                }

                if (!propertyImported)
                {
                    // not found. Copy the whole <pr> from domain lexicon
                    LexiconItemProperty newProperty = domainProperty.Clone();
                    foreach (DomainItem domainItem in newProperty.Domains.Values)
                    {
                        domainItem.IsFirstPronunciation = first;
                    }

                    _properties.Add(newProperty);
                    changed = true;
                }
            }

            return changed;
        }
        /// <summary>
        /// Compare objects that derived from LexiconPronunciation.
        /// </summary>
        /// <param name="obj1">Object 1.</param>
        /// <param name="obj2">Object 2.</param>
        /// <returns>true for equal
        ///     (null, null) => equal
        ///     (null, deleted) => equal
        ///     (deleted, deleted) => equal.
        /// </returns>
        public static bool Equals(LexiconPronunciation obj1, LexiconPronunciation obj2)
        {
            if (obj1 == obj2)
            {
                return true;
            }

            if ((obj1 == null || obj1.Status == Lexicon.LexiconStatus.Deleted) &&
                (obj2 == null || obj2.Status == Lexicon.LexiconStatus.Deleted))
            {
                return true;
            }

            if (obj1.Language != obj2.Language ||
                obj1.LexiconType != obj2.LexiconType ||
                obj1.Status != obj2.Status ||
                obj1.Symbolic != obj2.Symbolic)
            {
                return false;
            }

            Collection<LexiconItemProperty> props1 = obj1.Properties;
            Collection<LexiconItemProperty> props2 = obj2.Properties;
            int propsLength = props1.Count;
            if (propsLength != props2.Count)
            {
                return false;
            }

            for (int j = 0; j < propsLength; j++)
            {
                if (!props1[j].Equals(props2[j]))
                {
                    return false;
                }
            }

            return true;
        }
        /// <summary>
        /// Load LexiconItemProperty from XmlNode.
        /// </summary>
        /// <param name="parentLexPron">LexiconPronunciation.</param>
        /// <param name="propertyNode">XmlNode.</param>
        /// <param name="nsmgr">XmlNamespaceManager.</param>
        /// <param name="contentController">Object.</param>
        /// <param name="errorSet">ErrorSet.</param>
        /// <returns>LexiconItemProperty.</returns>
        internal static LexiconItemProperty Load(LexiconPronunciation parentLexPron, XmlNode propertyNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet)
        {
            Debug.Assert(parentLexPron != null && parentLexPron.Parent != null && propertyNode != null &&
                nsmgr != null && contentController != null && errorSet != null);

            LexiconItemProperty property = new LexiconItemProperty();
            property.Parent = parentLexPron;

            XmlElement propertyElem = propertyNode as XmlElement;
            string stateValue = propertyElem.GetAttribute("s");
            if (!string.IsNullOrEmpty(stateValue))
            {
                property.Status = (Lexicon.LexiconStatus)Enum.Parse(typeof(Lexicon.LexiconStatus), stateValue, true);
            }

            if (!contentController.IsHistoryCheckingMode && property.Status == Lexicon.LexiconStatus.Deleted)
            {
                property = null;
            }
            else
            {
                PosItem posItem = PosItem.Load(propertyNode, nsmgr);
                if (posItem != null)
                {
                    property.PartOfSpeech = posItem;
                }

                GenderItem genderItem = GenderItem.Load(propertyNode, nsmgr);
                if (genderItem != null)
                {
                    property.Gender = genderItem;
                }

                CaseItem caseItem = CaseItem.Load(propertyNode, nsmgr);
                if (caseItem != null)
                {
                    property.Case = caseItem;
                }

                NumberItem numberItem = NumberItem.Load(propertyNode, nsmgr);
                if (numberItem != null)
                {
                    property.Number = numberItem;
                }

                foreach (XmlNode domainNode in propertyNode.SelectNodes("tts:domain", nsmgr))
                {
                    DomainItem domainItem = DomainItem.Load(property, domainNode, nsmgr, contentController, errorSet);
                    if (domainItem != null)
                    {
                        if (!property.Domains.ContainsKey(domainItem.Value))
                        {
                            property.Domains.Add(domainItem.Value, domainItem);
                        }
                        else
                        {
                            Error error = new Error(DomainError.DuplicateDomain, domainItem.Value);
                            errorSet.Add(LexiconError.DomainError,
                                error, parentLexPron.Parent.Text, parentLexPron.Symbolic);
                        }
                    }
                }

                string lexLevelDomain = (parentLexPron.Parent.Parent as Lexicon).DomainTag;
                if (property.Domains.Count == 0)
                {
                    if (string.IsNullOrEmpty(lexLevelDomain))
                    {
                        property.ChangeDomain(new DomainItem());
                    }
                    else
                    {
                        property.ChangeDomain(new DomainItem(lexLevelDomain));
                    }
                }
                else if (!string.IsNullOrEmpty(lexLevelDomain))
                {
                    Error error = new Error(DomainError.InvalidDomainTags);
                    errorSet.Add(LexiconError.DomainError,
                        error, parentLexPron.Parent.Text, parentLexPron.Symbolic);
                }

                foreach (XmlNode attributeNode in propertyNode.SelectNodes("tts:attr", nsmgr))
                {
                    AttributeItem attributeItem = AttributeItem.Load(property, attributeNode, nsmgr, contentController, errorSet);
                    if (attributeItem != null)
                    {
                        property.AddAttribute(attributeItem);
                    }
                }
            }

            return property;
        }