/// <summary> /// Check and geneate isolated symbol lexion. /// </summary> /// <param name="chartable">Char table.</param> /// <param name="posSymbol">Pos of symbol.</param> /// <param name="lexiconOutput">Lexicon output.</param> /// <param name="errors">Errors.</param> public void CheckContextualSymbolInLexicon(CharTable chartable, string posSymbol, string lexiconOutput, Collection<string> errors) { if (chartable == null) { throw new ArgumentNullException("chartable"); } if (errors == null) { throw new ArgumentNullException("errors"); } if (posSymbol == null) { throw new ArgumentNullException("posSymbol"); } Lexicon lexicon = new Lexicon(chartable.Language); Collection<string> polyWord = new Collection<string>(); foreach (CharElement charElement in chartable.CharList) { LexicalItem symbolItem = _lexicon.Lookup( charElement.Symbol.ToString(), true); LexicalItem lexiconItem = new LexicalItem(lexicon.Language); LexiconPronunciation lexiconPron = new LexiconPronunciation( lexicon.Language); string pron = string.Empty; string expansion = charElement.ContextualExpansion; if (string.IsNullOrEmpty(expansion)) { continue; } lexiconItem.Grapheme = charElement.Symbol.ToString(); Collection<string> errorStrings = new Collection<string>(); bool hasError = _lexicon.GetPronunciationForWords(expansion, errorStrings, polyWord, ref pron); if (!hasError && !string.IsNullOrEmpty(pron)) { bool addWord = true; if (symbolItem != null) { string[] prons = Pronunciation.SplitIntoPhones(pron); foreach (LexiconPronunciation existPron in symbolItem.Pronunciations) { bool same = true; string[] existProns = Pronunciation.SplitIntoPhones(existPron.Symbolic); if (existProns.Length == prons.Length) { for (int i = 0; i < prons.Length; i++) { if (existProns[i] != prons[i]) { same = false; break; } } if (same) { addWord = false; break; } } } } // add the word if the symbol or pronunicaiton is not in lexicon if (addWord) { lexiconPron.Symbolic = pron; LexiconItemProperty lip = new LexiconItemProperty(); lip.PartOfSpeech = new PosItem(posSymbol); lexiconPron.Properties.Add(lip); lexiconItem.Pronunciations.Add(lexiconPron); lexicon.Items.Add(lexiconItem.Grapheme, lexiconItem); if (symbolItem != null) { errors.Add(AttributeError.SymbolDiffPronFromLex + charElement.Symbol.ToString()); } } else { errors.Add(AttributeError.InfoSymbolInLex + charElement.Symbol.ToString()); } } else { errors.Add(AttributeError.SymbolPronGenError + charElement.Symbol.ToString()); } } Lexicon.WriteAllData(lexiconOutput, lexicon, Encoding.Unicode); }
/// <summary> /// Create the lexicon from Xml Script file. /// </summary> /// <param name="scriptFile">Xml script file.</param> /// <param name="defaultPos">Part of Speech String.</param> /// <param name="mainLexicon">MainLexicon.</param> /// <returns>Lexicon.</returns> public static Lexicon CreateFromXmlScriptFile(XmlScriptFile scriptFile, string defaultPos, Lexicon mainLexicon) { if (scriptFile == null) { throw new ArgumentNullException("scriptFile"); } if (string.IsNullOrEmpty(defaultPos)) { throw new ArgumentNullException("defaultPos"); } Lexicon lexicon = new Lexicon(scriptFile.Language); foreach (ScriptItem item in scriptFile.Items) { foreach (ScriptWord scriptWord in item.AllPronouncedWords) { string word = scriptWord.Grapheme; // Create LexiconPronunciaton Node LexiconPronunciation pron = new LexiconPronunciation(lexicon.Language); pron.Symbolic = scriptWord.Pronunciation; if (mainLexicon != null) { LexicalItem mainLexiconItem = mainLexicon.Lookup(word, true); if (mainLexiconItem != null) { LexiconPronunciation lexPron = mainLexiconItem.FindPronunciation(pron.Symbolic, true); if (lexPron != null) { pron.Symbolic = lexPron.Symbolic; } } } LexiconItemProperty property = new LexiconItemProperty(); if (string.IsNullOrEmpty(scriptWord.PosString)) { property.PartOfSpeech = new PosItem(defaultPos); } else { property.PartOfSpeech = new PosItem(scriptWord.PosString); } pron.Properties.Add(property); if (!lexicon.Items.ContainsKey(word)) { LexicalItem lexicalItem = new LexicalItem(lexicon.Language); lexicalItem.Grapheme = word; lexicalItem.Pronunciations.Add(pron); lexicon.Items.Add(word, lexicalItem); } else { bool needAdd = true; foreach (LexiconPronunciation pronunciation in lexicon.Items[word].Pronunciations) { if (pronunciation.Symbolic.Equals(pron.Symbolic, StringComparison.InvariantCultureIgnoreCase)) { needAdd = false; if (!pronunciation.Properties.Contains(property)) { pronunciation.Properties.Add(property); } } } if (needAdd) { lexicon.Items[word].Pronunciations.Add(pron); } } } } return lexicon; }
/// <summary> /// Generate a LexicalItem from a dictionary. /// </summary> /// <param name="prons">Pronunciation dictionary.</param> private void GenerateLexicalItem(Dictionary<string, Collection<string>> prons) { Helper.ThrowIfNull(prons); foreach (string key in prons.Keys) { LexiconPronunciation pron = new LexiconPronunciation(_language); pron.Symbolic = key; foreach (string pos in prons[key]) { PosItem posItem = new PosItem(pos); LexiconItemProperty property = new LexiconItemProperty(posItem); pron.Properties.Add(property); } _item.Pronunciations.Add(pron); } }
/// <summary> /// Validate number for the word. /// </summary> /// <param name="word">Word.</param> /// <param name="property">Lexicon item property.</param> /// <param name="errorSet">Error set.</param> private static void ValidateNumber(string word, LexiconItemProperty property, ErrorSet errorSet) { // Validate the gender property if (property.Number != null) { int numberId; ErrorSet numberErrorSet = NumberItem.StringToId(property.Number.Value, out numberId); foreach (Error error in numberErrorSet.Errors) { errorSet.Add(LexiconError.NumberError, error, word); } if (numberErrorSet.Contains(ErrorSeverity.MustFix)) { property.Valid = false; } } }
/// <summary> /// Validate attribute set for the word. /// </summary> /// <param name="property">Lexicon item property.</param> /// <param name="attributeSchema">Lexical Attribute Schema.</param> /// <returns>Error set.</returns> private static ErrorSet ValidateAttributeSet(LexiconItemProperty property, LexicalAttributeSchema attributeSchema) { Debug.Assert(attributeSchema != null); ErrorSet attributeErrorSet = new ErrorSet(); foreach (KeyValuePair<string, List<AttributeItem>> pair in property.AttributeSet) { foreach (AttributeItem attribute in pair.Value) { AttributeCategory category = attributeSchema.GetRootCategory(pair.Key); if (category == null) { attributeErrorSet.Add(LexicalAttributeError.InvalidCategory, pair.Key); } else if (category.Name.Equals(LexicalAttributeSchema.PosCategoryName, StringComparison.Ordinal)) { attributeErrorSet.Add(LexicalAttributeError.InvalidDefinitionForPos, attribute.Value); } else { bool found = false; foreach (AttributeValue value in category.Values) { if (value.Name.Equals(attribute.Value, StringComparison.Ordinal)) { found = true; break; } } if (!found) { attributeErrorSet.Add(LexicalAttributeError.InvalidValue, attribute.Value, pair.Key); } } } } return attributeErrorSet; }
/// <summary> /// Validate case for the word. /// </summary> /// <param name="word">Word.</param> /// <param name="property">Lexicon item property.</param> /// <param name="errorSet">Error set.</param> private static void ValidateCase(string word, LexiconItemProperty property, ErrorSet errorSet) { // Validate the case property if (property.Case != null) { int caseId; ErrorSet caseErrorSet = CaseItem.StringToId(property.Case.Value, out caseId); foreach (Error error in caseErrorSet.Errors) { errorSet.Add(LexiconError.CaseError, error, word); } if (caseErrorSet.Contains(ErrorSeverity.MustFix)) { property.Valid = false; } } }
/// <summary> /// Validate gender for the word. /// </summary> /// <param name="word">Word.</param> /// <param name="property">Lexicon item property.</param> /// <param name="errorSet">Error set.</param> private static void ValidateGender(string word, LexiconItemProperty property, ErrorSet errorSet) { // Validate the gender property if (property.Gender != null) { ErrorSet genderErrorSet = GenderItem.Validate(property.Gender.Value); foreach (Error error in genderErrorSet.Errors) { errorSet.Add(LexiconError.GenderError, error, word); } if (genderErrorSet.Contains(ErrorSeverity.MustFix)) { property.Valid = false; } } }
/// <summary> /// Load LexiconItemProperty from XmlNode. /// </summary> /// <param name="parentLexPron">LexiconPronunciation.</param> /// <param name="propertyNode">XmlNode.</param> /// <param name="nsmgr">XmlNamespaceManager.</param> /// <param name="contentController">Object.</param> /// <param name="errorSet">ErrorSet.</param> /// <returns>LexiconItemProperty.</returns> internal static LexiconItemProperty Load(LexiconPronunciation parentLexPron, XmlNode propertyNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet) { Debug.Assert(parentLexPron != null && parentLexPron.Parent != null && propertyNode != null && nsmgr != null && contentController != null && errorSet != null); LexiconItemProperty property = new LexiconItemProperty(); property.Parent = parentLexPron; XmlElement propertyElem = propertyNode as XmlElement; string stateValue = propertyElem.GetAttribute("s"); if (!string.IsNullOrEmpty(stateValue)) { property.Status = (Lexicon.LexiconStatus)Enum.Parse(typeof(Lexicon.LexiconStatus), stateValue, true); } if (!contentController.IsHistoryCheckingMode && property.Status == Lexicon.LexiconStatus.Deleted) { property = null; } else { PosItem posItem = PosItem.Load(propertyNode, nsmgr); if (posItem != null) { property.PartOfSpeech = posItem; } GenderItem genderItem = GenderItem.Load(propertyNode, nsmgr); if (genderItem != null) { property.Gender = genderItem; } CaseItem caseItem = CaseItem.Load(propertyNode, nsmgr); if (caseItem != null) { property.Case = caseItem; } NumberItem numberItem = NumberItem.Load(propertyNode, nsmgr); if (numberItem != null) { property.Number = numberItem; } foreach (XmlNode domainNode in propertyNode.SelectNodes("tts:domain", nsmgr)) { DomainItem domainItem = DomainItem.Load(property, domainNode, nsmgr, contentController, errorSet); if (domainItem != null) { if (!property.Domains.ContainsKey(domainItem.Value)) { property.Domains.Add(domainItem.Value, domainItem); } else { Error error = new Error(DomainError.DuplicateDomain, domainItem.Value); errorSet.Add(LexiconError.DomainError, error, parentLexPron.Parent.Text, parentLexPron.Symbolic); } } } string lexLevelDomain = (parentLexPron.Parent.Parent as Lexicon).DomainTag; if (property.Domains.Count == 0) { if (string.IsNullOrEmpty(lexLevelDomain)) { property.ChangeDomain(new DomainItem()); } else { property.ChangeDomain(new DomainItem(lexLevelDomain)); } } else if (!string.IsNullOrEmpty(lexLevelDomain)) { Error error = new Error(DomainError.InvalidDomainTags); errorSet.Add(LexiconError.DomainError, error, parentLexPron.Parent.Text, parentLexPron.Symbolic); } foreach (XmlNode attributeNode in propertyNode.SelectNodes("tts:attr", nsmgr)) { AttributeItem attributeItem = AttributeItem.Load(property, attributeNode, nsmgr, contentController, errorSet); if (attributeItem != null) { property.AddAttribute(attributeItem); } } } return property; }
/// <summary> /// Clone current property . /// </summary> /// <returns>Cloned LexiconItemProperty.</returns> public LexiconItemProperty Clone() { LexiconItemProperty clonedProperty = new LexiconItemProperty(); clonedProperty.Valid = _valid; clonedProperty.Status = Status; if (_pos != null) { clonedProperty.PartOfSpeech = _pos.Clone(); } if (_gender != null) { clonedProperty.Gender = _gender.Clone(); } if (_case != null) { clonedProperty.Case = _case.Clone(); } if (_number != null) { clonedProperty.Number = _number.Clone(); } foreach (string domainName in _domains.Keys) { clonedProperty.Domains.Add(domainName, _domains[domainName].Clone()); } foreach (string attributeName in _attributes.Keys) { foreach (AttributeItem attr in _attributes[attributeName]) { clonedProperty.AddAttribute(attr.Clone()); } } return clonedProperty; }
/// <summary> /// Equal without domain information. /// </summary> /// <param name="property">The property.</param> /// <returns>True for equal.</returns> public bool EqualsWithoutDomain(LexiconItemProperty property) { bool equal = false; if (property != null) { equal = HistoryValue.Equals(_pos, property.PartOfSpeech) && HistoryValue.Equals(_gender, property.Gender) && HistoryValue.Equals(_case, property.Case) && HistoryValue.Equals(_number, property.Number) && AttributeSetEqual(_attributes, property.AttributeSet); } return equal; }
/// <summary> /// Load AttributeItem from XmlNode. /// </summary> /// <param name="parentProperty">LexiconItemProperty.</param> /// <param name="attributeNode">XmlNode.</param> /// <param name="nsmgr">XmlNamespaceManager.</param> /// <param name="contentController">Object.</param> /// <param name="errorSet">ErrorSet.</param> /// <returns>AttributeItem.</returns> internal static AttributeItem Load(LexiconItemProperty parentProperty, XmlNode attributeNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet) { Debug.Assert(parentProperty != null && parentProperty.Parent != null && parentProperty.Parent.Parent != null && attributeNode != null && contentController != null && nsmgr != null); AttributeItem attributeItem = new AttributeItem(); XmlElement attributeElem = attributeNode as XmlElement; Debug.Assert(attributeElem != null); string attrStatusValue = attributeElem.GetAttribute("s"); if (!string.IsNullOrEmpty(attrStatusValue)) { attributeItem.Status = (Lexicon.LexiconStatus)Enum.Parse( typeof(Lexicon.LexiconStatus), attrStatusValue, true); // Lexicon object is shared with lexicon reviewer tool, // We drop those items if they have "deleted" status when it is not loaded by lexicon reviewer tool if (attributeItem.Status == Lexicon.LexiconStatus.Deleted && !contentController.IsHistoryCheckingMode) { attributeItem = null; } } if (attributeItem != null) { string category = attributeElem.GetAttribute("category"); string value = attributeElem.GetAttribute("value"); string originalValue = attributeElem.GetAttribute("vo"); if (string.IsNullOrEmpty(category)) { Error error = new Error(LexicalAttributeError.EmptyCategory); errorSet.Add(LexiconError.AttributeError, error, parentProperty.Parent.Parent.Text, parentProperty.Parent.Symbolic); attributeItem = null; } else if (string.IsNullOrEmpty(value)) { Error error = new Error(LexicalAttributeError.EmptyValue); errorSet.Add(LexiconError.AttributeError, error, parentProperty.Parent.Parent.Text, parentProperty.Parent.Symbolic); attributeItem = null; } else { attributeItem.Value = value; attributeItem.CategoryName = category; if (!string.IsNullOrEmpty(originalValue) && attributeItem.Status != Lexicon.LexiconStatus.Original) { attributeItem.OldValue = originalValue; } else { attributeItem.OldValue = value; } } } return attributeItem; }
/// <summary> /// Load DomainItem. /// </summary> /// <param name="parentProperty">LexiconItemProperty.</param> /// <param name="domainNode">XmlNode.</param> /// <param name="nsmgr">XmlNamespaceManager.</param> /// <param name="contentController">Object.</param> /// <param name="errorSet">ErrorSet.</param> /// <returns>DomainItem.</returns> internal static DomainItem Load(LexiconItemProperty parentProperty, XmlNode domainNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet) { Debug.Assert(parentProperty != null && parentProperty.Parent != null && parentProperty.Parent.Parent != null && domainNode != null && contentController != null && nsmgr != null); DomainItem domainItem = new DomainItem(); XmlElement domainElem = domainNode as XmlElement; Debug.Assert(domainElem != null); string domainStatusValue = domainElem.GetAttribute("s"); if (!string.IsNullOrEmpty(domainStatusValue)) { domainItem.Status = (Lexicon.LexiconStatus)Enum.Parse(typeof(Lexicon.LexiconStatus), domainStatusValue, true); // Lexicon object is shared with lexicon reviewer tool, // We drop those items if they have "deleted" status when it is not loaded by lexicon reviewer tool if (domainItem.Status == Lexicon.LexiconStatus.Deleted && !contentController.IsHistoryCheckingMode) { domainItem = null; } } if (domainItem != null) { // Check whether pronunciation is prefered in this domain string preferedValue = domainElem.GetAttribute("p"); if (!string.IsNullOrEmpty(preferedValue)) { domainItem.IsFirstPronunciation = bool.Parse(preferedValue); } string domainValue = domainElem.GetAttribute("v"); string originalDomainValue = domainElem.GetAttribute("vo"); if (string.IsNullOrEmpty(domainValue)) { Error error = new Error(DomainError.EmptyDomain); errorSet.Add(LexiconError.DomainError, error, parentProperty.Parent.Parent.Text, parentProperty.Parent.Symbolic); domainItem = null; } else { domainItem.Value = domainValue.ToLower(); if (!string.IsNullOrEmpty(originalDomainValue) && domainItem.Status != Lexicon.LexiconStatus.Original) { domainItem.OldValue = originalDomainValue.ToLower(); } else { domainItem.OldValue = domainValue; } } } return domainItem; }