/// <summary> /// Check and geneate isolated symbol lexion. /// </summary> /// <param name="chartable">Char table.</param> /// <param name="posSymbol">Pos of symbol.</param> /// <param name="lexiconOutput">Lexicon output.</param> /// <param name="errors">Errors.</param> public void CheckContextualSymbolInLexicon(CharTable chartable, string posSymbol, string lexiconOutput, Collection<string> errors) { if (chartable == null) { throw new ArgumentNullException("chartable"); } if (errors == null) { throw new ArgumentNullException("errors"); } if (posSymbol == null) { throw new ArgumentNullException("posSymbol"); } Lexicon lexicon = new Lexicon(chartable.Language); Collection<string> polyWord = new Collection<string>(); foreach (CharElement charElement in chartable.CharList) { LexicalItem symbolItem = _lexicon.Lookup( charElement.Symbol.ToString(), true); LexicalItem lexiconItem = new LexicalItem(lexicon.Language); LexiconPronunciation lexiconPron = new LexiconPronunciation( lexicon.Language); string pron = string.Empty; string expansion = charElement.ContextualExpansion; if (string.IsNullOrEmpty(expansion)) { continue; } lexiconItem.Grapheme = charElement.Symbol.ToString(); Collection<string> errorStrings = new Collection<string>(); bool hasError = _lexicon.GetPronunciationForWords(expansion, errorStrings, polyWord, ref pron); if (!hasError && !string.IsNullOrEmpty(pron)) { bool addWord = true; if (symbolItem != null) { string[] prons = Pronunciation.SplitIntoPhones(pron); foreach (LexiconPronunciation existPron in symbolItem.Pronunciations) { bool same = true; string[] existProns = Pronunciation.SplitIntoPhones(existPron.Symbolic); if (existProns.Length == prons.Length) { for (int i = 0; i < prons.Length; i++) { if (existProns[i] != prons[i]) { same = false; break; } } if (same) { addWord = false; break; } } } } // add the word if the symbol or pronunicaiton is not in lexicon if (addWord) { lexiconPron.Symbolic = pron; LexiconItemProperty lip = new LexiconItemProperty(); lip.PartOfSpeech = new PosItem(posSymbol); lexiconPron.Properties.Add(lip); lexiconItem.Pronunciations.Add(lexiconPron); lexicon.Items.Add(lexiconItem.Grapheme, lexiconItem); if (symbolItem != null) { errors.Add(AttributeError.SymbolDiffPronFromLex + charElement.Symbol.ToString()); } } else { errors.Add(AttributeError.InfoSymbolInLex + charElement.Symbol.ToString()); } } else { errors.Add(AttributeError.SymbolPronGenError + charElement.Symbol.ToString()); } } Lexicon.WriteAllData(lexiconOutput, lexicon, Encoding.Unicode); }
/// <summary> /// Create the lexicon from Xml Script file. /// </summary> /// <param name="scriptFile">Xml script file.</param> /// <param name="defaultPos">Part of Speech String.</param> /// <param name="mainLexicon">MainLexicon.</param> /// <returns>Lexicon.</returns> public static Lexicon CreateFromXmlScriptFile(XmlScriptFile scriptFile, string defaultPos, Lexicon mainLexicon) { if (scriptFile == null) { throw new ArgumentNullException("scriptFile"); } if (string.IsNullOrEmpty(defaultPos)) { throw new ArgumentNullException("defaultPos"); } Lexicon lexicon = new Lexicon(scriptFile.Language); foreach (ScriptItem item in scriptFile.Items) { foreach (ScriptWord scriptWord in item.AllPronouncedWords) { string word = scriptWord.Grapheme; // Create LexiconPronunciaton Node LexiconPronunciation pron = new LexiconPronunciation(lexicon.Language); pron.Symbolic = scriptWord.Pronunciation; if (mainLexicon != null) { LexicalItem mainLexiconItem = mainLexicon.Lookup(word, true); if (mainLexiconItem != null) { LexiconPronunciation lexPron = mainLexiconItem.FindPronunciation(pron.Symbolic, true); if (lexPron != null) { pron.Symbolic = lexPron.Symbolic; } } } LexiconItemProperty property = new LexiconItemProperty(); if (string.IsNullOrEmpty(scriptWord.PosString)) { property.PartOfSpeech = new PosItem(defaultPos); } else { property.PartOfSpeech = new PosItem(scriptWord.PosString); } pron.Properties.Add(property); if (!lexicon.Items.ContainsKey(word)) { LexicalItem lexicalItem = new LexicalItem(lexicon.Language); lexicalItem.Grapheme = word; lexicalItem.Pronunciations.Add(pron); lexicon.Items.Add(word, lexicalItem); } else { bool needAdd = true; foreach (LexiconPronunciation pronunciation in lexicon.Items[word].Pronunciations) { if (pronunciation.Symbolic.Equals(pron.Symbolic, StringComparison.InvariantCultureIgnoreCase)) { needAdd = false; if (!pronunciation.Properties.Contains(property)) { pronunciation.Properties.Add(property); } } } if (needAdd) { lexicon.Items[word].Pronunciations.Add(pron); } } } } return lexicon; }
/// <summary> /// Generate a LexicalItem from a dictionary. /// </summary> /// <param name="prons">Pronunciation dictionary.</param> private void GenerateLexicalItem(Dictionary<string, Collection<string>> prons) { Helper.ThrowIfNull(prons); foreach (string key in prons.Keys) { LexiconPronunciation pron = new LexiconPronunciation(_language); pron.Symbolic = key; foreach (string pos in prons[key]) { PosItem posItem = new PosItem(pos); LexiconItemProperty property = new LexiconItemProperty(posItem); pron.Properties.Add(property); } _item.Pronunciations.Add(pron); } }
/// <summary> /// Compare two pronunciation's original position. /// </summary> /// <param name="firstPron">First pronunciation to be compared.</param> /// <param name="secondPron">Second pronunciation to be compared.</param> /// <returns> /// Bigger than zero, firstPron's position bigger than the second one; /// Equal to zero, firstPron's position equal to the second one; /// less than zero, firstPron's position less than the second one.</returns> private static int ComparePronOriginalPosition(LexiconPronunciation firstPron, LexiconPronunciation secondPron) { return firstPron.OldPosition - secondPron.OldPosition; }
/// <summary> /// Save lexicon properties. /// </summary> /// <param name="graphme">Word graphme.</param> /// <param name="lexPron">Lexicon pronunciation.</param> /// <param name="attributeStringList">Attribute string list.</param> private void SaveProperty(string graphme, LexiconPronunciation lexPron, Collection<string> attributeStringList) { foreach (LexiconItemProperty pr in lexPron.Properties) { if (!pr.Valid) { this.ErrorSet.Add(LexiconCompilerError.RemoveInvalidProperty, graphme, lexPron.Symbolic, pr.PartOfSpeech.Value); continue; } List<ArrayList> attributes = new List<ArrayList>(); Debug.Assert(pr.PartOfSpeech != null && !string.IsNullOrEmpty(pr.PartOfSpeech.Value)); if (pr.PartOfSpeech != null && !string.IsNullOrEmpty(pr.PartOfSpeech.Value)) { ArrayList attrbuteList = new ArrayList(); attrbuteList.Add( _attributeSchema.GenerateString("POS", pr.PartOfSpeech.Value)); attributes.Add(attrbuteList); } if (pr.Case != null && !string.IsNullOrEmpty(pr.Case.Value)) { ArrayList attrbuteList = new ArrayList(); ArrayList valueList = CaseItem.ConvertIntoArray(pr.Case.Value, ErrorSet); for (int i = 0; i < valueList.Count; i++) { attrbuteList.Add( _attributeSchema.GenerateString("F_CASE", valueList[i].ToString())); } attributes.Add(attrbuteList); } if (pr.Gender != null && !string.IsNullOrEmpty(pr.Gender.Value)) { ArrayList attrbuteList = new ArrayList(); ArrayList valueList = GenderItem.ConvertIntoArray(pr.Gender.Value, ErrorSet); for (int i = 0; i < valueList.Count; i++) { attrbuteList.Add( _attributeSchema.GenerateString("F_GENDER", valueList[i].ToString())); } attributes.Add(attrbuteList); } // Write out number information if present if (pr.Number != null && !string.IsNullOrEmpty(pr.Number.Value)) { ArrayList attrbuteList = new ArrayList(); ArrayList valueList = NumberItem.ConvertIntoArray(pr.Number.Value, ErrorSet); for (int i = 0; i < valueList.Count; i++) { attrbuteList.Add( _attributeSchema.GenerateString("F_NUMBER", valueList[i].ToString())); } attributes.Add(attrbuteList); } foreach (KeyValuePair<string, List<AttributeItem>> pair in pr.AttributeSet) { ArrayList attrbuteList = new ArrayList(); foreach (AttributeItem attr in pair.Value) { string attribute = LexicalAttributeSchema.GenerateString(pair.Key, attr.Value); if (!string.IsNullOrEmpty(attribute)) { attrbuteList.Add(attribute); } } attributes.Add(attrbuteList); } // fill a terminal null in end of the list attributes.Add(null); BuildAttributeStringList(attributeStringList, string.Empty, attributes.ToArray(), 0); } }
public static bool IsExpandedWord(string word, Language language, LexiconPronunciation pronunciation, SP.ServiceProvider sp) { // If the phone count is more than the letter count, the word is probably expanded. if (PhonesMoreThanLetters(word, pronunciation.Symbolic, language)) { // if this word is not spell, it is treated as expanded. if (!pronunciation.IsSpellOut(sp)) { return true; } } return false; }
/// <summary> /// Validate the pronunciation for the word. /// </summary> /// <param name="word">Word.</param> /// <param name="lexPron">Lexicon pronunciation.</param> /// <param name="ttsPhoneSet">TTS phone set.</param> /// <param name="errorSet">Error set.</param> private static void ValidatePronunciation(string word, LexiconPronunciation lexPron, TtsPhoneSet ttsPhoneSet, ErrorSet errorSet) { // Validate the pronunciation information ErrorSet pronErrorSet = Pronunciation.Validate(lexPron.Symbolic, ttsPhoneSet); bool invalid = false; foreach (Error error in pronErrorSet.Errors) { errorSet.Add(LexiconError.PronunciationError, error, word); if (error.Severity == ErrorSeverity.MustFix && !(error.Enum.Equals(PronunciationError.VowelAndSonorantCountLessThanMinimum) || error.Enum.Equals(PronunciationError.VowelAndSonorantCountGreaterThanMaximum) || error.Enum.Equals(PronunciationError.VowelCountLessThanMinimum) || error.Enum.Equals(PronunciationError.VowelCountGreaterThanMaximum))) { invalid = true; } } lexPron.Valid = lexPron.Valid && !invalid; }
/// <summary> /// Load LexiconPronunciation from XmlNode. /// </summary> /// <param name="parentLexItem">LexicalItem.</param> /// <param name="pronNode">XmlNode.</param> /// <param name="nsmgr">XmlNamespaceManager.</param> /// <param name="contentController">Object.</param> /// <param name="errorSet">ErrorSet.</param> /// <returns>LexiconPronunciation.</returns> internal static LexiconPronunciation Load(LexicalItem parentLexItem, XmlNode pronNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet) { Debug.Assert(parentLexItem != null && pronNode != null && nsmgr != null && contentController != null && errorSet != null); LexiconPronunciation lexPron = new LexiconPronunciation(parentLexItem.Language); lexPron.Parent = parentLexItem; XmlElement pronElem = pronNode as XmlElement; Debug.Assert(pronElem != null); string pronStatusValue = pronElem.GetAttribute("s"); if (!string.IsNullOrEmpty(pronStatusValue)) { lexPron.Status = (Lexicon.LexiconStatus)Enum.Parse(typeof(Lexicon.LexiconStatus), pronStatusValue, true); } // Lexicon object is shared with lexicon reviewer tool, // We drop those items if they have "deleted" status when it is not loaded by lexicon reviewer tool if (!contentController.IsHistoryCheckingMode && lexPron.Status == Lexicon.LexiconStatus.Deleted) { lexPron = null; } else { Regex regex = new Regex(@"\s{2,}"); lexPron.Symbolic = pronElem.GetAttribute("v").Trim(); lexPron.Symbolic = regex.Replace(lexPron.Symbolic, " ").ToLowerInvariant(); lexPron.OldSymbolic = lexPron.Symbolic; // Get pronunciation original position. string originalPronPosition = pronElem.GetAttribute("o"); if (!string.IsNullOrEmpty(originalPronPosition)) { lexPron.OldPosition = int.Parse(originalPronPosition, CultureInfo.InvariantCulture); } if (lexPron.Status != Lexicon.LexiconStatus.Original) { string originalPronText = pronElem.GetAttribute("vo"); if (!string.IsNullOrEmpty(originalPronText)) { lexPron.OldSymbolic = originalPronText; } } // Get word's frequency. If there's no such information, set frequency to zero int frequency = 0; int.TryParse(pronElem.GetAttribute("f"), out frequency); lexPron.Frequency = frequency; foreach (XmlNode propertyNode in pronNode.SelectNodes("tts:pr", nsmgr)) { LexiconItemProperty property = LexiconItemProperty.Load(lexPron, propertyNode, nsmgr, contentController, errorSet); if (property != null) { if (contentController.IsHistoryCheckingMode || !lexPron.Properties.Contains(property)) { lexPron.Properties.Add(property); } else { errorSet.Add(LexiconError.DuplicateProperty, parentLexItem.Text, lexPron.Symbolic); } } } } return lexPron; }
/// <summary> /// Clone current pronunciation. /// </summary> /// <returns>Cloned LexiconPronunciation.</returns> public LexiconPronunciation Clone() { LexiconPronunciation clonedPron = new LexiconPronunciation(); clonedPron.Valid = _valid; clonedPron.OldPosition = _oldPosition; clonedPron._symbolic = _symbolic; clonedPron._oldSymbolic = _oldSymbolic; clonedPron.Frequency = Frequency; clonedPron.Language = _language; clonedPron.Status = Status; clonedPron.LexiconType = _type; foreach (LexiconItemProperty property in _properties) { LexiconItemProperty clonedProperty = property.Clone(); clonedPron.Properties.Add(clonedProperty); clonedProperty.Parent = clonedPron; } return clonedPron; }
/// <summary> /// Import domain pronunciation. /// </summary> /// <param name="domainPron">Domain LexiconPronunciation.</param> /// <param name="domainTag">Domain tag.</param> /// <param name="first">Whether this pronunciation is the first one in domain lexicon.</param> /// <returns>Whether this LexiconPronunciation changed .</returns> public bool ImportDomainPronunciation(LexiconPronunciation domainPron, string domainTag, bool first) { Helper.ThrowIfNull(domainPron); Helper.ThrowIfNull(domainTag); if (!domainPron.OnlyContainsOneDomain(domainTag)) { throw new InvalidDataException("It is invalid to include any other domain in property level."); } if (!first) { RemovePronunciationIsFirstTags(domainTag); } bool changed = false; foreach (LexiconItemProperty domainProperty in domainPron.Properties) { if (domainProperty.Gender != null || domainProperty.Case != null || domainProperty.Number != null) { throw new InvalidDataException("domain lexicon contains old format <gender> <case> <number>. Please convert them to new format <attr> before import."); } // look for target property that domain tag will import to bool propertyImported = false; foreach (LexiconItemProperty targetProperty in _properties) { if (targetProperty.Gender != null || targetProperty.Case != null || targetProperty.Number != null) { throw new InvalidDataException("target lexicon contains old format <gender> <case> <number>. Please convert them to new format <attr> before import."); } // If main lexicon and domain lexicon have same pos in <pr>, import the domain lexicon attributes to main lexicon. if (HistoryValue.Equals(targetProperty.PartOfSpeech, domainProperty.PartOfSpeech)) { propertyImported = true; // found a proper <pr> to import domain tag. DomainItem domainItem = domainProperty.Domains[domainTag]; Helper.ThrowIfNull(domainItem); DomainItem newDomainItem = new DomainItem(domainItem.Value); if (targetProperty.ImportDomainItem(newDomainItem)) { changed = true; } targetProperty.Domains[domainTag].IsFirstPronunciation = first; // Import domain lexicon attributes to main lexicon. foreach (string attributeKey in domainProperty.AttributeSet.Keys) { if (targetProperty.AttributeSet.ContainsKey(attributeKey)) { // Union main lexicon and domain lexicon and remove duplicate. targetProperty.AttributeSet[attributeKey] = targetProperty.AttributeSet[attributeKey].Union(domainProperty.AttributeSet[attributeKey]).ToList(); } else { targetProperty.AttributeSet.Add(attributeKey, domainProperty.AttributeSet[attributeKey]); } } } } if (!propertyImported) { // not found. Copy the whole <pr> from domain lexicon LexiconItemProperty newProperty = domainProperty.Clone(); foreach (DomainItem domainItem in newProperty.Domains.Values) { domainItem.IsFirstPronunciation = first; } _properties.Add(newProperty); changed = true; } } return changed; }
/// <summary> /// Compare objects that derived from LexiconPronunciation. /// </summary> /// <param name="obj1">Object 1.</param> /// <param name="obj2">Object 2.</param> /// <returns>true for equal /// (null, null) => equal /// (null, deleted) => equal /// (deleted, deleted) => equal. /// </returns> public static bool Equals(LexiconPronunciation obj1, LexiconPronunciation obj2) { if (obj1 == obj2) { return true; } if ((obj1 == null || obj1.Status == Lexicon.LexiconStatus.Deleted) && (obj2 == null || obj2.Status == Lexicon.LexiconStatus.Deleted)) { return true; } if (obj1.Language != obj2.Language || obj1.LexiconType != obj2.LexiconType || obj1.Status != obj2.Status || obj1.Symbolic != obj2.Symbolic) { return false; } Collection<LexiconItemProperty> props1 = obj1.Properties; Collection<LexiconItemProperty> props2 = obj2.Properties; int propsLength = props1.Count; if (propsLength != props2.Count) { return false; } for (int j = 0; j < propsLength; j++) { if (!props1[j].Equals(props2[j])) { return false; } } return true; }
/// <summary> /// Load LexiconItemProperty from XmlNode. /// </summary> /// <param name="parentLexPron">LexiconPronunciation.</param> /// <param name="propertyNode">XmlNode.</param> /// <param name="nsmgr">XmlNamespaceManager.</param> /// <param name="contentController">Object.</param> /// <param name="errorSet">ErrorSet.</param> /// <returns>LexiconItemProperty.</returns> internal static LexiconItemProperty Load(LexiconPronunciation parentLexPron, XmlNode propertyNode, XmlNamespaceManager nsmgr, Lexicon.ContentControler contentController, ErrorSet errorSet) { Debug.Assert(parentLexPron != null && parentLexPron.Parent != null && propertyNode != null && nsmgr != null && contentController != null && errorSet != null); LexiconItemProperty property = new LexiconItemProperty(); property.Parent = parentLexPron; XmlElement propertyElem = propertyNode as XmlElement; string stateValue = propertyElem.GetAttribute("s"); if (!string.IsNullOrEmpty(stateValue)) { property.Status = (Lexicon.LexiconStatus)Enum.Parse(typeof(Lexicon.LexiconStatus), stateValue, true); } if (!contentController.IsHistoryCheckingMode && property.Status == Lexicon.LexiconStatus.Deleted) { property = null; } else { PosItem posItem = PosItem.Load(propertyNode, nsmgr); if (posItem != null) { property.PartOfSpeech = posItem; } GenderItem genderItem = GenderItem.Load(propertyNode, nsmgr); if (genderItem != null) { property.Gender = genderItem; } CaseItem caseItem = CaseItem.Load(propertyNode, nsmgr); if (caseItem != null) { property.Case = caseItem; } NumberItem numberItem = NumberItem.Load(propertyNode, nsmgr); if (numberItem != null) { property.Number = numberItem; } foreach (XmlNode domainNode in propertyNode.SelectNodes("tts:domain", nsmgr)) { DomainItem domainItem = DomainItem.Load(property, domainNode, nsmgr, contentController, errorSet); if (domainItem != null) { if (!property.Domains.ContainsKey(domainItem.Value)) { property.Domains.Add(domainItem.Value, domainItem); } else { Error error = new Error(DomainError.DuplicateDomain, domainItem.Value); errorSet.Add(LexiconError.DomainError, error, parentLexPron.Parent.Text, parentLexPron.Symbolic); } } } string lexLevelDomain = (parentLexPron.Parent.Parent as Lexicon).DomainTag; if (property.Domains.Count == 0) { if (string.IsNullOrEmpty(lexLevelDomain)) { property.ChangeDomain(new DomainItem()); } else { property.ChangeDomain(new DomainItem(lexLevelDomain)); } } else if (!string.IsNullOrEmpty(lexLevelDomain)) { Error error = new Error(DomainError.InvalidDomainTags); errorSet.Add(LexiconError.DomainError, error, parentLexPron.Parent.Text, parentLexPron.Symbolic); } foreach (XmlNode attributeNode in propertyNode.SelectNodes("tts:attr", nsmgr)) { AttributeItem attributeItem = AttributeItem.Load(property, attributeNode, nsmgr, contentController, errorSet); if (attributeItem != null) { property.AddAttribute(attributeItem); } } } return property; }