/** * <summary> The method disambiguates words with multiple possible root words in its morphological parses. If the word * is already morphologically disambiguated, the method does not disambiguate that word. The method first check * for multiple root words by using rootWords method. If there are multiple root words, the method select the most * occurring root word (if its occurence wrt other root words occurence is above some threshold) for that word * using the bestRootWord method. If root word is selected, then the case for single root word is called.</summary> * <param name="sentence">The sentence to be disambiguated automatically.</param> */ protected override void AutoDisambiguateMultipleRootWords(AnnotatedSentence.AnnotatedSentence sentence) { FsmParseList[] fsmParses = morphologicalAnalyzer.RobustMorphologicalAnalysis(sentence); List <FsmParse> correctParses = longestRootFirstDisambiguation.Disambiguate(fsmParses); for (int i = 0; i < sentence.WordCount(); i++) { AnnotatedWord word = (AnnotatedWord)sentence.GetWord(i); if (word.GetParse() == null) { SetParseAutomatically(correctParses[i], word); } } }
/** * <summary> The method checks for the MONEY entities using regular expressions. After that, if the expression is a MONEY * expression, it also assigns the previous text, which may included numbers or some monetarial texts, MONEY tag.</summary> * <param name="sentence">The sentence for which MONEY named entities checked.</param> */ protected override void AutoDetectMoney(AnnotatedSentence.AnnotatedSentence sentence) { for (var i = 0; i < sentence.WordCount(); i++) { var word = (AnnotatedWord)sentence.GetWord(i); var wordLowercase = word.GetName().ToLower(new CultureInfo("tr")); if (word.GetParse() != null) { if (Word.IsMoney(wordLowercase)) { word.SetNamedEntityType("MONEY"); var j = i - 1; while (j >= 0) { AnnotatedWord previous = (AnnotatedWord)sentence.GetWord(j); if (previous.GetParse() != null && (previous.GetName().Equals("amerikan") || previous.GetParse() .ContainsTag(MorphologicalTag.REAL) || previous.GetParse() .ContainsTag(MorphologicalTag.CARDINAL) || previous.GetParse() .ContainsTag(MorphologicalTag.NUMBER))) { previous.SetNamedEntityType("MONEY"); } else { break; } j--; } } } } }
/** * <summary> The method checks for the TIME entities using regular expressions. After that, if the expression is a TIME * expression, it also assigns the previous texts, which are numbers, TIME tag.</summary> * <param name="sentence">The sentence for which TIME named entities checked.</param> */ protected override void AutoDetectTime(AnnotatedSentence.AnnotatedSentence sentence) { for (var i = 0; i < sentence.WordCount(); i++) { var word = (AnnotatedWord)sentence.GetWord(i); var wordLowercase = word.GetName().ToLower(new CultureInfo("tr")); if (word.GetParse() != null) { if (Word.IsTime(wordLowercase)) { word.SetNamedEntityType("TIME"); if (i > 0) { AnnotatedWord previous = (AnnotatedWord)sentence.GetWord(i - 1); if (previous.GetParse().ContainsTag(MorphologicalTag.CARDINAL)) { previous.SetNamedEntityType("TIME"); } } } } } }
public WordNodePair(AnnotatedWord annotatedWord, int no) { _annotatedWord = annotatedWord; ParseNodeDrawable parent; if (GetUniversalDependency().Equals("ADVMOD")) { parent = new ParseNodeDrawable(new Symbol("ADVP")); } else if (GetUniversalDependency().Equals("ACL")) { parent = new ParseNodeDrawable(new Symbol("ADJP")); } else { parent = new ParseNodeDrawable(new Symbol(annotatedWord.GetParse().GetTreePos())); } _node = new ParseNodeDrawable(parent, annotatedWord.ToString().Replace("\\(", "-LRB-").Replace("\\)", "-RRB-"), true, 0); parent.AddChild(_node); _no = no; _doneForConnect = false; _doneForHead = false; }
protected List <SynSet> GetCandidateSynSets(WordNet.WordNet wordNet, FsmMorphologicalAnalyzer fsm, AnnotatedSentence.AnnotatedSentence sentence, int index) { AnnotatedWord twoPrevious = null, previous = null, current, twoNext = null, next = null; var synSets = new List <SynSet>(); current = (AnnotatedWord)sentence.GetWord(index); if (index > 1) { twoPrevious = (AnnotatedWord)sentence.GetWord(index - 2); } if (index > 0) { previous = (AnnotatedWord)sentence.GetWord(index - 1); } if (index != sentence.WordCount() - 1) { next = (AnnotatedWord)sentence.GetWord(index + 1); } if (index < sentence.WordCount() - 2) { twoNext = (AnnotatedWord)sentence.GetWord(index + 2); } synSets = wordNet.ConstructSynSets(current.GetParse().GetWord().GetName(), current.GetParse(), current.GetMetamorphicParse(), fsm); if (twoPrevious?.GetParse() != null && previous?.GetParse() != null) { synSets.AddRange(wordNet.ConstructIdiomSynSets(twoPrevious.GetParse(), previous.GetParse(), current.GetParse(), twoPrevious.GetMetamorphicParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(), fsm)); } if (previous?.GetParse() != null && next?.GetParse() != null) { synSets.AddRange(wordNet.ConstructIdiomSynSets(previous.GetParse(), current.GetParse(), next.GetParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), fsm)); } if (next?.GetParse() != null && twoNext?.GetParse() != null) { synSets.AddRange(wordNet.ConstructIdiomSynSets(current.GetParse(), next.GetParse(), twoNext.GetParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), twoNext.GetMetamorphicParse(), fsm)); } if (previous?.GetParse() != null) { synSets.AddRange(wordNet.ConstructIdiomSynSets(previous.GetParse(), current.GetParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(), fsm)); } if (next?.GetParse() != null) { synSets.AddRange(wordNet.ConstructIdiomSynSets(current.GetParse(), next.GetParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), fsm)); } return(synSets); }
/** * <summary> The method checks * 1. the previous two words and the current word; the previous, current and next word, current and the next * two words for a three word multiword expression that occurs in the Turkish wordnet. * 2. the previous word and current word; current word and the next word for a two word multiword expression that * occurs in the Turkish wordnet. * 3. the current word * if it has only one sense. If there is only one sense for that multiword expression or word; it sets that sense.</summary> * <param name="sentence">The sentence for which word sense disambiguation will be determined automatically.</param> */ protected override bool AutoLabelSingleSemantics(AnnotatedSentence.AnnotatedSentence sentence) { AnnotatedWord twoPrevious = null, previous = null; AnnotatedWord twoNext = null, next = null; for (var i = 0; i < sentence.WordCount(); i++) { var current = (AnnotatedWord)sentence.GetWord(i); if (i > 1) { twoPrevious = (AnnotatedWord)sentence.GetWord(i - 2); } if (i > 0) { previous = (AnnotatedWord)sentence.GetWord(i - 1); } if (i != sentence.WordCount() - 1) { next = (AnnotatedWord)sentence.GetWord(i + 1); } if (i < sentence.WordCount() - 2) { twoNext = (AnnotatedWord)sentence.GetWord(i + 2); } if (current.GetSemantic() == null && current.GetParse() != null) { if (previous != null && twoPrevious != null && twoPrevious.GetParse() != null && previous.GetParse() != null) { var idioms = _turkishWordNet.ConstructIdiomSynSets(twoPrevious.GetParse(), previous.GetParse(), current.GetParse(), twoPrevious.GetMetamorphicParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(), _fsm); if (idioms.Count == 1) { current.SetSemantic(idioms[0].GetId()); continue; } } if (previous != null && previous.GetParse() != null && next != null && next.GetParse() != null) { var idioms = _turkishWordNet.ConstructIdiomSynSets(previous.GetParse(), current.GetParse(), next.GetParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), _fsm); if (idioms.Count == 1) { current.SetSemantic(idioms[0].GetId()); continue; } } if (next != null && next.GetParse() != null && twoNext != null && twoNext.GetParse() != null) { var idioms = _turkishWordNet.ConstructIdiomSynSets(current.GetParse(), next.GetParse(), twoNext.GetParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), twoNext.GetMetamorphicParse(), _fsm); if (idioms.Count == 1) { current.SetSemantic(idioms[0].GetId()); continue; } } if (previous != null && previous.GetParse() != null) { var idioms = _turkishWordNet.ConstructIdiomSynSets(previous.GetParse(), current.GetParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(), _fsm); if (idioms.Count == 1) { current.SetSemantic(idioms[0].GetId()); continue; } } if (current.GetSemantic() == null && next != null && next.GetParse() != null) { var idioms = _turkishWordNet.ConstructIdiomSynSets(current.GetParse(), next.GetParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), _fsm); if (idioms.Count == 1) { current.SetSemantic(idioms[0].GetId()); continue; } } var meanings = _turkishWordNet.ConstructSynSets(current.GetParse().GetWord().GetName(), current.GetParse(), current.GetMetamorphicParse(), _fsm); if (current.GetSemantic() == null && meanings.Count == 1) { current.SetSemantic(meanings[0].GetId()); } } } return(true); }
public string GetTreePos() { return(_annotatedWord.GetParse().GetTreePos()); }
private string FindData(string dependent, string head, bool condition1, bool condition2, AnnotatedWord dependentWord, AnnotatedWord headWord) { if (condition1 || condition2) { return("PUNCT"); } switch (dependent) { case "ADVP": if (dependentWord.GetParse().GetRootPos().Equals("VERB")) { return("ADVCL"); } if (dependentWord.GetParse().GetRootPos().Equals("NOUN")) { return("NMOD"); } return("ADVMOD"); case "ADJP": switch (head) { case "NP": if (dependentWord.GetParse().GetRootPos().Equals("VERB")) { return("ACL"); } return("AMOD"); } return("ADVMOD"); case "PP": switch (head) { case "NP": return("CASE"); default: if (dependentWord.GetParse() != null && dependentWord.GetParse().GetRootPos().Equals("NOUN")) { return("NMOD"); } return("ADVMOD"); } case "DP": return("DET"); case "NP": switch (head) { case "NP": if (dependentWord.GetParse().ContainsTag(MorphologicalTag.PROPERNOUN) && headWord.GetParse().ContainsTag(MorphologicalTag.PROPERNOUN)) { return("FLAT"); } if (dependentWord.GetSemantic() != null && headWord.GetSemantic() != null && dependentWord.GetSemantic().Equals(headWord.GetSemantic())) { return("COMPOUND"); } return("NMOD"); case "VP": if (dependentWord.GetSemantic() != null && headWord.GetSemantic() != null && dependentWord.GetSemantic().Equals(headWord.GetSemantic())) { return("COMPOUND"); } if (dependentWord.GetParse().ContainsTag(MorphologicalTag.NOMINATIVE) || dependentWord.GetParse().ContainsTag(MorphologicalTag.ACCUSATIVE)) { return("OBJ"); } return("OBL"); } return("NMOD"); case "S": switch (head) { case "VP": return("CCOMP"); default: return("DEP"); } case "NUM": return("NUMMOD"); case "INTJ": return("DISCOURSE"); case "NEG": return("NEG"); case "CONJP": return("CC"); default: return("DEP"); } }