private void ExtractPronounForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basic = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhanced == null) { enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } int beginIdx = headword.Index() - 1; int endIdx = headword.Index(); // handle "you all", "they both" etc if (sent.Count > headword.Index() && sent[headword.Index()].Word().Matches("all|both")) { IndexedWord c = dep.GetNodeByIndex(headword.Index() + 1); SemanticGraphEdge edge = dep.GetEdge(headword, c); if (edge != null) { endIdx++; } } IntPair mSpan = new IntPair(beginIdx, endIdx); if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet))) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new List <CoreLabel>(sent.SubList(beginIdx, endIdx))); m.headIndex = headword.Index() - 1; m.headWord = sent[m.headIndex]; m.headString = m.headWord.Word().ToLower(Locale.English); mentions.Add(m); mentionSpanSet.Add(mSpan); } // when pronoun is a part of conjunction (e.g., you and I) ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); if (conjChildren.Count > 0) { IntPair npSpan = GetNPSpan(headword, dep, sent); beginIdx = npSpan.Get(0); endIdx = npSpan.Get(1) + 1; if (",".Equals(sent[endIdx - 1].Word())) { endIdx--; } // try not to have span that ends with , AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); } }
/// <summary> /// Returns true if /// <paramref name="word"/> /// has an infinitival "to" attached to it. /// </summary> private static bool HasTo(SemanticGraph sg, IndexedWord word) { /* Check for infinitival to. */ if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.Marker)) { foreach (IndexedWord marker in sg.GetChildrenWithReln(word, UniversalEnglishGrammaticalRelations.Marker)) { if (Sharpen.Runtime.EqualsIgnoreCase(marker.Value(), "to")) { return(true); } } } return(false); }
/// <summary> /// Returns true if /// <paramref name="word"/> /// has an inflection of "be" as an auxiliary. /// </summary> private static bool HasBeAux(SemanticGraph sg, IndexedWord word) { foreach (IndexedWord aux in sg.GetChildrenWithReln(word, UniversalEnglishGrammaticalRelations.AuxModifier)) { if (aux.Value().Matches(BeRegex)) { return(true); } } /* Check if head of conjunction has an auxiliary in case the word is part of a conjunction */ IndexedWord gov = sg.GetParent(word); if (gov != null) { SemanticGraphEdge edge = sg.GetEdge(gov, word); if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation())) { return(HasBeAux(sg, gov)); } } return(false); }
private void ExtractMentionForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basic = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhanced == null) { enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } // pronoun if (headword.Tag().StartsWith("PRP")) { ExtractPronounForHeadword(headword, dep, s, mentions, mentionSpanSet, namedEntitySpanSet); return; } // add NP mention IntPair npSpan = GetNPSpan(headword, dep, sent); int beginIdx = npSpan.Get(0); int endIdx = npSpan.Get(1) + 1; if (",".Equals(sent[endIdx - 1].Word())) { endIdx--; } // try not to have span that ends with , if ("IN".Equals(sent[beginIdx].Tag())) { beginIdx++; } // try to remove first IN. AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); // // extract the first element in conjunction (A and B -> extract A here "A and B", "B" will be extracted above) // // to make sure we find the first conjunction ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); if (conjChildren.Count > 0) { IndexedWord conjChild = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); foreach (IndexedWord c in conjChildren) { if (c.Index() < conjChild.Index()) { conjChild = c; } } IndexedWord left = SemanticGraphUtils.LeftMostChildVertice(conjChild, dep); for (int endIdxFirstElement = left.Index() - 1; endIdxFirstElement > beginIdx; endIdxFirstElement--) { if (!sent[endIdxFirstElement - 1].Tag().Matches("CC|,")) { if (headword.Index() - 1 < endIdxFirstElement) { AddMention(beginIdx, endIdxFirstElement, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); } break; } } } }