private IntPair GetNPSpanOld(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent) { IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula); Pair <IndexedWord, IndexedWord> leftRight = SemanticGraphUtils.LeftRightMostChildVertices(headword, dep); // headword can be first or last word int beginIdx = Math.Min(headword.Index() - 1, leftRight.first.Index() - 1); int endIdx = Math.Max(headword.Index() - 1, leftRight.second.Index() - 1); // no copula relation if (cop == null) { return(new IntPair(beginIdx, endIdx)); } // if we have copula relation IList <IndexedWord> children = dep.GetChildList(headword); int copIdx = children.IndexOf(cop); if (copIdx + 1 < children.Count) { beginIdx = Math.Min(headword.Index() - 1, SemanticGraphUtils.LeftMostChildVertice(children[copIdx + 1], dep).Index() - 1); } else { beginIdx = headword.Index() - 1; } return(new IntPair(beginIdx, endIdx)); }
/// <summary>Determine the person of "was".</summary> private static string WasPerson(SemanticGraph sg, IndexedWord word) { IndexedWord subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalSubject); if (subj == null) { subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalPassiveSubject); } if (subj != null) { if (Sharpen.Runtime.EqualsIgnoreCase(subj.Word(), "i")) { /* "I" is the subject of "was". */ return("1"); } } IndexedWord parent = sg.GetParent(word); if (parent == null) { return(subj != null ? "3" : null); } SemanticGraphEdge edge = sg.GetEdge(parent, word); if (edge == null) { return(subj != null ? "3" : null); } if (UniversalEnglishGrammaticalRelations.AuxModifier.Equals(edge.GetRelation()) || UniversalEnglishGrammaticalRelations.AuxPassiveModifier.Equals(edge.GetRelation())) { return(WasPerson(sg, parent)); } if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation())) { /* Check if the subject of the head of a conjunction is "I". */ return(WasPerson(sg, parent)); } return("3"); }
/// <summary> /// return the left and right most node except copula relation (nsubj & cop) and some others (maybe discourse?) /// e.g., you are the person -> return "the person" /// </summary> private IntPair GetNPSpan(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent) { int headwordIdx = headword.Index() - 1; IList <IndexedWord> children = dep.GetChildList(headword); // if(children.size()==0) return new IntPair(headwordIdx, headwordIdx); // the headword is the only word // check if we have copula relation IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula); int startIdx = (cop == null) ? 0 : children.IndexOf(cop) + 1; // children which will be inside of NP IList <IndexedWord> insideNP = Generics.NewArrayList(); for (int i = startIdx; i < children.Count; i++) { IndexedWord child = children[i]; SemanticGraphEdge edge = dep.GetEdge(headword, child); if (edge.GetRelation().GetShortName().Matches("dep|discourse|punct")) { continue; } else { // skip insideNP.Add(child); } } if (insideNP.Count == 0) { return(new IntPair(headwordIdx, headwordIdx)); } // the headword is the only word Pair <IndexedWord, IndexedWord> firstChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[0], dep); Pair <IndexedWord, IndexedWord> lastChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[insideNP.Count - 1], dep); // headword can be first or last word int beginIdx = Math.Min(headwordIdx, firstChildLeftRight.first.Index() - 1); int endIdx = Math.Max(headwordIdx, lastChildLeftRight.second.Index() - 1); return(new IntPair(beginIdx, endIdx)); }
private void ExtractMentionForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basic = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhanced == null) { enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } // pronoun if (headword.Tag().StartsWith("PRP")) { ExtractPronounForHeadword(headword, dep, s, mentions, mentionSpanSet, namedEntitySpanSet); return; } // add NP mention IntPair npSpan = GetNPSpan(headword, dep, sent); int beginIdx = npSpan.Get(0); int endIdx = npSpan.Get(1) + 1; if (",".Equals(sent[endIdx - 1].Word())) { endIdx--; } // try not to have span that ends with , if ("IN".Equals(sent[beginIdx].Tag())) { beginIdx++; } // try to remove first IN. AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); // // extract the first element in conjunction (A and B -> extract A here "A and B", "B" will be extracted above) // // to make sure we find the first conjunction ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); if (conjChildren.Count > 0) { IndexedWord conjChild = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); foreach (IndexedWord c in conjChildren) { if (c.Index() < conjChild.Index()) { conjChild = c; } } IndexedWord left = SemanticGraphUtils.LeftMostChildVertice(conjChild, dep); for (int endIdxFirstElement = left.Index() - 1; endIdxFirstElement > beginIdx; endIdxFirstElement--) { if (!sent[endIdxFirstElement - 1].Tag().Matches("CC|,")) { if (headword.Index() - 1 < endIdxFirstElement) { AddMention(beginIdx, endIdxFirstElement, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); } break; } } } }