private IntPair GetNPSpanOld(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent) { IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula); Pair <IndexedWord, IndexedWord> leftRight = SemanticGraphUtils.LeftRightMostChildVertices(headword, dep); // headword can be first or last word int beginIdx = Math.Min(headword.Index() - 1, leftRight.first.Index() - 1); int endIdx = Math.Max(headword.Index() - 1, leftRight.second.Index() - 1); // no copula relation if (cop == null) { return(new IntPair(beginIdx, endIdx)); } // if we have copula relation IList <IndexedWord> children = dep.GetChildList(headword); int copIdx = children.IndexOf(cop); if (copIdx + 1 < children.Count) { beginIdx = Math.Min(headword.Index() - 1, SemanticGraphUtils.LeftMostChildVertice(children[copIdx + 1], dep).Index() - 1); } else { beginIdx = headword.Index() - 1; } return(new IntPair(beginIdx, endIdx)); }
/// <summary>TODO: figure out how to specify where in the sentence this node goes.</summary> /// <remarks> /// TODO: figure out how to specify where in the sentence this node goes. /// TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel. /// TODO: bombproof if this gov, dep, and reln already exist. /// </remarks> public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord govNode = sm.GetNode(govNodeName); IndexedWord newNode = new IndexedWord(newNodePrototype); int newIndex = SemanticGraphUtils.LeftMostChildVertice(govNode, sg).Index(); // cheap En-specific hack for placing copula (beginning of governing phrase) newNode.SetDocID(govNode.DocID()); newNode.SetIndex(newIndex); newNode.SetSentIndex(govNode.SentIndex()); sg.AddVertex(newNode); sg.AddEdge(govNode, newNode, relation, weight, false); }
private void ExtractMentionForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basic = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhanced == null) { enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } // pronoun if (headword.Tag().StartsWith("PRP")) { ExtractPronounForHeadword(headword, dep, s, mentions, mentionSpanSet, namedEntitySpanSet); return; } // add NP mention IntPair npSpan = GetNPSpan(headword, dep, sent); int beginIdx = npSpan.Get(0); int endIdx = npSpan.Get(1) + 1; if (",".Equals(sent[endIdx - 1].Word())) { endIdx--; } // try not to have span that ends with , if ("IN".Equals(sent[beginIdx].Tag())) { beginIdx++; } // try to remove first IN. AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); // // extract the first element in conjunction (A and B -> extract A here "A and B", "B" will be extracted above) // // to make sure we find the first conjunction ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); if (conjChildren.Count > 0) { IndexedWord conjChild = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); foreach (IndexedWord c in conjChildren) { if (c.Index() < conjChild.Index()) { conjChild = c; } } IndexedWord left = SemanticGraphUtils.LeftMostChildVertice(conjChild, dep); for (int endIdxFirstElement = left.Index() - 1; endIdxFirstElement > beginIdx; endIdxFirstElement--) { if (!sent[endIdxFirstElement - 1].Tag().Matches("CC|,")) { if (headword.Index() - 1 < endIdxFirstElement) { AddMention(beginIdx, endIdxFirstElement, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); } break; } } } }