/* * Sets the head word and the index for an entity, given the parse tree for * the sentence containing the entity. * * This code is no longer used, but I've kept it around (at least for now) as * reference when we modify preProcessSentences(). */ private void SetHeadWord(EntityMention entity, Tree tree) { IList <Tree> leaves = tree.GetLeaves(); Tree argRoot = tree.JoinNode(leaves[entity.GetExtentTokenStart()], leaves[entity.GetExtentTokenEnd()]); Tree headWordNode = argRoot.HeadTerminal(headFinder); int headWordIndex = GetIndexByObjectEquality(leaves, headWordNode); if (StringUtils.IsPunct(leaves[entity.GetExtentTokenEnd()].Label().Value().Trim()) && (headWordIndex >= entity.GetExtentTokenEnd() || headWordIndex < entity.GetExtentTokenStart())) { argRoot = tree.JoinNode(leaves[entity.GetExtentTokenStart()], leaves[entity.GetExtentTokenEnd() - 1]); headWordNode = argRoot.HeadTerminal(headFinder); headWordIndex = GetIndexByObjectEquality(leaves, headWordNode); if (headWordIndex >= entity.GetExtentTokenStart() && headWordIndex <= entity.GetExtentTokenEnd() - 1) { entity.SetHeadTokenPosition(headWordIndex); entity.SetHeadTokenSpan(new Span(headWordIndex, headWordIndex + 1)); } } if (headWordIndex >= entity.GetExtentTokenStart() && headWordIndex <= entity.GetExtentTokenEnd()) { entity.SetHeadTokenPosition(headWordIndex); entity.SetHeadTokenSpan(new Span(headWordIndex, headWordIndex + 1)); } else { // Re-parse the argument words by themselves // Get the list of words in the arg by looking at the leaves between // arg.getExtentTokenStart() and arg.getExtentTokenEnd() inclusive IList <string> argWords = new List <string>(); for (int i = entity.GetExtentTokenStart(); i <= entity.GetExtentTokenEnd(); i++) { argWords.Add(leaves[i].Label().Value()); } if (StringUtils.IsPunct(argWords[argWords.Count - 1])) { argWords.Remove(argWords.Count - 1); } Tree argTree = ParseStrings(argWords); headWordNode = argTree.HeadTerminal(headFinder); headWordIndex = GetIndexByObjectEquality(argTree.GetLeaves(), headWordNode) + entity.GetExtentTokenStart(); entity.SetHeadTokenPosition(headWordIndex); entity.SetHeadTokenSpan(new Span(headWordIndex, headWordIndex + 1)); } }
/// <summary>Find the index of the head of an entity.</summary> /// <param name="ent">The entity mention</param> /// <param name="tree">The Tree for the entire sentence in which it occurs.</param> /// <param name="tokens">The Sentence in which it occurs</param> /// <param name="setHeadSpan">Whether to set the head span in the entity mention.</param> /// <returns>The index of the entity head</returns> public virtual int AssignSyntacticHead(EntityMention ent, Tree tree, IList <CoreLabel> tokens, bool setHeadSpan) { if (ent.GetSyntacticHeadTokenPosition() != -1) { return(ent.GetSyntacticHeadTokenPosition()); } logger.Finest("Finding syntactic head for entity: " + ent + " in tree: " + tree.ToString()); logger.Finest("Flat sentence is: " + tokens); Tree sh = null; try { sh = FindSyntacticHead(ent, tree, tokens); } catch (Exception e) { logger.Severe("WARNING: failed to parse sentence. Will continue with the right-most head heuristic: " + SentenceToString(tokens)); Sharpen.Runtime.PrintStackTrace(e); } int headPos = ent.GetExtentTokenEnd() - 1; if (sh != null) { CoreLabel label = (CoreLabel)sh.Label(); headPos = label.Get(typeof(CoreAnnotations.BeginIndexAnnotation)); } else { logger.Fine("WARNING: failed to find syntactic head for entity: " + ent + " in tree: " + tree); logger.Fine("Fallback strategy: will set head to last token in mention: " + tokens[headPos]); } ent.SetHeadTokenPosition(headPos); if (setHeadSpan) { // set the head span to match exactly the syntactic head // this is needed for some corpora where the head span is not given ent.SetHeadTokenSpan(new Span(headPos, headPos + 1)); } return(headPos); }