/*
         * Sets the head word and the index for an entity, given the parse tree for
         * the sentence containing the entity.
         *
         * This code is no longer used, but I've kept it around (at least for now) as
         * reference when we modify preProcessSentences().
         */
        private void SetHeadWord(EntityMention entity, Tree tree)
        {
            IList <Tree> leaves        = tree.GetLeaves();
            Tree         argRoot       = tree.JoinNode(leaves[entity.GetExtentTokenStart()], leaves[entity.GetExtentTokenEnd()]);
            Tree         headWordNode  = argRoot.HeadTerminal(headFinder);
            int          headWordIndex = GetIndexByObjectEquality(leaves, headWordNode);

            if (StringUtils.IsPunct(leaves[entity.GetExtentTokenEnd()].Label().Value().Trim()) && (headWordIndex >= entity.GetExtentTokenEnd() || headWordIndex < entity.GetExtentTokenStart()))
            {
                argRoot       = tree.JoinNode(leaves[entity.GetExtentTokenStart()], leaves[entity.GetExtentTokenEnd() - 1]);
                headWordNode  = argRoot.HeadTerminal(headFinder);
                headWordIndex = GetIndexByObjectEquality(leaves, headWordNode);
                if (headWordIndex >= entity.GetExtentTokenStart() && headWordIndex <= entity.GetExtentTokenEnd() - 1)
                {
                    entity.SetHeadTokenPosition(headWordIndex);
                    entity.SetHeadTokenSpan(new Span(headWordIndex, headWordIndex + 1));
                }
            }
            if (headWordIndex >= entity.GetExtentTokenStart() && headWordIndex <= entity.GetExtentTokenEnd())
            {
                entity.SetHeadTokenPosition(headWordIndex);
                entity.SetHeadTokenSpan(new Span(headWordIndex, headWordIndex + 1));
            }
            else
            {
                // Re-parse the argument words by themselves
                // Get the list of words in the arg by looking at the leaves between
                // arg.getExtentTokenStart() and arg.getExtentTokenEnd() inclusive
                IList <string> argWords = new List <string>();
                for (int i = entity.GetExtentTokenStart(); i <= entity.GetExtentTokenEnd(); i++)
                {
                    argWords.Add(leaves[i].Label().Value());
                }
                if (StringUtils.IsPunct(argWords[argWords.Count - 1]))
                {
                    argWords.Remove(argWords.Count - 1);
                }
                Tree argTree = ParseStrings(argWords);
                headWordNode  = argTree.HeadTerminal(headFinder);
                headWordIndex = GetIndexByObjectEquality(argTree.GetLeaves(), headWordNode) + entity.GetExtentTokenStart();
                entity.SetHeadTokenPosition(headWordIndex);
                entity.SetHeadTokenSpan(new Span(headWordIndex, headWordIndex + 1));
            }
        }
예제 #2
0
        /// <summary>Find the index of the head of an entity.</summary>
        /// <param name="ent">The entity mention</param>
        /// <param name="tree">The Tree for the entire sentence in which it occurs.</param>
        /// <param name="tokens">The Sentence in which it occurs</param>
        /// <param name="setHeadSpan">Whether to set the head span in the entity mention.</param>
        /// <returns>The index of the entity head</returns>
        public virtual int AssignSyntacticHead(EntityMention ent, Tree tree, IList <CoreLabel> tokens, bool setHeadSpan)
        {
            if (ent.GetSyntacticHeadTokenPosition() != -1)
            {
                return(ent.GetSyntacticHeadTokenPosition());
            }
            logger.Finest("Finding syntactic head for entity: " + ent + " in tree: " + tree.ToString());
            logger.Finest("Flat sentence is: " + tokens);
            Tree sh = null;

            try
            {
                sh = FindSyntacticHead(ent, tree, tokens);
            }
            catch (Exception e)
            {
                logger.Severe("WARNING: failed to parse sentence. Will continue with the right-most head heuristic: " + SentenceToString(tokens));
                Sharpen.Runtime.PrintStackTrace(e);
            }
            int headPos = ent.GetExtentTokenEnd() - 1;

            if (sh != null)
            {
                CoreLabel label = (CoreLabel)sh.Label();
                headPos = label.Get(typeof(CoreAnnotations.BeginIndexAnnotation));
            }
            else
            {
                logger.Fine("WARNING: failed to find syntactic head for entity: " + ent + " in tree: " + tree);
                logger.Fine("Fallback strategy: will set head to last token in mention: " + tokens[headPos]);
            }
            ent.SetHeadTokenPosition(headPos);
            if (setHeadSpan)
            {
                // set the head span to match exactly the syntactic head
                // this is needed for some corpora where the head span is not given
                ent.SetHeadTokenSpan(new Span(headPos, headPos + 1));
            }
            return(headPos);
        }