예제 #1
0
        /// <summary>
        /// Convert an
        /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceEntityMention"/>
        /// to an
        /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/>
        /// .
        /// </summary>
        /// <param name="entityMention">
        ///
        /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceEntityMention"/>
        /// to convert
        /// </param>
        /// <param name="docId">ID of the document containing this entity mention</param>
        /// <param name="sentence"/>
        /// <param name="tokenOffset">
        /// An offset in the calculations of position of the extent to sentence boundary
        /// (the ace.reader stores absolute token offset from the beginning of the document, but
        /// we need token offsets from the beginning of the sentence =&gt; adjust by tokenOffset)
        /// </param>
        /// <returns>
        /// entity as an
        /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/>
        /// </returns>
        private EntityMention ConvertAceEntityMention(AceEntityMention entityMention, string docId, ICoreMap sentence, int tokenOffset)
        {
            //log.info("TYPE is " + entityMention.getParent().getType());
            //log.info("SUBTYPE is " + entityMention.getParent().getSubtype());
            //log.info("LDCTYPE is " + entityMention.getLdctype());
            AceCharSeq ext      = entityMention.GetExtent();
            AceCharSeq head     = entityMention.GetHead();
            int        extStart = ext.GetTokenStart() - tokenOffset;
            int        extEnd   = ext.GetTokenEnd() - tokenOffset + 1;

            if (extStart < 0)
            {
                logger.Severe("READER ERROR: Invalid extent start " + extStart + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence);
                logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity extent.");
                extStart = 0;
            }
            if (extEnd > sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count)
            {
                logger.Severe("READER ERROR: Invalid extent end " + extEnd + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence);
                logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity extent.");
                extEnd = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count;
            }
            int headStart = head.GetTokenStart() - tokenOffset;
            int headEnd   = head.GetTokenEnd() - tokenOffset + 1;

            if (headStart < 0)
            {
                logger.Severe("READER ERROR: Invalid head start " + headStart + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence);
                logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity head span.");
                headStart = 0;
            }
            if (headEnd > sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count)
            {
                logger.Severe("READER ERROR: Invalid head end " + headEnd + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence);
                logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity head span.");
                headEnd = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count;
            }
            // must adjust due to possible incorrect EOS detection
            if (headStart < extStart)
            {
                headStart = extStart;
            }
            if (headEnd > extEnd)
            {
                headEnd = extEnd;
            }
            System.Diagnostics.Debug.Assert((headStart < headEnd));
            // note: the ace.reader stores absolute token offset from the beginning of the document, but
            //       we need token offsets from the beginning of the sentence => adjust by tokenOffset
            // note: in ace.reader the end token position is inclusive, but
            //       in our setup the end token position is exclusive => add 1 to end
            EntityMention converted = new EntityMention(entityMention.GetId(), sentence, new Span(extStart, extEnd), new Span(headStart, headEnd), entityMention.GetParent().GetType(), entityMention.GetParent().GetSubtype(), entityMention.GetLdctype());

            return(converted);
        }
예제 #2
0
        private EventMention ConvertAceEventMention(AceEventMention aceEventMention, string docId, ICoreMap sentence, IDictionary <string, EntityMention> entityMap, int tokenOffset)
        {
            ICollection <string> roleSet = aceEventMention.GetRoles();
            IList <string>       roles   = new List <string>();

            foreach (string role in roleSet)
            {
                roles.Add(role);
            }
            IList <ExtractionObject> convertedArgs = new List <ExtractionObject>();
            int left  = int.MaxValue;
            int right = int.MinValue;

            foreach (string role_1 in roles)
            {
                AceEntityMention arg = aceEventMention.GetArg(role_1);
                ExtractionObject o   = entityMap[arg.GetId()];
                if (o == null)
                {
                    logger.Severe("READER ERROR: Failed to find event argument with id " + arg.GetId());
                    logger.Severe("This happens because a few event mentions illegally span multiple sentences. Will ignore this mention.");
                    return(null);
                }
                convertedArgs.Add(o);
                if (o.GetExtentTokenStart() < left)
                {
                    left = o.GetExtentTokenStart();
                }
                if (o.GetExtentTokenEnd() > right)
                {
                    right = o.GetExtentTokenEnd();
                }
            }
            AceCharSeq       anchor       = aceEventMention.GetAnchor();
            ExtractionObject anchorObject = new ExtractionObject(aceEventMention.GetId() + "-anchor", sentence, new Span(anchor.GetTokenStart() - tokenOffset, anchor.GetTokenEnd() + 1 - tokenOffset), "ANCHOR", null);
            EventMention     em           = new EventMention(aceEventMention.GetId(), sentence, new Span(left, right), aceEventMention.GetParent().GetType(), aceEventMention.GetParent().GetSubtype(), anchorObject, convertedArgs, roles);

            return(em);
        }