/// <summary> /// Convert an /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceEntityMention"/> /// to an /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/> /// . /// </summary> /// <param name="entityMention"> /// /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceEntityMention"/> /// to convert /// </param> /// <param name="docId">ID of the document containing this entity mention</param> /// <param name="sentence"/> /// <param name="tokenOffset"> /// An offset in the calculations of position of the extent to sentence boundary /// (the ace.reader stores absolute token offset from the beginning of the document, but /// we need token offsets from the beginning of the sentence => adjust by tokenOffset) /// </param> /// <returns> /// entity as an /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/> /// </returns> private EntityMention ConvertAceEntityMention(AceEntityMention entityMention, string docId, ICoreMap sentence, int tokenOffset) { //log.info("TYPE is " + entityMention.getParent().getType()); //log.info("SUBTYPE is " + entityMention.getParent().getSubtype()); //log.info("LDCTYPE is " + entityMention.getLdctype()); AceCharSeq ext = entityMention.GetExtent(); AceCharSeq head = entityMention.GetHead(); int extStart = ext.GetTokenStart() - tokenOffset; int extEnd = ext.GetTokenEnd() - tokenOffset + 1; if (extStart < 0) { logger.Severe("READER ERROR: Invalid extent start " + extStart + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence); logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity extent."); extStart = 0; } if (extEnd > sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count) { logger.Severe("READER ERROR: Invalid extent end " + extEnd + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence); logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity extent."); extEnd = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count; } int headStart = head.GetTokenStart() - tokenOffset; int headEnd = head.GetTokenEnd() - tokenOffset + 1; if (headStart < 0) { logger.Severe("READER ERROR: Invalid head start " + headStart + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence); logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity head span."); headStart = 0; } if (headEnd > sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count) { logger.Severe("READER ERROR: Invalid head end " + headEnd + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence); logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity head span."); headEnd = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count; } // must adjust due to possible incorrect EOS detection if (headStart < extStart) { headStart = extStart; } if (headEnd > extEnd) { headEnd = extEnd; } System.Diagnostics.Debug.Assert((headStart < headEnd)); // note: the ace.reader stores absolute token offset from the beginning of the document, but // we need token offsets from the beginning of the sentence => adjust by tokenOffset // note: in ace.reader the end token position is inclusive, but // in our setup the end token position is exclusive => add 1 to end EntityMention converted = new EntityMention(entityMention.GetId(), sentence, new Span(extStart, extEnd), new Span(headStart, headEnd), entityMention.GetParent().GetType(), entityMention.GetParent().GetSubtype(), entityMention.GetLdctype()); return(converted); }
private EventMention ConvertAceEventMention(AceEventMention aceEventMention, string docId, ICoreMap sentence, IDictionary <string, EntityMention> entityMap, int tokenOffset) { ICollection <string> roleSet = aceEventMention.GetRoles(); IList <string> roles = new List <string>(); foreach (string role in roleSet) { roles.Add(role); } IList <ExtractionObject> convertedArgs = new List <ExtractionObject>(); int left = int.MaxValue; int right = int.MinValue; foreach (string role_1 in roles) { AceEntityMention arg = aceEventMention.GetArg(role_1); ExtractionObject o = entityMap[arg.GetId()]; if (o == null) { logger.Severe("READER ERROR: Failed to find event argument with id " + arg.GetId()); logger.Severe("This happens because a few event mentions illegally span multiple sentences. Will ignore this mention."); return(null); } convertedArgs.Add(o); if (o.GetExtentTokenStart() < left) { left = o.GetExtentTokenStart(); } if (o.GetExtentTokenEnd() > right) { right = o.GetExtentTokenEnd(); } } AceCharSeq anchor = aceEventMention.GetAnchor(); ExtractionObject anchorObject = new ExtractionObject(aceEventMention.GetId() + "-anchor", sentence, new Span(anchor.GetTokenStart() - tokenOffset, anchor.GetTokenEnd() + 1 - tokenOffset), "ANCHOR", null); EventMention em = new EventMention(aceEventMention.GetId(), sentence, new Span(left, right), aceEventMention.GetParent().GetType(), aceEventMention.GetParent().GetSubtype(), anchorObject, convertedArgs, roles); return(em); }