public AceMentionArgument(string role, AceEntityMention content, string mentionType) { // in practice, event or relation mRole = role; mContent = content; this.mentionType = mentionType; }
// buf.toString(), /// <summary>Makes an ACE entity from the span [startToken, endToken)</summary> private void MakeEntity(int startToken, int endToken, int id, string type, string subtype) { string eid = mId + "-E" + id; AceEntity ent = new AceEntity(eid, type, subtype, "SPC"); AddEntity(ent); AceCharSeq cseq = MakeCharSeq(startToken, endToken); string emid = mId + "-E" + id + "-1"; AceEntityMention entm = new AceEntityMention(emid, "NOM", "NOM", cseq, cseq); AddEntityMention(entm); ent.AddMention(entm); }
/// <summary>Extracts info about one relation mention</summary> private static AceEventMention ParseEventMention(INode node, AceDocument doc) { string id = GetAttributeValue(node, "ID"); AceCharSeq extent = ParseCharSeq(GetChildByName(node, "extent")); AceCharSeq anchor = ParseCharSeq(GetChildByName(node, "anchor")); // create the mention AceEventMention mention = new AceEventMention(id, extent, anchor); // find the mention args IList <INode> args = GetChildrenByName(node, "event_mention_argument"); foreach (INode arg in args) { string role = GetAttributeValue(arg, "ROLE"); string refid = GetAttributeValue(arg, "REFID"); AceEntityMention am = doc.GetEntityMention(refid); if (am != null) { am.AddEventMention(mention); mention.AddArg(am, role); } } return(mention); }
/// <summary>Extracts info about one relation mention</summary> private static AceRelationMention ParseRelationMention(INode node, AceDocument doc) { string id = GetAttributeValue(node, "ID"); AceCharSeq extent = ParseCharSeq(GetChildByName(node, "extent")); string lc = GetAttributeValue(node, "LEXICALCONDITION"); // create the mention AceRelationMention mention = new AceRelationMention(id, extent, lc); // find the mention args IList <INode> args = GetChildrenByName(node, "relation_mention_argument"); foreach (INode arg in args) { string role = GetAttributeValue(arg, "ROLE"); string refid = GetAttributeValue(arg, "REFID"); AceEntityMention am = doc.GetEntityMention(refid); if (am != null) { am.AddRelationMention(mention); if (Sharpen.Runtime.EqualsIgnoreCase(role, "arg-1")) { mention.GetArgs()[0] = new AceRelationMentionArgument(role, am); } else { if (Sharpen.Runtime.EqualsIgnoreCase(role, "arg-2")) { mention.GetArgs()[1] = new AceRelationMentionArgument(role, am); } else { throw new Exception("Invalid relation mention argument role: " + role); } } } } return(mention); }
/// <summary>Parses one ACE specification</summary> /// <returns>Simply displays the events to stdout</returns> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Xml.Sax.SAXException"/> /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/> public static AceDocument ParseDocument(File f) { // parse the Dom document IDocument document = ReadDocument(f); // // create the ACE document object // INode docElement = document.GetElementsByTagName("document").Item(0); AceDocument aceDoc = new AceDocument(GetAttributeValue(docElement, "DOCID")); // // read all entities // INodeList entities = document.GetElementsByTagName("entity"); int entityCount = 0; for (int i = 0; i < entities.GetLength(); i++) { INode node = entities.Item(i); // // the entity type and subtype // string id = GetAttributeValue(node, "ID"); string type = GetAttributeValue(node, "TYPE"); string subtype = GetAttributeValue(node, "SUBTYPE"); string cls = GetAttributeValue(node, "CLASS"); // create the entity AceEntity entity = new AceEntity(id, type, subtype, cls); aceDoc.AddEntity(entity); // fetch all mentions of this event IList <INode> mentions = GetChildrenByName(node, "entity_mention"); // parse all its mentions foreach (INode mention1 in mentions) { AceEntityMention mention = ParseEntityMention(mention1); entity.AddMention(mention); aceDoc.AddEntityMention(mention); } entityCount++; } //log.info("Parsed " + entityCount + " XML entities."); // // read all relations // INodeList relations = document.GetElementsByTagName("relation"); for (int i_1 = 0; i_1 < relations.GetLength(); i_1++) { INode node = relations.Item(i_1); // // the relation type, subtype, tense, and modality // string id = GetAttributeValue(node, "ID"); string type = GetAttributeValue(node, "TYPE"); string subtype = GetAttributeValue(node, "SUBTYPE"); string modality = GetAttributeValue(node, "MODALITY"); string tense = GetAttributeValue(node, "TENSE"); // create the relation AceRelation relation = new AceRelation(id, type, subtype, modality, tense); aceDoc.AddRelation(relation); // XXX: fetch relation_arguments here! // fetch all mentions of this relation IList <INode> mentions = GetChildrenByName(node, "relation_mention"); // traverse all mentions foreach (INode mention1 in mentions) { AceRelationMention mention = ParseRelationMention(mention1, aceDoc); relation.AddMention(mention); aceDoc.AddRelationMention(mention); } } // // read all events // INodeList events = document.GetElementsByTagName("event"); for (int i_2 = 0; i_2 < events.GetLength(); i_2++) { INode node = events.Item(i_2); // // the event type, subtype, tense, and modality // string id = GetAttributeValue(node, "ID"); string type = GetAttributeValue(node, "TYPE"); string subtype = GetAttributeValue(node, "SUBTYPE"); string modality = GetAttributeValue(node, "MODALITY"); string polarity = GetAttributeValue(node, "POLARITY"); string genericity = GetAttributeValue(node, "GENERICITY"); string tense = GetAttributeValue(node, "TENSE"); // create the event AceEvent @event = new AceEvent(id, type, subtype, modality, polarity, genericity, tense); aceDoc.AddEvent(@event); // fetch all mentions of this relation IList <INode> mentions = GetChildrenByName(node, "event_mention"); // traverse all mentions foreach (INode mention1 in mentions) { AceEventMention mention = ParseEventMention(mention1, aceDoc); @event.AddMention(mention); aceDoc.AddEventMention(mention); } } return(aceDoc); }
public virtual void SetArg(int which, AceEntityMention em, string role) { mArguments[which] = new AceRelationMentionArgument(role, em); }
public AceEventMentionArgument(string role, AceEntityMention content) : base(role, content, "event") { }
public virtual void AddArg(AceEntityMention em, string role) { mRolesToArguments[role] = new AceEventMentionArgument(role, em); }
// // heeyoung : skip relation, event parsing part - for ACE2004 // /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Xml.Sax.SAXException"/> /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/> public static Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceDocument ParseDocument(string prefix, bool usePredictedBoundaries, string AceVersion) { mLog.Fine("Reading document " + prefix); Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceDocument doc = null; // // read the ACE XML annotations // if (usePredictedBoundaries == false) { doc = AceDomReader.ParseDocument(new File(prefix + XmlExt)); } else { // log.info("Parsed " + doc.getEntityMentions().size() + // " entities in document " + prefix); // // will use the predicted entity boundaries (see below) // int lastSlash = prefix.LastIndexOf(File.separator); System.Diagnostics.Debug.Assert((lastSlash > 0 && lastSlash < prefix.Length - 1)); string id = Sharpen.Runtime.Substring(prefix, lastSlash + 1); // log.info(id + ": " + prefix); doc = new Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceDocument(id); } doc.SetPrefix(prefix); // // read the raw byte stream // string trueCasedFileName = prefix + OrigExt + ".truecase"; if ((new File(trueCasedFileName).Exists())) { mLog.Severe("Using truecased file: " + trueCasedFileName); doc.ReadRawBytes(trueCasedFileName); } else { doc.ReadRawBytes(prefix + OrigExt); } // // read the AceTokens // int offsetToSubtract = 0; IList <IList <AceToken> > sentences = AceSentenceSegmenter.TokenizeAndSegmentSentences(prefix); doc.SetSentences(sentences); foreach (IList <AceToken> sentence in sentences) { foreach (AceToken token in sentence) { offsetToSubtract = token.AdjustPhrasePositions(offsetToSubtract, token.GetLiteral()); doc.AddToken(token); } } // // match char sequences to phrases // doc.MatchCharSeqs(prefix); // // construct the mEntityMentions matrix // ICollection <string> entityKeys = doc.mEntityMentions.Keys; int sentence_1; foreach (string key in entityKeys) { AceEntityMention em = doc.mEntityMentions[key]; sentence_1 = doc.mTokens[em.GetHead().GetTokenStart()].GetSentence(); // adjust the number of rows if necessary while (sentence_1 >= doc.mSentenceEntityMentions.Count) { doc.mSentenceEntityMentions.Add(new List <AceEntityMention>()); doc.mSentenceRelationMentions.Add(new List <AceRelationMention>()); doc.mSentenceEventMentions.Add(new List <AceEventMention>()); } // store the entity mentions in increasing order: // (a) of the start position of their head // (b) if start is the same, in increasing order of the head end List <AceEntityMention> sentEnts = doc.mSentenceEntityMentions[sentence_1]; bool added = false; for (int i = 0; i < sentEnts.Count; i++) { AceEntityMention crt = sentEnts[i]; if ((crt.GetHead().GetTokenStart() > em.GetHead().GetTokenStart()) || (crt.GetHead().GetTokenStart() == em.GetHead().GetTokenStart() && crt.GetHead().GetTokenEnd() > em.GetHead().GetTokenEnd())) { sentEnts.Add(i, em); added = true; break; } } if (!added) { sentEnts.Add(em); } } return(doc); }
/// <summary>Matches all relevant mentions, i.e.</summary> /// <remarks> /// Matches all relevant mentions, i.e. entities and anchors, to tokens Note: /// entity mentions may match with multiple tokens! /// </remarks> public virtual void MatchCharSeqs(string filePrefix) { // // match the head and extent of entity mentions // ICollection <string> keys = mEntityMentions.Keys; foreach (string key in keys) { AceEntityMention m = mEntityMentions[key]; // // match the head charseq to 1+ phrase(s) // try { m.GetHead().Match(mTokens); } catch (MatchException) { mLog.Severe("READER ERROR: Failed to match entity mention head: " + "[" + m.GetHead().GetText() + ", " + m.GetHead().GetByteStart() + ", " + m.GetHead().GetByteEnd() + "]"); mLog.Severe("Document tokens: " + TokensWithByteSpan(m.GetHead().GetByteStart(), m.GetHead().GetByteEnd())); mLog.Severe("Document prefix: " + filePrefix); System.Environment.Exit(1); } // // match the extent charseq to 1+ phrase(s) // try { m.GetExtent().Match(mTokens); } catch (MatchException) { mLog.Severe("READER ERROR: Failed to match entity mention extent: " + "[" + m.GetExtent().GetText() + ", " + m.GetExtent().GetByteStart() + ", " + m.GetExtent().GetByteEnd() + "]"); mLog.Severe("Document tokens: " + TokensWithByteSpan(m.GetExtent().GetByteStart(), m.GetExtent().GetByteEnd())); System.Environment.Exit(1); } // // set the head word of the mention // m.DetectHeadToken(this); } // we need to do this for events as well since they may not have any AceEntityMentions associated with them (if they have no arguments) ICollection <string> eventKeys = mEventMentions.Keys; foreach (string key_1 in eventKeys) { AceEventMention m = mEventMentions[key_1]; // // match the extent charseq to 1+ phrase(s) // try { m.GetExtent().Match(mTokens); } catch (MatchException) { mLog.Severe("READER ERROR: Failed to match event mention extent: " + "[" + m.GetExtent().GetText() + ", " + m.GetExtent().GetByteStart() + ", " + m.GetExtent().GetByteEnd() + "]"); mLog.Severe("Document tokens: " + TokensWithByteSpan(m.GetExtent().GetByteStart(), m.GetExtent().GetByteEnd())); System.Environment.Exit(1); } } }
public virtual void AddEntityMention(AceEntityMention em) { mEntityMentions[em.GetId()] = em; }
public virtual void AddMention(AceEntityMention m) { mMentions.Add(m); m.SetParent(this); }
public AceRelationMentionArgument(string role, AceEntityMention content) : base(role, content, "relation") { }