// TODO: never used? public virtual void ConstructSentenceRelationMentions() { // // construct the mRelationEntityMentions matrix // ICollection <string> relKeys = mRelationMentions.Keys; foreach (string key in relKeys) { AceRelationMention rm = mRelationMentions[key]; int sentence = mTokens[rm.GetArg(0).GetHead().GetTokenStart()].GetSentence(); // // no need to adjust the number of rows: was done in parseDocument // // store the relation mentions in increasing order // (a) of the start position of their head, or // (b) if start is the same, in increasing order of ends List <AceRelationMention> sentRels = mSentenceRelationMentions[sentence]; bool added = false; for (int i = 0; i < sentRels.Count; i++) { AceRelationMention crt = sentRels[i]; if ((crt.GetMinTokenStart() > rm.GetMinTokenStart()) || (crt.GetMinTokenStart() == rm.GetMinTokenStart() && crt.GetMaxTokenEnd() > rm.GetMaxTokenEnd())) { sentRels.Add(i, rm); added = true; break; } } if (!added) { sentRels.Add(rm); } } }
/// <summary>Extracts info about one relation mention</summary> private static AceRelationMention ParseRelationMention(INode node, AceDocument doc) { string id = GetAttributeValue(node, "ID"); AceCharSeq extent = ParseCharSeq(GetChildByName(node, "extent")); string lc = GetAttributeValue(node, "LEXICALCONDITION"); // create the mention AceRelationMention mention = new AceRelationMention(id, extent, lc); // find the mention args IList <INode> args = GetChildrenByName(node, "relation_mention_argument"); foreach (INode arg in args) { string role = GetAttributeValue(arg, "ROLE"); string refid = GetAttributeValue(arg, "REFID"); AceEntityMention am = doc.GetEntityMention(refid); if (am != null) { am.AddRelationMention(mention); if (Sharpen.Runtime.EqualsIgnoreCase(role, "arg-1")) { mention.GetArgs()[0] = new AceRelationMentionArgument(role, am); } else { if (Sharpen.Runtime.EqualsIgnoreCase(role, "arg-2")) { mention.GetArgs()[1] = new AceRelationMentionArgument(role, am); } else { throw new Exception("Invalid relation mention argument role: " + role); } } } } return(mention); }
/// <summary>Parses one ACE specification</summary> /// <returns>Simply displays the events to stdout</returns> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Xml.Sax.SAXException"/> /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/> public static AceDocument ParseDocument(File f) { // parse the Dom document IDocument document = ReadDocument(f); // // create the ACE document object // INode docElement = document.GetElementsByTagName("document").Item(0); AceDocument aceDoc = new AceDocument(GetAttributeValue(docElement, "DOCID")); // // read all entities // INodeList entities = document.GetElementsByTagName("entity"); int entityCount = 0; for (int i = 0; i < entities.GetLength(); i++) { INode node = entities.Item(i); // // the entity type and subtype // string id = GetAttributeValue(node, "ID"); string type = GetAttributeValue(node, "TYPE"); string subtype = GetAttributeValue(node, "SUBTYPE"); string cls = GetAttributeValue(node, "CLASS"); // create the entity AceEntity entity = new AceEntity(id, type, subtype, cls); aceDoc.AddEntity(entity); // fetch all mentions of this event IList <INode> mentions = GetChildrenByName(node, "entity_mention"); // parse all its mentions foreach (INode mention1 in mentions) { AceEntityMention mention = ParseEntityMention(mention1); entity.AddMention(mention); aceDoc.AddEntityMention(mention); } entityCount++; } //log.info("Parsed " + entityCount + " XML entities."); // // read all relations // INodeList relations = document.GetElementsByTagName("relation"); for (int i_1 = 0; i_1 < relations.GetLength(); i_1++) { INode node = relations.Item(i_1); // // the relation type, subtype, tense, and modality // string id = GetAttributeValue(node, "ID"); string type = GetAttributeValue(node, "TYPE"); string subtype = GetAttributeValue(node, "SUBTYPE"); string modality = GetAttributeValue(node, "MODALITY"); string tense = GetAttributeValue(node, "TENSE"); // create the relation AceRelation relation = new AceRelation(id, type, subtype, modality, tense); aceDoc.AddRelation(relation); // XXX: fetch relation_arguments here! // fetch all mentions of this relation IList <INode> mentions = GetChildrenByName(node, "relation_mention"); // traverse all mentions foreach (INode mention1 in mentions) { AceRelationMention mention = ParseRelationMention(mention1, aceDoc); relation.AddMention(mention); aceDoc.AddRelationMention(mention); } } // // read all events // INodeList events = document.GetElementsByTagName("event"); for (int i_2 = 0; i_2 < events.GetLength(); i_2++) { INode node = events.Item(i_2); // // the event type, subtype, tense, and modality // string id = GetAttributeValue(node, "ID"); string type = GetAttributeValue(node, "TYPE"); string subtype = GetAttributeValue(node, "SUBTYPE"); string modality = GetAttributeValue(node, "MODALITY"); string polarity = GetAttributeValue(node, "POLARITY"); string genericity = GetAttributeValue(node, "GENERICITY"); string tense = GetAttributeValue(node, "TENSE"); // create the event AceEvent @event = new AceEvent(id, type, subtype, modality, polarity, genericity, tense); aceDoc.AddEvent(@event); // fetch all mentions of this relation IList <INode> mentions = GetChildrenByName(node, "event_mention"); // traverse all mentions foreach (INode mention1 in mentions) { AceEventMention mention = ParseEventMention(mention1, aceDoc); @event.AddMention(mention); aceDoc.AddEventMention(mention); } } return(aceDoc); }
public virtual void AddRelationMention(AceRelationMention rm) { mRelationMentions.Add(rm); }
/// <summary>Parses an ACE document.</summary> /// <remarks> /// Parses an ACE document. Works in the following steps: (a) reads both the /// XML annotations; (b) reads the tokens; (c) matches the tokens against the /// annotations (d) constructs mSentenceEntityMentions and /// mRelationEntityMentions /// </remarks> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Xml.Sax.SAXException"/> /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/> public static Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceDocument ParseDocument(string prefix, bool usePredictedBoundaries) { mLog.Fine("Reading document " + prefix); Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceDocument doc = null; // // read the ACE XML annotations // if (usePredictedBoundaries == false) { doc = AceDomReader.ParseDocument(new File(prefix + XmlExt)); } else { // log.info("Parsed " + doc.getEntityMentions().size() + // " entities in document " + prefix); // // will use the predicted entity boundaries (see below) // int lastSlash = prefix.LastIndexOf(File.separator); System.Diagnostics.Debug.Assert((lastSlash > 0 && lastSlash < prefix.Length - 1)); string id = Sharpen.Runtime.Substring(prefix, lastSlash + 1); // log.info(id + ": " + prefix); doc = new Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceDocument(id); } doc.SetPrefix(prefix); // // read the raw byte stream // string trueCasedFileName = prefix + OrigExt + ".truecase"; if ((new File(trueCasedFileName).Exists())) { mLog.Severe("Using truecased file: " + trueCasedFileName); doc.ReadRawBytes(trueCasedFileName); } else { doc.ReadRawBytes(prefix + OrigExt); } // // read the AceTokens // int offsetToSubtract = 0; IList <IList <AceToken> > sentences = AceSentenceSegmenter.TokenizeAndSegmentSentences(prefix); doc.SetSentences(sentences); foreach (IList <AceToken> sentence in sentences) { foreach (AceToken token in sentence) { offsetToSubtract = token.AdjustPhrasePositions(offsetToSubtract, token.GetLiteral()); doc.AddToken(token); } } // // match char sequences to phrases // doc.MatchCharSeqs(prefix); // // construct the mEntityMentions matrix // ICollection <string> entityKeys = doc.mEntityMentions.Keys; int sentence_1; foreach (string key in entityKeys) { AceEntityMention em = doc.mEntityMentions[key]; sentence_1 = doc.mTokens[em.GetHead().GetTokenStart()].GetSentence(); // adjust the number of rows if necessary while (sentence_1 >= doc.mSentenceEntityMentions.Count) { doc.mSentenceEntityMentions.Add(new List <AceEntityMention>()); doc.mSentenceRelationMentions.Add(new List <AceRelationMention>()); doc.mSentenceEventMentions.Add(new List <AceEventMention>()); } // store the entity mentions in increasing order: // (a) of the start position of their head // (b) if start is the same, in increasing order of the head end List <AceEntityMention> sentEnts = doc.mSentenceEntityMentions[sentence_1]; bool added = false; for (int i = 0; i < sentEnts.Count; i++) { AceEntityMention crt = sentEnts[i]; if ((crt.GetHead().GetTokenStart() > em.GetHead().GetTokenStart()) || (crt.GetHead().GetTokenStart() == em.GetHead().GetTokenStart() && crt.GetHead().GetTokenEnd() > em.GetHead().GetTokenEnd())) { sentEnts.Add(i, em); added = true; break; } } if (!added) { sentEnts.Add(em); } } // // construct the mRelationMentions matrix // ICollection <string> relKeys = doc.mRelationMentions.Keys; foreach (string key_1 in relKeys) { AceRelationMention rm = doc.mRelationMentions[key_1]; sentence_1 = doc.mTokens[rm.GetArg(0).GetHead().GetTokenStart()].GetSentence(); // // no need to adjust the number of rows: was done above // // store the relation mentions in increasing order // (a) of the start position of their head, or // (b) if start is the same, in increasing order of ends List <AceRelationMention> sentRels = doc.mSentenceRelationMentions[sentence_1]; bool added = false; for (int i = 0; i < sentRels.Count; i++) { AceRelationMention crt = sentRels[i]; if ((crt.GetMinTokenStart() > rm.GetMinTokenStart()) || (crt.GetMinTokenStart() == rm.GetMinTokenStart() && crt.GetMaxTokenEnd() > rm.GetMaxTokenEnd())) { sentRels.Add(i, rm); added = true; break; } } if (!added) { sentRels.Add(rm); } } // // construct the mEventMentions matrix // ICollection <string> eventKeys = doc.mEventMentions.Keys; foreach (string key_2 in eventKeys) { AceEventMention em = doc.mEventMentions[key_2]; sentence_1 = doc.mTokens[em.GetMinTokenStart()].GetSentence(); /* * adjust the number of rows if necessary -- if you're wondering why we do * this here again, (after we've done it for entities) it's because we can * have an event with no entities near the end of the document and thus * won't have created rows in mSentence*Mentions */ while (sentence_1 >= doc.mSentenceEntityMentions.Count) { doc.mSentenceEntityMentions.Add(new List <AceEntityMention>()); doc.mSentenceRelationMentions.Add(new List <AceRelationMention>()); doc.mSentenceEventMentions.Add(new List <AceEventMention>()); } // store the event mentions in increasing order // (a) first, event mentions with no arguments // (b) then by the start position of their head, or // (c) if start is the same, in increasing order of ends List <AceEventMention> sentEvents = doc.mSentenceEventMentions[sentence_1]; bool added = false; for (int i = 0; i < sentEvents.Count; i++) { AceEventMention crt = sentEvents[i]; if ((crt.GetMinTokenStart() > em.GetMinTokenStart()) || (crt.GetMinTokenStart() == em.GetMinTokenStart() && crt.GetMaxTokenEnd() > em.GetMaxTokenEnd())) { sentEvents.Add(i, em); added = true; break; } } if (!added) { sentEvents.Add(em); } } return(doc); }
public virtual void AddRelationMention(AceRelationMention e) { mRelationMentions[e.GetId()] = e; }
public virtual void AddMention(AceRelationMention m) { mMentions.Add(m); m.SetParent(this); }