public AceMentionArgument(string role, AceEntityMention content, string mentionType)
 {
     // in practice, event or relation
     mRole            = role;
     mContent         = content;
     this.mentionType = mentionType;
 }
        // buf.toString(),
        /// <summary>Makes an ACE entity from the span [startToken, endToken)</summary>
        private void MakeEntity(int startToken, int endToken, int id, string type, string subtype)
        {
            string    eid = mId + "-E" + id;
            AceEntity ent = new AceEntity(eid, type, subtype, "SPC");

            AddEntity(ent);
            AceCharSeq       cseq = MakeCharSeq(startToken, endToken);
            string           emid = mId + "-E" + id + "-1";
            AceEntityMention entm = new AceEntityMention(emid, "NOM", "NOM", cseq, cseq);

            AddEntityMention(entm);
            ent.AddMention(entm);
        }
        /// <summary>Extracts info about one relation mention</summary>
        private static AceEventMention ParseEventMention(INode node, AceDocument doc)
        {
            string     id     = GetAttributeValue(node, "ID");
            AceCharSeq extent = ParseCharSeq(GetChildByName(node, "extent"));
            AceCharSeq anchor = ParseCharSeq(GetChildByName(node, "anchor"));
            // create the mention
            AceEventMention mention = new AceEventMention(id, extent, anchor);
            // find the mention args
            IList <INode> args = GetChildrenByName(node, "event_mention_argument");

            foreach (INode arg in args)
            {
                string           role  = GetAttributeValue(arg, "ROLE");
                string           refid = GetAttributeValue(arg, "REFID");
                AceEntityMention am    = doc.GetEntityMention(refid);
                if (am != null)
                {
                    am.AddEventMention(mention);
                    mention.AddArg(am, role);
                }
            }
            return(mention);
        }
        /// <summary>Extracts info about one relation mention</summary>
        private static AceRelationMention ParseRelationMention(INode node, AceDocument doc)
        {
            string     id     = GetAttributeValue(node, "ID");
            AceCharSeq extent = ParseCharSeq(GetChildByName(node, "extent"));
            string     lc     = GetAttributeValue(node, "LEXICALCONDITION");
            // create the mention
            AceRelationMention mention = new AceRelationMention(id, extent, lc);
            // find the mention args
            IList <INode> args = GetChildrenByName(node, "relation_mention_argument");

            foreach (INode arg in args)
            {
                string           role  = GetAttributeValue(arg, "ROLE");
                string           refid = GetAttributeValue(arg, "REFID");
                AceEntityMention am    = doc.GetEntityMention(refid);
                if (am != null)
                {
                    am.AddRelationMention(mention);
                    if (Sharpen.Runtime.EqualsIgnoreCase(role, "arg-1"))
                    {
                        mention.GetArgs()[0] = new AceRelationMentionArgument(role, am);
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(role, "arg-2"))
                        {
                            mention.GetArgs()[1] = new AceRelationMentionArgument(role, am);
                        }
                        else
                        {
                            throw new Exception("Invalid relation mention argument role: " + role);
                        }
                    }
                }
            }
            return(mention);
        }
        /// <summary>Parses one ACE specification</summary>
        /// <returns>Simply displays the events to stdout</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Xml.Sax.SAXException"/>
        /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/>
        public static AceDocument ParseDocument(File f)
        {
            // parse the Dom document
            IDocument document = ReadDocument(f);
            //
            // create the ACE document object
            //
            INode       docElement = document.GetElementsByTagName("document").Item(0);
            AceDocument aceDoc     = new AceDocument(GetAttributeValue(docElement, "DOCID"));
            //
            // read all entities
            //
            INodeList entities    = document.GetElementsByTagName("entity");
            int       entityCount = 0;

            for (int i = 0; i < entities.GetLength(); i++)
            {
                INode node = entities.Item(i);
                //
                // the entity type and subtype
                //
                string id      = GetAttributeValue(node, "ID");
                string type    = GetAttributeValue(node, "TYPE");
                string subtype = GetAttributeValue(node, "SUBTYPE");
                string cls     = GetAttributeValue(node, "CLASS");
                // create the entity
                AceEntity entity = new AceEntity(id, type, subtype, cls);
                aceDoc.AddEntity(entity);
                // fetch all mentions of this event
                IList <INode> mentions = GetChildrenByName(node, "entity_mention");
                // parse all its mentions
                foreach (INode mention1 in mentions)
                {
                    AceEntityMention mention = ParseEntityMention(mention1);
                    entity.AddMention(mention);
                    aceDoc.AddEntityMention(mention);
                }
                entityCount++;
            }
            //log.info("Parsed " + entityCount + " XML entities.");
            //
            // read all relations
            //
            INodeList relations = document.GetElementsByTagName("relation");

            for (int i_1 = 0; i_1 < relations.GetLength(); i_1++)
            {
                INode node = relations.Item(i_1);
                //
                // the relation type, subtype, tense, and modality
                //
                string id       = GetAttributeValue(node, "ID");
                string type     = GetAttributeValue(node, "TYPE");
                string subtype  = GetAttributeValue(node, "SUBTYPE");
                string modality = GetAttributeValue(node, "MODALITY");
                string tense    = GetAttributeValue(node, "TENSE");
                // create the relation
                AceRelation relation = new AceRelation(id, type, subtype, modality, tense);
                aceDoc.AddRelation(relation);
                // XXX: fetch relation_arguments here!
                // fetch all mentions of this relation
                IList <INode> mentions = GetChildrenByName(node, "relation_mention");
                // traverse all mentions
                foreach (INode mention1 in mentions)
                {
                    AceRelationMention mention = ParseRelationMention(mention1, aceDoc);
                    relation.AddMention(mention);
                    aceDoc.AddRelationMention(mention);
                }
            }
            //
            // read all events
            //
            INodeList events = document.GetElementsByTagName("event");

            for (int i_2 = 0; i_2 < events.GetLength(); i_2++)
            {
                INode node = events.Item(i_2);
                //
                // the event type, subtype, tense, and modality
                //
                string id         = GetAttributeValue(node, "ID");
                string type       = GetAttributeValue(node, "TYPE");
                string subtype    = GetAttributeValue(node, "SUBTYPE");
                string modality   = GetAttributeValue(node, "MODALITY");
                string polarity   = GetAttributeValue(node, "POLARITY");
                string genericity = GetAttributeValue(node, "GENERICITY");
                string tense      = GetAttributeValue(node, "TENSE");
                // create the event
                AceEvent @event = new AceEvent(id, type, subtype, modality, polarity, genericity, tense);
                aceDoc.AddEvent(@event);
                // fetch all mentions of this relation
                IList <INode> mentions = GetChildrenByName(node, "event_mention");
                // traverse all mentions
                foreach (INode mention1 in mentions)
                {
                    AceEventMention mention = ParseEventMention(mention1, aceDoc);
                    @event.AddMention(mention);
                    aceDoc.AddEventMention(mention);
                }
            }
            return(aceDoc);
        }
Example #6
0
 public virtual void SetArg(int which, AceEntityMention em, string role)
 {
     mArguments[which] = new AceRelationMentionArgument(role, em);
 }
 public AceEventMentionArgument(string role, AceEntityMention content)
     : base(role, content, "event")
 {
 }
Example #8
0
 public virtual void AddArg(AceEntityMention em, string role)
 {
     mRolesToArguments[role] = new AceEventMentionArgument(role, em);
 }
        //
        // heeyoung : skip relation, event parsing part - for ACE2004
        //
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Xml.Sax.SAXException"/>
        /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/>
        public static Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceDocument ParseDocument(string prefix, bool usePredictedBoundaries, string AceVersion)
        {
            mLog.Fine("Reading document " + prefix);
            Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceDocument doc = null;
            //
            // read the ACE XML annotations
            //
            if (usePredictedBoundaries == false)
            {
                doc = AceDomReader.ParseDocument(new File(prefix + XmlExt));
            }
            else
            {
                // log.info("Parsed " + doc.getEntityMentions().size() +
                // " entities in document " + prefix);
                //
                // will use the predicted entity boundaries (see below)
                //
                int lastSlash = prefix.LastIndexOf(File.separator);
                System.Diagnostics.Debug.Assert((lastSlash > 0 && lastSlash < prefix.Length - 1));
                string id = Sharpen.Runtime.Substring(prefix, lastSlash + 1);
                // log.info(id + ": " + prefix);
                doc = new Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceDocument(id);
            }
            doc.SetPrefix(prefix);
            //
            // read the raw byte stream
            //
            string trueCasedFileName = prefix + OrigExt + ".truecase";

            if ((new File(trueCasedFileName).Exists()))
            {
                mLog.Severe("Using truecased file: " + trueCasedFileName);
                doc.ReadRawBytes(trueCasedFileName);
            }
            else
            {
                doc.ReadRawBytes(prefix + OrigExt);
            }
            //
            // read the AceTokens
            //
            int offsetToSubtract = 0;
            IList <IList <AceToken> > sentences = AceSentenceSegmenter.TokenizeAndSegmentSentences(prefix);

            doc.SetSentences(sentences);
            foreach (IList <AceToken> sentence in sentences)
            {
                foreach (AceToken token in sentence)
                {
                    offsetToSubtract = token.AdjustPhrasePositions(offsetToSubtract, token.GetLiteral());
                    doc.AddToken(token);
                }
            }
            //
            // match char sequences to phrases
            //
            doc.MatchCharSeqs(prefix);
            //
            // construct the mEntityMentions matrix
            //
            ICollection <string> entityKeys = doc.mEntityMentions.Keys;
            int sentence_1;

            foreach (string key in entityKeys)
            {
                AceEntityMention em = doc.mEntityMentions[key];
                sentence_1 = doc.mTokens[em.GetHead().GetTokenStart()].GetSentence();
                // adjust the number of rows if necessary
                while (sentence_1 >= doc.mSentenceEntityMentions.Count)
                {
                    doc.mSentenceEntityMentions.Add(new List <AceEntityMention>());
                    doc.mSentenceRelationMentions.Add(new List <AceRelationMention>());
                    doc.mSentenceEventMentions.Add(new List <AceEventMention>());
                }
                // store the entity mentions in increasing order:
                // (a) of the start position of their head
                // (b) if start is the same, in increasing order of the head end
                List <AceEntityMention> sentEnts = doc.mSentenceEntityMentions[sentence_1];
                bool added = false;
                for (int i = 0; i < sentEnts.Count; i++)
                {
                    AceEntityMention crt = sentEnts[i];
                    if ((crt.GetHead().GetTokenStart() > em.GetHead().GetTokenStart()) || (crt.GetHead().GetTokenStart() == em.GetHead().GetTokenStart() && crt.GetHead().GetTokenEnd() > em.GetHead().GetTokenEnd()))
                    {
                        sentEnts.Add(i, em);
                        added = true;
                        break;
                    }
                }
                if (!added)
                {
                    sentEnts.Add(em);
                }
            }
            return(doc);
        }
        /// <summary>Matches all relevant mentions, i.e.</summary>
        /// <remarks>
        /// Matches all relevant mentions, i.e. entities and anchors, to tokens Note:
        /// entity mentions may match with multiple tokens!
        /// </remarks>
        public virtual void MatchCharSeqs(string filePrefix)
        {
            //
            // match the head and extent of entity mentions
            //
            ICollection <string> keys = mEntityMentions.Keys;

            foreach (string key in keys)
            {
                AceEntityMention m = mEntityMentions[key];
                //
                // match the head charseq to 1+ phrase(s)
                //
                try
                {
                    m.GetHead().Match(mTokens);
                }
                catch (MatchException)
                {
                    mLog.Severe("READER ERROR: Failed to match entity mention head: " + "[" + m.GetHead().GetText() + ", " + m.GetHead().GetByteStart() + ", " + m.GetHead().GetByteEnd() + "]");
                    mLog.Severe("Document tokens: " + TokensWithByteSpan(m.GetHead().GetByteStart(), m.GetHead().GetByteEnd()));
                    mLog.Severe("Document prefix: " + filePrefix);
                    System.Environment.Exit(1);
                }
                //
                // match the extent charseq to 1+ phrase(s)
                //
                try
                {
                    m.GetExtent().Match(mTokens);
                }
                catch (MatchException)
                {
                    mLog.Severe("READER ERROR: Failed to match entity mention extent: " + "[" + m.GetExtent().GetText() + ", " + m.GetExtent().GetByteStart() + ", " + m.GetExtent().GetByteEnd() + "]");
                    mLog.Severe("Document tokens: " + TokensWithByteSpan(m.GetExtent().GetByteStart(), m.GetExtent().GetByteEnd()));
                    System.Environment.Exit(1);
                }
                //
                // set the head word of the mention
                //
                m.DetectHeadToken(this);
            }
            // we need to do this for events as well since they may not have any AceEntityMentions associated with them (if they have no arguments)
            ICollection <string> eventKeys = mEventMentions.Keys;

            foreach (string key_1 in eventKeys)
            {
                AceEventMention m = mEventMentions[key_1];
                //
                // match the extent charseq to 1+ phrase(s)
                //
                try
                {
                    m.GetExtent().Match(mTokens);
                }
                catch (MatchException)
                {
                    mLog.Severe("READER ERROR: Failed to match event mention extent: " + "[" + m.GetExtent().GetText() + ", " + m.GetExtent().GetByteStart() + ", " + m.GetExtent().GetByteEnd() + "]");
                    mLog.Severe("Document tokens: " + TokensWithByteSpan(m.GetExtent().GetByteStart(), m.GetExtent().GetByteEnd()));
                    System.Environment.Exit(1);
                }
            }
        }
 public virtual void AddEntityMention(AceEntityMention em)
 {
     mEntityMentions[em.GetId()] = em;
 }
Example #12
0
 public virtual void AddMention(AceEntityMention m)
 {
     mMentions.Add(m);
     m.SetParent(this);
 }
 public AceRelationMentionArgument(string role, AceEntityMention content)
     : base(role, content, "relation")
 {
 }