private static Element ToXML(RelationMention relation, string curNS)
        {
            Element top = new Element("relation", curNS);

            top.AddAttribute(new Attribute("id", relation.GetObjectId()));
            Element type = new Element("type", curNS);

            type.AppendChild(relation.GetType());
            top.AppendChild(relation.GetType());
            if (relation.GetSubType() != null)
            {
                Element subtype = new Element("subtype", curNS);
                subtype.AppendChild(relation.GetSubType());
                top.AppendChild(relation.GetSubType());
            }
            IList <EntityMention> mentions = relation.GetEntityMentionArgs();
            Element args = new Element("arguments", curNS);

            foreach (EntityMention e in mentions)
            {
                args.AppendChild(ToXML(e, curNS));
            }
            top.AppendChild(args);
            top.AppendChild(MakeProbabilitiesElement(relation, curNS));
            return(top);
        }
Example #2
0
        protected internal virtual IDatum <string, string> CreateDatum(RelationMention rel, string label)
        {
            System.Diagnostics.Debug.Assert((featureFactory != null));
            IDatum <string, string> datum = featureFactory.CreateDatum(rel, label);

            return(datum);
        }
        private void PrintResultsInternal(PrintWriter pw, ICounter <Pair <string, string> > results, ClassicCounter <string> labelCount)
        {
            ClassicCounter <string> correct         = new ClassicCounter <string>();
            ClassicCounter <string> predictionCount = new ClassicCounter <string>();
            bool countGoldLabels = false;

            if (labelCount == null)
            {
                labelCount      = new ClassicCounter <string>();
                countGoldLabels = true;
            }
            foreach (Pair <string, string> predictedActual in results.KeySet())
            {
                string predicted = predictedActual.first;
                string actual    = predictedActual.second;
                if (predicted.Equals(actual))
                {
                    correct.IncrementCount(actual, results.GetCount(predictedActual));
                }
                predictionCount.IncrementCount(predicted, results.GetCount(predictedActual));
                if (countGoldLabels)
                {
                    labelCount.IncrementCount(actual, results.GetCount(predictedActual));
                }
            }
            DecimalFormat formatter = new DecimalFormat();

            formatter.SetMaximumFractionDigits(1);
            formatter.SetMinimumFractionDigits(1);
            double totalCount     = 0;
            double totalCorrect   = 0;
            double totalPredicted = 0;

            pw.Println("Label\tCorrect\tPredict\tActual\tPrecn\tRecall\tF");
            IList <string> labels = new List <string>(labelCount.KeySet());

            labels.Sort();
            foreach (string label in labels)
            {
                double numcorrect = correct.GetCount(label);
                double predicted  = predictionCount.GetCount(label);
                double trueCount  = labelCount.GetCount(label);
                double precision  = (predicted > 0) ? (numcorrect / predicted) : 0;
                double recall     = numcorrect / trueCount;
                double f          = (precision + recall > 0) ? 2 * precision * recall / (precision + recall) : 0.0;
                pw.Println(StringUtils.PadOrTrim(label, MaxLabelLength) + "\t" + numcorrect + "\t" + predicted + "\t" + trueCount + "\t" + formatter.Format(precision * 100) + "\t" + formatter.Format(100 * recall) + "\t" + formatter.Format(100 * f));
                if (!RelationMention.IsUnrelatedLabel(label))
                {
                    totalCount     += trueCount;
                    totalCorrect   += numcorrect;
                    totalPredicted += predicted;
                }
            }
            double precision_1 = (totalPredicted > 0) ? (totalCorrect / totalPredicted) : 0;
            double recall_1    = totalCorrect / totalCount;
            double f_1         = (totalPredicted > 0 && totalCorrect > 0) ? 2 * precision_1 * recall_1 / (precision_1 + recall_1) : 0.0;

            pw.Println("Total\t" + totalCorrect + "\t" + totalPredicted + "\t" + totalCount + "\t" + formatter.Format(100 * precision_1) + "\t" + formatter.Format(100 * recall_1) + "\t" + formatter.Format(100 * f_1));
        }
Example #4
0
        /// <summary>Predict a relation for each pair of entities in the sentence; including relations of type unrelated.</summary>
        /// <remarks>
        /// Predict a relation for each pair of entities in the sentence; including relations of type unrelated.
        /// This creates new RelationMention objects!
        /// </remarks>
        protected internal virtual IList <RelationMention> ExtractAllRelations(ICoreMap sentence)
        {
            IList <RelationMention> extractions = new List <RelationMention>();
            IList <RelationMention> cands       = null;

            if (createUnrelatedRelations)
            {
                // creates all possible relations between all entities in the sentence
                cands = AnnotationUtils.GetAllUnrelatedRelations(relationMentionFactory, sentence, false);
            }
            else
            {
                // just take the candidates produced by the reader (in KBP)
                cands = sentence.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation));
                if (cands == null)
                {
                    cands = new List <RelationMention>();
                }
            }
            // the actual classification takes place here!
            foreach (RelationMention rel in cands)
            {
                IDatum <string, string> testDatum = CreateDatum(rel);
                string            label           = ClassOf(testDatum, rel);
                ICounter <string> probs           = ProbabilityOf(testDatum);
                double            prob            = probs.GetCount(label);
                StringWriter      sw = new StringWriter();
                PrintWriter       pw = new PrintWriter(sw);
                if (logger.IsLoggable(Level.Info))
                {
                    JustificationOf(testDatum, pw, label);
                }
                logger.Info("Current sentence: " + AnnotationUtils.TokensAndNELabelsToString(rel.GetArg(0).GetSentence()) + "\n" + "Classifying relation: " + rel + "\n" + "JUSTIFICATION for label GOLD:" + rel.GetType() + " SYS:" + label + " (prob:" + prob +
                            "):\n" + sw.ToString());
                logger.Info("Justification done.");
                RelationMention relation = relationMentionFactory.ConstructRelationMention(rel.GetObjectId(), sentence, rel.GetExtent(), label, null, rel.GetArgs(), probs);
                extractions.Add(relation);
                if (!relation.GetType().Equals(rel.GetType()))
                {
                    logger.Info("Classification: found different type " + relation.GetType() + " for relation: " + rel);
                    logger.Info("The predicted relation is: " + relation);
                    logger.Info("Current sentence: " + AnnotationUtils.TokensAndNELabelsToString(rel.GetArg(0).GetSentence()));
                }
                else
                {
                    logger.Info("Classification: found similar type " + relation.GetType() + " for relation: " + rel);
                    logger.Info("The predicted relation is: " + relation);
                    logger.Info("Current sentence: " + AnnotationUtils.TokensAndNELabelsToString(rel.GetArg(0).GetSentence()));
                }
            }
            return(extractions);
        }
Example #5
0
        private RelationMention ConvertAceRelationMention(AceRelationMention aceRelationMention, string docId, ICoreMap sentence, IDictionary <string, EntityMention> entityMap)
        {
            IList <AceRelationMentionArgument> args          = Arrays.AsList(aceRelationMention.GetArgs());
            IList <ExtractionObject>           convertedArgs = new List <ExtractionObject>();
            IList <string> argNames = new List <string>();
            // the arguments are already stored in semantic order. Make sure we preserve the same ordering!
            int left  = int.MaxValue;
            int right = int.MinValue;

            foreach (AceRelationMentionArgument arg in args)
            {
                ExtractionObject o = entityMap[arg.GetContent().GetId()];
                if (o == null)
                {
                    logger.Severe("READER ERROR: Failed to find relation argument with id " + arg.GetContent().GetId());
                    logger.Severe("This happens because a few relation mentions illegally span multiple sentences. Will ignore this mention.");
                    return(null);
                }
                convertedArgs.Add(o);
                argNames.Add(arg.GetRole());
                if (o.GetExtentTokenStart() < left)
                {
                    left = o.GetExtentTokenStart();
                }
                if (o.GetExtentTokenEnd() > right)
                {
                    right = o.GetExtentTokenEnd();
                }
            }
            if (argNames.Count != 2 || !Sharpen.Runtime.EqualsIgnoreCase(argNames[0], "arg-1") || !Sharpen.Runtime.EqualsIgnoreCase(argNames[1], "arg-2"))
            {
                logger.Severe("READER ERROR: Invalid succession of arguments in relation mention: " + argNames);
                logger.Severe("ACE relations must have two arguments. Will ignore this mention.");
                return(null);
            }
            RelationMention relation = new RelationMention(aceRelationMention.GetId(), sentence, new Span(left, right), aceRelationMention.GetParent().GetType(), aceRelationMention.GetParent().GetSubtype(), convertedArgs, null);

            return(relation);
        }
Example #6
0
 /*
  * If in case, creating test datum is different.
  */
 public abstract IDatum <string, string> CreateTestDatum(RelationMention rel, Logger logger);
Example #7
0
 public abstract ICollection <string> GetFeatures(RelationMention rel, string dependency_path_words);
Example #8
0
 public abstract string GetFeature(RelationMention rel, string dependency_path_lowlevel);
Example #9
0
 public abstract IDatum <string, string> CreateDatum(RelationMention rel);
        private Annotation ReadSentence(string docId, IEnumerator <string> lineIterator)
        {
            Annotation sentence = new Annotation(string.Empty);

            sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docId);
            sentence.Set(typeof(MachineReadingAnnotations.EntityMentionsAnnotation), new List <EntityMention>());
            // we'll need to set things like the tokens and textContent after we've
            // fully read the sentence
            // contains the full text that we've read so far
            StringBuilder textContent = new StringBuilder();
            int           tokenCount  = 0;
            // how many tokens we've seen so far
            IList <CoreLabel> tokens = new List <CoreLabel>();
            // when we've seen two blank lines in a row, this sentence is over (one
            // blank line separates the sentence and the relations
            int    numBlankLinesSeen = 0;
            string sentenceID        = null;
            // keeps tracks of entities we've seen so far for use by relations
            IDictionary <string, EntityMention> indexToEntityMention = new Dictionary <string, EntityMention>();

            while (lineIterator.MoveNext() && numBlankLinesSeen < 2)
            {
                string currentLine = lineIterator.Current;
                currentLine = currentLine.Replace("COMMA", ",");
                IList <string> pieces = StringUtils.Split(currentLine);
                string         identifier;
                int            size = pieces.Count;
                switch (size)
                {
                case 1:
                {
                    // blank line between sentences or relations
                    numBlankLinesSeen++;
                    break;
                }

                case 3:
                {
                    // relation
                    string type = pieces[2];
                    IList <ExtractionObject> args    = new List <ExtractionObject>();
                    EntityMention            entity1 = indexToEntityMention[pieces[0]];
                    EntityMention            entity2 = indexToEntityMention[pieces[1]];
                    args.Add(entity1);
                    args.Add(entity2);
                    Span span = new Span(entity1.GetExtentTokenStart(), entity2.GetExtentTokenEnd());
                    // identifier = "relation" + sentenceID + "-" + sentence.getAllRelations().size();
                    identifier = RelationMention.MakeUniqueId();
                    RelationMention relationMention = new RelationMention(identifier, sentence, span, type, null, args);
                    AnnotationUtils.AddRelationMention(sentence, relationMention);
                    break;
                }

                case 9:
                {
                    // token

                    /*
                     * Roth token lines look like this:
                     *
                     * 19 Peop 9 O NNP/NNP Jamal/Ghosheh O O O
                     */
                    // Entities may be multiple words joined by '/'; we split these up
                    IList <string> words = StringUtils.Split(pieces[5], "/");
                    //List<String> postags = StringUtils.split(pieces.get(4),"/");
                    string text = StringUtils.Join(words, " ");
                    identifier = "entity" + pieces[0] + '-' + pieces[2];
                    string nerTag = GetNormalizedNERTag(pieces[1]);
                    // entity type of the word/expression
                    if (sentenceID == null)
                    {
                        sentenceID = pieces[0];
                    }
                    if (!nerTag.Equals("O"))
                    {
                        Span extentSpan = new Span(tokenCount, tokenCount + words.Count);
                        // Temporarily sets the head span to equal the extent span.
                        // This is so the entity has a head (in particular, getValue() works) even if preprocessSentences isn't called.
                        // The head span is later modified if preprocessSentences is called.
                        EntityMention entity = new EntityMention(identifier, sentence, extentSpan, extentSpan, nerTag, null, null);
                        AnnotationUtils.AddEntityMention(sentence, entity);
                        // we can get by using these indices as strings since we only use them
                        // as a hash key
                        string index = pieces[2];
                        indexToEntityMention[index] = entity;
                    }
                    // int i =0;
                    foreach (string word in words)
                    {
                        CoreLabel label = new CoreLabel();
                        label.SetWord(word);
                        //label.setTag(postags.get(i));
                        label.Set(typeof(CoreAnnotations.TextAnnotation), word);
                        label.Set(typeof(CoreAnnotations.ValueAnnotation), word);
                        // we don't set TokenBeginAnnotation or TokenEndAnnotation since we're
                        // not keeping track of character offsets
                        tokens.Add(label);
                    }
                    // i++;
                    textContent.Append(text);
                    textContent.Append(' ');
                    tokenCount += words.Count;
                    break;
                }
                }
            }
            sentence.Set(typeof(CoreAnnotations.TextAnnotation), textContent.ToString());
            sentence.Set(typeof(CoreAnnotations.ValueAnnotation), textContent.ToString());
            sentence.Set(typeof(CoreAnnotations.TokensAnnotation), tokens);
            sentence.Set(typeof(CoreAnnotations.SentenceIDAnnotation), sentenceID);
            return(sentence);
        }
Example #11
0
        /// <summary>
        /// Reads in a single ACE*.apf.xml file and convert it to RelationSentence
        /// objects.
        /// </summary>
        /// <remarks>
        /// Reads in a single ACE*.apf.xml file and convert it to RelationSentence
        /// objects. However, you probably should call parse() instead.
        /// </remarks>
        /// <param name="prefix">
        /// prefix of ACE filename to read (e.g.
        /// "/u/mcclosky/scr/data/ACE2005/english_test/bc/CNN_CF_20030827.1630.01"
        /// ) (no ".apf.xml" extension)
        /// </param>
        /// <returns>list of RelationSentence objects</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Xml.Sax.SAXException"/>
        /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/>
        private IList <ICoreMap> ReadDocument(string prefix, Annotation corpus)
        {
            logger.Info("Reading document: " + prefix);
            IList <ICoreMap> results = new List <ICoreMap>();
            AceDocument      aceDocument;

            if (aceVersion.Equals("ACE2004"))
            {
                aceDocument = AceDocument.ParseDocument(prefix, false, aceVersion);
            }
            else
            {
                aceDocument = AceDocument.ParseDocument(prefix, false);
            }
            string docId = aceDocument.GetId();
            // map entity mention ID strings to their EntityMention counterparts
            IDictionary <string, EntityMention> entityMentionMap = Generics.NewHashMap();

            /*
             * for (int sentenceIndex = 0; sentenceIndex < aceDocument.getSentenceCount(); sentenceIndex++) {
             * List<AceToken> tokens = aceDocument.getSentence(sentenceIndex);
             * StringBuffer b = new StringBuffer();
             * for(AceToken t: tokens) b.append(t.getLiteral() + " " );
             * logger.info("SENTENCE: " + b.toString());
             * }
             */
            int tokenOffset = 0;

            for (int sentenceIndex = 0; sentenceIndex < aceDocument.GetSentenceCount(); sentenceIndex++)
            {
                IList <AceToken>  tokens      = aceDocument.GetSentence(sentenceIndex);
                IList <CoreLabel> words       = new List <CoreLabel>();
                StringBuilder     textContent = new StringBuilder();
                for (int i = 0; i < tokens.Count; i++)
                {
                    CoreLabel l = new CoreLabel();
                    l.SetWord(tokens[i].GetLiteral());
                    l.Set(typeof(CoreAnnotations.ValueAnnotation), l.Word());
                    l.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), tokens[i].GetByteStart());
                    l.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), tokens[i].GetByteEnd());
                    words.Add(l);
                    if (i > 0)
                    {
                        textContent.Append(" ");
                    }
                    textContent.Append(tokens[i].GetLiteral());
                }
                // skip "sentences" that are really just SGML tags (which come from using the RobustTokenizer)
                if (words.Count == 1)
                {
                    string word = words[0].Word();
                    if (word.StartsWith("<") && word.EndsWith(">"))
                    {
                        tokenOffset += tokens.Count;
                        continue;
                    }
                }
                ICoreMap sentence = new Annotation(textContent.ToString());
                sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docId);
                sentence.Set(typeof(CoreAnnotations.TokensAnnotation), words);
                logger.Info("Reading sentence: \"" + textContent + "\"");
                IList <AceEntityMention>   entityMentions   = aceDocument.GetEntityMentions(sentenceIndex);
                IList <AceRelationMention> relationMentions = aceDocument.GetRelationMentions(sentenceIndex);
                IList <AceEventMention>    eventMentions    = aceDocument.GetEventMentions(sentenceIndex);
                // convert entity mentions
                foreach (AceEntityMention aceEntityMention in entityMentions)
                {
                    string corefID = string.Empty;
                    foreach (string entityID in aceDocument.GetKeySetEntities())
                    {
                        AceEntity e = aceDocument.GetEntity(entityID);
                        if (e.GetMentions().Contains(aceEntityMention))
                        {
                            corefID = entityID;
                            break;
                        }
                    }
                    EntityMention convertedMention = ConvertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset, corefID);
                    //        EntityMention convertedMention = convertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset);
                    entityCounts.IncrementCount(convertedMention.GetType());
                    logger.Info("CONVERTED MENTION HEAD SPAN: " + convertedMention.GetHead());
                    logger.Info("CONVERTED ENTITY MENTION: " + convertedMention);
                    AnnotationUtils.AddEntityMention(sentence, convertedMention);
                    entityMentionMap[aceEntityMention.GetId()] = convertedMention;
                }
                // TODO: make Entity objects as needed
                // convert relation mentions
                foreach (AceRelationMention aceRelationMention in relationMentions)
                {
                    RelationMention convertedMention = ConvertAceRelationMention(aceRelationMention, docId, sentence, entityMentionMap);
                    if (convertedMention != null)
                    {
                        relationCounts.IncrementCount(convertedMention.GetType());
                        logger.Info("CONVERTED RELATION MENTION: " + convertedMention);
                        AnnotationUtils.AddRelationMention(sentence, convertedMention);
                    }
                }
                // TODO: make Relation objects
                // convert EventMentions
                foreach (AceEventMention aceEventMention in eventMentions)
                {
                    EventMention convertedMention = ConvertAceEventMention(aceEventMention, docId, sentence, entityMentionMap, tokenOffset);
                    if (convertedMention != null)
                    {
                        eventCounts.IncrementCount(convertedMention.GetType());
                        logger.Info("CONVERTED EVENT MENTION: " + convertedMention);
                        AnnotationUtils.AddEventMention(sentence, convertedMention);
                    }
                }
                // TODO: make Event objects
                results.Add(sentence);
                tokenOffset += tokens.Count;
            }
            return(results);
        }