private static Element ToXML(RelationMention relation, string curNS) { Element top = new Element("relation", curNS); top.AddAttribute(new Attribute("id", relation.GetObjectId())); Element type = new Element("type", curNS); type.AppendChild(relation.GetType()); top.AppendChild(relation.GetType()); if (relation.GetSubType() != null) { Element subtype = new Element("subtype", curNS); subtype.AppendChild(relation.GetSubType()); top.AppendChild(relation.GetSubType()); } IList <EntityMention> mentions = relation.GetEntityMentionArgs(); Element args = new Element("arguments", curNS); foreach (EntityMention e in mentions) { args.AppendChild(ToXML(e, curNS)); } top.AppendChild(args); top.AppendChild(MakeProbabilitiesElement(relation, curNS)); return(top); }
protected internal virtual IDatum <string, string> CreateDatum(RelationMention rel, string label) { System.Diagnostics.Debug.Assert((featureFactory != null)); IDatum <string, string> datum = featureFactory.CreateDatum(rel, label); return(datum); }
private void PrintResultsInternal(PrintWriter pw, ICounter <Pair <string, string> > results, ClassicCounter <string> labelCount) { ClassicCounter <string> correct = new ClassicCounter <string>(); ClassicCounter <string> predictionCount = new ClassicCounter <string>(); bool countGoldLabels = false; if (labelCount == null) { labelCount = new ClassicCounter <string>(); countGoldLabels = true; } foreach (Pair <string, string> predictedActual in results.KeySet()) { string predicted = predictedActual.first; string actual = predictedActual.second; if (predicted.Equals(actual)) { correct.IncrementCount(actual, results.GetCount(predictedActual)); } predictionCount.IncrementCount(predicted, results.GetCount(predictedActual)); if (countGoldLabels) { labelCount.IncrementCount(actual, results.GetCount(predictedActual)); } } DecimalFormat formatter = new DecimalFormat(); formatter.SetMaximumFractionDigits(1); formatter.SetMinimumFractionDigits(1); double totalCount = 0; double totalCorrect = 0; double totalPredicted = 0; pw.Println("Label\tCorrect\tPredict\tActual\tPrecn\tRecall\tF"); IList <string> labels = new List <string>(labelCount.KeySet()); labels.Sort(); foreach (string label in labels) { double numcorrect = correct.GetCount(label); double predicted = predictionCount.GetCount(label); double trueCount = labelCount.GetCount(label); double precision = (predicted > 0) ? (numcorrect / predicted) : 0; double recall = numcorrect / trueCount; double f = (precision + recall > 0) ? 2 * precision * recall / (precision + recall) : 0.0; pw.Println(StringUtils.PadOrTrim(label, MaxLabelLength) + "\t" + numcorrect + "\t" + predicted + "\t" + trueCount + "\t" + formatter.Format(precision * 100) + "\t" + formatter.Format(100 * recall) + "\t" + formatter.Format(100 * f)); if (!RelationMention.IsUnrelatedLabel(label)) { totalCount += trueCount; totalCorrect += numcorrect; totalPredicted += predicted; } } double precision_1 = (totalPredicted > 0) ? (totalCorrect / totalPredicted) : 0; double recall_1 = totalCorrect / totalCount; double f_1 = (totalPredicted > 0 && totalCorrect > 0) ? 2 * precision_1 * recall_1 / (precision_1 + recall_1) : 0.0; pw.Println("Total\t" + totalCorrect + "\t" + totalPredicted + "\t" + totalCount + "\t" + formatter.Format(100 * precision_1) + "\t" + formatter.Format(100 * recall_1) + "\t" + formatter.Format(100 * f_1)); }
/// <summary>Predict a relation for each pair of entities in the sentence; including relations of type unrelated.</summary> /// <remarks> /// Predict a relation for each pair of entities in the sentence; including relations of type unrelated. /// This creates new RelationMention objects! /// </remarks> protected internal virtual IList <RelationMention> ExtractAllRelations(ICoreMap sentence) { IList <RelationMention> extractions = new List <RelationMention>(); IList <RelationMention> cands = null; if (createUnrelatedRelations) { // creates all possible relations between all entities in the sentence cands = AnnotationUtils.GetAllUnrelatedRelations(relationMentionFactory, sentence, false); } else { // just take the candidates produced by the reader (in KBP) cands = sentence.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation)); if (cands == null) { cands = new List <RelationMention>(); } } // the actual classification takes place here! foreach (RelationMention rel in cands) { IDatum <string, string> testDatum = CreateDatum(rel); string label = ClassOf(testDatum, rel); ICounter <string> probs = ProbabilityOf(testDatum); double prob = probs.GetCount(label); StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); if (logger.IsLoggable(Level.Info)) { JustificationOf(testDatum, pw, label); } logger.Info("Current sentence: " + AnnotationUtils.TokensAndNELabelsToString(rel.GetArg(0).GetSentence()) + "\n" + "Classifying relation: " + rel + "\n" + "JUSTIFICATION for label GOLD:" + rel.GetType() + " SYS:" + label + " (prob:" + prob + "):\n" + sw.ToString()); logger.Info("Justification done."); RelationMention relation = relationMentionFactory.ConstructRelationMention(rel.GetObjectId(), sentence, rel.GetExtent(), label, null, rel.GetArgs(), probs); extractions.Add(relation); if (!relation.GetType().Equals(rel.GetType())) { logger.Info("Classification: found different type " + relation.GetType() + " for relation: " + rel); logger.Info("The predicted relation is: " + relation); logger.Info("Current sentence: " + AnnotationUtils.TokensAndNELabelsToString(rel.GetArg(0).GetSentence())); } else { logger.Info("Classification: found similar type " + relation.GetType() + " for relation: " + rel); logger.Info("The predicted relation is: " + relation); logger.Info("Current sentence: " + AnnotationUtils.TokensAndNELabelsToString(rel.GetArg(0).GetSentence())); } } return(extractions); }
private RelationMention ConvertAceRelationMention(AceRelationMention aceRelationMention, string docId, ICoreMap sentence, IDictionary <string, EntityMention> entityMap) { IList <AceRelationMentionArgument> args = Arrays.AsList(aceRelationMention.GetArgs()); IList <ExtractionObject> convertedArgs = new List <ExtractionObject>(); IList <string> argNames = new List <string>(); // the arguments are already stored in semantic order. Make sure we preserve the same ordering! int left = int.MaxValue; int right = int.MinValue; foreach (AceRelationMentionArgument arg in args) { ExtractionObject o = entityMap[arg.GetContent().GetId()]; if (o == null) { logger.Severe("READER ERROR: Failed to find relation argument with id " + arg.GetContent().GetId()); logger.Severe("This happens because a few relation mentions illegally span multiple sentences. Will ignore this mention."); return(null); } convertedArgs.Add(o); argNames.Add(arg.GetRole()); if (o.GetExtentTokenStart() < left) { left = o.GetExtentTokenStart(); } if (o.GetExtentTokenEnd() > right) { right = o.GetExtentTokenEnd(); } } if (argNames.Count != 2 || !Sharpen.Runtime.EqualsIgnoreCase(argNames[0], "arg-1") || !Sharpen.Runtime.EqualsIgnoreCase(argNames[1], "arg-2")) { logger.Severe("READER ERROR: Invalid succession of arguments in relation mention: " + argNames); logger.Severe("ACE relations must have two arguments. Will ignore this mention."); return(null); } RelationMention relation = new RelationMention(aceRelationMention.GetId(), sentence, new Span(left, right), aceRelationMention.GetParent().GetType(), aceRelationMention.GetParent().GetSubtype(), convertedArgs, null); return(relation); }
/* * If in case, creating test datum is different. */ public abstract IDatum <string, string> CreateTestDatum(RelationMention rel, Logger logger);
public abstract ICollection <string> GetFeatures(RelationMention rel, string dependency_path_words);
public abstract string GetFeature(RelationMention rel, string dependency_path_lowlevel);
public abstract IDatum <string, string> CreateDatum(RelationMention rel);
private Annotation ReadSentence(string docId, IEnumerator <string> lineIterator) { Annotation sentence = new Annotation(string.Empty); sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docId); sentence.Set(typeof(MachineReadingAnnotations.EntityMentionsAnnotation), new List <EntityMention>()); // we'll need to set things like the tokens and textContent after we've // fully read the sentence // contains the full text that we've read so far StringBuilder textContent = new StringBuilder(); int tokenCount = 0; // how many tokens we've seen so far IList <CoreLabel> tokens = new List <CoreLabel>(); // when we've seen two blank lines in a row, this sentence is over (one // blank line separates the sentence and the relations int numBlankLinesSeen = 0; string sentenceID = null; // keeps tracks of entities we've seen so far for use by relations IDictionary <string, EntityMention> indexToEntityMention = new Dictionary <string, EntityMention>(); while (lineIterator.MoveNext() && numBlankLinesSeen < 2) { string currentLine = lineIterator.Current; currentLine = currentLine.Replace("COMMA", ","); IList <string> pieces = StringUtils.Split(currentLine); string identifier; int size = pieces.Count; switch (size) { case 1: { // blank line between sentences or relations numBlankLinesSeen++; break; } case 3: { // relation string type = pieces[2]; IList <ExtractionObject> args = new List <ExtractionObject>(); EntityMention entity1 = indexToEntityMention[pieces[0]]; EntityMention entity2 = indexToEntityMention[pieces[1]]; args.Add(entity1); args.Add(entity2); Span span = new Span(entity1.GetExtentTokenStart(), entity2.GetExtentTokenEnd()); // identifier = "relation" + sentenceID + "-" + sentence.getAllRelations().size(); identifier = RelationMention.MakeUniqueId(); RelationMention relationMention = new RelationMention(identifier, sentence, span, type, null, args); AnnotationUtils.AddRelationMention(sentence, relationMention); break; } case 9: { // token /* * Roth token lines look like this: * * 19 Peop 9 O NNP/NNP Jamal/Ghosheh O O O */ // Entities may be multiple words joined by '/'; we split these up IList <string> words = StringUtils.Split(pieces[5], "/"); //List<String> postags = StringUtils.split(pieces.get(4),"/"); string text = StringUtils.Join(words, " "); identifier = "entity" + pieces[0] + '-' + pieces[2]; string nerTag = GetNormalizedNERTag(pieces[1]); // entity type of the word/expression if (sentenceID == null) { sentenceID = pieces[0]; } if (!nerTag.Equals("O")) { Span extentSpan = new Span(tokenCount, tokenCount + words.Count); // Temporarily sets the head span to equal the extent span. // This is so the entity has a head (in particular, getValue() works) even if preprocessSentences isn't called. // The head span is later modified if preprocessSentences is called. EntityMention entity = new EntityMention(identifier, sentence, extentSpan, extentSpan, nerTag, null, null); AnnotationUtils.AddEntityMention(sentence, entity); // we can get by using these indices as strings since we only use them // as a hash key string index = pieces[2]; indexToEntityMention[index] = entity; } // int i =0; foreach (string word in words) { CoreLabel label = new CoreLabel(); label.SetWord(word); //label.setTag(postags.get(i)); label.Set(typeof(CoreAnnotations.TextAnnotation), word); label.Set(typeof(CoreAnnotations.ValueAnnotation), word); // we don't set TokenBeginAnnotation or TokenEndAnnotation since we're // not keeping track of character offsets tokens.Add(label); } // i++; textContent.Append(text); textContent.Append(' '); tokenCount += words.Count; break; } } } sentence.Set(typeof(CoreAnnotations.TextAnnotation), textContent.ToString()); sentence.Set(typeof(CoreAnnotations.ValueAnnotation), textContent.ToString()); sentence.Set(typeof(CoreAnnotations.TokensAnnotation), tokens); sentence.Set(typeof(CoreAnnotations.SentenceIDAnnotation), sentenceID); return(sentence); }
/// <summary> /// Reads in a single ACE*.apf.xml file and convert it to RelationSentence /// objects. /// </summary> /// <remarks> /// Reads in a single ACE*.apf.xml file and convert it to RelationSentence /// objects. However, you probably should call parse() instead. /// </remarks> /// <param name="prefix"> /// prefix of ACE filename to read (e.g. /// "/u/mcclosky/scr/data/ACE2005/english_test/bc/CNN_CF_20030827.1630.01" /// ) (no ".apf.xml" extension) /// </param> /// <returns>list of RelationSentence objects</returns> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Xml.Sax.SAXException"/> /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/> private IList <ICoreMap> ReadDocument(string prefix, Annotation corpus) { logger.Info("Reading document: " + prefix); IList <ICoreMap> results = new List <ICoreMap>(); AceDocument aceDocument; if (aceVersion.Equals("ACE2004")) { aceDocument = AceDocument.ParseDocument(prefix, false, aceVersion); } else { aceDocument = AceDocument.ParseDocument(prefix, false); } string docId = aceDocument.GetId(); // map entity mention ID strings to their EntityMention counterparts IDictionary <string, EntityMention> entityMentionMap = Generics.NewHashMap(); /* * for (int sentenceIndex = 0; sentenceIndex < aceDocument.getSentenceCount(); sentenceIndex++) { * List<AceToken> tokens = aceDocument.getSentence(sentenceIndex); * StringBuffer b = new StringBuffer(); * for(AceToken t: tokens) b.append(t.getLiteral() + " " ); * logger.info("SENTENCE: " + b.toString()); * } */ int tokenOffset = 0; for (int sentenceIndex = 0; sentenceIndex < aceDocument.GetSentenceCount(); sentenceIndex++) { IList <AceToken> tokens = aceDocument.GetSentence(sentenceIndex); IList <CoreLabel> words = new List <CoreLabel>(); StringBuilder textContent = new StringBuilder(); for (int i = 0; i < tokens.Count; i++) { CoreLabel l = new CoreLabel(); l.SetWord(tokens[i].GetLiteral()); l.Set(typeof(CoreAnnotations.ValueAnnotation), l.Word()); l.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), tokens[i].GetByteStart()); l.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), tokens[i].GetByteEnd()); words.Add(l); if (i > 0) { textContent.Append(" "); } textContent.Append(tokens[i].GetLiteral()); } // skip "sentences" that are really just SGML tags (which come from using the RobustTokenizer) if (words.Count == 1) { string word = words[0].Word(); if (word.StartsWith("<") && word.EndsWith(">")) { tokenOffset += tokens.Count; continue; } } ICoreMap sentence = new Annotation(textContent.ToString()); sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docId); sentence.Set(typeof(CoreAnnotations.TokensAnnotation), words); logger.Info("Reading sentence: \"" + textContent + "\""); IList <AceEntityMention> entityMentions = aceDocument.GetEntityMentions(sentenceIndex); IList <AceRelationMention> relationMentions = aceDocument.GetRelationMentions(sentenceIndex); IList <AceEventMention> eventMentions = aceDocument.GetEventMentions(sentenceIndex); // convert entity mentions foreach (AceEntityMention aceEntityMention in entityMentions) { string corefID = string.Empty; foreach (string entityID in aceDocument.GetKeySetEntities()) { AceEntity e = aceDocument.GetEntity(entityID); if (e.GetMentions().Contains(aceEntityMention)) { corefID = entityID; break; } } EntityMention convertedMention = ConvertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset, corefID); // EntityMention convertedMention = convertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset); entityCounts.IncrementCount(convertedMention.GetType()); logger.Info("CONVERTED MENTION HEAD SPAN: " + convertedMention.GetHead()); logger.Info("CONVERTED ENTITY MENTION: " + convertedMention); AnnotationUtils.AddEntityMention(sentence, convertedMention); entityMentionMap[aceEntityMention.GetId()] = convertedMention; } // TODO: make Entity objects as needed // convert relation mentions foreach (AceRelationMention aceRelationMention in relationMentions) { RelationMention convertedMention = ConvertAceRelationMention(aceRelationMention, docId, sentence, entityMentionMap); if (convertedMention != null) { relationCounts.IncrementCount(convertedMention.GetType()); logger.Info("CONVERTED RELATION MENTION: " + convertedMention); AnnotationUtils.AddRelationMention(sentence, convertedMention); } } // TODO: make Relation objects // convert EventMentions foreach (AceEventMention aceEventMention in eventMentions) { EventMention convertedMention = ConvertAceEventMention(aceEventMention, docId, sentence, entityMentionMap, tokenOffset); if (convertedMention != null) { eventCounts.IncrementCount(convertedMention.GetType()); logger.Info("CONVERTED EVENT MENTION: " + convertedMention); AnnotationUtils.AddEventMention(sentence, convertedMention); } } // TODO: make Event objects results.Add(sentence); tokenOffset += tokens.Count; } return(results); }