/// <summary>Generates the XML content for the coreference chain object.</summary> private static bool AddCorefGraphInfo(AnnotationOutputter.Options options, Element corefInfo, IList <ICoreMap> sentences, IDictionary <int, CorefChain> corefChains, string curNS) { bool foundCoref = false; foreach (CorefChain chain in corefChains.Values) { if (!options.printSingletons && chain.GetMentionsInTextualOrder().Count <= 1) { continue; } foundCoref = true; Element chainElem = new Element("coreference", curNS); CorefChain.CorefMention source = chain.GetRepresentativeMention(); AddCorefMention(options, chainElem, curNS, sentences, source, true); foreach (CorefChain.CorefMention mention in chain.GetMentionsInTextualOrder()) { if (mention == source) { continue; } AddCorefMention(options, chainElem, curNS, sentences, mention, false); } corefInfo.AppendChild(chainElem); } return(foundCoref); }
/// <summary>The meat of the outputter</summary> /// <exception cref="System.IO.IOException"/> private static void Print(Annotation annotation, PrintWriter pw, AnnotationOutputter.Options options) { double beam = options.beamPrintingOption; IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); // Display docid if available string docId = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation)); if (docId != null) { IList <CoreLabel> tokens = annotation.Get(typeof(CoreAnnotations.TokensAnnotation)); int nSentences = (sentences != null) ? sentences.Count : 0; int nTokens = (tokens != null) ? tokens.Count : 0; pw.Printf("Document: ID=%s (%d sentences, %d tokens)%n", docId, nSentences, nTokens); } // Display doctitle if available string docTitle = annotation.Get(typeof(CoreAnnotations.DocTitleAnnotation)); if (docTitle != null) { pw.Printf("Document Title: %s%n", docTitle); } // Display docdate if available string docDate = annotation.Get(typeof(CoreAnnotations.DocDateAnnotation)); if (docDate != null) { pw.Printf("Document Date: %s%n", docDate); } // Display doctype if available string docType = annotation.Get(typeof(CoreAnnotations.DocTypeAnnotation)); if (docType != null) { pw.Printf("Document Type: %s%n", docType); } // Display docsourcetype if available string docSourceType = annotation.Get(typeof(CoreAnnotations.DocSourceTypeAnnotation)); if (docSourceType != null) { pw.Printf("Document Source Type: %s%n", docSourceType); } // display each sentence in this annotation if (sentences != null) { for (int i = 0; i < sz; i++) { pw.Println(); ICoreMap sentence = sentences[i]; IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); string sentiment = sentence.Get(typeof(SentimentCoreAnnotations.SentimentClass)); string piece; if (sentiment == null) { piece = string.Empty; } else { piece = ", sentiment: " + sentiment; } pw.Printf("Sentence #%d (%d tokens%s):%n", (i + 1), tokens.Count, piece); string text = sentence.Get(typeof(CoreAnnotations.TextAnnotation)); pw.Println(text); // display the token-level annotations string[] tokenAnnotations = new string[] { "Text", "PartOfSpeech", "Lemma", "Answer", "NamedEntityTag", "CharacterOffsetBegin", "CharacterOffsetEnd", "NormalizedNamedEntityTag", "Timex", "TrueCase", "TrueCaseText", "SentimentClass", "WikipediaEntity" }; pw.Println(); pw.Println("Tokens:"); foreach (CoreLabel token in tokens) { pw.Print(token.ToShorterString(tokenAnnotations)); pw.Println(); } // display the parse tree for this sentence Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); if (tree != null) { pw.Println(); pw.Println("Constituency parse: "); options.constituentTreePrinter.PrintTree(tree, pw); } // display sentiment tree if they asked for sentiment if (!StringUtils.IsNullOrEmpty(sentiment)) { pw.Println(); pw.Println("Sentiment-annotated binary tree:"); Tree sTree = sentence.Get(typeof(SentimentCoreAnnotations.SentimentAnnotatedTree)); if (sTree != null) { sTree.PennPrint(pw, null); pw.Println(); } } // It is possible to turn off the semantic graphs, in which // case we don't want to recreate them using the dependency // printer. This might be relevant if using CoreNLP for a // language which doesn't have dependencies, for example. if (sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)) != null) { pw.Println(); pw.Println("Dependency Parse (enhanced plus plus dependencies):"); pw.Print(sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)).ToList()); } // display the entity mentions IList <ICoreMap> entityMentions = sentence.Get(typeof(CoreAnnotations.MentionsAnnotation)); if (entityMentions != null) { pw.Println(); pw.Println("Extracted the following NER entity mentions:"); foreach (ICoreMap entityMention in entityMentions) { if (entityMention.Get(typeof(CoreAnnotations.EntityTypeAnnotation)) != null) { pw.Println(entityMention.Get(typeof(CoreAnnotations.TextAnnotation)) + "\t" + entityMention.Get(typeof(CoreAnnotations.EntityTypeAnnotation))); } } } // display MachineReading entities and relations IList <EntityMention> entities = sentence.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation)); if (entities != null) { pw.Println(); pw.Println("Extracted the following MachineReading entity mentions:"); foreach (EntityMention e in entities) { pw.Print('\t'); pw.Println(e); } } IList <RelationMention> relations = sentence.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation)); if (relations != null) { pw.Println(); pw.Println("Extracted the following MachineReading relation mentions:"); foreach (RelationMention r in relations) { if (r.PrintableObject(beam)) { pw.Println(r); } } } // display OpenIE triples ICollection <RelationTriple> openieTriples = sentence.Get(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation)); if (openieTriples != null && !openieTriples.IsEmpty()) { pw.Println(); pw.Println("Extracted the following Open IE triples:"); foreach (RelationTriple triple in openieTriples) { pw.Println(OpenIE.TripleToString(triple, docId, sentence)); } } // display KBP triples ICollection <RelationTriple> kbpTriples = sentence.Get(typeof(CoreAnnotations.KBPTriplesAnnotation)); if (kbpTriples != null && !kbpTriples.IsEmpty()) { pw.Println(); pw.Println("Extracted the following KBP triples:"); foreach (RelationTriple triple in kbpTriples) { pw.Println(triple); } } } } else { IList <CoreLabel> tokens = annotation.Get(typeof(CoreAnnotations.TokensAnnotation)); pw.Println("Tokens:"); pw.Println(annotation.Get(typeof(CoreAnnotations.TextAnnotation))); foreach (CoreLabel token in tokens) { int tokenCharBegin = token.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)); int tokenCharEnd = token.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)); pw.Println("[Text=" + token.Word() + " CharacterOffsetBegin=" + tokenCharBegin + " CharacterOffsetEnd=" + tokenCharEnd + ']'); } } // display the old-style doc-level coref annotations // this is not supported anymore! //String corefAnno = annotation.get(CorefPLAnnotation.class); //if(corefAnno != null) os.println(corefAnno); // display the new-style coreference graph IDictionary <int, CorefChain> corefChains = annotation.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation)); if (corefChains != null && sentences != null) { foreach (CorefChain chain in corefChains.Values) { CorefChain.CorefMention representative = chain.GetRepresentativeMention(); bool outputHeading = false; foreach (CorefChain.CorefMention mention in chain.GetMentionsInTextualOrder()) { if (mention == representative) { continue; } if (!outputHeading) { outputHeading = true; pw.Println(); pw.Println("Coreference set:"); } // all offsets start at 1! pw.Printf("\t(%d,%d,[%d,%d]) -> (%d,%d,[%d,%d]), that is: \"%s\" -> \"%s\"%n", mention.sentNum, mention.headIndex, mention.startIndex, mention.endIndex, representative.sentNum, representative.headIndex, representative.startIndex, representative .endIndex, mention.mentionSpan, representative.mentionSpan); } } } // display quotes if available if (annotation.Get(typeof(CoreAnnotations.QuotationsAnnotation)) != null) { pw.Println(); pw.Println("Extracted quotes: "); IList <ICoreMap> allQuotes = QuoteAnnotator.GatherQuotes(annotation); foreach (ICoreMap quote in allQuotes) { string speakerString; if (quote.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation)) != null) { speakerString = quote.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation)); } else { if (quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)) != null) { speakerString = quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)); } else { speakerString = "Unknown"; } } pw.Printf("[QuotationIndex=%d, CharacterOffsetBegin=%d, Text=%s, Speaker=%s]%n", quote.Get(typeof(CoreAnnotations.QuotationIndexAnnotation)), quote.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)), quote.Get(typeof(CoreAnnotations.TextAnnotation )), speakerString); } } pw.Flush(); }
/// <summary>Loads the CorefChain objects from the serialized buffer.</summary> /// <param name="reader">the buffer</param> /// <returns>A map from cluster id to clusters</returns> /// <exception cref="System.IO.IOException"/> private static IDictionary <int, CorefChain> LoadCorefChains(BufferedReader reader) { string line = reader.ReadLine().Trim(); if (line.IsEmpty()) { return(null); } int clusterCount = System.Convert.ToInt32(line); IDictionary <int, CorefChain> chains = Generics.NewHashMap(); // read each cluster for (int c = 0; c < clusterCount; c++) { line = reader.ReadLine().Trim(); string[] bits = line.Split("\\s"); int cid = System.Convert.ToInt32(bits[0]); int mentionCount = System.Convert.ToInt32(bits[1]); IDictionary <IntPair, ICollection <CorefChain.CorefMention> > mentionMap = Generics.NewHashMap(); CorefChain.CorefMention representative = null; // read each mention in this cluster for (int m = 0; m < mentionCount; m++) { line = reader.ReadLine(); bits = line.Split("\\s"); IntPair key = new IntPair(System.Convert.ToInt32(bits[0]), System.Convert.ToInt32(bits[1])); bool rep = bits[2].Equals("1"); Dictionaries.MentionType mentionType = ParseMentionType(bits[3]); Dictionaries.Number number = ParseNumber(bits[4]); Dictionaries.Gender gender = ParseGender(bits[5]); Dictionaries.Animacy animacy = ParseAnimacy(bits[6]); int startIndex = System.Convert.ToInt32(bits[7]); int endIndex = System.Convert.ToInt32(bits[8]); int headIndex = System.Convert.ToInt32(bits[9]); int clusterID = System.Convert.ToInt32(bits[10]); int mentionID = System.Convert.ToInt32(bits[11]); int sentNum = System.Convert.ToInt32(bits[12]); int posLen = System.Convert.ToInt32(bits[13]); int[] posElems = new int[posLen]; for (int i = 0; i < posLen; i++) { posElems[i] = System.Convert.ToInt32(bits[14 + i]); } IntTuple position = new IntTuple(posElems); string span = UnescapeSpace(bits[14 + posLen]); CorefChain.CorefMention mention = new CorefChain.CorefMention(mentionType, number, gender, animacy, startIndex, endIndex, headIndex, clusterID, mentionID, sentNum, position, span); ICollection <CorefChain.CorefMention> mentionsWithThisHead = mentionMap[key]; if (mentionsWithThisHead == null) { mentionsWithThisHead = Generics.NewHashSet(); mentionMap[key] = mentionsWithThisHead; } mentionsWithThisHead.Add(mention); if (rep) { representative = mention; } } // construct the cluster CorefChain chain = new CorefChain(cid, mentionMap, representative); chains[cid] = chain; } reader.ReadLine(); return(chains); }
private static void AddCorefMention(AnnotationOutputter.Options options, Element chainElem, string curNS, IList <ICoreMap> sentences, CorefChain.CorefMention mention, bool representative) { Element mentionElem = new Element("mention", curNS); if (representative) { mentionElem.AddAttribute(new Attribute("representative", "true")); } SetSingleElement(mentionElem, "sentence", curNS, int.ToString(mention.sentNum)); SetSingleElement(mentionElem, "start", curNS, int.ToString(mention.startIndex)); SetSingleElement(mentionElem, "end", curNS, int.ToString(mention.endIndex)); SetSingleElement(mentionElem, "head", curNS, int.ToString(mention.headIndex)); string text = mention.mentionSpan; SetSingleElement(mentionElem, "text", curNS, text); // Do you want context with your coreference? if (sentences != null && options.coreferenceContextSize > 0) { // If so use sentences to get so context from sentences IList <CoreLabel> tokens = sentences[mention.sentNum - 1].Get(typeof(CoreAnnotations.TokensAnnotation)); int contextStart = Math.Max(mention.startIndex - 1 - 5, 0); int contextEnd = Math.Min(mention.endIndex - 1 + 5, tokens.Count); string leftContext = StringUtils.JoinWords(tokens, " ", contextStart, mention.startIndex - 1); string rightContext = StringUtils.JoinWords(tokens, " ", mention.endIndex - 1, contextEnd); SetSingleElement(mentionElem, "leftContext", curNS, leftContext); SetSingleElement(mentionElem, "rightContext", curNS, rightContext); } chainElem.AppendChild(mentionElem); }
/// <summary>A utility to get useful information out of a CorefMention.</summary> /// <remarks> /// A utility to get useful information out of a CorefMention. In particular, it returns the CoreLabels which are /// associated with this mention, and it returns a score for how much we think this mention should be the canonical /// mention. /// </remarks> /// <param name="doc">The document this mention is referenced into.</param> /// <param name="mention">The mention itself.</param> /// <returns>A pair of the tokens in the mention, and a score for how much we like this mention as the canonical mention.</returns> private static Pair <IList <CoreLabel>, double> GrokCorefMention(Annotation doc, CorefChain.CorefMention mention) { IList <CoreLabel> tokens = doc.Get(typeof(CoreAnnotations.SentencesAnnotation))[mention.sentNum - 1].Get(typeof(CoreAnnotations.TokensAnnotation)); IList <CoreLabel> mentionAsTokens = tokens.SubList(mention.startIndex - 1, mention.endIndex - 1); // Try to assess this mention's NER type ICounter <string> nerVotes = new ClassicCounter <string>(); mentionAsTokens.Stream().Filter(null).ForEach(null); string ner = Counters.Argmax(nerVotes, null); double nerCount = nerVotes.GetCount(ner); double nerScore = nerCount * nerCount / ((double)mentionAsTokens.Count); // Return return(Pair.MakePair(mentionAsTokens, nerScore)); }