/// <summary>Generates the XML content for the coreference chain object.</summary>
        private static bool AddCorefGraphInfo(AnnotationOutputter.Options options, Element corefInfo, IList <ICoreMap> sentences, IDictionary <int, CorefChain> corefChains, string curNS)
        {
            bool foundCoref = false;

            foreach (CorefChain chain in corefChains.Values)
            {
                if (!options.printSingletons && chain.GetMentionsInTextualOrder().Count <= 1)
                {
                    continue;
                }
                foundCoref = true;
                Element chainElem = new Element("coreference", curNS);
                CorefChain.CorefMention source = chain.GetRepresentativeMention();
                AddCorefMention(options, chainElem, curNS, sentences, source, true);
                foreach (CorefChain.CorefMention mention in chain.GetMentionsInTextualOrder())
                {
                    if (mention == source)
                    {
                        continue;
                    }
                    AddCorefMention(options, chainElem, curNS, sentences, mention, false);
                }
                corefInfo.AppendChild(chainElem);
            }
            return(foundCoref);
        }
        /// <summary>The meat of the outputter</summary>
        /// <exception cref="System.IO.IOException"/>
        private static void Print(Annotation annotation, PrintWriter pw, AnnotationOutputter.Options options)
        {
            double           beam      = options.beamPrintingOption;
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            // Display docid if available
            string docId = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation));

            if (docId != null)
            {
                IList <CoreLabel> tokens = annotation.Get(typeof(CoreAnnotations.TokensAnnotation));
                int nSentences           = (sentences != null) ? sentences.Count : 0;
                int nTokens = (tokens != null) ? tokens.Count : 0;
                pw.Printf("Document: ID=%s (%d sentences, %d tokens)%n", docId, nSentences, nTokens);
            }
            // Display doctitle if available
            string docTitle = annotation.Get(typeof(CoreAnnotations.DocTitleAnnotation));

            if (docTitle != null)
            {
                pw.Printf("Document Title: %s%n", docTitle);
            }
            // Display docdate if available
            string docDate = annotation.Get(typeof(CoreAnnotations.DocDateAnnotation));

            if (docDate != null)
            {
                pw.Printf("Document Date: %s%n", docDate);
            }
            // Display doctype if available
            string docType = annotation.Get(typeof(CoreAnnotations.DocTypeAnnotation));

            if (docType != null)
            {
                pw.Printf("Document Type: %s%n", docType);
            }
            // Display docsourcetype if available
            string docSourceType = annotation.Get(typeof(CoreAnnotations.DocSourceTypeAnnotation));

            if (docSourceType != null)
            {
                pw.Printf("Document Source Type: %s%n", docSourceType);
            }
            // display each sentence in this annotation
            if (sentences != null)
            {
                for (int i = 0; i < sz; i++)
                {
                    pw.Println();
                    ICoreMap          sentence  = sentences[i];
                    IList <CoreLabel> tokens    = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                    string            sentiment = sentence.Get(typeof(SentimentCoreAnnotations.SentimentClass));
                    string            piece;
                    if (sentiment == null)
                    {
                        piece = string.Empty;
                    }
                    else
                    {
                        piece = ", sentiment: " + sentiment;
                    }
                    pw.Printf("Sentence #%d (%d tokens%s):%n", (i + 1), tokens.Count, piece);
                    string text = sentence.Get(typeof(CoreAnnotations.TextAnnotation));
                    pw.Println(text);
                    // display the token-level annotations
                    string[] tokenAnnotations = new string[] { "Text", "PartOfSpeech", "Lemma", "Answer", "NamedEntityTag", "CharacterOffsetBegin", "CharacterOffsetEnd", "NormalizedNamedEntityTag", "Timex", "TrueCase", "TrueCaseText", "SentimentClass", "WikipediaEntity" };
                    pw.Println();
                    pw.Println("Tokens:");
                    foreach (CoreLabel token in tokens)
                    {
                        pw.Print(token.ToShorterString(tokenAnnotations));
                        pw.Println();
                    }
                    // display the parse tree for this sentence
                    Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
                    if (tree != null)
                    {
                        pw.Println();
                        pw.Println("Constituency parse: ");
                        options.constituentTreePrinter.PrintTree(tree, pw);
                    }
                    // display sentiment tree if they asked for sentiment
                    if (!StringUtils.IsNullOrEmpty(sentiment))
                    {
                        pw.Println();
                        pw.Println("Sentiment-annotated binary tree:");
                        Tree sTree = sentence.Get(typeof(SentimentCoreAnnotations.SentimentAnnotatedTree));
                        if (sTree != null)
                        {
                            sTree.PennPrint(pw, null);
                            pw.Println();
                        }
                    }
                    // It is possible to turn off the semantic graphs, in which
                    // case we don't want to recreate them using the dependency
                    // printer.  This might be relevant if using CoreNLP for a
                    // language which doesn't have dependencies, for example.
                    if (sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)) != null)
                    {
                        pw.Println();
                        pw.Println("Dependency Parse (enhanced plus plus dependencies):");
                        pw.Print(sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)).ToList());
                    }
                    // display the entity mentions
                    IList <ICoreMap> entityMentions = sentence.Get(typeof(CoreAnnotations.MentionsAnnotation));
                    if (entityMentions != null)
                    {
                        pw.Println();
                        pw.Println("Extracted the following NER entity mentions:");
                        foreach (ICoreMap entityMention in entityMentions)
                        {
                            if (entityMention.Get(typeof(CoreAnnotations.EntityTypeAnnotation)) != null)
                            {
                                pw.Println(entityMention.Get(typeof(CoreAnnotations.TextAnnotation)) + "\t" + entityMention.Get(typeof(CoreAnnotations.EntityTypeAnnotation)));
                            }
                        }
                    }
                    // display MachineReading entities and relations
                    IList <EntityMention> entities = sentence.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation));
                    if (entities != null)
                    {
                        pw.Println();
                        pw.Println("Extracted the following MachineReading entity mentions:");
                        foreach (EntityMention e in entities)
                        {
                            pw.Print('\t');
                            pw.Println(e);
                        }
                    }
                    IList <RelationMention> relations = sentence.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation));
                    if (relations != null)
                    {
                        pw.Println();
                        pw.Println("Extracted the following MachineReading relation mentions:");
                        foreach (RelationMention r in relations)
                        {
                            if (r.PrintableObject(beam))
                            {
                                pw.Println(r);
                            }
                        }
                    }
                    // display OpenIE triples
                    ICollection <RelationTriple> openieTriples = sentence.Get(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation));
                    if (openieTriples != null && !openieTriples.IsEmpty())
                    {
                        pw.Println();
                        pw.Println("Extracted the following Open IE triples:");
                        foreach (RelationTriple triple in openieTriples)
                        {
                            pw.Println(OpenIE.TripleToString(triple, docId, sentence));
                        }
                    }
                    // display KBP triples
                    ICollection <RelationTriple> kbpTriples = sentence.Get(typeof(CoreAnnotations.KBPTriplesAnnotation));
                    if (kbpTriples != null && !kbpTriples.IsEmpty())
                    {
                        pw.Println();
                        pw.Println("Extracted the following KBP triples:");
                        foreach (RelationTriple triple in kbpTriples)
                        {
                            pw.Println(triple);
                        }
                    }
                }
            }
            else
            {
                IList <CoreLabel> tokens = annotation.Get(typeof(CoreAnnotations.TokensAnnotation));
                pw.Println("Tokens:");
                pw.Println(annotation.Get(typeof(CoreAnnotations.TextAnnotation)));
                foreach (CoreLabel token in tokens)
                {
                    int tokenCharBegin = token.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                    int tokenCharEnd   = token.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                    pw.Println("[Text=" + token.Word() + " CharacterOffsetBegin=" + tokenCharBegin + " CharacterOffsetEnd=" + tokenCharEnd + ']');
                }
            }
            // display the old-style doc-level coref annotations
            // this is not supported anymore!
            //String corefAnno = annotation.get(CorefPLAnnotation.class);
            //if(corefAnno != null) os.println(corefAnno);
            // display the new-style coreference graph
            IDictionary <int, CorefChain> corefChains = annotation.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation));

            if (corefChains != null && sentences != null)
            {
                foreach (CorefChain chain in corefChains.Values)
                {
                    CorefChain.CorefMention representative = chain.GetRepresentativeMention();
                    bool outputHeading = false;
                    foreach (CorefChain.CorefMention mention in chain.GetMentionsInTextualOrder())
                    {
                        if (mention == representative)
                        {
                            continue;
                        }
                        if (!outputHeading)
                        {
                            outputHeading = true;
                            pw.Println();
                            pw.Println("Coreference set:");
                        }
                        // all offsets start at 1!
                        pw.Printf("\t(%d,%d,[%d,%d]) -> (%d,%d,[%d,%d]), that is: \"%s\" -> \"%s\"%n", mention.sentNum, mention.headIndex, mention.startIndex, mention.endIndex, representative.sentNum, representative.headIndex, representative.startIndex, representative
                                  .endIndex, mention.mentionSpan, representative.mentionSpan);
                    }
                }
            }
            // display quotes if available
            if (annotation.Get(typeof(CoreAnnotations.QuotationsAnnotation)) != null)
            {
                pw.Println();
                pw.Println("Extracted quotes: ");
                IList <ICoreMap> allQuotes = QuoteAnnotator.GatherQuotes(annotation);
                foreach (ICoreMap quote in allQuotes)
                {
                    string speakerString;
                    if (quote.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation)) != null)
                    {
                        speakerString = quote.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation));
                    }
                    else
                    {
                        if (quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)) != null)
                        {
                            speakerString = quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation));
                        }
                        else
                        {
                            speakerString = "Unknown";
                        }
                    }
                    pw.Printf("[QuotationIndex=%d, CharacterOffsetBegin=%d, Text=%s, Speaker=%s]%n", quote.Get(typeof(CoreAnnotations.QuotationIndexAnnotation)), quote.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)), quote.Get(typeof(CoreAnnotations.TextAnnotation
                                                                                                                                                                                                                                                      )), speakerString);
                }
            }
            pw.Flush();
        }
        /// <summary>Loads the CorefChain objects from the serialized buffer.</summary>
        /// <param name="reader">the buffer</param>
        /// <returns>A map from cluster id to clusters</returns>
        /// <exception cref="System.IO.IOException"/>
        private static IDictionary <int, CorefChain> LoadCorefChains(BufferedReader reader)
        {
            string line = reader.ReadLine().Trim();

            if (line.IsEmpty())
            {
                return(null);
            }
            int clusterCount = System.Convert.ToInt32(line);
            IDictionary <int, CorefChain> chains = Generics.NewHashMap();

            // read each cluster
            for (int c = 0; c < clusterCount; c++)
            {
                line = reader.ReadLine().Trim();
                string[] bits         = line.Split("\\s");
                int      cid          = System.Convert.ToInt32(bits[0]);
                int      mentionCount = System.Convert.ToInt32(bits[1]);
                IDictionary <IntPair, ICollection <CorefChain.CorefMention> > mentionMap = Generics.NewHashMap();
                CorefChain.CorefMention representative = null;
                // read each mention in this cluster
                for (int m = 0; m < mentionCount; m++)
                {
                    line = reader.ReadLine();
                    bits = line.Split("\\s");
                    IntPair key = new IntPair(System.Convert.ToInt32(bits[0]), System.Convert.ToInt32(bits[1]));
                    bool    rep = bits[2].Equals("1");
                    Dictionaries.MentionType mentionType = ParseMentionType(bits[3]);
                    Dictionaries.Number      number      = ParseNumber(bits[4]);
                    Dictionaries.Gender      gender      = ParseGender(bits[5]);
                    Dictionaries.Animacy     animacy     = ParseAnimacy(bits[6]);
                    int   startIndex = System.Convert.ToInt32(bits[7]);
                    int   endIndex   = System.Convert.ToInt32(bits[8]);
                    int   headIndex  = System.Convert.ToInt32(bits[9]);
                    int   clusterID  = System.Convert.ToInt32(bits[10]);
                    int   mentionID  = System.Convert.ToInt32(bits[11]);
                    int   sentNum    = System.Convert.ToInt32(bits[12]);
                    int   posLen     = System.Convert.ToInt32(bits[13]);
                    int[] posElems   = new int[posLen];
                    for (int i = 0; i < posLen; i++)
                    {
                        posElems[i] = System.Convert.ToInt32(bits[14 + i]);
                    }
                    IntTuple position = new IntTuple(posElems);
                    string   span     = UnescapeSpace(bits[14 + posLen]);
                    CorefChain.CorefMention mention = new CorefChain.CorefMention(mentionType, number, gender, animacy, startIndex, endIndex, headIndex, clusterID, mentionID, sentNum, position, span);
                    ICollection <CorefChain.CorefMention> mentionsWithThisHead = mentionMap[key];
                    if (mentionsWithThisHead == null)
                    {
                        mentionsWithThisHead = Generics.NewHashSet();
                        mentionMap[key]      = mentionsWithThisHead;
                    }
                    mentionsWithThisHead.Add(mention);
                    if (rep)
                    {
                        representative = mention;
                    }
                }
                // construct the cluster
                CorefChain chain = new CorefChain(cid, mentionMap, representative);
                chains[cid] = chain;
            }
            reader.ReadLine();
            return(chains);
        }
        private static void AddCorefMention(AnnotationOutputter.Options options, Element chainElem, string curNS, IList <ICoreMap> sentences, CorefChain.CorefMention mention, bool representative)
        {
            Element mentionElem = new Element("mention", curNS);

            if (representative)
            {
                mentionElem.AddAttribute(new Attribute("representative", "true"));
            }
            SetSingleElement(mentionElem, "sentence", curNS, int.ToString(mention.sentNum));
            SetSingleElement(mentionElem, "start", curNS, int.ToString(mention.startIndex));
            SetSingleElement(mentionElem, "end", curNS, int.ToString(mention.endIndex));
            SetSingleElement(mentionElem, "head", curNS, int.ToString(mention.headIndex));
            string text = mention.mentionSpan;

            SetSingleElement(mentionElem, "text", curNS, text);
            // Do you want context with your coreference?
            if (sentences != null && options.coreferenceContextSize > 0)
            {
                // If so use sentences to get so context from sentences
                IList <CoreLabel> tokens = sentences[mention.sentNum - 1].Get(typeof(CoreAnnotations.TokensAnnotation));
                int    contextStart      = Math.Max(mention.startIndex - 1 - 5, 0);
                int    contextEnd        = Math.Min(mention.endIndex - 1 + 5, tokens.Count);
                string leftContext       = StringUtils.JoinWords(tokens, " ", contextStart, mention.startIndex - 1);
                string rightContext      = StringUtils.JoinWords(tokens, " ", mention.endIndex - 1, contextEnd);
                SetSingleElement(mentionElem, "leftContext", curNS, leftContext);
                SetSingleElement(mentionElem, "rightContext", curNS, rightContext);
            }
            chainElem.AppendChild(mentionElem);
        }
Пример #5
0
        /// <summary>A utility to get useful information out of a CorefMention.</summary>
        /// <remarks>
        /// A utility to get useful information out of a CorefMention. In particular, it returns the CoreLabels which are
        /// associated with this mention, and it returns a score for how much we think this mention should be the canonical
        /// mention.
        /// </remarks>
        /// <param name="doc">The document this mention is referenced into.</param>
        /// <param name="mention">The mention itself.</param>
        /// <returns>A pair of the tokens in the mention, and a score for how much we like this mention as the canonical mention.</returns>
        private static Pair <IList <CoreLabel>, double> GrokCorefMention(Annotation doc, CorefChain.CorefMention mention)
        {
            IList <CoreLabel> tokens          = doc.Get(typeof(CoreAnnotations.SentencesAnnotation))[mention.sentNum - 1].Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <CoreLabel> mentionAsTokens = tokens.SubList(mention.startIndex - 1, mention.endIndex - 1);
            // Try to assess this mention's NER type
            ICounter <string> nerVotes = new ClassicCounter <string>();

            mentionAsTokens.Stream().Filter(null).ForEach(null);
            string ner      = Counters.Argmax(nerVotes, null);
            double nerCount = nerVotes.GetCount(ner);
            double nerScore = nerCount * nerCount / ((double)mentionAsTokens.Count);

            // Return
            return(Pair.MakePair(mentionAsTokens, nerScore));
        }