Esempio n. 1
0
        /// <summary>Read in a CoreDocument from this input stream.</summary>
        /// <param name="is">The input stream to read a CoreDocument's annotation from</param>
        /// <returns>A pair with the CoreDocument and the input stream</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.InvalidCastException"/>
        public virtual Pair <CoreDocument, InputStream> ReadCoreDocument(InputStream @is)
        {
            Pair <Annotation, InputStream> readPair = Read(@is);
            CoreDocument readCoreDocument           = new CoreDocument(readPair.First());

            return(new Pair <CoreDocument, InputStream>(readCoreDocument, @is));
        }
Esempio n. 2
0
        /// <summary>return the canonical entity mention for this entity mention</summary>
        public virtual Optional <Edu.Stanford.Nlp.Pipeline.CoreEntityMention> CanonicalEntityMention()
        {
            CoreDocument   myDocument = sentence.Document();
            Optional <int> canonicalEntityMentionIndex = Optional.OfNullable(CoreMap().Get(typeof(CoreAnnotations.CanonicalEntityMentionIndexAnnotation)));

            return(canonicalEntityMentionIndex.IsPresent() ? Optional.Of(sentence.Document().EntityMentions()[canonicalEntityMentionIndex.Get()]) : Optional.Empty());
        }
Esempio n. 3
0
 public CoreSentence(CoreDocument myDocument, ICoreMap coreMapSentence)
 {
     this.document        = myDocument;
     this.sentenceCoreMap = coreMapSentence;
 }
Esempio n. 4
0
        public CoreQuote(CoreDocument myDocument, ICoreMap coreMapQuote)
        {
            // optional speaker info...note there may not be an entity mention corresponding to the speaker
            this.document     = myDocument;
            this.quoteCoreMap = coreMapQuote;
            // attach sentences to the quote
            this.sentences = new List <CoreSentence>();
            int firstSentenceIndex = this.quoteCoreMap.Get(typeof(CoreAnnotations.SentenceBeginAnnotation));
            int lastSentenceIndex  = this.quoteCoreMap.Get(typeof(CoreAnnotations.SentenceEndAnnotation));

            for (int currSentIndex = firstSentenceIndex; currSentIndex <= lastSentenceIndex; currSentIndex++)
            {
                this.sentences.Add(this.document.Sentences()[currSentIndex]);
            }
            // set up the speaker info
            this.speaker = this.quoteCoreMap.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)) != null?Optional.Of(this.quoteCoreMap.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation))) : Optional.Empty();

            this.canonicalSpeaker = this.quoteCoreMap.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation)) != null?Optional.Of(this.quoteCoreMap.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation))) : Optional.Empty();

            // set up info for direct speaker mention (example: "He")
            int firstSpeakerTokenIndex = quoteCoreMap.Get(typeof(QuoteAttributionAnnotator.MentionBeginAnnotation));
            int lastSpeakerTokenIndex  = quoteCoreMap.Get(typeof(QuoteAttributionAnnotator.MentionEndAnnotation));

            this.speakerTokens        = Optional.Empty();
            this.speakerCharOffsets   = Optional.Empty();
            this.speakerEntityMention = Optional.Empty();
            if (firstSpeakerTokenIndex != null && lastSpeakerTokenIndex != null)
            {
                this.speakerTokens = Optional.Of(new List <CoreLabel>());
                for (int speakerTokenIndex = firstSpeakerTokenIndex; speakerTokenIndex <= lastSpeakerTokenIndex; speakerTokenIndex++)
                {
                    this.speakerTokens.Get().Add(this.document.Tokens()[speakerTokenIndex]);
                }
                int speakerCharOffsetBegin = this.speakerTokens.Get()[0].Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                int speakerCharOffsetEnd   = this.speakerTokens.Get()[speakerTokens.Get().Count - 1].Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                this.speakerCharOffsets = Optional.Of(new Pair <int, int>(speakerCharOffsetBegin, speakerCharOffsetEnd));
                foreach (CoreEntityMention candidateEntityMention in this.document.EntityMentions())
                {
                    Pair <int, int> entityMentionOffsets = candidateEntityMention.CharOffsets();
                    if (entityMentionOffsets.Equals(this.speakerCharOffsets.Get()))
                    {
                        this.speakerEntityMention = Optional.Of(candidateEntityMention);
                        break;
                    }
                }
            }
            // set up info for canonical speaker mention (example: "Joe Smith")
            int firstCanonicalSpeakerTokenIndex = quoteCoreMap.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionBeginAnnotation));
            int lastCanonicalSpeakerTokenIndex  = quoteCoreMap.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionEndAnnotation));

            this.canonicalSpeakerTokens        = Optional.Empty();
            this.canonicalSpeakerCharOffsets   = Optional.Empty();
            this.canonicalSpeakerEntityMention = Optional.Empty();
            if (firstCanonicalSpeakerTokenIndex != null && lastCanonicalSpeakerTokenIndex != null)
            {
                this.canonicalSpeakerTokens = Optional.Of(new List <CoreLabel>());
                for (int canonicalSpeakerTokenIndex = firstCanonicalSpeakerTokenIndex; canonicalSpeakerTokenIndex <= lastCanonicalSpeakerTokenIndex; canonicalSpeakerTokenIndex++)
                {
                    this.canonicalSpeakerTokens.Get().Add(this.document.Tokens()[canonicalSpeakerTokenIndex]);
                }
                int canonicalSpeakerCharOffsetBegin = this.canonicalSpeakerTokens.Get()[0].Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                int canonicalSpeakerCharOffsetEnd   = this.canonicalSpeakerTokens.Get()[canonicalSpeakerTokens.Get().Count - 1].Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                this.canonicalSpeakerCharOffsets = Optional.Of(new Pair <int, int>(canonicalSpeakerCharOffsetBegin, canonicalSpeakerCharOffsetEnd));
                foreach (CoreEntityMention candidateEntityMention in this.document.EntityMentions())
                {
                    Pair <int, int> entityMentionOffsets = candidateEntityMention.CharOffsets();
                    if (entityMentionOffsets.Equals(this.canonicalSpeakerCharOffsets.Get()))
                    {
                        this.canonicalSpeakerEntityMention = Optional.Of(candidateEntityMention);
                        break;
                    }
                }
            }
            // record if there is speaker info
            this.hasSpeaker          = this.speaker.IsPresent();
            this.hasCanonicalSpeaker = this.canonicalSpeaker.IsPresent();
        }
Esempio n. 5
0
        /// <summary>Append a CoreDocument to this output stream.</summary>
        /// <param name="document">The CoreDocument to serialize (its internal annotation is serialized)</param>
        /// <param name="os">The output stream to serialize to</param>
        /// <returns>The output stream which should be closed</returns>
        /// <exception cref="System.IO.IOException"/>
        public virtual OutputStream WriteCoreDocument(CoreDocument document, OutputStream os)
        {
            Annotation wrappedAnnotation = document.Annotation();

            return(Write(wrappedAnnotation, os));
        }