private ICoreMap MakeTimexMap(HeidelTimeKBPAnnotator.HeidelTimeOutputReader.TimexNode node, IList <CoreLabel> tokens, ICoreMap sentence)
            {
                ICoreMap timexMap = new ArrayCoreMap();

                timexMap.Set(typeof(TimeAnnotations.TimexAnnotation), node.timex);
                timexMap.Set(typeof(CoreAnnotations.TextAnnotation), node.contents);
                timexMap.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), BeginOffset(tokens[0]));
                timexMap.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), EndOffset(tokens[tokens.Count - 1]));
                timexMap.Set(typeof(CoreAnnotations.TokenBeginAnnotation), tokens[0].Index());
                timexMap.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokens[tokens.Count - 1].Index());
                timexMap.Set(typeof(CoreAnnotations.TokensAnnotation), tokens);
                if (sentence.Get(typeof(TimeAnnotations.TimexAnnotations)) == null)
                {
                    sentence.Set(typeof(TimeAnnotations.TimexAnnotations), new List <ICoreMap>());
                }
                sentence.Get(typeof(TimeAnnotations.TimexAnnotations)).Add(timexMap);
                // update NER for tokens
                foreach (CoreLabel token in tokens)
                {
                    token.Set(typeof(CoreAnnotations.NamedEntityTagAnnotation), "DATE");
                    token.Set(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation), node.timex.Value());
                    token.Set(typeof(TimeAnnotations.TimexAnnotation), node.timex);
                }
                return(timexMap);
            }
예제 #2
0
        protected internal static void ExtractEnumerations(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree          tree         = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
            SemanticGraph dependency   = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
            TregexPattern tgrepPattern = enumerationsMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);
            IDictionary <IntPair, Tree> spanToMentionSubTree = Generics.NewHashMap();

            while (matcher.Find())
            {
                matcher.GetMatch();
                Tree         m1       = matcher.GetNode("m1");
                Tree         m2       = matcher.GetNode("m2");
                IList <Tree> mLeaves  = m1.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                spanToMentionSubTree[new IntPair(beginIdx, endIdx)] = m1;
                mLeaves  = m2.GetLeaves();
                beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                spanToMentionSubTree[new IntPair(beginIdx, endIdx)] = m2;
            }
            foreach (IntPair mSpan in spanToMentionSubTree.Keys)
            {
                if (!mentionSpanSet.Contains(mSpan) && !InsideNE(mSpan, namedEntitySpanSet))
                {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, mSpan.Get(0), mSpan.Get(1), dependency, new List <CoreLabel>(sent.SubList(mSpan.Get(0), mSpan.Get(1))), spanToMentionSubTree[mSpan]);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                }
            }
        }
        protected internal override void DoOneSentence(Annotation annotation, ICoreMap sentence)
        {
            SemanticGraph sg = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            Tree          t  = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            featureAnnotator.AddFeatures(sg, t, false, true);
        }
예제 #4
0
 public virtual ExtractionObject GetSingleParent(ICoreMap sentence)
 {
     if (GetParents().Count > 1)
     {
         ICollection <ExtractionObject> parents = GetParents();
         log.Info("This event has multiple parents: " + this);
         int count = 1;
         foreach (ExtractionObject po in parents)
         {
             log.Info("PARENT #" + count + ": " + po);
             count++;
         }
         log.Info("DOC " + sentence.Get(typeof(CoreAnnotations.DocIDAnnotation)));
         log.Info("SENTENCE:");
         foreach (CoreLabel t in sentence.Get(typeof(CoreAnnotations.TokensAnnotation)))
         {
             log.Info(" " + t.Word());
         }
         log.Info("EVENTS IN SENTENCE:");
         count = 1;
         foreach (Edu.Stanford.Nlp.IE.Machinereading.Structure.EventMention e in sentence.Get(typeof(MachineReadingAnnotations.EventMentionsAnnotation)))
         {
             log.Info("EVENT #" + count + ": " + e);
             count++;
         }
     }
     System.Diagnostics.Debug.Assert((GetParents().Count <= 1));
     foreach (ExtractionObject p in GetParents())
     {
         return(p);
     }
     return(null);
 }
예제 #5
0
        protected internal static void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            tree.IndexLeaves();
            SemanticGraph dependency   = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
            TregexPattern tgrepPattern = npOrPrpMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);

            while (matcher.Find())
            {
                Tree         t        = matcher.GetMatch();
                IList <Tree> mLeaves  = t.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                if (",".Equals(sent[endIdx - 1].Word()))
                {
                    endIdx--;
                }
                // try not to have span that ends with ,
                IntPair mSpan = new IntPair(beginIdx, endIdx);
                if (!mentionSpanSet.Contains(mSpan) && !InsideNE(mSpan, namedEntitySpanSet))
                {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                }
            }
        }
예제 #6
0
        private string FindNextParagraphSpeaker(IList <ICoreMap> paragraph, int paragraphOffset, Dictionaries dict)
        {
            ICoreMap lastSent = paragraph[paragraph.Count - 1];
            string   speaker  = string.Empty;

            foreach (CoreLabel w in lastSent.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                if (w.Get(typeof(CoreAnnotations.LemmaAnnotation)).Equals("report") || w.Get(typeof(CoreAnnotations.LemmaAnnotation)).Equals("say"))
                {
                    string        word       = w.Get(typeof(CoreAnnotations.TextAnnotation));
                    SemanticGraph dependency = lastSent.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
                    IndexedWord   t          = dependency.GetNodeByWordPattern(word);
                    foreach (Pair <GrammaticalRelation, IndexedWord> child in dependency.ChildPairs(t))
                    {
                        if (child.First().GetShortName().Equals("nsubj"))
                        {
                            int subjectIndex = child.Second().Index();
                            // start from 1
                            IntTuple headPosition = new IntTuple(2);
                            headPosition.Set(0, paragraph.Count - 1 + paragraphOffset);
                            headPosition.Set(1, subjectIndex - 1);
                            if (mentionheadPositions.Contains(headPosition) && mentionheadPositions[headPosition].nerString.StartsWith("PER"))
                            {
                                speaker = int.ToString(mentionheadPositions[headPosition].mentionID);
                            }
                        }
                    }
                }
            }
            return(speaker);
        }
        /// <summary>see if a potential mention is longer or same length and appears earlier</summary>
        public virtual bool MoreCanonicalMention(ICoreMap entityMention, ICoreMap potentialCanonicalMention)
        {
            // text of the mentions
            string entityMentionText             = entityMention.Get(typeof(CoreAnnotations.TextAnnotation));
            string potentialCanonicalMentionText = potentialCanonicalMention.Get(typeof(CoreAnnotations.TextAnnotation));
            // start positions of mentions
            int entityMentionStart             = entityMention.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
            int potentialCanonicalMentionStart = potentialCanonicalMention.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));

            if (potentialCanonicalMentionText.Length > entityMentionText.Length)
            {
                return(true);
            }
            else
            {
                if (potentialCanonicalMentionText.Length == entityMentionText.Length && potentialCanonicalMentionStart < entityMentionStart)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
        }
        //      throw e;
        private Optional <string> GetPubDate(ICoreMap document)
        {
            //--Get Date
            //(error checks)
            if (!document.ContainsKey(typeof(CoreAnnotations.CalendarAnnotation)) && !document.ContainsKey(typeof(CoreAnnotations.DocDateAnnotation)))
            {
                throw new ArgumentException("CoreMap must have either a Calendar or DocDate annotation");
            }
            //not strictly necessary, technically...
            //(variables)
            Calendar dateCalendar = document.Get(typeof(CoreAnnotations.CalendarAnnotation));

            if (dateCalendar != null)
            {
                //(case: calendar annotation)
                return(Optional.Of(string.Format("%TF", dateCalendar)));
            }
            else
            {
                //(case: docdateannotation)
                string s = document.Get(typeof(CoreAnnotations.DocDateAnnotation));
                if (s != null)
                {
                    return(Optional.Of(s));
                }
            }
            return(Optional.Empty());
        }
        // nothing to do by default
        /// <summary>
        /// Converts NamedEntityTagAnnotation tags into
        /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/>
        /// s. This
        /// finds the longest sequence of NamedEntityTagAnnotation tags of the matching
        /// type.
        /// </summary>
        /// <param name="sentence">A sentence, ideally annotated with NamedEntityTagAnnotation</param>
        /// <param name="nerTag">The name of the NER tag to copy, e.g. "DATE".</param>
        /// <param name="entityType">
        /// The type of the
        /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/>
        /// objects created
        /// </param>
        public virtual void MakeAnnotationFromGivenNERTag(ICoreMap sentence, string nerTag, string entityType)
        {
            IList <CoreLabel>     words    = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <EntityMention> mentions = sentence.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation));

            System.Diagnostics.Debug.Assert(words != null);
            System.Diagnostics.Debug.Assert(mentions != null);
            for (int start = 0; start < words.Count; start++)
            {
                int end;
                // find the first token after start that isn't of nerType
                for (end = start; end < words.Count; end++)
                {
                    string ne = words[end].Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                    if (!ne.Equals(nerTag))
                    {
                        break;
                    }
                }
                if (end > start)
                {
                    // found a match!
                    EntityMention m = entityMentionFactory.ConstructEntityMention(EntityMention.MakeUniqueId(), sentence, new Span(start, end), new Span(start, end), entityType, null, null);
                    logger.Info("Created " + entityType + " entity mention: " + m);
                    start = end - 1;
                    mentions.Add(m);
                }
            }
            sentence.Set(typeof(MachineReadingAnnotations.EntityMentionsAnnotation), mentions);
        }
예제 #10
0
        public virtual void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            tree.IndexLeaves();
            SemanticGraph basicDependency    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhancedDependency == null)
            {
                enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            TregexPattern tgrepPattern = npOrPrpMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);

            while (matcher.Find())
            {
                Tree         t        = matcher.GetMatch();
                IList <Tree> mLeaves  = t.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                //if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with ,
                IntPair mSpan = new IntPair(beginIdx, endIdx);
                if (!mentionSpanSet.Contains(mSpan) && (lang == Locale.Chinese || !InsideNE(mSpan, namedEntitySpanSet)))
                {
                    //      if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP")) ) {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                }
            }
        }
예제 #11
0
        private void FinishSentence(ICoreMap sentence, IList <Tree> trees)
        {
            if (treeMap != null)
            {
                IList <Tree> mappedTrees = Generics.NewLinkedList();
                foreach (Tree tree in trees)
                {
                    Tree mappedTree = treeMap.Apply(tree);
                    mappedTrees.Add(mappedTree);
                }
                trees = mappedTrees;
            }
            ParserAnnotatorUtils.FillInParseAnnotations(Verbose, BuildGraphs, gsf, sentence, trees, extraDependencies);
            if (saveBinaryTrees)
            {
                TreeBinarizer binarizer = TreeBinarizer.SimpleTreeBinarizer(parser.GetTLPParams().HeadFinder(), parser.TreebankLanguagePack());
                Tree          binarized = binarizer.TransformTree(trees[0]);
                Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(binarized);
                sentence.Set(typeof(TreeCoreAnnotations.BinarizedTreeAnnotation), binarized);
            }
            // for some reason in some corner cases nodes aren't having sentenceIndex set
            // do a pass and make sure all nodes have sentenceIndex set
            SemanticGraph sg = sentence.Get(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation));

            if (sg != null)
            {
                foreach (IndexedWord iw in sg.VertexSet())
                {
                    if (iw.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)) == null && sentence.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)) != null)
                    {
                        iw.SetSentIndex(sentence.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)));
                    }
                }
            }
        }
예제 #12
0
        public virtual void FindHead(ICoreMap s, IList <Mention> mentions)
        {
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
            IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation));

            tree.IndexSpans(0);
            foreach (Mention m in mentions)
            {
                if (lang == Locale.Chinese)
                {
                    FindHeadChinese(sent, m);
                }
                else
                {
                    CoreLabel head = (CoreLabel)FindSyntacticHead(m, tree, sent).Label();
                    m.headIndex  = head.Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                    m.headWord   = sent[m.headIndex];
                    m.headString = m.headWord.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower(Locale.English);
                }
                int start = m.headIndex - m.startIndex;
                if (start < 0 || start >= m.originalSpan.Count)
                {
                    Redwood.Log("Invalid index for head " + start + "=" + m.headIndex + "-" + m.startIndex + ": originalSpan=[" + StringUtils.JoinWords(m.originalSpan, " ") + "], head=" + m.headWord);
                    Redwood.Log("Setting head string to entire mention");
                    m.headIndex  = m.startIndex;
                    m.headWord   = m.originalSpan.Count > 0 ? m.originalSpan[0] : sent[m.startIndex];
                    m.headString = m.originalSpan.ToString();
                }
            }
        }
예제 #13
0
        /// <summary>TODO(gabor) JavaDoc</summary>
        /// <param name="sentence"/>
        /// <param name="pipeline"/>
        public static void Annotate(ICoreMap sentence, AnnotationPipeline pipeline)
        {
            Annotation ann = new Annotation(StringUtils.Join(sentence.Get(typeof(CoreAnnotations.TokensAnnotation)), " "));

            ann.Set(typeof(CoreAnnotations.TokensAnnotation), sentence.Get(typeof(CoreAnnotations.TokensAnnotation)));
            ann.Set(typeof(CoreAnnotations.SentencesAnnotation), Java.Util.Collections.SingletonList(sentence));
            pipeline.Annotate(ann);
        }
예제 #14
0
 public DataInstanceDep(ICoreMap s)
 {
     graph = s.Get(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation));
     //    System.out.println("CollapsedCCProcessedDependenciesAnnotation graph is " + s.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class));
     //    System.out.println("CollapsedDependenciesAnnotation graph is " + s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class));
     //    System.out.println("BasicDependenciesAnnotation graph is " + s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class));
     tokens = s.Get(typeof(CoreAnnotations.TokensAnnotation));
 }
예제 #15
0
        protected internal virtual Sieve.MentionData GetMentionData(ICoreMap quote)
        {
            string text  = quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation));
            int    begin = quote.Get(typeof(QuoteAttributionAnnotator.MentionBeginAnnotation));
            int    end   = quote.Get(typeof(QuoteAttributionAnnotator.MentionEndAnnotation));
            string type  = quote.Get(typeof(QuoteAttributionAnnotator.MentionTypeAnnotation));

            return(new Sieve.MentionData(this, begin, end, text, type));
        }
예제 #16
0
 public virtual Sieve.MentionData MakeMentionData(ICoreMap q)
 {
     if (q.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null)
     {
         return(new Sieve.MentionData(this, q.Get(typeof(QuoteAttributionAnnotator.MentionBeginAnnotation)), q.Get(typeof(QuoteAttributionAnnotator.MentionEndAnnotation)), q.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)), q.Get(typeof(QuoteAttributionAnnotator.MentionTypeAnnotation
                                                                                                                                                                                                                                                     ))));
     }
     return(new Sieve.MentionData(this, -1, -1, null, null));
 }
        public static IList <ICoreMap> ReplaceMergedUsingTokenOffsets <_T0, _T1>(IList <_T0> list, IList <_T1> matchedExprs)
            where _T0 : ICoreMap
            where _T1 : MatchedExpression
        {
            if (matchedExprs == null)
            {
                return(list);
            }
            IDictionary <int, int> tokenBeginToListIndexMap = new Dictionary <int, int>();
            //Generics.newHashMap();
            IDictionary <int, int> tokenEndToListIndexMap = new Dictionary <int, int>();

            //Generics.newHashMap();
            for (int i = 0; i < list.Count; i++)
            {
                ICoreMap cm = list[i];
                if (cm.ContainsKey(typeof(CoreAnnotations.TokenBeginAnnotation)) && cm.ContainsKey(typeof(CoreAnnotations.TokenEndAnnotation)))
                {
                    tokenBeginToListIndexMap[cm.Get(typeof(CoreAnnotations.TokenBeginAnnotation))] = i;
                    tokenEndToListIndexMap[cm.Get(typeof(CoreAnnotations.TokenEndAnnotation))]     = i + 1;
                }
                else
                {
                    tokenBeginToListIndexMap[i]   = i;
                    tokenEndToListIndexMap[i + 1] = i + 1;
                }
            }
            matchedExprs.Sort(ExprTokenOffsetComparator);
            IList <ICoreMap> merged = new List <ICoreMap>(list.Count);
            // Approximate size
            int last = 0;

            foreach (MatchedExpression expr in matchedExprs)
            {
                int start  = expr.tokenOffsets.First();
                int end    = expr.tokenOffsets.Second();
                int istart = tokenBeginToListIndexMap[start];
                int iend   = tokenEndToListIndexMap[end];
                if (istart != null && iend != null)
                {
                    if (istart >= last)
                    {
                        Sharpen.Collections.AddAll(merged, list.SubList(last, istart));
                        ICoreMap m = expr.GetAnnotation();
                        merged.Add(m);
                        last = iend;
                    }
                }
            }
            // Add rest of elements
            if (last < list.Count)
            {
                Sharpen.Collections.AddAll(merged, list.SubList(last, list.Count));
            }
            return(merged);
        }
예제 #18
0
        //        if(m.originalSpan.size() > 1) {
        //          boolean isNE = true;
        //          for(CoreLabel cl : m.originalSpan) {
        //            if(!cl.tag().startsWith("NNP")) isNE = false;
        //          }
        //          if(isNE) {
        //            namedEntitySpanSet.add(mSpan);
        //          }
        //        }
        protected internal static void ExtractNamedEntityMentions(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent               = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basicDependency    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhancedDependency == null)
            {
                enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            string preNE      = "O";
            int    beginIndex = -1;

            foreach (CoreLabel w in sent)
            {
                string nerString = w.Ner();
                if (!nerString.Equals(preNE))
                {
                    int endIndex = w.Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                    if (!preNE.Matches("O|QUANTITY|CARDINAL|PERCENT|DATE|DURATION|TIME|SET"))
                    {
                        if (w.Get(typeof(CoreAnnotations.TextAnnotation)).Equals("'s") && w.Tag().Equals("POS"))
                        {
                            endIndex++;
                        }
                        IntPair mSpan = new IntPair(beginIndex, endIndex);
                        // Need to check if beginIndex < endIndex because, for
                        // example, there could be a 's mislabeled by the NER and
                        // attached to the previous NER by the earlier heuristic
                        if (beginIndex < endIndex && !mentionSpanSet.Contains(mSpan))
                        {
                            int     dummyMentionId = -1;
                            Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIndex, endIndex)));
                            mentions.Add(m);
                            mentionSpanSet.Add(mSpan);
                            namedEntitySpanSet.Add(mSpan);
                        }
                    }
                    beginIndex = endIndex;
                    preNE      = nerString;
                }
            }
            // NE at the end of sentence
            if (!preNE.Matches("O|QUANTITY|CARDINAL|PERCENT|DATE|DURATION|TIME|SET"))
            {
                IntPair mSpan = new IntPair(beginIndex, sent.Count);
                if (!mentionSpanSet.Contains(mSpan))
                {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIndex, sent.Count, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIndex, sent.Count)));
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                    namedEntitySpanSet.Add(mSpan);
                }
            }
        }
        private void ExtractGoldMentions(ICoreMap s, IList <IList <Mention> > allGoldMentions, ACEMentionExtractor.EntityComparator comparator)
        {
            IList <Mention> goldMentions = new List <Mention>();

            allGoldMentions.Add(goldMentions);
            IList <EntityMention>   goldMentionList         = s.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation));
            IList <CoreLabel>       words                   = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            TreeSet <EntityMention> treeForSortGoldMentions = new TreeSet <EntityMention>(comparator);

            if (goldMentionList != null)
            {
                Sharpen.Collections.AddAll(treeForSortGoldMentions, goldMentionList);
            }
            if (!treeForSortGoldMentions.IsEmpty())
            {
                foreach (EntityMention e in treeForSortGoldMentions)
                {
                    Mention men = new Mention();
                    men.dependency = s.Get(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation));
                    if (men.dependency == null)
                    {
                        men.dependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
                    }
                    men.startIndex = e.GetExtentTokenStart();
                    men.endIndex   = e.GetExtentTokenEnd();
                    string[] parseID = e.GetObjectId().Split("-");
                    men.mentionID = System.Convert.ToInt32(parseID[parseID.Length - 1]);
                    string[] parseCorefID = e.GetCorefID().Split("-E");
                    men.goldCorefClusterID = System.Convert.ToInt32(parseCorefID[parseCorefID.Length - 1]);
                    men.originalRef        = -1;
                    for (int j = allGoldMentions.Count - 1; j >= 0; j--)
                    {
                        IList <Mention> l = allGoldMentions[j];
                        for (int k = l.Count - 1; k >= 0; k--)
                        {
                            Mention m = l[k];
                            if (men.goldCorefClusterID == m.goldCorefClusterID)
                            {
                                men.originalRef = m.mentionID;
                            }
                        }
                    }
                    goldMentions.Add(men);
                    if (men.mentionID > maxID)
                    {
                        maxID = men.mentionID;
                    }
                    // set ner type
                    for (int j_1 = e.GetExtentTokenStart(); j_1 < e.GetExtentTokenEnd(); j_1++)
                    {
                        CoreLabel word = words[j_1];
                        string    ner  = e.GetType() + "-" + e.GetSubType();
                    }
                }
            }
        }
 protected internal virtual bool ExtractAnnotation <_T0>(IList <_T0> source, CoreMapAggregator aggregator)
     where _T0 : ICoreMap
 {
     annotation   = aggregator.Merge(source, chunkOffsets.GetBegin(), chunkOffsets.GetEnd());
     charOffsets  = Interval.ToInterval(annotation.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)), annotation.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)), Interval.IntervalOpenEnd);
     tokenOffsets = Interval.ToInterval(annotation.Get(typeof(CoreAnnotations.TokenBeginAnnotation)), annotation.Get(typeof(CoreAnnotations.TokenEndAnnotation)), Interval.IntervalOpenEnd);
     text         = annotation.Get(typeof(CoreAnnotations.TextAnnotation));
     extractFunc.Annotate(this, source.SubList(chunkOffsets.GetBegin(), chunkOffsets.GetEnd()));
     return(true);
 }
예제 #21
0
        /// <summary>Annotate a single sentence.</summary>
        /// <remarks>
        /// Annotate a single sentence.
        /// This annotator will, in particular, set the
        /// <see cref="EntailedSentencesAnnotation"/>
        /// and
        /// <see cref="RelationTriplesAnnotation"/>
        /// annotations.
        /// </remarks>
        public virtual void AnnotateSentence(ICoreMap sentence, IDictionary <CoreLabel, IList <CoreLabel> > canonicalMentionMap)
        {
            IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));

            if (tokens.Count < 2)
            {
                // Short sentence. Skip annotating it.
                sentence.Set(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation), Java.Util.Collections.EmptyList());
                if (!stripEntailments)
                {
                    sentence.Set(typeof(NaturalLogicAnnotations.EntailedSentencesAnnotation), Java.Util.Collections.EmptySet());
                }
            }
            else
            {
                // Get the dependency tree
                SemanticGraph parse = sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation));
                if (parse == null)
                {
                    parse = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
                }
                if (parse == null)
                {
                    throw new InvalidOperationException("Cannot run OpenIE without a parse tree!");
                }
                // Clean the tree
                parse = new SemanticGraph(parse);
                Edu.Stanford.Nlp.Naturalli.Util.CleanTree(parse);
                // Resolve Coreference
                SemanticGraph canonicalizedParse = parse;
                if (resolveCoref && !canonicalMentionMap.IsEmpty())
                {
                    canonicalizedParse = CanonicalizeCoref(parse, canonicalMentionMap);
                }
                // Run OpenIE
                // (clauses)
                IList <SentenceFragment> clauses = ClausesInSentence(canonicalizedParse, true);
                // note: uses coref-canonicalized parse
                // (entailment)
                ICollection <SentenceFragment> fragments = EntailmentsFromClauses(clauses);
                // (segment)
                IList <RelationTriple> extractions = segmenter.Extract(parse, tokens);
                // note: uses non-coref-canonicalized parse!
                Sharpen.Collections.AddAll(extractions, RelationsInFragments(fragments, sentence));
                // Set the annotations
                sentence.Set(typeof(NaturalLogicAnnotations.EntailedClausesAnnotation), new HashSet <SentenceFragment>(clauses));
                sentence.Set(typeof(NaturalLogicAnnotations.EntailedSentencesAnnotation), fragments);
                sentence.Set(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation), new List <RelationTriple>(new HashSet <RelationTriple>(extractions)));
                // uniq the extractions
                if (stripEntailments)
                {
                    sentence.Remove(typeof(NaturalLogicAnnotations.EntailedSentencesAnnotation));
                }
            }
        }
예제 #22
0
        /// <summary>
        /// Convert an
        /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceEntityMention"/>
        /// to an
        /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/>
        /// .
        /// </summary>
        /// <param name="entityMention">
        ///
        /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceEntityMention"/>
        /// to convert
        /// </param>
        /// <param name="docId">ID of the document containing this entity mention</param>
        /// <param name="sentence"/>
        /// <param name="tokenOffset">
        /// An offset in the calculations of position of the extent to sentence boundary
        /// (the ace.reader stores absolute token offset from the beginning of the document, but
        /// we need token offsets from the beginning of the sentence =&gt; adjust by tokenOffset)
        /// </param>
        /// <returns>
        /// entity as an
        /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/>
        /// </returns>
        private EntityMention ConvertAceEntityMention(AceEntityMention entityMention, string docId, ICoreMap sentence, int tokenOffset)
        {
            //log.info("TYPE is " + entityMention.getParent().getType());
            //log.info("SUBTYPE is " + entityMention.getParent().getSubtype());
            //log.info("LDCTYPE is " + entityMention.getLdctype());
            AceCharSeq ext      = entityMention.GetExtent();
            AceCharSeq head     = entityMention.GetHead();
            int        extStart = ext.GetTokenStart() - tokenOffset;
            int        extEnd   = ext.GetTokenEnd() - tokenOffset + 1;

            if (extStart < 0)
            {
                logger.Severe("READER ERROR: Invalid extent start " + extStart + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence);
                logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity extent.");
                extStart = 0;
            }
            if (extEnd > sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count)
            {
                logger.Severe("READER ERROR: Invalid extent end " + extEnd + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence);
                logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity extent.");
                extEnd = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count;
            }
            int headStart = head.GetTokenStart() - tokenOffset;
            int headEnd   = head.GetTokenEnd() - tokenOffset + 1;

            if (headStart < 0)
            {
                logger.Severe("READER ERROR: Invalid head start " + headStart + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence);
                logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity head span.");
                headStart = 0;
            }
            if (headEnd > sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count)
            {
                logger.Severe("READER ERROR: Invalid head end " + headEnd + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence);
                logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity head span.");
                headEnd = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count;
            }
            // must adjust due to possible incorrect EOS detection
            if (headStart < extStart)
            {
                headStart = extStart;
            }
            if (headEnd > extEnd)
            {
                headEnd = extEnd;
            }
            System.Diagnostics.Debug.Assert((headStart < headEnd));
            // note: the ace.reader stores absolute token offset from the beginning of the document, but
            //       we need token offsets from the beginning of the sentence => adjust by tokenOffset
            // note: in ace.reader the end token position is inclusive, but
            //       in our setup the end token position is exclusive => add 1 to end
            EntityMention converted = new EntityMention(entityMention.GetId(), sentence, new Span(extStart, extEnd), new Span(headStart, headEnd), entityMention.GetParent().GetType(), entityMention.GetParent().GetSubtype(), entityMention.GetLdctype());

            return(converted);
        }
예제 #23
0
        private static void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            tree.IndexLeaves();
            SemanticGraph basicDependency    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhancedDependency == null)
            {
                enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            TregexPattern tgrepPattern = npOrPrpMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);

            while (matcher.Find())
            {
                Tree         t        = matcher.GetMatch();
                IList <Tree> mLeaves  = t.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                if (",".Equals(sent[endIdx - 1].Word()))
                {
                    endIdx--;
                }
                // try not to have span that ends with ,
                IntPair mSpan = new IntPair(beginIdx, endIdx);
                //      if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet)) ) {
                if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet) || t.Value().StartsWith("PRP")))
                {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                    if (m.originalSpan.Count > 1)
                    {
                        bool isNE = true;
                        foreach (CoreLabel cl in m.originalSpan)
                        {
                            if (!cl.Tag().StartsWith("NNP"))
                            {
                                isNE = false;
                            }
                        }
                        if (isNE)
                        {
                            namedEntitySpanSet.Add(mSpan);
                        }
                    }
                }
            }
        }
예제 #24
0
        public virtual void TopSpeakerInRange(Annotation doc)
        {
            IList <CoreLabel> toks   = doc.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <ICoreMap>  quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation));

            for (int quote_idx = 0; quote_idx < quotes.Count; quote_idx++)
            {
                ICoreMap quote = quotes[quote_idx];
                if (quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)) == null)
                {
                    Pair <int, int>           quoteRun = new Pair <int, int>(quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation)), quote.Get(typeof(CoreAnnotations.TokenEndAnnotation)));
                    IList <Sieve.MentionData> closestMentionsBackward = FindClosestMentionsInSpanBackward(new Pair <int, int>(Math.Max(0, quoteRun.first - BackwardWindow), quoteRun.first - 1));
                    IList <Sieve.MentionData> closestMentions         = FindClosestMentionsInSpanForward(new Pair <int, int>(quoteRun.second + 1, Math.Min(quoteRun.second + ForwardWindow, toks.Count - 1)));
                    Sharpen.Collections.AddAll(closestMentions, closestMentionsBackward);
                    Person.Gender  gender      = GetGender(MakeMentionData(quote));
                    IList <string> topSpeakers = Counters.ToSortedList(GetTopSpeakers(closestMentions, closestMentionsBackward, gender, quote, false));
                    //if none found, try again with bigger window
                    if (topSpeakers.IsEmpty())
                    {
                        closestMentionsBackward = FindClosestMentionsInSpanBackward(new Pair <int, int>(Math.Max(0, quoteRun.first - BackwardWindowBig), quoteRun.first - 1));
                        closestMentions         = FindClosestMentionsInSpanForward(new Pair <int, int>(quoteRun.second + 1, Math.Min(quoteRun.second + ForwardWindowBig, toks.Count - 1)));
                        topSpeakers             = Counters.ToSortedList(GetTopSpeakers(closestMentions, closestMentionsBackward, gender, quote, true));
                    }
                    if (topSpeakers.IsEmpty())
                    {
                        log.Warn("Watch out, there's an empty top speakers list!");
                        continue;
                    }
                    topSpeakers = RemoveQuoteNames(topSpeakers, quote);
                    string topSpeaker = topSpeakers[0];
                    Pair <string, string> nextPrediction = GetConversationalNextPrediction(quotes, quote_idx, gender);
                    bool set = UpdatePredictions(quote, nextPrediction);
                    if (set)
                    {
                        continue;
                    }
                    Pair <string, string> prevPrediction = GetConversationalPreviousPrediction(quotes, quote_idx, gender);
                    set = UpdatePredictions(quote, prevPrediction);
                    if (set)
                    {
                        continue;
                    }
                    Pair <string, string> famPrediction = GetFamilyAnimateVocative(quotes, quote_idx, gender, topSpeakers);
                    set = UpdatePredictions(quote, famPrediction);
                    if (set)
                    {
                        continue;
                    }
                    UpdatePredictions(quote, new Pair <string, string>(topSpeaker, string.Empty));
                }
            }
        }
        /// <summary>Annotate all the pronominal mentions in the document.</summary>
        /// <param name="ann">The document.</param>
        /// <returns>The list of pronominal mentions in the document.</returns>
        private static IList <ICoreMap> AnnotatePronominalMentions(Annotation ann)
        {
            IList <ICoreMap> pronouns  = new List <ICoreMap>();
            IList <ICoreMap> sentences = ann.Get(typeof(CoreAnnotations.SentencesAnnotation));

            for (int sentenceIndex = 0; sentenceIndex < sentences.Count; sentenceIndex++)
            {
                ICoreMap sentence       = sentences[sentenceIndex];
                int      annoTokenBegin = sentence.Get(typeof(CoreAnnotations.TokenBeginAnnotation));
                if (annoTokenBegin == null)
                {
                    annoTokenBegin = 0;
                }
                IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                for (int tokenIndex = 0; tokenIndex < tokens.Count; tokenIndex++)
                {
                    CoreLabel token = tokens[tokenIndex];
                    if (KbpIsPronominalMention(token))
                    {
                        ICoreMap pronoun = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, tokenIndex, tokenIndex + 1, annoTokenBegin, null, typeof(CoreAnnotations.TextAnnotation), null);
                        pronoun.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentenceIndex);
                        pronoun.Set(typeof(CoreAnnotations.NamedEntityTagAnnotation), KBPRelationExtractor.NERTag.Person.name);
                        pronoun.Set(typeof(CoreAnnotations.EntityTypeAnnotation), KBPRelationExtractor.NERTag.Person.name);
                        // set gender
                        string pronounGender = null;
                        if (pronoun.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower().Equals("she"))
                        {
                            pronounGender = "FEMALE";
                            pronoun.Set(typeof(CoreAnnotations.GenderAnnotation), pronounGender);
                        }
                        else
                        {
                            if (pronoun.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower().Equals("he"))
                            {
                                pronounGender = "MALE";
                                pronoun.Set(typeof(CoreAnnotations.GenderAnnotation), pronounGender);
                            }
                        }
                        if (pronounGender != null)
                        {
                            foreach (CoreLabel pronounToken in pronoun.Get(typeof(CoreAnnotations.TokensAnnotation)))
                            {
                                pronounToken.Set(typeof(CoreAnnotations.GenderAnnotation), pronounGender);
                            }
                        }
                        sentence.Get(typeof(CoreAnnotations.MentionsAnnotation)).Add(pronoun);
                        pronouns.Add(pronoun);
                    }
                }
            }
            return(pronouns);
        }
예제 #26
0
        private void ExtractPronounForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basic    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhanced == null)
            {
                enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            int beginIdx = headword.Index() - 1;
            int endIdx   = headword.Index();

            // handle "you all", "they both" etc
            if (sent.Count > headword.Index() && sent[headword.Index()].Word().Matches("all|both"))
            {
                IndexedWord       c    = dep.GetNodeByIndex(headword.Index() + 1);
                SemanticGraphEdge edge = dep.GetEdge(headword, c);
                if (edge != null)
                {
                    endIdx++;
                }
            }
            IntPair mSpan = new IntPair(beginIdx, endIdx);

            if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet)))
            {
                int     dummyMentionId = -1;
                Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)));
                m.headIndex  = headword.Index() - 1;
                m.headWord   = sent[m.headIndex];
                m.headString = m.headWord.Word().ToLower(Locale.English);
                mentions.Add(m);
                mentionSpanSet.Add(mSpan);
            }
            // when pronoun is a part of conjunction (e.g., you and I)
            ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);

            if (conjChildren.Count > 0)
            {
                IntPair npSpan = GetNPSpan(headword, dep, sent);
                beginIdx = npSpan.Get(0);
                endIdx   = npSpan.Get(1) + 1;
                if (",".Equals(sent[endIdx - 1].Word()))
                {
                    endIdx--;
                }
                // try not to have span that ends with ,
                AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
            }
        }
        private static IDictionary <int, IList <ICoreMap> > GetQuotesInParagraph(Annotation doc)
        {
            IList <ICoreMap> quotes    = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation));
            IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));
            IDictionary <int, IList <ICoreMap> > paragraphToQuotes = new Dictionary <int, IList <ICoreMap> >();

            foreach (ICoreMap quote in quotes)
            {
                ICoreMap sentence = sentences[quote.Get(typeof(CoreAnnotations.SentenceBeginAnnotation))];
                paragraphToQuotes.PutIfAbsent(sentence.Get(typeof(CoreAnnotations.ParagraphIndexAnnotation)), new List <ICoreMap>());
                paragraphToQuotes[sentence.Get(typeof(CoreAnnotations.ParagraphIndexAnnotation))].Add(quote);
            }
            return(paragraphToQuotes);
        }
예제 #28
0
        public virtual Pair <string, string> GetConversationalNextPrediction(IList <ICoreMap> quotes, int quoteIndex, Person.Gender gender)
        {
            string topSpeaker = null;
            string modifier   = null;
            // if the n - 2 paragraph quotes are labelled with a speaker and
            // that speakers gender does not disagree, label with that speaker
            IList <int> quotesInNextNext = new List <int>();
            ICoreMap    quote            = quotes[quoteIndex];
            int         quoteParagraph   = GetQuoteParagraph(quote);

            for (int j = quoteIndex + 1; j < quotes.Count; j++)
            {
                if (GetQuoteParagraph(quotes[j]) == quoteParagraph + 2)
                {
                    quotesInNextNext.Add(j);
                }
            }
            foreach (int nextNext in quotesInNextNext)
            {
                ICoreMap          nextNextQuote = quotes[nextNext];
                string            speakerName   = nextNextQuote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation));
                Sieve.MentionData md            = MakeMentionData(quotes[nextNext]);
                if (speakerName != null && (gender == Person.Gender.Unk) || GetGender(md) == gender)
                {
                    topSpeaker = speakerName;
                    modifier   = " conversation - next";
                }
            }
            return(new Pair <string, string>(topSpeaker, modifier));
        }
예제 #29
0
        public virtual Pair <string, string> GetConversationalPreviousPrediction(IList <ICoreMap> quotes, int quoteIndex, Person.Gender gender)
        {
            string topSpeaker = null;
            string modifier   = null;
            // if the n - 2 paragraph quotes are labelled with a speaker and
            // that speakers gender does not disagree, label with that speaker
            IList <int> quotesInPrevPrev = new List <int>();
            ICoreMap    quote            = quotes[quoteIndex];
            int         quoteParagraph   = GetQuoteParagraph(quote);

            for (int j = quoteIndex - 1; j >= 0; j--)
            {
                if (GetQuoteParagraph(quotes[j]) == quoteParagraph - 2)
                {
                    quotesInPrevPrev.Add(j);
                }
            }
            foreach (int prevPrev in quotesInPrevPrev)
            {
                ICoreMap prevprevQuote = quotes[prevPrev];
                string   speakerName   = prevprevQuote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation));
                if (speakerName != null && (gender == Person.Gender.Unk) || GetGender(MakeMentionData(prevprevQuote)) == gender)
                {
                    topSpeaker = speakerName;
                    modifier   = " conversation - prev";
                }
            }
            return(new Pair <string, string>(topSpeaker, modifier));
        }
예제 #30
0
        public virtual Sieve.MentionData GetClosestMention(ICoreMap quote)
        {
            Sieve.MentionData closestBackward = FindClosestMentionInSpanBackward(new Pair <int, int>(0, quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation)) - 1));
            Sieve.MentionData closestForward  = FindClosestMentionInSpanForward(new Pair <int, int>(quote.Get(typeof(CoreAnnotations.TokenEndAnnotation)), doc.Get(typeof(CoreAnnotations.TokensAnnotation)).Count - 1));
            int backDistance    = quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation)) - closestBackward.end;
            int forwardDistance = closestForward.begin - quote.Get(typeof(CoreAnnotations.TokenEndAnnotation)) + 1;

            if (backDistance < forwardDistance)
            {
                return(closestBackward);
            }
            else
            {
                return(closestForward);
            }
        }
예제 #31
0
        /// <summary>
        /// Copy constructor.
        /// </summary>
        /// <param name="other">The ArrayCoreMap to copy. It may not be null.</param>
        public ArrayCoreMap(ICoreMap other)
        {
            /*Set<Class<?>>*/
            var otherKeys = other.KeySet();

            psize = otherKeys.Count;
            keys = new Type[psize];
            values = new Object[psize];

            int i = 0;
            foreach (var key in otherKeys)
            {
                this.keys[i] = key;
                this.values[i] = other.Get(key);
                i++;
            }
        }