예제 #1
0
        public virtual void OneNameSentence(Annotation doc)
        {
            IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation));

            foreach (ICoreMap quote in quotes)
            {
                if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null)
                {
                    continue;
                }
                Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote);
                if (range == null)
                {
                    continue;
                }
                Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range);
                List <string>           names       = namesAndNameIndices.first;
                List <Pair <int, int> > nameIndices = namesAndNameIndices.second;
                List <int> pronounsIndices          = ScanForPronouns(range);
                if (names.Count == 1)
                {
                    IList <Person> p = characterMap[names[0]];
                    //guess if exactly one name
                    if (p.Count == 1 && pronounsIndices.Count == 0)
                    {
                        FillInMention(quote, TokenRangeToString(nameIndices[0]), nameIndices[0].first, nameIndices[0].second, sieveName, Name);
                    }
                }
            }
        }
예제 #2
0
        //select nearest mention to the left if: the quote is ending a paragraph.
        public virtual void ParagraphEndQuoteClosestBefore(Annotation doc)
        {
            IList <CoreLabel> tokens = doc.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <ICoreMap>  quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation));

            foreach (ICoreMap quote in quotes)
            {
                if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null)
                {
                    continue;
                }
                Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote);
                if (range == null)
                {
                    continue;
                }
                //search for mentions in the first run
                Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range);
                List <string> names = namesAndNameIndices.first;
                int           quoteBeginTokenIndex = quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation));
                bool          isBefore             = range.second.Equals(quoteBeginTokenIndex - 1);
                //check if the range is preceding the quote or after it.
                int  quoteParagraph         = QuoteAttributionUtils.GetQuoteParagraphIndex(doc, quote);
                int  quoteIndex             = quote.Get(typeof(CoreAnnotations.QuotationIndexAnnotation));
                bool isOnlyQuoteInParagraph = true;
                if (quoteIndex > 0)
                {
                    ICoreMap prevQuote          = quotes[quoteIndex - 1];
                    int      prevQuoteParagraph = QuoteAttributionUtils.GetQuoteParagraphIndex(doc, prevQuote);
                    if (prevQuoteParagraph == quoteParagraph)
                    {
                        isOnlyQuoteInParagraph = false;
                    }
                }
                if (quoteIndex < quotes.Count - 1)
                {
                    ICoreMap nextQuote          = quotes[quoteIndex + 1];
                    int      nextQuoteParagraph = QuoteAttributionUtils.GetQuoteParagraphIndex(doc, nextQuote);
                    if (nextQuoteParagraph == quoteParagraph)
                    {
                        isOnlyQuoteInParagraph = false;
                    }
                }
                if (isBefore && tokens[range.second].Word().Equals(",") && isOnlyQuoteInParagraph)
                {
                    Sieve.MentionData closestMention = FindClosestMentionInSpanBackward(range);
                    if (closestMention != null && !closestMention.type.Equals("animate noun"))
                    {
                        FillInMention(quote, closestMention, sieveName);
                    }
                }
            }
        }
예제 #3
0
        //using quote-removed depparses
        public virtual void DependencyParses(Annotation doc)
        {
            IList <ICoreMap>  quotes    = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation));
            IList <CoreLabel> tokens    = doc.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <ICoreMap>  sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (ICoreMap quote in quotes)
            {
                if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null)
                {
                    continue;
                }
                Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote);
                if (range == null)
                {
                    continue;
                }
                //search for mentions in the first run
                Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range);
                List <string>           names       = namesAndNameIndices.first;
                List <Pair <int, int> > nameIndices = namesAndNameIndices.second;
                SemanticGraph           graph       = quote.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation));
                SemgrexMatcher          matcher     = subjVerbPattern.Matcher(graph);
                IList <Pair <IndexedWord, IndexedWord> > subjVerbPairs = new List <Pair <IndexedWord, IndexedWord> >();
                //TODO: check and see if this is necessary
                while (matcher.Find())
                {
                    IndexedWord subj = matcher.GetNode("SUBJ");
                    IndexedWord verb = matcher.GetNode("VERB");
                    subjVerbPairs.Add(new Pair <IndexedWord, IndexedWord>(subj, verb));
                }
                IList <IndexedWord> vbs = graph.GetAllNodesByPartOfSpeechPattern("VB.*");
                foreach (IndexedWord iw in vbs)
                {
                    // does it have an nsubj child?
                    ICollection <IndexedWord> children = graph.GetChildren(iw);
                    IList <IndexedWord>       deps     = Generics.NewArrayList();
                    IndexedWord nsubj = null;
                    foreach (IndexedWord child in children)
                    {
                        SemanticGraphEdge sge = graph.GetEdge(iw, child);
                        if (sge.GetRelation().GetShortName().Equals("dep") && child.Tag().StartsWith("VB"))
                        {
                            deps.Add(child);
                        }
                        else
                        {
                            if (sge.GetRelation().GetShortName().Equals("nsubj"))
                            {
                                nsubj = child;
                            }
                        }
                    }
                    if (nsubj != null)
                    {
                        foreach (IndexedWord dep in deps)
                        {
                            subjVerbPairs.Add(new Pair(nsubj, dep));
                        }
                    }
                }
                //look for a speech verb
                foreach (Pair <IndexedWord, IndexedWord> SVPair in subjVerbPairs)
                {
                    IndexedWord verb = SVPair.second;
                    IndexedWord subj = SVPair.first;
                    //check if subj and verb outside of quote
                    int verbTokPos = TokenToLocation(verb.BackingLabel());
                    int subjTokPos = TokenToLocation(verb.BackingLabel());
                    if (InRange(range, verbTokPos) && InRange(range, subjTokPos) && commonSpeechWords.Contains(verb.Lemma()))
                    {
                        if (subj.Tag().Equals("NNP"))
                        {
                            int startChar = subj.BeginPosition();
                            for (int i = 0; i < names.Count; i++)
                            {
                                Pair <int, int> nameIndex = nameIndices[i];
                                //avoid names that don't actually exist in
                                if (RangeContainsCharIndex(nameIndex, startChar))
                                {
                                    FillInMention(quote, TokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, Name);
                                    break;
                                }
                            }
                        }
                        else
                        {
                            if (subj.Tag().Equals("PRP"))
                            {
                                int loc = TokenToLocation(subj.BackingLabel());
                                FillInMention(quote, subj.Word(), loc, loc, sieveName, Pronoun);
                                break;
                            }
                            else
                            {
                                if (subj.Tag().Equals("NN") && animacySet.Contains(subj.Word()))
                                {
                                    int loc = TokenToLocation(subj.BackingLabel());
                                    FillInMention(quote, subj.Word(), loc, loc, sieveName, AnimateNoun);
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }