public virtual void OneNameSentence(Annotation doc) { IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); foreach (ICoreMap quote in quotes) { if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { continue; } Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote); if (range == null) { continue; } Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range); List <string> names = namesAndNameIndices.first; List <Pair <int, int> > nameIndices = namesAndNameIndices.second; List <int> pronounsIndices = ScanForPronouns(range); if (names.Count == 1) { IList <Person> p = characterMap[names[0]]; //guess if exactly one name if (p.Count == 1 && pronounsIndices.Count == 0) { FillInMention(quote, TokenRangeToString(nameIndices[0]), nameIndices[0].first, nameIndices[0].second, sieveName, Name); } } } }
//select nearest mention to the left if: the quote is ending a paragraph. public virtual void ParagraphEndQuoteClosestBefore(Annotation doc) { IList <CoreLabel> tokens = doc.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); foreach (ICoreMap quote in quotes) { if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { continue; } Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote); if (range == null) { continue; } //search for mentions in the first run Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range); List <string> names = namesAndNameIndices.first; int quoteBeginTokenIndex = quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation)); bool isBefore = range.second.Equals(quoteBeginTokenIndex - 1); //check if the range is preceding the quote or after it. int quoteParagraph = QuoteAttributionUtils.GetQuoteParagraphIndex(doc, quote); int quoteIndex = quote.Get(typeof(CoreAnnotations.QuotationIndexAnnotation)); bool isOnlyQuoteInParagraph = true; if (quoteIndex > 0) { ICoreMap prevQuote = quotes[quoteIndex - 1]; int prevQuoteParagraph = QuoteAttributionUtils.GetQuoteParagraphIndex(doc, prevQuote); if (prevQuoteParagraph == quoteParagraph) { isOnlyQuoteInParagraph = false; } } if (quoteIndex < quotes.Count - 1) { ICoreMap nextQuote = quotes[quoteIndex + 1]; int nextQuoteParagraph = QuoteAttributionUtils.GetQuoteParagraphIndex(doc, nextQuote); if (nextQuoteParagraph == quoteParagraph) { isOnlyQuoteInParagraph = false; } } if (isBefore && tokens[range.second].Word().Equals(",") && isOnlyQuoteInParagraph) { Sieve.MentionData closestMention = FindClosestMentionInSpanBackward(range); if (closestMention != null && !closestMention.type.Equals("animate noun")) { FillInMention(quote, closestMention, sieveName); } } } }
//using quote-removed depparses public virtual void DependencyParses(Annotation doc) { IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); IList <CoreLabel> tokens = doc.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation)); foreach (ICoreMap quote in quotes) { if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { continue; } Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote); if (range == null) { continue; } //search for mentions in the first run Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range); List <string> names = namesAndNameIndices.first; List <Pair <int, int> > nameIndices = namesAndNameIndices.second; SemanticGraph graph = quote.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)); SemgrexMatcher matcher = subjVerbPattern.Matcher(graph); IList <Pair <IndexedWord, IndexedWord> > subjVerbPairs = new List <Pair <IndexedWord, IndexedWord> >(); //TODO: check and see if this is necessary while (matcher.Find()) { IndexedWord subj = matcher.GetNode("SUBJ"); IndexedWord verb = matcher.GetNode("VERB"); subjVerbPairs.Add(new Pair <IndexedWord, IndexedWord>(subj, verb)); } IList <IndexedWord> vbs = graph.GetAllNodesByPartOfSpeechPattern("VB.*"); foreach (IndexedWord iw in vbs) { // does it have an nsubj child? ICollection <IndexedWord> children = graph.GetChildren(iw); IList <IndexedWord> deps = Generics.NewArrayList(); IndexedWord nsubj = null; foreach (IndexedWord child in children) { SemanticGraphEdge sge = graph.GetEdge(iw, child); if (sge.GetRelation().GetShortName().Equals("dep") && child.Tag().StartsWith("VB")) { deps.Add(child); } else { if (sge.GetRelation().GetShortName().Equals("nsubj")) { nsubj = child; } } } if (nsubj != null) { foreach (IndexedWord dep in deps) { subjVerbPairs.Add(new Pair(nsubj, dep)); } } } //look for a speech verb foreach (Pair <IndexedWord, IndexedWord> SVPair in subjVerbPairs) { IndexedWord verb = SVPair.second; IndexedWord subj = SVPair.first; //check if subj and verb outside of quote int verbTokPos = TokenToLocation(verb.BackingLabel()); int subjTokPos = TokenToLocation(verb.BackingLabel()); if (InRange(range, verbTokPos) && InRange(range, subjTokPos) && commonSpeechWords.Contains(verb.Lemma())) { if (subj.Tag().Equals("NNP")) { int startChar = subj.BeginPosition(); for (int i = 0; i < names.Count; i++) { Pair <int, int> nameIndex = nameIndices[i]; //avoid names that don't actually exist in if (RangeContainsCharIndex(nameIndex, startChar)) { FillInMention(quote, TokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, Name); break; } } } else { if (subj.Tag().Equals("PRP")) { int loc = TokenToLocation(subj.BackingLabel()); FillInMention(quote, subj.Word(), loc, loc, sieveName, Pronoun); break; } else { if (subj.Tag().Equals("NN") && animacySet.Contains(subj.Word())) { int loc = TokenToLocation(subj.BackingLabel()); FillInMention(quote, subj.Word(), loc, loc, sieveName, AnimateNoun); break; } } } } } } }