public virtual Pair <string, string> GetFamilyAnimateVocative(IList <ICoreMap> quotes, int quote_index, Person.Gender gender, IList <string> topSpeakers) { Sieve.MentionData mention = MakeMentionData(quotes[quote_index]); if (mention.text != null) { if (mention.type.Equals("animate noun") && familyRelations.Contains(mention.text.ToLower()) && gender != Person.Gender.Unk) { int quoteContainingMention = GetQuoteContainingRange(quotes, new Pair <int, int>(mention.begin, mention.end)); if (quoteContainingMention >= 0) { string relatedName = quotes[quoteContainingMention].Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)); if (relatedName != null) { foreach (string speaker in topSpeakers) { string[] speakerNames = speaker.Split("_"); if (relatedName.EndsWith(speakerNames[speakerNames.Length - 1])) { return(new Pair <string, string>(speaker, "family animate")); } } } } } } return(new Pair <string, string>(null, null)); }
public virtual Pair <string, string> GetConversationalNextPrediction(IList <ICoreMap> quotes, int quoteIndex, Person.Gender gender) { string topSpeaker = null; string modifier = null; // if the n - 2 paragraph quotes are labelled with a speaker and // that speakers gender does not disagree, label with that speaker IList <int> quotesInNextNext = new List <int>(); ICoreMap quote = quotes[quoteIndex]; int quoteParagraph = GetQuoteParagraph(quote); for (int j = quoteIndex + 1; j < quotes.Count; j++) { if (GetQuoteParagraph(quotes[j]) == quoteParagraph + 2) { quotesInNextNext.Add(j); } } foreach (int nextNext in quotesInNextNext) { ICoreMap nextNextQuote = quotes[nextNext]; string speakerName = nextNextQuote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)); Sieve.MentionData md = MakeMentionData(quotes[nextNext]); if (speakerName != null && (gender == Person.Gender.Unk) || GetGender(md) == gender) { topSpeaker = speakerName; modifier = " conversation - next"; } } return(new Pair <string, string>(topSpeaker, modifier)); }
//select nearest mention to the left if: the quote is ending a paragraph. public virtual void ParagraphEndQuoteClosestBefore(Annotation doc) { IList <CoreLabel> tokens = doc.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); foreach (ICoreMap quote in quotes) { if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { continue; } Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote); if (range == null) { continue; } //search for mentions in the first run Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range); List <string> names = namesAndNameIndices.first; int quoteBeginTokenIndex = quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation)); bool isBefore = range.second.Equals(quoteBeginTokenIndex - 1); //check if the range is preceding the quote or after it. int quoteParagraph = QuoteAttributionUtils.GetQuoteParagraphIndex(doc, quote); int quoteIndex = quote.Get(typeof(CoreAnnotations.QuotationIndexAnnotation)); bool isOnlyQuoteInParagraph = true; if (quoteIndex > 0) { ICoreMap prevQuote = quotes[quoteIndex - 1]; int prevQuoteParagraph = QuoteAttributionUtils.GetQuoteParagraphIndex(doc, prevQuote); if (prevQuoteParagraph == quoteParagraph) { isOnlyQuoteInParagraph = false; } } if (quoteIndex < quotes.Count - 1) { ICoreMap nextQuote = quotes[quoteIndex + 1]; int nextQuoteParagraph = QuoteAttributionUtils.GetQuoteParagraphIndex(doc, nextQuote); if (nextQuoteParagraph == quoteParagraph) { isOnlyQuoteInParagraph = false; } } if (isBefore && tokens[range.second].Word().Equals(",") && isOnlyQuoteInParagraph) { Sieve.MentionData closestMention = FindClosestMentionInSpanBackward(range); if (closestMention != null && !closestMention.type.Equals("animate noun")) { FillInMention(quote, closestMention, sieveName); } } } }
public override void DoQuoteToMention(Annotation doc) { IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); foreach (ICoreMap quote in quotes) { if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { Sieve.MentionData md = GetClosestMention(quote); FillInMention(quote, md, sieveName); } } }
public virtual Sieve.MentionData GetClosestMention(ICoreMap quote) { Sieve.MentionData closestBackward = FindClosestMentionInSpanBackward(new Pair <int, int>(0, quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation)) - 1)); Sieve.MentionData closestForward = FindClosestMentionInSpanForward(new Pair <int, int>(quote.Get(typeof(CoreAnnotations.TokenEndAnnotation)), doc.Get(typeof(CoreAnnotations.TokensAnnotation)).Count - 1)); int backDistance = quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation)) - closestBackward.end; int forwardDistance = closestForward.begin - quote.Get(typeof(CoreAnnotations.TokenEndAnnotation)) + 1; if (backDistance < forwardDistance) { return(closestBackward); } else { return(closestForward); } }
public virtual void ScoreBestMentionNew(SupervisedSieveTraining.FeaturesData fd, Annotation doc) { IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); for (int i = 0; i < quotes.Count; i++) { ICoreMap quote = quotes[i]; if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { continue; } double maxConfidence = 0; int maxDataIdx = -1; int goldDataIdx = -1; Pair <int, int> dataRange = fd.mapQuoteToDataRange[i]; if (dataRange == null) { continue; } else { for (int dataIdx = dataRange.first; dataIdx <= dataRange.second; dataIdx++) { RVFDatum <string, string> datum = fd.dataset.GetRVFDatum(dataIdx); double isMentionConfidence = quoteToMentionClassifier.ScoresOf(datum).GetCount("isMention"); if (isMentionConfidence > maxConfidence) { maxConfidence = isMentionConfidence; maxDataIdx = dataIdx; } } if (maxDataIdx != -1) { Sieve.MentionData mentionData = fd.mapDatumToMention[maxDataIdx]; if (mentionData.type.Equals("animate noun")) { continue; } quote.Set(typeof(QuoteAttributionAnnotator.MentionAnnotation), mentionData.text); quote.Set(typeof(QuoteAttributionAnnotator.MentionBeginAnnotation), mentionData.begin); quote.Set(typeof(QuoteAttributionAnnotator.MentionEndAnnotation), mentionData.end); quote.Set(typeof(QuoteAttributionAnnotator.MentionTypeAnnotation), mentionData.type); quote.Set(typeof(QuoteAttributionAnnotator.MentionSieveAnnotation), "supervised"); } } } }
//TODO: potential bug in previous iteration: not implementing order reversal in eliminateDuplicates private static IList <Sieve.MentionData> EliminateDuplicates(IList <Sieve.MentionData> mentionCandidates) { IList <Sieve.MentionData> newList = new List <Sieve.MentionData>(); ICollection <string> seenText = new HashSet <string>(); for (int i = 0; i < mentionCandidates.Count; i++) { Sieve.MentionData mentionCandidate = mentionCandidates[i]; string text = mentionCandidate.text; if (!seenText.Contains(text) || mentionCandidate.type.Equals("Pronoun")) { newList.Add(mentionCandidate); } seenText.Add(text); } return(newList); }
public virtual Person.Gender GetGender(Sieve.MentionData mention) { Person.Gender gender = Person.Gender.Unk; if (mention.type != null && mention.type.Equals("pronoun")) { if (Sharpen.Runtime.EqualsIgnoreCase(mention.text, "he")) { gender = Person.Gender.Male; } else { if (Sharpen.Runtime.EqualsIgnoreCase(mention.text, "she")) { gender = Person.Gender.Female; } } } else { if (mention.type != null && mention.type.Equals("animate noun")) { string mentionText = mention.text.ToLower(); if (genderList[mentionText] != null) { gender = genderList[mentionText]; } } else { if (mention.type != null && mention.type.Equals("name")) { gender = characterMap[mention.text][0].gender; } } } return(gender); }
protected internal static void FillInMention(ICoreMap quote, Sieve.MentionData md, string sieveName) { FillInMention(quote, md.text, md.begin, md.end, sieveName, md.type); }