/// <exception cref="System.Exception"/> public override void FindCoreferentAntecedent(Mention m, int mIdx, Document document, Dictionaries dict, Properties props, StringBuilder sbLog) { for (int distance = 0; distance <= m.sentNum; distance++) { IList<Mention> candidates = document.predictedMentions[m.sentNum - distance]; foreach (Mention candidate in candidates) { if (!MatchedMentionType(candidate, aTypeStr) || !MatchedMentionType(m, mTypeStr)) { continue; } // if(!options.mType.contains(m.mentionType) || !options.aType.contains(candidate.mentionType)) continue; if (candidate == m) { continue; } if (distance == 0 && m.AppearEarlierThan(candidate)) { continue; } // ignore cataphora if (Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.IsReallyCoref(document, m.mentionID, candidate.mentionID)) { if (m.mentionType == Dictionaries.MentionType.List) { log.Info("LIST MATCHING MENTION : " + m.SpanToString() + "\tANT: " + candidate.SpanToString()); } Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.Merge(document, m.mentionID, candidate.mentionID); return; } } } }
private ICounter <string> GetFeatures(Document doc, Mention m1, Mention m2) { System.Diagnostics.Debug.Assert((m1.AppearEarlierThan(m2))); ICounter <string> features = new ClassicCounter <string>(); // global features features.IncrementCount("bias"); if (useDocSource) { features.IncrementCount("doc-type=" + doc.docType); if (doc.docInfo != null && doc.docInfo.Contains("DOC_ID")) { features.IncrementCount("doc-source=" + doc.docInfo["DOC_ID"].Split("/")[1]); } } // singleton feature conjunctions IList <string> singletonFeatures1 = m1.GetSingletonFeatures(dictionaries); IList <string> singletonFeatures2 = m2.GetSingletonFeatures(dictionaries); foreach (KeyValuePair <int, string> e in SingletonFeatures) { if (e.Key < singletonFeatures1.Count && e.Key < singletonFeatures2.Count) { features.IncrementCount(e.Value + "=" + singletonFeatures1[e.Key] + "_" + singletonFeatures2[e.Key]); } } SemanticGraphEdge p1 = GetDependencyParent(m1); SemanticGraphEdge p2 = GetDependencyParent(m2); features.IncrementCount("dep-relations=" + (p1 == null ? "null" : p1.GetRelation()) + "_" + (p2 == null ? "null" : p2.GetRelation())); features.IncrementCount("roles=" + GetRole(m1) + "_" + GetRole(m2)); CoreLabel headCL1 = HeadWord(m1); CoreLabel headCL2 = HeadWord(m2); string headPOS1 = GetPOS(headCL1); string headPOS2 = GetPOS(headCL2); features.IncrementCount("head-pos-s=" + headPOS1 + "_" + headPOS2); features.IncrementCount("head-words=" + WordIndicator("h_" + headCL1.Word().ToLower() + "_" + headCL2.Word().ToLower(), headPOS1 + "_" + headPOS2)); // agreement features AddFeature(features, "animacies-agree", m2.AnimaciesAgree(m1)); AddFeature(features, "attributes-agree", m2.AttributesAgree(m1, dictionaries)); AddFeature(features, "entity-types-agree", m2.EntityTypesAgree(m1, dictionaries)); AddFeature(features, "numbers-agree", m2.NumbersAgree(m1)); AddFeature(features, "genders-agree", m2.GendersAgree(m1)); AddFeature(features, "ner-strings-equal", m1.nerString.Equals(m2.nerString)); // string matching features AddFeature(features, "antecedent-head-in-anaphor", HeadContainedIn(m1, m2)); AddFeature(features, "anaphor-head-in-antecedent", HeadContainedIn(m2, m1)); if (m1.mentionType != Dictionaries.MentionType.Pronominal && m2.mentionType != Dictionaries.MentionType.Pronominal) { AddFeature(features, "antecedent-in-anaphor", m2.SpanToString().ToLower().Contains(m1.SpanToString().ToLower())); AddFeature(features, "anaphor-in-antecedent", m1.SpanToString().ToLower().Contains(m2.SpanToString().ToLower())); AddFeature(features, "heads-equal", Sharpen.Runtime.EqualsIgnoreCase(m1.headString, m2.headString)); AddFeature(features, "heads-agree", m2.HeadsAgree(m1)); AddFeature(features, "exact-match", m1.ToString().Trim().ToLower().Equals(m2.ToString().Trim().ToLower())); AddFeature(features, "partial-match", RelaxedStringMatch(m1, m2)); double editDistance = StringUtils.EditDistance(m1.SpanToString(), m2.SpanToString()) / (double)(m1.SpanToString().Length + m2.SpanToString().Length); features.IncrementCount("edit-distance", editDistance); features.IncrementCount("edit-distance=" + ((int)(editDistance * 10) / 10.0)); double headEditDistance = StringUtils.EditDistance(m1.headString, m2.headString) / (double)(m1.headString.Length + m2.headString.Length); features.IncrementCount("head-edit-distance", headEditDistance); features.IncrementCount("head-edit-distance=" + ((int)(headEditDistance * 10) / 10.0)); } // distance features AddNumeric(features, "mention-distance", m2.mentionNum - m1.mentionNum); AddNumeric(features, "sentence-distance", m2.sentNum - m1.sentNum); if (m2.sentNum == m1.sentNum) { AddNumeric(features, "word-distance", m2.startIndex - m1.endIndex); if (m1.endIndex > m2.startIndex) { features.IncrementCount("spans-intersect"); } } // setup for dcoref features ICollection <Mention> ms1 = new HashSet <Mention>(); ms1.Add(m1); ICollection <Mention> ms2 = new HashSet <Mention>(); ms2.Add(m2); Random r = new Random(); CorefCluster c1 = new CorefCluster(20000 + r.NextInt(10000), ms1); CorefCluster c2 = new CorefCluster(10000 + r.NextInt(10000), ms2); string s2 = m2.LowercaseNormalizedSpanString(); string s1 = m1.LowercaseNormalizedSpanString(); // discourse dcoref features AddFeature(features, "mention-speaker-PER0", Sharpen.Runtime.EqualsIgnoreCase(m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)), "PER0")); AddFeature(features, "antecedent-is-anaphor-speaker", CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1, dictionaries)); AddFeature(features, "same-speaker", CorefRules.EntitySameSpeaker(doc, m2, m1)); AddFeature(features, "person-disagree-same-speaker", CorefRules.EntityPersonDisagree(doc, m2, m1, dictionaries) && CorefRules.EntitySameSpeaker(doc, m2, m1)); AddFeature(features, "antecedent-matches-anaphor-speaker", CorefRules.AntecedentMatchesMentionSpeakerAnnotation(m2, m1, doc)); AddFeature(features, "discourse-you-PER0", m2.person == Dictionaries.Person.You && doc.docType == Document.DocType.Article && m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0")); AddFeature(features, "speaker-match-i-i", m2.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s1) && m1.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s2) && CorefRules. EntitySameSpeaker(doc, m2, m1)); AddFeature(features, "speaker-match-speaker-i", m2.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s2) && CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1, dictionaries)); AddFeature(features, "speaker-match-i-speaker", m1.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s1) && CorefRules.AntecedentIsMentionSpeaker(doc, m1, m2, dictionaries)); AddFeature(features, "speaker-match-you-you", dictionaries.secondPersonPronouns.Contains(s1) && dictionaries.secondPersonPronouns.Contains(s2) && CorefRules.EntitySameSpeaker(doc, m2, m1)); AddFeature(features, "discourse-between-two-person", ((m2.person == Dictionaries.Person.I && m1.person == Dictionaries.Person.You || (m2.person == Dictionaries.Person.You && m1.person == Dictionaries.Person.I)) && (m2.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation )) - m1.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) == 1) && doc.docType == Document.DocType.Conversation)); AddFeature(features, "incompatible-not-match", m1.person != Dictionaries.Person.I && m2.person != Dictionaries.Person.I && (CorefRules.AntecedentIsMentionSpeaker(doc, m1, m2, dictionaries) || CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1 , dictionaries))); int utteranceDist = Math.Abs(m1.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) - m2.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation))); if (doc.docType != Document.DocType.Article && utteranceDist == 1 && !CorefRules.EntitySameSpeaker(doc, m2, m1)) { AddFeature(features, "speaker-mismatch-i-i", m1.person == Dictionaries.Person.I && m2.person == Dictionaries.Person.I); AddFeature(features, "speaker-mismatch-you-you", m1.person == Dictionaries.Person.You && m2.person == Dictionaries.Person.You); AddFeature(features, "speaker-mismatch-we-we", m1.person == Dictionaries.Person.We && m2.person == Dictionaries.Person.We); } // other dcoref features string firstWord1 = FirstWord(m1).Word().ToLower(); AddFeature(features, "indefinite-article-np", (m1.appositions == null && m1.predicateNominatives == null && (firstWord1.Equals("a") || firstWord1.Equals("an")))); AddFeature(features, "far-this", m2.LowercaseNormalizedSpanString().Equals("this") && Math.Abs(m2.sentNum - m1.sentNum) > 3); AddFeature(features, "per0-you-in-article", m2.person == Dictionaries.Person.You && doc.docType == Document.DocType.Article && m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0")); AddFeature(features, "inside-in", m2.InsideIn(m1) || m1.InsideIn(m2)); AddFeature(features, "indefinite-determiners", dictionaries.indefinitePronouns.Contains(m1.originalSpan[0].Lemma()) || dictionaries.indefinitePronouns.Contains(m2.originalSpan[0].Lemma())); AddFeature(features, "entity-attributes-agree", CorefRules.EntityAttributesAgree(c2, c1)); AddFeature(features, "entity-token-distance", CorefRules.EntityTokenDistance(m2, m1)); AddFeature(features, "i-within-i", CorefRules.EntityIWithinI(m2, m1, dictionaries)); AddFeature(features, "exact-string-match", CorefRules.EntityExactStringMatch(c2, c1, dictionaries, doc.roleSet)); AddFeature(features, "entity-relaxed-heads-agree", CorefRules.EntityRelaxedHeadsAgreeBetweenMentions(c2, c1, m2, m1)); AddFeature(features, "is-acronym", CorefRules.EntityIsAcronym(doc, c2, c1)); AddFeature(features, "demonym", m2.IsDemonym(m1, dictionaries)); AddFeature(features, "incompatible-modifier", CorefRules.EntityHaveIncompatibleModifier(m2, m1)); AddFeature(features, "head-lemma-match", m1.headWord.Lemma().Equals(m2.headWord.Lemma())); AddFeature(features, "words-included", CorefRules.EntityWordsIncluded(c2, c1, m2, m1)); AddFeature(features, "extra-proper-noun", CorefRules.EntityHaveExtraProperNoun(m2, m1, new HashSet <string>())); AddFeature(features, "number-in-later-mentions", CorefRules.EntityNumberInLaterMention(m2, m1)); AddFeature(features, "sentence-context-incompatible", CorefRules.SentenceContextIncompatible(m2, m1, dictionaries)); // syntax features if (useConstituencyParse) { if (m1.sentNum == m2.sentNum) { int clauseCount = 0; Tree tree = m2.contextParseTree; Tree current = m2.mentionSubTree; while (true) { current = current.Ancestor(1, tree); if (current.Label().Value().StartsWith("S")) { clauseCount++; } if (current.Dominates(m1.mentionSubTree)) { break; } if (current.Label().Value().Equals("ROOT") || current.Ancestor(1, tree) == null) { break; } } features.IncrementCount("clause-count", clauseCount); features.IncrementCount("clause-count=" + Bin(clauseCount)); } if (RuleBasedCorefMentionFinder.IsPleonastic(m2, m2.contextParseTree) || RuleBasedCorefMentionFinder.IsPleonastic(m1, m1.contextParseTree)) { features.IncrementCount("pleonastic-it"); } if (MaximalNp(m1.mentionSubTree) == MaximalNp(m2.mentionSubTree)) { features.IncrementCount("same-maximal-np"); } bool m1Embedded = HeadEmbeddingLevel(m1.mentionSubTree, m1.headIndex - m1.startIndex) > 1; bool m2Embedded = HeadEmbeddingLevel(m2.mentionSubTree, m2.headIndex - m2.startIndex) > 1; features.IncrementCount("embedding=" + m1Embedded + "_" + m2Embedded); } return(features); }
/// <exception cref="System.Exception"/> public static void LinkDistanceAnalysis(string[] args) { Properties props = StringUtils.ArgsToProperties(args); HybridCorefSystem cs = new HybridCorefSystem(props); cs.docMaker.ResetDocs(); ICounter <int> proper = new ClassicCounter <int>(); ICounter <int> common = new ClassicCounter <int>(); ICounter <int> pronoun = new ClassicCounter <int>(); ICounter <int> list = new ClassicCounter <int>(); while (true) { Document document = cs.docMaker.NextDoc(); if (document == null) { break; } for (int sentIdx = 0; sentIdx < document.predictedMentions.Count; sentIdx++) { IList <Mention> predictedInSent = document.predictedMentions[sentIdx]; for (int mIdx = 0; mIdx < predictedInSent.Count; mIdx++) { Mention m = predictedInSent[mIdx]; for (int distance = 0; distance <= sentIdx; distance++) { IList <Mention> candidates = Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.GetOrderedAntecedents(m, sentIdx - distance, mIdx, document.predictedMentions, cs.dictionaries); foreach (Mention candidate in candidates) { if (candidate == m) { continue; } if (distance == 0 && m.AppearEarlierThan(candidate)) { continue; } // ignore cataphora if (candidate.goldCorefClusterID == m.goldCorefClusterID) { switch (m.mentionType) { case Dictionaries.MentionType.Nominal: { if (candidate.mentionType == Dictionaries.MentionType.Nominal || candidate.mentionType == Dictionaries.MentionType.Proper) { common.IncrementCount(distance); goto loop_break; } break; } case Dictionaries.MentionType.Proper: { if (candidate.mentionType == Dictionaries.MentionType.Proper) { proper.IncrementCount(distance); goto loop_break; } break; } case Dictionaries.MentionType.Pronominal: { pronoun.IncrementCount(distance); goto loop_break; } case Dictionaries.MentionType.List: { if (candidate.mentionType == Dictionaries.MentionType.List) { list.IncrementCount(distance); goto loop_break; } break; } default: { break; } } } } loop_continue :; } loop_break :; } } } System.Console.Out.WriteLine("PROPER -------------------------------------------"); Counters.PrintCounterSortedByKeys(proper); System.Console.Out.WriteLine("COMMON -------------------------------------------"); Counters.PrintCounterSortedByKeys(common); System.Console.Out.WriteLine("PRONOUN -------------------------------------------"); Counters.PrintCounterSortedByKeys(pronoun); System.Console.Out.WriteLine("LIST -------------------------------------------"); Counters.PrintCounterSortedByKeys(list); log.Info(); }