/// <exception cref="System.Exception"/> public virtual string ResolveMention(Document document, Dictionaries dict, Properties props) { StringBuilder sbLog = new StringBuilder(); if (HybridCorefProperties.Debug(props)) { sbLog.Append("======================================================="); sbLog.Append(HybridCorefPrinter.PrintRawDoc(document, true, true)); } foreach (IList <Mention> mentionsInSent in document.predictedMentions) { for (int mIdx = 0; mIdx < mentionsInSent.Count; mIdx++) { Mention m = mentionsInSent[mIdx]; if (SkipMentionType(m, props)) { continue; } FindCoreferentAntecedent(m, mIdx, document, dict, props, sbLog); } } return(sbLog.ToString()); }
/// <exception cref="System.Exception"/> public override void FindCoreferentAntecedent(Mention m, int mIdx, Document document, Dictionaries dict, Properties props, StringBuilder sbLog) { // check for skip: first mention only, discourse salience if (!this.flags.UseSpeakermatch && !this.flags.UseDiscoursematch && !this.flags.UseApposition && !this.flags.UsePredicatenominatives && this.SkipThisMention(document, m, document.corefClusters[m.corefClusterID], dict)) { return; } ICollection <Mention> roleSet = document.roleSet; for (int sentJ = m.sentNum; sentJ >= 0; sentJ--) { IList <Mention> l = Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.GetOrderedAntecedents(m, sentJ, mIdx, document.predictedMentions, dict); if (maxSentDist != -1 && m.sentNum - sentJ > maxSentDist) { continue; } // TODO: do we need this? // Sort mentions by length whenever we have two mentions beginning at the same position and having the same head for (int i = 0; i < l.Count; i++) { for (int j = 0; j < l.Count; j++) { if (l[i].headString.Equals(l[j].headString) && l[i].startIndex == l[j].startIndex && l[i].SameSentence(l[j]) && j > i && l[i].SpanToString().Length > l[j].SpanToString().Length) { l.Set(j, l.Set(i, l[j])); } } } // log.info("antecedent ordering changed!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); foreach (Mention ant in l) { if (SkipForAnalysis(ant, m, props)) { continue; } // m2 - antecedent of m1 // Skip singletons according to the singleton predictor // (only for non-NE mentions) // Recasens, de Marneffe, and Potts (NAACL 2013) if (m.isSingleton && m.mentionType != Dictionaries.MentionType.Proper && ant.isSingleton && ant.mentionType != Dictionaries.MentionType.Proper) { continue; } if (m.corefClusterID == ant.corefClusterID) { continue; } if (!mType.Contains(m.mentionType) || !aType.Contains(ant.mentionType)) { continue; } if (m.mentionType == Dictionaries.MentionType.Pronominal) { if (!MatchedMentionType(m, mTypeStr)) { continue; } if (!MatchedMentionType(ant, aTypeStr)) { continue; } } CorefCluster c1 = document.corefClusters[m.corefClusterID]; CorefCluster c2 = document.corefClusters[ant.corefClusterID]; System.Diagnostics.Debug.Assert((c1 != null)); System.Diagnostics.Debug.Assert((c2 != null)); if (this.UseRoleSkip()) { if (m.IsRoleAppositive(ant, dict)) { roleSet.Add(m); } else { if (ant.IsRoleAppositive(m, dict)) { roleSet.Add(ant); } } continue; } if (this.Coreferent(document, c1, c2, m, ant, dict, roleSet)) { // print logs for analysis // if (doScore()) { // printLogs(c1, c2, m1, m2, document, currentSieve); // } // print dcoref log if (HybridCorefProperties.Debug(props)) { sbLog.Append(HybridCorefPrinter.PrintErrorLogDcoref(m, ant, document, dict, mIdx, this.GetType().FullName)); } int removeID = c1.clusterID; // log.info("Merging ant "+c2+" with "+c1); CorefCluster.MergeClusters(c2, c1); document.MergeIncompatibles(c2, c1); document.MergeAcronymCache(c2, c1); // log.warning("Removing cluster " + removeID + ", merged with " + c2.getClusterID()); Sharpen.Collections.Remove(document.corefClusters, removeID); return; } } } }