Exemplo n.º 1
0
        /// <summary>Checks if two clusters are coreferent according to our sieve pass constraints.</summary>
        /// <param name="document"/>
        /// <exception cref="System.Exception"/>
        public virtual bool Coreferent(Document document, CorefCluster mentionCluster, CorefCluster potentialAntecedent, Mention mention2, Mention ant, Dictionaries dict, ICollection <Mention> roleSet, Semantics semantics)
        {
            bool    ret     = false;
            Mention mention = mentionCluster.GetRepresentativeMention();

            if (flags.UseIncompatibles)
            {
                // Check our list of incompatible mentions and don't cluster them together
                // Allows definite no's from previous sieves to propagate down
                if (document.IsIncompatible(mentionCluster, potentialAntecedent))
                {
                    SieveCoreferenceSystem.logger.Finest("INCOMPATIBLE clusters: not match: " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID
                                                                                                                                                                                                                   ));
                    return(false);
                }
            }
            if (flags.DoPronoun && Math.Abs(mention2.sentNum - ant.sentNum) > 3 && mention2.person != Dictionaries.Person.I && mention2.person != Dictionaries.Person.You)
            {
                return(false);
            }
            if (mention2.LowercaseNormalizedSpanString().Equals("this") && Math.Abs(mention2.sentNum - ant.sentNum) > 3)
            {
                return(false);
            }
            if (mention2.person == Dictionaries.Person.You && document.docType == Document.DocType.Article && mention2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0"))
            {
                return(false);
            }
            if (document.conllDoc != null)
            {
                if (ant.generic && ant.person == Dictionaries.Person.You)
                {
                    return(false);
                }
                if (mention2.generic)
                {
                    return(false);
                }
            }
            if (mention2.InsideIn(ant) || ant.InsideIn(mention2))
            {
                return(false);
            }
            if (flags.UseDiscoursematch)
            {
                string mString   = mention.LowercaseNormalizedSpanString();
                string antString = ant.LowercaseNormalizedSpanString();
                // mention and ant both belong to the same speaker cluster
                if (mention.speakerInfo != null && mention.speakerInfo == ant.speakerInfo)
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: maps to same speaker: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    return(true);
                }
                // (I - I) in the same speaker's quotation.
                if (mention.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(mString) && ant.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(antString) && Rules.EntitySameSpeaker(document, mention, ant))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: 1st person same speaker: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    return(true);
                }
                // (speaker - I)
                if ((mention.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(mString)) && Rules.AntecedentIsMentionSpeaker(document, mention, ant, dict))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: 1st person mention speaker match antecedent: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    if (mention.speakerInfo == null && ant.speakerInfo != null)
                    {
                        mention.speakerInfo = ant.speakerInfo;
                    }
                    return(true);
                }
                // (I - speaker)
                if ((ant.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(antString)) && Rules.AntecedentIsMentionSpeaker(document, ant, mention, dict))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: 1st person antecedent speaker match mention: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    if (ant.speakerInfo == null && mention.speakerInfo != null)
                    {
                        ant.speakerInfo = mention.speakerInfo;
                    }
                    return(true);
                }
                // Can be iffy if more than two speakers... but still should be okay most of the time
                if (dict.secondPersonPronouns.Contains(mString) && dict.secondPersonPronouns.Contains(antString) && Rules.EntitySameSpeaker(document, mention, ant))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: 2nd person same speaker: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    return(true);
                }
                // previous I - you or previous you - I in two person conversation
                if (((mention.person == Dictionaries.Person.I && ant.person == Dictionaries.Person.You || (mention.person == Dictionaries.Person.You && ant.person == Dictionaries.Person.I)) && (mention.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation
                                                                                                                                                                                                                              )) - ant.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) == 1) && document.docType == Document.DocType.Conversation))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: between two person: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    return(true);
                }
                if (dict.reflexivePronouns.Contains(mention.headString) && Rules.EntitySubjectObject(mention, ant))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: reflexive pronoun: " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID == ant.goldCorefClusterID
                                                                                                                                                                                                                     ));
                    return(true);
                }
            }
            if (Constants.UseDiscourseConstraints && !flags.UseExactstringmatch && !flags.UseRelaxedExactstringmatch && !flags.UseApposition && !flags.UseWordsInclusion)
            {
                foreach (Mention m in mentionCluster.GetCorefMentions())
                {
                    foreach (Mention a in potentialAntecedent.GetCorefMentions())
                    {
                        // angelx - not sure about the logic here, disable (code was also refactored from original)
                        // vv gabor - re-enabled code (seems to improve performance) vv
                        if (m.person != Dictionaries.Person.I && a.person != Dictionaries.Person.I && (Rules.AntecedentIsMentionSpeaker(document, m, a, dict) || Rules.AntecedentIsMentionSpeaker(document, a, m, dict)))
                        {
                            SieveCoreferenceSystem.logger.Finest("Incompatibles: not match(speaker): " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID
                                                                                                                                                                                                                            ));
                            document.AddIncompatible(m, a);
                            return(false);
                        }
                        // ^^ end block of code in question ^^
                        int dist = Math.Abs(m.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) - a.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)));
                        if (document.docType != Document.DocType.Article && dist == 1 && !Rules.EntitySameSpeaker(document, m, a))
                        {
                            string mSpeaker = document.speakers[m.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation))];
                            string aSpeaker = document.speakers[a.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation))];
                            if (m.person == Dictionaries.Person.I && a.person == Dictionaries.Person.I)
                            {
                                SieveCoreferenceSystem.logger.Finest("Incompatibles: neighbor I: " + ant.SpanToString() + "(" + ant.mentionID + "," + aSpeaker + ") :: " + mention.SpanToString() + "(" + mention.mentionID + "," + mSpeaker + ") -> " + (mention.goldCorefClusterID
                                                                                                                                                                                                                                                          != ant.goldCorefClusterID));
                                document.AddIncompatible(m, a);
                                return(false);
                            }
                            if (m.person == Dictionaries.Person.You && a.person == Dictionaries.Person.You)
                            {
                                SieveCoreferenceSystem.logger.Finest("Incompatibles: neighbor YOU: " + ant.SpanToString() + "(" + ant.mentionID + "," + aSpeaker + ") :: " + mention.SpanToString() + "(" + mention.mentionID + "," + mSpeaker + ") -> " + (mention.goldCorefClusterID
                                                                                                                                                                                                                                                            != ant.goldCorefClusterID));
                                document.AddIncompatible(m, a);
                                return(false);
                            }
                            // This is weak since we can refer to both speakers
                            if (m.person == Dictionaries.Person.We && a.person == Dictionaries.Person.We)
                            {
                                SieveCoreferenceSystem.logger.Finest("Incompatibles: neighbor WE: " + ant.SpanToString() + "(" + ant.mentionID + "," + aSpeaker + ") :: " + mention.SpanToString() + "(" + mention.mentionID + "," + mSpeaker + ") -> " + (mention.goldCorefClusterID
                                                                                                                                                                                                                                                           != ant.goldCorefClusterID));
                                document.AddIncompatible(m, a);
                                return(false);
                            }
                        }
                    }
                }
                if (document.docType == Document.DocType.Article)
                {
                    foreach (Mention m_1 in mentionCluster.GetCorefMentions())
                    {
                        foreach (Mention a in potentialAntecedent.GetCorefMentions())
                        {
                            if (Rules.EntitySubjectObject(m_1, a))
                            {
                                SieveCoreferenceSystem.logger.Finest("Incompatibles: subject-object: " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID)
                                                                     );
                                document.AddIncompatible(m_1, a);
                                return(false);
                            }
                        }
                    }
                }
            }
            // Incompatibility constraints - do before match checks
            if (flags.USE_iwithini && Rules.EntityIWithinI(mention, ant, dict))
            {
                SieveCoreferenceSystem.logger.Finest("Incompatibles: iwithini: " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID));
                document.AddIncompatible(mention, ant);
                return(false);
            }
            // Match checks
            if (flags.UseExactstringmatch && Rules.EntityExactStringMatch(mentionCluster, potentialAntecedent, dict, roleSet))
            {
                return(true);
            }
            if (flags.UseNameMatch && CheckEntityMatch(document, mentionCluster, potentialAntecedent, dict, roleSet))
            {
                ret = true;
            }
            if (flags.UseRelaxedExactstringmatch && Rules.EntityRelaxedExactStringMatch(mentionCluster, potentialAntecedent, mention, ant, dict, roleSet))
            {
                return(true);
            }
            if (flags.UseApposition && Rules.EntityIsApposition(mentionCluster, potentialAntecedent, mention, ant))
            {
                SieveCoreferenceSystem.logger.Finest("Apposition: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                return(true);
            }
            if (flags.UsePredicatenominatives && Rules.EntityIsPredicateNominatives(mentionCluster, potentialAntecedent, mention, ant))
            {
                SieveCoreferenceSystem.logger.Finest("Predicate nominatives: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                return(true);
            }
            if (flags.UseAcronym && Rules.EntityIsAcronym(document, mentionCluster, potentialAntecedent))
            {
                SieveCoreferenceSystem.logger.Finest("Acronym: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                return(true);
            }
            if (flags.UseRelativepronoun && Rules.EntityIsRelativePronoun(mention, ant))
            {
                SieveCoreferenceSystem.logger.Finest("Relative pronoun: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                return(true);
            }
            if (flags.UseDemonym && mention.IsDemonym(ant, dict))
            {
                SieveCoreferenceSystem.logger.Finest("Demonym: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                return(true);
            }
            if (flags.UseRoleapposition && lang != Locale.Chinese && Rules.EntityIsRoleAppositive(mentionCluster, potentialAntecedent, mention, ant, dict))
            {
                SieveCoreferenceSystem.logger.Finest("Role Appositive: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                ret = true;
            }
            if (flags.UseInclusionHeadmatch && Rules.EntityHeadsAgree(mentionCluster, potentialAntecedent, mention, ant, dict))
            {
                SieveCoreferenceSystem.logger.Finest("Entity heads agree: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                ret = true;
            }
            if (flags.UseRelaxedHeadmatch && Rules.EntityRelaxedHeadsAgreeBetweenMentions(mentionCluster, potentialAntecedent, mention, ant))
            {
                ret = true;
            }
            if (flags.UseWordsInclusion && ret && !Rules.EntityWordsIncluded(mentionCluster, potentialAntecedent, mention, ant))
            {
                return(false);
            }
            if (flags.UseIncompatibleModifier && ret && Rules.EntityHaveIncompatibleModifier(mentionCluster, potentialAntecedent))
            {
                return(false);
            }
            if (flags.UseProperheadAtLast && ret && !Rules.EntitySameProperHeadLastWord(mentionCluster, potentialAntecedent, mention, ant))
            {
                return(false);
            }
            if (flags.UseAttributesAgree && !Rules.EntityAttributesAgree(mentionCluster, potentialAntecedent))
            {
                return(false);
            }
            if (flags.UseDifferentLocation && Rules.EntityHaveDifferentLocation(mention, ant, dict))
            {
                if (flags.UseProperheadAtLast && ret && mention.goldCorefClusterID != ant.goldCorefClusterID)
                {
                    SieveCoreferenceSystem.logger.Finest("DIFFERENT LOCATION: " + ant.SpanToString() + " :: " + mention.SpanToString());
                }
                return(false);
            }
            if (flags.UseNumberInMention && Rules.EntityNumberInLaterMention(mention, ant))
            {
                if (flags.UseProperheadAtLast && ret && mention.goldCorefClusterID != ant.goldCorefClusterID)
                {
                    SieveCoreferenceSystem.logger.Finest("NEW NUMBER : " + ant.SpanToString() + " :: " + mention.SpanToString());
                }
                return(false);
            }
            if (flags.UseWnHypernym)
            {
                MethodInfo meth = semantics.wordnet.GetType().GetMethod("checkHypernym", typeof(CorefCluster), typeof(CorefCluster), typeof(Mention), typeof(Mention));
                if ((bool)meth.Invoke(semantics.wordnet, mentionCluster, potentialAntecedent, mention, ant))
                {
                    ret = true;
                }
                else
                {
                    if (mention.goldCorefClusterID == ant.goldCorefClusterID && !mention.IsPronominal() && !ant.IsPronominal())
                    {
                        SieveCoreferenceSystem.logger.Finest("not hypernym in WN");
                        SieveCoreferenceSystem.logger.Finest("False Negatives:: " + ant.SpanToString() + " <= " + mention.SpanToString());
                    }
                }
            }
            if (flags.UseWnSynonym)
            {
                MethodInfo meth = semantics.wordnet.GetType().GetMethod("checkSynonym", new Type[] { typeof(Mention), typeof(Mention) });
                if ((bool)meth.Invoke(semantics.wordnet, mention, ant))
                {
                    ret = true;
                }
                else
                {
                    if (mention.goldCorefClusterID == ant.goldCorefClusterID && !mention.IsPronominal() && !ant.IsPronominal())
                    {
                        SieveCoreferenceSystem.logger.Finest("not synonym in WN");
                        SieveCoreferenceSystem.logger.Finest("False Negatives:: " + ant.SpanToString() + " <= " + mention.SpanToString());
                    }
                }
            }
            try
            {
                if (flags.UseAlias && Rules.EntityAlias(mentionCluster, potentialAntecedent, semantics, dict))
                {
                    return(true);
                }
            }
            catch (Exception e)
            {
                throw new Exception(e);
            }
            if (flags.UseDistance && Rules.EntityTokenDistance(mention2, ant))
            {
                return(false);
            }
            if (flags.UseCorefDict)
            {
                // Head match
                if (ant.headWord.Lemma().Equals(mention2.headWord.Lemma()))
                {
                    return(false);
                }
                // Constraint: ignore pairs commonNoun - properNoun
                if (ant.mentionType != Dictionaries.MentionType.Proper && (mention2.headWord.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).StartsWith("NNP") || !Sharpen.Runtime.Substring(mention2.headWord.Word(), 1).Equals(Sharpen.Runtime.Substring(mention2
                                                                                                                                                                                                                                                               .headWord.Word(), 1).ToLower())))
                {
                    return(false);
                }
                // Constraint: ignore plurals
                if (ant.headWord.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).Equals("NNS") && mention2.headWord.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).Equals("NNS"))
                {
                    return(false);
                }
                // Constraint: ignore mentions with indefinite determiners
                if (dict.indefinitePronouns.Contains(ant.originalSpan[0].Lemma()) || dict.indefinitePronouns.Contains(mention2.originalSpan[0].Lemma()))
                {
                    return(false);
                }
                // Constraint: ignore coordinated mentions
                if (ant.IsCoordinated() || mention2.IsCoordinated())
                {
                    return(false);
                }
                // Constraint: context incompatibility
                if (Rules.ContextIncompatible(mention2, ant, dict))
                {
                    return(false);
                }
                // Constraint: sentence context incompatibility when the mentions are common nouns
                if (Rules.SentenceContextIncompatible(mention2, ant, dict))
                {
                    return(false);
                }
                if (Rules.EntityClusterAllCorefDictionary(mentionCluster, potentialAntecedent, dict, 1, 8))
                {
                    return(true);
                }
                if (Rules.EntityCorefDictionary(mention, ant, dict, 2, 2))
                {
                    return(true);
                }
                if (Rules.EntityCorefDictionary(mention, ant, dict, 3, 2))
                {
                    return(true);
                }
                if (Rules.EntityCorefDictionary(mention, ant, dict, 4, 2))
                {
                    return(true);
                }
            }
            if (flags.DoPronoun)
            {
                Mention m;
                if (mention.predicateNominatives != null && mention.predicateNominatives.Contains(mention2))
                {
                    m = mention2;
                }
                else
                {
                    m = mention;
                }
                if ((m.IsPronominal() || dict.allPronouns.Contains(m.ToString())) && Rules.EntityAttributesAgree(mentionCluster, potentialAntecedent))
                {
                    if (dict.demonymSet.Contains(ant.LowercaseNormalizedSpanString()) && dict.notOrganizationPRP.Contains(m.headString))
                    {
                        document.AddIncompatible(m, ant);
                        return(false);
                    }
                    if (Constants.UseDiscourseConstraints && Rules.EntityPersonDisagree(document, mentionCluster, potentialAntecedent, dict))
                    {
                        SieveCoreferenceSystem.logger.Finest("Incompatibles: Person Disagree: " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID
                                                                                                                                                                                                                     ));
                        document.AddIncompatible(m, ant);
                        return(false);
                    }
                    return(true);
                }
            }
            return(ret);
        }