CorefCluster C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

        public override bool CheckEntityMatch(Document document, CorefCluster mentionCluster, CorefCluster potentialAntecedent, Dictionaries dict, ICollection <Mention> roleSet)
        {
            bool    matched     = false;
            Mention mainMention = mentionCluster.GetRepresentativeMention();
            Mention antMention  = potentialAntecedent.GetRepresentativeMention();

            // Check if the representative mentions are compatible
            if (IsNamedMention(mainMention, dict, roleSet) && IsNamedMention(antMention, dict, roleSet))
            {
                if (mainMention.originalSpan.Count > minTokens || antMention.originalSpan.Count > minTokens)
                {
                    if (CorefRules.EntityAttributesAgree(mentionCluster, potentialAntecedent, ignoreGender))
                    {
                        if (supportedNerTypes.Contains(mainMention.nerString) || supportedNerTypes.Contains(antMention.nerString))
                        {
                            matched = mentionMatcher.IsCompatible(mainMention, antMention);
                            if (matched != null)
                            {
                                //Redwood.log("Match '" + mainMention + "' with '" + antMention + "' => " + matched);
                                if (!matched)
                                {
                                    document.AddIncompatible(mainMention, antMention);
                                }
                            }
                            else
                            {
                                matched = false;
                            }
                        }
                    }
                }
            }
            return(matched);
        }

Exemple #2

0

Afficher le fichier

        /// <summary>Skip this mention? (search pruning)</summary>
        public virtual bool SkipThisMention(Document document, Mention m1, CorefCluster c, Dictionaries dict)
        {
            bool skip = false;

            // only do for the first mention in its cluster
            //    if(!flags.USE_EXACTSTRINGMATCH && !flags.USE_ROLEAPPOSITION && !flags.USE_PREDICATENOMINATIVES
            if (!flags.UseRoleapposition && !flags.UsePredicatenominatives && !flags.UseAcronym && !flags.UseApposition && !flags.UseRelativepronoun && !c.GetFirstMention().Equals(m1))
            {
                // CHINESE CHANGE
                return(true);
            }
            if (m1.appositions == null && m1.predicateNominatives == null && (m1.LowercaseNormalizedSpanString().StartsWith("a ") || m1.LowercaseNormalizedSpanString().StartsWith("an ")) && !flags.UseExactstringmatch)
            {
                skip = true;
            }
            // A noun phrase starting with an indefinite article - unlikely to have an antecedent (e.g. "A commission" was set up to .... )
            if (dict.indefinitePronouns.Contains(m1.LowercaseNormalizedSpanString()))
            {
                skip = true;
            }
            // An indefinite pronoun - unlikely to have an antecedent (e.g. "Some" say that... )
            foreach (string indef in dict.indefinitePronouns)
            {
                if (m1.LowercaseNormalizedSpanString().StartsWith(indef + " "))
                {
                    skip = true;
                    // A noun phrase starting with an indefinite adjective - unlikely to have an antecedent (e.g. "Another opinion" on the topic is...)
                    break;
                }
            }
            return(skip);
        }

Exemple #3

0

Afficher le fichier

        /// <summary>Skip this mention? (search pruning)</summary>
        public virtual bool SkipThisMention(Document document, Mention m1, CorefCluster c, Dictionaries dict)
        {
            bool skip = false;

            // only do for the first mention in its cluster
            if (!flags.UseExactstringmatch && !flags.UseRoleapposition && !flags.UsePredicatenominatives && !flags.UseAcronym && !flags.UseApposition && !flags.UseRelativepronoun && !c.GetFirstMention().Equals(m1))
            {
                return(true);
            }
            SieveCoreferenceSystem.logger.Finest("DOING COREF FOR:\t" + m1.SpanToString());
            if (m1.appositions == null && m1.predicateNominatives == null && (m1.LowercaseNormalizedSpanString().StartsWith("a ") || m1.LowercaseNormalizedSpanString().StartsWith("an ")) && !flags.UseExactstringmatch)
            {
                skip = true;
            }
            // A noun phrase starting with an indefinite article - unlikely to have an antecedent (e.g. "A commission" was set up to .... )
            if (dict.indefinitePronouns.Contains(m1.LowercaseNormalizedSpanString()))
            {
                skip = true;
            }
            // An indefinite pronoun - unlikely to have an antecedent (e.g. "Some" say that... )
            foreach (string indef in dict.indefinitePronouns)
            {
                if (m1.LowercaseNormalizedSpanString().StartsWith(indef + " "))
                {
                    skip = true;
                    // A noun phrase starting with an indefinite adjective - unlikely to have an antecedent (e.g. "Another opinion" on the topic is...)
                    break;
                }
            }
            if (skip)
            {
                SieveCoreferenceSystem.logger.Finest("MENTION SKIPPED:\t" + m1.SpanToString() + "(" + m1.sentNum + ")" + "\toriginalRef: " + m1.originalRef + " in discourse " + m1.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)));
            }
            return(skip);
        }

Exemple #4

0

Afficher le fichier

        public static void Merge(Document document, int mID, int antID)
        {
            CorefCluster c1 = document.corefClusters[document.predictedMentionsByID[mID].corefClusterID];
            CorefCluster c2 = document.corefClusters[document.predictedMentionsByID[antID].corefClusterID];

            if (c1 == c2)
            {
                return;
            }
            int removeID = c1.GetClusterID();

            CorefCluster.MergeClusters(c2, c1);
            document.MergeIncompatibles(c2, c1);
            Sharpen.Collections.Remove(document.corefClusters, removeID);
        }

Exemple #5

0

Afficher le fichier

        public static void MergeCoreferenceClusters(Pair <int, int> mentionPair, Document document)
        {
            Mention m1 = document.predictedMentionsByID[mentionPair.first];
            Mention m2 = document.predictedMentionsByID[mentionPair.second];

            if (m1.corefClusterID == m2.corefClusterID)
            {
                return;
            }
            int          removeId = m1.corefClusterID;
            CorefCluster c1       = document.corefClusters[m1.corefClusterID];
            CorefCluster c2       = document.corefClusters[m2.corefClusterID];

            CorefCluster.MergeClusters(c2, c1);
            Sharpen.Collections.Remove(document.corefClusters, removeId);
        }

Exemple #6

0

Afficher le fichier

        private ICounter <string> GetFeatures(Document doc, Mention m1, Mention m2)
        {
            System.Diagnostics.Debug.Assert((m1.AppearEarlierThan(m2)));
            ICounter <string> features = new ClassicCounter <string>();

            // global features
            features.IncrementCount("bias");
            if (useDocSource)
            {
                features.IncrementCount("doc-type=" + doc.docType);
                if (doc.docInfo != null && doc.docInfo.Contains("DOC_ID"))
                {
                    features.IncrementCount("doc-source=" + doc.docInfo["DOC_ID"].Split("/")[1]);
                }
            }
            // singleton feature conjunctions
            IList <string> singletonFeatures1 = m1.GetSingletonFeatures(dictionaries);
            IList <string> singletonFeatures2 = m2.GetSingletonFeatures(dictionaries);

            foreach (KeyValuePair <int, string> e in SingletonFeatures)
            {
                if (e.Key < singletonFeatures1.Count && e.Key < singletonFeatures2.Count)
                {
                    features.IncrementCount(e.Value + "=" + singletonFeatures1[e.Key] + "_" + singletonFeatures2[e.Key]);
                }
            }
            SemanticGraphEdge p1 = GetDependencyParent(m1);
            SemanticGraphEdge p2 = GetDependencyParent(m2);

            features.IncrementCount("dep-relations=" + (p1 == null ? "null" : p1.GetRelation()) + "_" + (p2 == null ? "null" : p2.GetRelation()));
            features.IncrementCount("roles=" + GetRole(m1) + "_" + GetRole(m2));
            CoreLabel headCL1  = HeadWord(m1);
            CoreLabel headCL2  = HeadWord(m2);
            string    headPOS1 = GetPOS(headCL1);
            string    headPOS2 = GetPOS(headCL2);

            features.IncrementCount("head-pos-s=" + headPOS1 + "_" + headPOS2);
            features.IncrementCount("head-words=" + WordIndicator("h_" + headCL1.Word().ToLower() + "_" + headCL2.Word().ToLower(), headPOS1 + "_" + headPOS2));
            // agreement features
            AddFeature(features, "animacies-agree", m2.AnimaciesAgree(m1));
            AddFeature(features, "attributes-agree", m2.AttributesAgree(m1, dictionaries));
            AddFeature(features, "entity-types-agree", m2.EntityTypesAgree(m1, dictionaries));
            AddFeature(features, "numbers-agree", m2.NumbersAgree(m1));
            AddFeature(features, "genders-agree", m2.GendersAgree(m1));
            AddFeature(features, "ner-strings-equal", m1.nerString.Equals(m2.nerString));
            // string matching features
            AddFeature(features, "antecedent-head-in-anaphor", HeadContainedIn(m1, m2));
            AddFeature(features, "anaphor-head-in-antecedent", HeadContainedIn(m2, m1));
            if (m1.mentionType != Dictionaries.MentionType.Pronominal && m2.mentionType != Dictionaries.MentionType.Pronominal)
            {
                AddFeature(features, "antecedent-in-anaphor", m2.SpanToString().ToLower().Contains(m1.SpanToString().ToLower()));
                AddFeature(features, "anaphor-in-antecedent", m1.SpanToString().ToLower().Contains(m2.SpanToString().ToLower()));
                AddFeature(features, "heads-equal", Sharpen.Runtime.EqualsIgnoreCase(m1.headString, m2.headString));
                AddFeature(features, "heads-agree", m2.HeadsAgree(m1));
                AddFeature(features, "exact-match", m1.ToString().Trim().ToLower().Equals(m2.ToString().Trim().ToLower()));
                AddFeature(features, "partial-match", RelaxedStringMatch(m1, m2));
                double editDistance = StringUtils.EditDistance(m1.SpanToString(), m2.SpanToString()) / (double)(m1.SpanToString().Length + m2.SpanToString().Length);
                features.IncrementCount("edit-distance", editDistance);
                features.IncrementCount("edit-distance=" + ((int)(editDistance * 10) / 10.0));
                double headEditDistance = StringUtils.EditDistance(m1.headString, m2.headString) / (double)(m1.headString.Length + m2.headString.Length);
                features.IncrementCount("head-edit-distance", headEditDistance);
                features.IncrementCount("head-edit-distance=" + ((int)(headEditDistance * 10) / 10.0));
            }
            // distance features
            AddNumeric(features, "mention-distance", m2.mentionNum - m1.mentionNum);
            AddNumeric(features, "sentence-distance", m2.sentNum - m1.sentNum);
            if (m2.sentNum == m1.sentNum)
            {
                AddNumeric(features, "word-distance", m2.startIndex - m1.endIndex);
                if (m1.endIndex > m2.startIndex)
                {
                    features.IncrementCount("spans-intersect");
                }
            }
            // setup for dcoref features
            ICollection <Mention> ms1 = new HashSet <Mention>();

            ms1.Add(m1);
            ICollection <Mention> ms2 = new HashSet <Mention>();

            ms2.Add(m2);
            Random       r  = new Random();
            CorefCluster c1 = new CorefCluster(20000 + r.NextInt(10000), ms1);
            CorefCluster c2 = new CorefCluster(10000 + r.NextInt(10000), ms2);
            string       s2 = m2.LowercaseNormalizedSpanString();
            string       s1 = m1.LowercaseNormalizedSpanString();

            // discourse dcoref features
            AddFeature(features, "mention-speaker-PER0", Sharpen.Runtime.EqualsIgnoreCase(m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)), "PER0"));
            AddFeature(features, "antecedent-is-anaphor-speaker", CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1, dictionaries));
            AddFeature(features, "same-speaker", CorefRules.EntitySameSpeaker(doc, m2, m1));
            AddFeature(features, "person-disagree-same-speaker", CorefRules.EntityPersonDisagree(doc, m2, m1, dictionaries) && CorefRules.EntitySameSpeaker(doc, m2, m1));
            AddFeature(features, "antecedent-matches-anaphor-speaker", CorefRules.AntecedentMatchesMentionSpeakerAnnotation(m2, m1, doc));
            AddFeature(features, "discourse-you-PER0", m2.person == Dictionaries.Person.You && doc.docType == Document.DocType.Article && m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0"));
            AddFeature(features, "speaker-match-i-i", m2.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s1) && m1.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s2) && CorefRules.
                       EntitySameSpeaker(doc, m2, m1));
            AddFeature(features, "speaker-match-speaker-i", m2.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s2) && CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1, dictionaries));
            AddFeature(features, "speaker-match-i-speaker", m1.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s1) && CorefRules.AntecedentIsMentionSpeaker(doc, m1, m2, dictionaries));
            AddFeature(features, "speaker-match-you-you", dictionaries.secondPersonPronouns.Contains(s1) && dictionaries.secondPersonPronouns.Contains(s2) && CorefRules.EntitySameSpeaker(doc, m2, m1));
            AddFeature(features, "discourse-between-two-person", ((m2.person == Dictionaries.Person.I && m1.person == Dictionaries.Person.You || (m2.person == Dictionaries.Person.You && m1.person == Dictionaries.Person.I)) && (m2.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation
                                                                                                                                                                                                                                                          )) - m1.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) == 1) && doc.docType == Document.DocType.Conversation));
            AddFeature(features, "incompatible-not-match", m1.person != Dictionaries.Person.I && m2.person != Dictionaries.Person.I && (CorefRules.AntecedentIsMentionSpeaker(doc, m1, m2, dictionaries) || CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1
                                                                                                                                                                                                                                                  , dictionaries)));
            int utteranceDist = Math.Abs(m1.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) - m2.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)));

            if (doc.docType != Document.DocType.Article && utteranceDist == 1 && !CorefRules.EntitySameSpeaker(doc, m2, m1))
            {
                AddFeature(features, "speaker-mismatch-i-i", m1.person == Dictionaries.Person.I && m2.person == Dictionaries.Person.I);
                AddFeature(features, "speaker-mismatch-you-you", m1.person == Dictionaries.Person.You && m2.person == Dictionaries.Person.You);
                AddFeature(features, "speaker-mismatch-we-we", m1.person == Dictionaries.Person.We && m2.person == Dictionaries.Person.We);
            }
            // other dcoref features
            string firstWord1 = FirstWord(m1).Word().ToLower();

            AddFeature(features, "indefinite-article-np", (m1.appositions == null && m1.predicateNominatives == null && (firstWord1.Equals("a") || firstWord1.Equals("an"))));
            AddFeature(features, "far-this", m2.LowercaseNormalizedSpanString().Equals("this") && Math.Abs(m2.sentNum - m1.sentNum) > 3);
            AddFeature(features, "per0-you-in-article", m2.person == Dictionaries.Person.You && doc.docType == Document.DocType.Article && m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0"));
            AddFeature(features, "inside-in", m2.InsideIn(m1) || m1.InsideIn(m2));
            AddFeature(features, "indefinite-determiners", dictionaries.indefinitePronouns.Contains(m1.originalSpan[0].Lemma()) || dictionaries.indefinitePronouns.Contains(m2.originalSpan[0].Lemma()));
            AddFeature(features, "entity-attributes-agree", CorefRules.EntityAttributesAgree(c2, c1));
            AddFeature(features, "entity-token-distance", CorefRules.EntityTokenDistance(m2, m1));
            AddFeature(features, "i-within-i", CorefRules.EntityIWithinI(m2, m1, dictionaries));
            AddFeature(features, "exact-string-match", CorefRules.EntityExactStringMatch(c2, c1, dictionaries, doc.roleSet));
            AddFeature(features, "entity-relaxed-heads-agree", CorefRules.EntityRelaxedHeadsAgreeBetweenMentions(c2, c1, m2, m1));
            AddFeature(features, "is-acronym", CorefRules.EntityIsAcronym(doc, c2, c1));
            AddFeature(features, "demonym", m2.IsDemonym(m1, dictionaries));
            AddFeature(features, "incompatible-modifier", CorefRules.EntityHaveIncompatibleModifier(m2, m1));
            AddFeature(features, "head-lemma-match", m1.headWord.Lemma().Equals(m2.headWord.Lemma()));
            AddFeature(features, "words-included", CorefRules.EntityWordsIncluded(c2, c1, m2, m1));
            AddFeature(features, "extra-proper-noun", CorefRules.EntityHaveExtraProperNoun(m2, m1, new HashSet <string>()));
            AddFeature(features, "number-in-later-mentions", CorefRules.EntityNumberInLaterMention(m2, m1));
            AddFeature(features, "sentence-context-incompatible", CorefRules.SentenceContextIncompatible(m2, m1, dictionaries));
            // syntax features
            if (useConstituencyParse)
            {
                if (m1.sentNum == m2.sentNum)
                {
                    int  clauseCount = 0;
                    Tree tree        = m2.contextParseTree;
                    Tree current     = m2.mentionSubTree;
                    while (true)
                    {
                        current = current.Ancestor(1, tree);
                        if (current.Label().Value().StartsWith("S"))
                        {
                            clauseCount++;
                        }
                        if (current.Dominates(m1.mentionSubTree))
                        {
                            break;
                        }
                        if (current.Label().Value().Equals("ROOT") || current.Ancestor(1, tree) == null)
                        {
                            break;
                        }
                    }
                    features.IncrementCount("clause-count", clauseCount);
                    features.IncrementCount("clause-count=" + Bin(clauseCount));
                }
                if (RuleBasedCorefMentionFinder.IsPleonastic(m2, m2.contextParseTree) || RuleBasedCorefMentionFinder.IsPleonastic(m1, m1.contextParseTree))
                {
                    features.IncrementCount("pleonastic-it");
                }
                if (MaximalNp(m1.mentionSubTree) == MaximalNp(m2.mentionSubTree))
                {
                    features.IncrementCount("same-maximal-np");
                }
                bool m1Embedded = HeadEmbeddingLevel(m1.mentionSubTree, m1.headIndex - m1.startIndex) > 1;
                bool m2Embedded = HeadEmbeddingLevel(m2.mentionSubTree, m2.headIndex - m2.startIndex) > 1;
                features.IncrementCount("embedding=" + m1Embedded + "_" + m2Embedded);
            }
            return(features);
        }

Exemple #7

0

Afficher le fichier

        /// <exception cref="System.Exception"/>
        public override void FindCoreferentAntecedent(Mention m, int mIdx, Document document, Dictionaries dict, Properties props, StringBuilder sbLog)
        {
            // check for skip: first mention only, discourse salience
            if (!this.flags.UseSpeakermatch && !this.flags.UseDiscoursematch && !this.flags.UseApposition && !this.flags.UsePredicatenominatives && this.SkipThisMention(document, m, document.corefClusters[m.corefClusterID], dict))
            {
                return;
            }
            ICollection <Mention> roleSet = document.roleSet;

            for (int sentJ = m.sentNum; sentJ >= 0; sentJ--)
            {
                IList <Mention> l = Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.GetOrderedAntecedents(m, sentJ, mIdx, document.predictedMentions, dict);
                if (maxSentDist != -1 && m.sentNum - sentJ > maxSentDist)
                {
                    continue;
                }
                // TODO: do we need this?
                // Sort mentions by length whenever we have two mentions beginning at the same position and having the same head
                for (int i = 0; i < l.Count; i++)
                {
                    for (int j = 0; j < l.Count; j++)
                    {
                        if (l[i].headString.Equals(l[j].headString) && l[i].startIndex == l[j].startIndex && l[i].SameSentence(l[j]) && j > i && l[i].SpanToString().Length > l[j].SpanToString().Length)
                        {
                            l.Set(j, l.Set(i, l[j]));
                        }
                    }
                }
                //              log.info("antecedent ordering changed!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
                foreach (Mention ant in l)
                {
                    if (SkipForAnalysis(ant, m, props))
                    {
                        continue;
                    }
                    // m2 - antecedent of m1
                    // Skip singletons according to the singleton predictor
                    // (only for non-NE mentions)
                    // Recasens, de Marneffe, and Potts (NAACL 2013)
                    if (m.isSingleton && m.mentionType != Dictionaries.MentionType.Proper && ant.isSingleton && ant.mentionType != Dictionaries.MentionType.Proper)
                    {
                        continue;
                    }
                    if (m.corefClusterID == ant.corefClusterID)
                    {
                        continue;
                    }
                    if (!mType.Contains(m.mentionType) || !aType.Contains(ant.mentionType))
                    {
                        continue;
                    }
                    if (m.mentionType == Dictionaries.MentionType.Pronominal)
                    {
                        if (!MatchedMentionType(m, mTypeStr))
                        {
                            continue;
                        }
                        if (!MatchedMentionType(ant, aTypeStr))
                        {
                            continue;
                        }
                    }
                    CorefCluster c1 = document.corefClusters[m.corefClusterID];
                    CorefCluster c2 = document.corefClusters[ant.corefClusterID];
                    System.Diagnostics.Debug.Assert((c1 != null));
                    System.Diagnostics.Debug.Assert((c2 != null));
                    if (this.UseRoleSkip())
                    {
                        if (m.IsRoleAppositive(ant, dict))
                        {
                            roleSet.Add(m);
                        }
                        else
                        {
                            if (ant.IsRoleAppositive(m, dict))
                            {
                                roleSet.Add(ant);
                            }
                        }
                        continue;
                    }
                    if (this.Coreferent(document, c1, c2, m, ant, dict, roleSet))
                    {
                        // print logs for analysis
                        //            if (doScore()) {
                        //              printLogs(c1, c2, m1, m2, document, currentSieve);
                        //            }
                        // print dcoref log
                        if (HybridCorefProperties.Debug(props))
                        {
                            sbLog.Append(HybridCorefPrinter.PrintErrorLogDcoref(m, ant, document, dict, mIdx, this.GetType().FullName));
                        }
                        int removeID = c1.clusterID;
                        //          log.info("Merging ant "+c2+" with "+c1);
                        CorefCluster.MergeClusters(c2, c1);
                        document.MergeIncompatibles(c2, c1);
                        document.MergeAcronymCache(c2, c1);
                        //            log.warning("Removing cluster " + removeID + ", merged with " + c2.getClusterID());
                        Sharpen.Collections.Remove(document.corefClusters, removeID);
                        return;
                    }
                }
            }
        }

Exemple #8

0

Afficher le fichier

        /// <summary>Checks if two clusters are coreferent according to our sieve pass constraints</summary>
        /// <param name="document"/>
        /// <exception cref="System.Exception"/>
        public virtual bool Coreferent(Document document, CorefCluster mentionCluster, CorefCluster potentialAntecedent, Mention mention2, Mention ant, Dictionaries dict, ICollection <Mention> roleSet)
        {
            bool    ret     = false;
            Mention mention = mentionCluster.GetRepresentativeMention();

            if (flags.UseIncompatibles)
            {
                // Check our list of incompatible mentions and don't cluster them together
                // Allows definite no's from previous sieves to propagate down
                if (document.IsIncompatible(mentionCluster, potentialAntecedent))
                {
                    return(false);
                }
            }
            if (flags.DoPronoun && Math.Abs(mention2.sentNum - ant.sentNum) > 3 && mention2.person != Dictionaries.Person.I && mention2.person != Dictionaries.Person.You)
            {
                return(false);
            }
            if (mention2.LowercaseNormalizedSpanString().Equals("this") && Math.Abs(mention2.sentNum - ant.sentNum) > 3)
            {
                return(false);
            }
            if (mention2.person == Dictionaries.Person.You && document.docType == Document.DocType.Article && mention2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0"))
            {
                return(false);
            }
            if (document.conllDoc != null)
            {
                if (ant.generic && ant.person == Dictionaries.Person.You)
                {
                    return(false);
                }
                if (mention2.generic)
                {
                    return(false);
                }
            }
            // chinese newswire contains coref nested NPs with shared headword  Chen & Ng
            if (lang != Locale.Chinese || document.docInfo == null || !document.docInfo.GetOrDefault("DOC_ID", string.Empty).Contains("nw"))
            {
                if (mention2.InsideIn(ant) || ant.InsideIn(mention2))
                {
                    return(false);
                }
            }
            if (flags.UseSpeakermatch)
            {
                string mSpeaker = mention2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation));
                string aSpeaker = ant.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation));
                // <I> from same speaker
                if (mention2.person == Dictionaries.Person.I && ant.person == Dictionaries.Person.I)
                {
                    return(mSpeaker.Equals(aSpeaker));
                }
                // <I> - speaker
                if ((mention2.person == Dictionaries.Person.I && mSpeaker.Equals(int.ToString(ant.mentionID))) || (ant.person == Dictionaries.Person.I && aSpeaker.Equals(int.ToString(mention2.mentionID))))
                {
                    return(true);
                }
            }
            if (flags.UseDiscoursematch)
            {
                string mString   = mention.LowercaseNormalizedSpanString();
                string antString = ant.LowercaseNormalizedSpanString();
                // mention and ant both belong to the same speaker cluster
                if (mention.speakerInfo != null && mention.speakerInfo == ant.speakerInfo)
                {
                    return(true);
                }
                // (I - I) in the same speaker's quotation.
                if (mention.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(mString) && ant.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(antString) && CorefRules.EntitySameSpeaker(document, mention,
                                                                                                                                                                                                                                               ant))
                {
                    return(true);
                }
                // (speaker - I)
                if ((mention.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(mString)) && CorefRules.AntecedentIsMentionSpeaker(document, mention, ant, dict))
                {
                    if (mention.speakerInfo == null && ant.speakerInfo != null)
                    {
                        mention.speakerInfo = ant.speakerInfo;
                    }
                    return(true);
                }
                // (I - speaker)
                if ((ant.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(antString)) && CorefRules.AntecedentIsMentionSpeaker(document, ant, mention, dict))
                {
                    if (ant.speakerInfo == null && mention.speakerInfo != null)
                    {
                        ant.speakerInfo = mention.speakerInfo;
                    }
                    return(true);
                }
                // Can be iffy if more than two speakers... but still should be okay most of the time
                if (dict.secondPersonPronouns.Contains(mString) && dict.secondPersonPronouns.Contains(antString) && CorefRules.EntitySameSpeaker(document, mention, ant))
                {
                    return(true);
                }
                // previous I - you or previous you - I in two person conversation
                if (((mention.person == Dictionaries.Person.I && ant.person == Dictionaries.Person.You || (mention.person == Dictionaries.Person.You && ant.person == Dictionaries.Person.I)) && (mention.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation
                                                                                                                                                                                                                              )) - ant.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) == 1) && document.docType == Document.DocType.Conversation))
                {
                    return(true);
                }
                if (dict.reflexivePronouns.Contains(mention.headString) && CorefRules.EntitySubjectObject(mention, ant))
                {
                    return(true);
                }
            }
            if (!flags.UseExactstringmatch && !flags.UseRelaxedExactstringmatch && !flags.UseApposition && !flags.UseWordsInclusion)
            {
                foreach (Mention m in mentionCluster.GetCorefMentions())
                {
                    foreach (Mention a in potentialAntecedent.GetCorefMentions())
                    {
                        // angelx - not sure about the logic here, disable (code was also refactored from original)
                        // vv gabor - re-enabled code (seems to improve performance) vv
                        if (m.person != Dictionaries.Person.I && a.person != Dictionaries.Person.I && (CorefRules.AntecedentIsMentionSpeaker(document, m, a, dict) || CorefRules.AntecedentIsMentionSpeaker(document, a, m, dict)))
                        {
                            document.AddIncompatible(m, a);
                            return(false);
                        }
                        // ^^ end block of code in question ^^
                        int dist = Math.Abs(m.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) - a.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)));
                        if (document.docType != Document.DocType.Article && dist == 1 && !CorefRules.EntitySameSpeaker(document, m, a))
                        {
                            string mSpeaker = document.speakers[m.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation))];
                            string aSpeaker = document.speakers[a.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation))];
                            if (m.person == Dictionaries.Person.I && a.person == Dictionaries.Person.I)
                            {
                                document.AddIncompatible(m, a);
                                return(false);
                            }
                            if (m.person == Dictionaries.Person.You && a.person == Dictionaries.Person.You)
                            {
                                document.AddIncompatible(m, a);
                                return(false);
                            }
                            // This is weak since we can refer to both speakers
                            if (m.person == Dictionaries.Person.We && a.person == Dictionaries.Person.We)
                            {
                                document.AddIncompatible(m, a);
                                return(false);
                            }
                        }
                    }
                }
                if (document.docType == Document.DocType.Article)
                {
                    foreach (Mention m_1 in mentionCluster.GetCorefMentions())
                    {
                        foreach (Mention a in potentialAntecedent.GetCorefMentions())
                        {
                            if (CorefRules.EntitySubjectObject(m_1, a))
                            {
                                document.AddIncompatible(m_1, a);
                                return(false);
                            }
                        }
                    }
                }
            }
            // Incompatibility constraints - do before match checks
            if (flags.USE_iwithini && CorefRules.EntityIWithinI(mention, ant, dict))
            {
                document.AddIncompatible(mention, ant);
                return(false);
            }
            // Match checks
            if (flags.UseExactstringmatch && CorefRules.EntityExactStringMatch(mention, ant, dict, roleSet))
            {
                return(true);
            }
            //    if(flags.USE_EXACTSTRINGMATCH && Rules.entityExactStringMatch(mentionCluster, potentialAntecedent, dict, roleSet)){
            //      return true;
            //    }
            if (flags.UseNameMatch && CheckEntityMatch(document, mentionCluster, potentialAntecedent, dict, roleSet))
            {
                ret = true;
            }
            if (flags.UseRelaxedExactstringmatch && CorefRules.EntityRelaxedExactStringMatch(mentionCluster, potentialAntecedent, mention, ant, dict, roleSet))
            {
                return(true);
            }
            if (flags.UseApposition && CorefRules.EntityIsApposition(mentionCluster, potentialAntecedent, mention, ant))
            {
                return(true);
            }
            if (flags.UsePredicatenominatives && CorefRules.EntityIsPredicateNominatives(mentionCluster, potentialAntecedent, mention, ant))
            {
                return(true);
            }
            if (flags.UseAcronym && CorefRules.EntityIsAcronym(document, mentionCluster, potentialAntecedent))
            {
                return(true);
            }
            if (flags.UseRelativepronoun && CorefRules.EntityIsRelativePronoun(mention, ant))
            {
                return(true);
            }
            if (flags.UseDemonym && mention.IsDemonym(ant, dict))
            {
                return(true);
            }
            if (flags.UseRoleapposition)
            {
                if (lang == Locale.Chinese)
                {
                    ret = false;
                }
                else
                {
                    if (CorefRules.EntityIsRoleAppositive(mentionCluster, potentialAntecedent, mention, ant, dict))
                    {
                        ret = true;
                    }
                }
            }
            if (flags.UseInclusionHeadmatch && CorefRules.EntityHeadsAgree(mentionCluster, potentialAntecedent, mention, ant, dict))
            {
                ret = true;
            }
            if (flags.UseRelaxedHeadmatch && CorefRules.EntityRelaxedHeadsAgreeBetweenMentions(mentionCluster, potentialAntecedent, mention, ant))
            {
                ret = true;
            }
            if (flags.UseWordsInclusion && ret && !CorefRules.EntityWordsIncluded(mentionCluster, potentialAntecedent, mention, ant))
            {
                return(false);
            }
            if (flags.UseIncompatibleModifier && ret && CorefRules.EntityHaveIncompatibleModifier(mentionCluster, potentialAntecedent))
            {
                return(false);
            }
            if (flags.UseProperheadAtLast && ret && !CorefRules.EntitySameProperHeadLastWord(mentionCluster, potentialAntecedent, mention, ant))
            {
                return(false);
            }
            if (flags.UseAttributesAgree && !CorefRules.EntityAttributesAgree(mentionCluster, potentialAntecedent))
            {
                return(false);
            }
            if (flags.UseDifferentLocation && CorefRules.EntityHaveDifferentLocation(mention, ant, dict))
            {
                if (flags.UseProperheadAtLast && ret && mention.goldCorefClusterID != ant.goldCorefClusterID)
                {
                }
                return(false);
            }
            if (flags.UseNumberInMention && CorefRules.EntityNumberInLaterMention(mention, ant))
            {
                if (flags.UseProperheadAtLast && ret && mention.goldCorefClusterID != ant.goldCorefClusterID)
                {
                }
                return(false);
            }
            if (flags.UseDistance && CorefRules.EntityTokenDistance(mention2, ant))
            {
                return(false);
            }
            if (flags.UseCorefDict)
            {
                // Head match
                if (ant.headWord.Lemma().Equals(mention2.headWord.Lemma()))
                {
                    return(false);
                }
                // Constraint: ignore pairs commonNoun - properNoun
                if (ant.mentionType != Dictionaries.MentionType.Proper && (mention2.headWord.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).StartsWith("NNP") || !Sharpen.Runtime.Substring(mention2.headWord.Word(), 1).Equals(Sharpen.Runtime.Substring(mention2
                                                                                                                                                                                                                                                               .headWord.Word(), 1).ToLower())))
                {
                    return(false);
                }
                // Constraint: ignore plurals
                if (ant.headWord.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).Equals("NNS") && mention2.headWord.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).Equals("NNS"))
                {
                    return(false);
                }
                // Constraint: ignore mentions with indefinite determiners
                if (dict.indefinitePronouns.Contains(ant.originalSpan[0].Lemma()) || dict.indefinitePronouns.Contains(mention2.originalSpan[0].Lemma()))
                {
                    return(false);
                }
                // Constraint: ignore coordinated mentions
                if (ant.IsCoordinated() || mention2.IsCoordinated())
                {
                    return(false);
                }
                // Constraint: context incompatibility
                if (CorefRules.ContextIncompatible(mention2, ant, dict))
                {
                    return(false);
                }
                // Constraint: sentence context incompatibility when the mentions are common nouns
                if (CorefRules.SentenceContextIncompatible(mention2, ant, dict))
                {
                    return(false);
                }
                if (CorefRules.EntityClusterAllCorefDictionary(mentionCluster, potentialAntecedent, dict, 1, 8))
                {
                    return(true);
                }
                if (CorefRules.EntityCorefDictionary(mention, ant, dict, 2, 2))
                {
                    return(true);
                }
                if (CorefRules.EntityCorefDictionary(mention, ant, dict, 3, 2))
                {
                    return(true);
                }
                if (CorefRules.EntityCorefDictionary(mention, ant, dict, 4, 2))
                {
                    return(true);
                }
            }
            if (flags.DoPronoun)
            {
                Mention m;
                if (mention.predicateNominatives != null && mention.predicateNominatives.Contains(mention2))
                {
                    m = mention2;
                }
                else
                {
                    m = mention;
                }
                bool mIsPronoun = (m.IsPronominal() || dict.allPronouns.Contains(m.ToString()));
                bool attrAgree  = HybridCorefProperties.UseDefaultPronounAgreement(props) ? CorefRules.EntityAttributesAgree(mentionCluster, potentialAntecedent) : CorefRules.EntityAttributesAgree(mentionCluster, potentialAntecedent, lang);
                if (mIsPronoun && attrAgree)
                {
                    if (dict.demonymSet.Contains(ant.LowercaseNormalizedSpanString()) && dict.notOrganizationPRP.Contains(m.headString))
                    {
                        document.AddIncompatible(m, ant);
                        return(false);
                    }
                    if (CorefRules.EntityPersonDisagree(document, mentionCluster, potentialAntecedent, dict))
                    {
                        document.AddIncompatible(m, ant);
                        return(false);
                    }
                    return(true);
                }
            }
            if (flags.UseChineseHeadMatch)
            {
                if (mention2.headWord == ant.headWord && mention2.InsideIn(ant))
                {
                    if (!document.IsCoref(mention2, ant))
                    {
                    }
                    // TODO: exclude conjunction
                    // log.info("error in chinese head match: "+mention2.spanToString()+"\t"+ant.spanToString());
                    return(true);
                }
            }
            return(ret);
        }

Exemple #9

0

Afficher le fichier

 public virtual bool CheckEntityMatch(Document document, CorefCluster mentionCluster, CorefCluster potentialAntecedent, Dictionaries dict, ICollection <Mention> roleSet)
 {
     return(false);
 }

Exemple #10

0

Afficher le fichier

        /// <summary>Checks if two clusters are coreferent according to our sieve pass constraints.</summary>
        /// <param name="document"/>
        /// <exception cref="System.Exception"/>
        public virtual bool Coreferent(Document document, CorefCluster mentionCluster, CorefCluster potentialAntecedent, Mention mention2, Mention ant, Dictionaries dict, ICollection <Mention> roleSet, Semantics semantics)
        {
            bool    ret     = false;
            Mention mention = mentionCluster.GetRepresentativeMention();

            if (flags.UseIncompatibles)
            {
                // Check our list of incompatible mentions and don't cluster them together
                // Allows definite no's from previous sieves to propagate down
                if (document.IsIncompatible(mentionCluster, potentialAntecedent))
                {
                    SieveCoreferenceSystem.logger.Finest("INCOMPATIBLE clusters: not match: " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID
                                                                                                                                                                                                                   ));
                    return(false);
                }
            }
            if (flags.DoPronoun && Math.Abs(mention2.sentNum - ant.sentNum) > 3 && mention2.person != Dictionaries.Person.I && mention2.person != Dictionaries.Person.You)
            {
                return(false);
            }
            if (mention2.LowercaseNormalizedSpanString().Equals("this") && Math.Abs(mention2.sentNum - ant.sentNum) > 3)
            {
                return(false);
            }
            if (mention2.person == Dictionaries.Person.You && document.docType == Document.DocType.Article && mention2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0"))
            {
                return(false);
            }
            if (document.conllDoc != null)
            {
                if (ant.generic && ant.person == Dictionaries.Person.You)
                {
                    return(false);
                }
                if (mention2.generic)
                {
                    return(false);
                }
            }
            if (mention2.InsideIn(ant) || ant.InsideIn(mention2))
            {
                return(false);
            }
            if (flags.UseDiscoursematch)
            {
                string mString   = mention.LowercaseNormalizedSpanString();
                string antString = ant.LowercaseNormalizedSpanString();
                // mention and ant both belong to the same speaker cluster
                if (mention.speakerInfo != null && mention.speakerInfo == ant.speakerInfo)
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: maps to same speaker: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    return(true);
                }
                // (I - I) in the same speaker's quotation.
                if (mention.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(mString) && ant.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(antString) && Rules.EntitySameSpeaker(document, mention, ant))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: 1st person same speaker: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    return(true);
                }
                // (speaker - I)
                if ((mention.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(mString)) && Rules.AntecedentIsMentionSpeaker(document, mention, ant, dict))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: 1st person mention speaker match antecedent: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    if (mention.speakerInfo == null && ant.speakerInfo != null)
                    {
                        mention.speakerInfo = ant.speakerInfo;
                    }
                    return(true);
                }
                // (I - speaker)
                if ((ant.number == Dictionaries.Number.Singular && dict.firstPersonPronouns.Contains(antString)) && Rules.AntecedentIsMentionSpeaker(document, ant, mention, dict))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: 1st person antecedent speaker match mention: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    if (ant.speakerInfo == null && mention.speakerInfo != null)
                    {
                        ant.speakerInfo = mention.speakerInfo;
                    }
                    return(true);
                }
                // Can be iffy if more than two speakers... but still should be okay most of the time
                if (dict.secondPersonPronouns.Contains(mString) && dict.secondPersonPronouns.Contains(antString) && Rules.EntitySameSpeaker(document, mention, ant))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: 2nd person same speaker: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    return(true);
                }
                // previous I - you or previous you - I in two person conversation
                if (((mention.person == Dictionaries.Person.I && ant.person == Dictionaries.Person.You || (mention.person == Dictionaries.Person.You && ant.person == Dictionaries.Person.I)) && (mention.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation
                                                                                                                                                                                                                              )) - ant.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) == 1) && document.docType == Document.DocType.Conversation))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: between two person: " + mention.SpanToString() + "\tmatched\t" + ant.SpanToString());
                    return(true);
                }
                if (dict.reflexivePronouns.Contains(mention.headString) && Rules.EntitySubjectObject(mention, ant))
                {
                    SieveCoreferenceSystem.logger.Finest("discourse match: reflexive pronoun: " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID == ant.goldCorefClusterID
                                                                                                                                                                                                                     ));
                    return(true);
                }
            }
            if (Constants.UseDiscourseConstraints && !flags.UseExactstringmatch && !flags.UseRelaxedExactstringmatch && !flags.UseApposition && !flags.UseWordsInclusion)
            {
                foreach (Mention m in mentionCluster.GetCorefMentions())
                {
                    foreach (Mention a in potentialAntecedent.GetCorefMentions())
                    {
                        // angelx - not sure about the logic here, disable (code was also refactored from original)
                        // vv gabor - re-enabled code (seems to improve performance) vv
                        if (m.person != Dictionaries.Person.I && a.person != Dictionaries.Person.I && (Rules.AntecedentIsMentionSpeaker(document, m, a, dict) || Rules.AntecedentIsMentionSpeaker(document, a, m, dict)))
                        {
                            SieveCoreferenceSystem.logger.Finest("Incompatibles: not match(speaker): " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID
                                                                                                                                                                                                                            ));
                            document.AddIncompatible(m, a);
                            return(false);
                        }
                        // ^^ end block of code in question ^^
                        int dist = Math.Abs(m.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) - a.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)));
                        if (document.docType != Document.DocType.Article && dist == 1 && !Rules.EntitySameSpeaker(document, m, a))
                        {
                            string mSpeaker = document.speakers[m.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation))];
                            string aSpeaker = document.speakers[a.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation))];
                            if (m.person == Dictionaries.Person.I && a.person == Dictionaries.Person.I)
                            {
                                SieveCoreferenceSystem.logger.Finest("Incompatibles: neighbor I: " + ant.SpanToString() + "(" + ant.mentionID + "," + aSpeaker + ") :: " + mention.SpanToString() + "(" + mention.mentionID + "," + mSpeaker + ") -> " + (mention.goldCorefClusterID
                                                                                                                                                                                                                                                          != ant.goldCorefClusterID));
                                document.AddIncompatible(m, a);
                                return(false);
                            }
                            if (m.person == Dictionaries.Person.You && a.person == Dictionaries.Person.You)
                            {
                                SieveCoreferenceSystem.logger.Finest("Incompatibles: neighbor YOU: " + ant.SpanToString() + "(" + ant.mentionID + "," + aSpeaker + ") :: " + mention.SpanToString() + "(" + mention.mentionID + "," + mSpeaker + ") -> " + (mention.goldCorefClusterID
                                                                                                                                                                                                                                                            != ant.goldCorefClusterID));
                                document.AddIncompatible(m, a);
                                return(false);
                            }
                            // This is weak since we can refer to both speakers
                            if (m.person == Dictionaries.Person.We && a.person == Dictionaries.Person.We)
                            {
                                SieveCoreferenceSystem.logger.Finest("Incompatibles: neighbor WE: " + ant.SpanToString() + "(" + ant.mentionID + "," + aSpeaker + ") :: " + mention.SpanToString() + "(" + mention.mentionID + "," + mSpeaker + ") -> " + (mention.goldCorefClusterID
                                                                                                                                                                                                                                                           != ant.goldCorefClusterID));
                                document.AddIncompatible(m, a);
                                return(false);
                            }
                        }
                    }
                }
                if (document.docType == Document.DocType.Article)
                {
                    foreach (Mention m_1 in mentionCluster.GetCorefMentions())
                    {
                        foreach (Mention a in potentialAntecedent.GetCorefMentions())
                        {
                            if (Rules.EntitySubjectObject(m_1, a))
                            {
                                SieveCoreferenceSystem.logger.Finest("Incompatibles: subject-object: " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID)
                                                                     );
                                document.AddIncompatible(m_1, a);
                                return(false);
                            }
                        }
                    }
                }
            }
            // Incompatibility constraints - do before match checks
            if (flags.USE_iwithini && Rules.EntityIWithinI(mention, ant, dict))
            {
                SieveCoreferenceSystem.logger.Finest("Incompatibles: iwithini: " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID));
                document.AddIncompatible(mention, ant);
                return(false);
            }
            // Match checks
            if (flags.UseExactstringmatch && Rules.EntityExactStringMatch(mentionCluster, potentialAntecedent, dict, roleSet))
            {
                return(true);
            }
            if (flags.UseNameMatch && CheckEntityMatch(document, mentionCluster, potentialAntecedent, dict, roleSet))
            {
                ret = true;
            }
            if (flags.UseRelaxedExactstringmatch && Rules.EntityRelaxedExactStringMatch(mentionCluster, potentialAntecedent, mention, ant, dict, roleSet))
            {
                return(true);
            }
            if (flags.UseApposition && Rules.EntityIsApposition(mentionCluster, potentialAntecedent, mention, ant))
            {
                SieveCoreferenceSystem.logger.Finest("Apposition: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                return(true);
            }
            if (flags.UsePredicatenominatives && Rules.EntityIsPredicateNominatives(mentionCluster, potentialAntecedent, mention, ant))
            {
                SieveCoreferenceSystem.logger.Finest("Predicate nominatives: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                return(true);
            }
            if (flags.UseAcronym && Rules.EntityIsAcronym(document, mentionCluster, potentialAntecedent))
            {
                SieveCoreferenceSystem.logger.Finest("Acronym: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                return(true);
            }
            if (flags.UseRelativepronoun && Rules.EntityIsRelativePronoun(mention, ant))
            {
                SieveCoreferenceSystem.logger.Finest("Relative pronoun: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                return(true);
            }
            if (flags.UseDemonym && mention.IsDemonym(ant, dict))
            {
                SieveCoreferenceSystem.logger.Finest("Demonym: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                return(true);
            }
            if (flags.UseRoleapposition && lang != Locale.Chinese && Rules.EntityIsRoleAppositive(mentionCluster, potentialAntecedent, mention, ant, dict))
            {
                SieveCoreferenceSystem.logger.Finest("Role Appositive: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                ret = true;
            }
            if (flags.UseInclusionHeadmatch && Rules.EntityHeadsAgree(mentionCluster, potentialAntecedent, mention, ant, dict))
            {
                SieveCoreferenceSystem.logger.Finest("Entity heads agree: " + mention.SpanToString() + "\tvs\t" + ant.SpanToString());
                ret = true;
            }
            if (flags.UseRelaxedHeadmatch && Rules.EntityRelaxedHeadsAgreeBetweenMentions(mentionCluster, potentialAntecedent, mention, ant))
            {
                ret = true;
            }
            if (flags.UseWordsInclusion && ret && !Rules.EntityWordsIncluded(mentionCluster, potentialAntecedent, mention, ant))
            {
                return(false);
            }
            if (flags.UseIncompatibleModifier && ret && Rules.EntityHaveIncompatibleModifier(mentionCluster, potentialAntecedent))
            {
                return(false);
            }
            if (flags.UseProperheadAtLast && ret && !Rules.EntitySameProperHeadLastWord(mentionCluster, potentialAntecedent, mention, ant))
            {
                return(false);
            }
            if (flags.UseAttributesAgree && !Rules.EntityAttributesAgree(mentionCluster, potentialAntecedent))
            {
                return(false);
            }
            if (flags.UseDifferentLocation && Rules.EntityHaveDifferentLocation(mention, ant, dict))
            {
                if (flags.UseProperheadAtLast && ret && mention.goldCorefClusterID != ant.goldCorefClusterID)
                {
                    SieveCoreferenceSystem.logger.Finest("DIFFERENT LOCATION: " + ant.SpanToString() + " :: " + mention.SpanToString());
                }
                return(false);
            }
            if (flags.UseNumberInMention && Rules.EntityNumberInLaterMention(mention, ant))
            {
                if (flags.UseProperheadAtLast && ret && mention.goldCorefClusterID != ant.goldCorefClusterID)
                {
                    SieveCoreferenceSystem.logger.Finest("NEW NUMBER : " + ant.SpanToString() + " :: " + mention.SpanToString());
                }
                return(false);
            }
            if (flags.UseWnHypernym)
            {
                MethodInfo meth = semantics.wordnet.GetType().GetMethod("checkHypernym", typeof(CorefCluster), typeof(CorefCluster), typeof(Mention), typeof(Mention));
                if ((bool)meth.Invoke(semantics.wordnet, mentionCluster, potentialAntecedent, mention, ant))
                {
                    ret = true;
                }
                else
                {
                    if (mention.goldCorefClusterID == ant.goldCorefClusterID && !mention.IsPronominal() && !ant.IsPronominal())
                    {
                        SieveCoreferenceSystem.logger.Finest("not hypernym in WN");
                        SieveCoreferenceSystem.logger.Finest("False Negatives:: " + ant.SpanToString() + " <= " + mention.SpanToString());
                    }
                }
            }
            if (flags.UseWnSynonym)
            {
                MethodInfo meth = semantics.wordnet.GetType().GetMethod("checkSynonym", new Type[] { typeof(Mention), typeof(Mention) });
                if ((bool)meth.Invoke(semantics.wordnet, mention, ant))
                {
                    ret = true;
                }
                else
                {
                    if (mention.goldCorefClusterID == ant.goldCorefClusterID && !mention.IsPronominal() && !ant.IsPronominal())
                    {
                        SieveCoreferenceSystem.logger.Finest("not synonym in WN");
                        SieveCoreferenceSystem.logger.Finest("False Negatives:: " + ant.SpanToString() + " <= " + mention.SpanToString());
                    }
                }
            }
            try
            {
                if (flags.UseAlias && Rules.EntityAlias(mentionCluster, potentialAntecedent, semantics, dict))
                {
                    return(true);
                }
            }
            catch (Exception e)
            {
                throw new Exception(e);
            }
            if (flags.UseDistance && Rules.EntityTokenDistance(mention2, ant))
            {
                return(false);
            }
            if (flags.UseCorefDict)
            {
                // Head match
                if (ant.headWord.Lemma().Equals(mention2.headWord.Lemma()))
                {
                    return(false);
                }
                // Constraint: ignore pairs commonNoun - properNoun
                if (ant.mentionType != Dictionaries.MentionType.Proper && (mention2.headWord.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).StartsWith("NNP") || !Sharpen.Runtime.Substring(mention2.headWord.Word(), 1).Equals(Sharpen.Runtime.Substring(mention2
                                                                                                                                                                                                                                                               .headWord.Word(), 1).ToLower())))
                {
                    return(false);
                }
                // Constraint: ignore plurals
                if (ant.headWord.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).Equals("NNS") && mention2.headWord.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).Equals("NNS"))
                {
                    return(false);
                }
                // Constraint: ignore mentions with indefinite determiners
                if (dict.indefinitePronouns.Contains(ant.originalSpan[0].Lemma()) || dict.indefinitePronouns.Contains(mention2.originalSpan[0].Lemma()))
                {
                    return(false);
                }
                // Constraint: ignore coordinated mentions
                if (ant.IsCoordinated() || mention2.IsCoordinated())
                {
                    return(false);
                }
                // Constraint: context incompatibility
                if (Rules.ContextIncompatible(mention2, ant, dict))
                {
                    return(false);
                }
                // Constraint: sentence context incompatibility when the mentions are common nouns
                if (Rules.SentenceContextIncompatible(mention2, ant, dict))
                {
                    return(false);
                }
                if (Rules.EntityClusterAllCorefDictionary(mentionCluster, potentialAntecedent, dict, 1, 8))
                {
                    return(true);
                }
                if (Rules.EntityCorefDictionary(mention, ant, dict, 2, 2))
                {
                    return(true);
                }
                if (Rules.EntityCorefDictionary(mention, ant, dict, 3, 2))
                {
                    return(true);
                }
                if (Rules.EntityCorefDictionary(mention, ant, dict, 4, 2))
                {
                    return(true);
                }
            }
            if (flags.DoPronoun)
            {
                Mention m;
                if (mention.predicateNominatives != null && mention.predicateNominatives.Contains(mention2))
                {
                    m = mention2;
                }
                else
                {
                    m = mention;
                }
                if ((m.IsPronominal() || dict.allPronouns.Contains(m.ToString())) && Rules.EntityAttributesAgree(mentionCluster, potentialAntecedent))
                {
                    if (dict.demonymSet.Contains(ant.LowercaseNormalizedSpanString()) && dict.notOrganizationPRP.Contains(m.headString))
                    {
                        document.AddIncompatible(m, ant);
                        return(false);
                    }
                    if (Constants.UseDiscourseConstraints && Rules.EntityPersonDisagree(document, mentionCluster, potentialAntecedent, dict))
                    {
                        SieveCoreferenceSystem.logger.Finest("Incompatibles: Person Disagree: " + ant.SpanToString() + "(" + ant.mentionID + ") :: " + mention.SpanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID
                                                                                                                                                                                                                     ));
                        document.AddIncompatible(m, ant);
                        return(false);
                    }
                    return(true);
                }
            }
            return(ret);
        }

Exemple #11

0

Afficher le fichier

Fichier : HybridCorefPrinter.cs Projet : awesomedotnetcore/Stanford.CoreNLP.NET

        /// <exception cref="System.Exception"/>
        public static string PrintErrorLog(Mention m, Document document, ICounter <int> probs, int mIdx, Dictionaries dict, RFSieve sieve)
        {
            StringBuilder sb = new StringBuilder();

            sb.Append("\nERROR START-----------------------------------------------------------------------\n");
            sb.Append("RESOLVER TYPE: mType: " + sieve.mType + ", aType: " + sieve.aType).Append("\n");
            sb.Append("DOCUMENT: " + document.docInfo["DOC_ID"] + ", " + document.docInfo["DOC_PART"]).Append("\n");
            IList <Mention> orderedAnts = new List <Mention>();

            sb.Append("\nGOLD CLUSTER ID\n");
            for (int sentDist = m.sentNum; sentDist >= 0; sentDist--)
            {
                if (sentDist == sieve.maxSentDist)
                {
                    sb.Append("\tstart compare from here-------------\n");
                }
                int sentIdx = m.sentNum - sentDist;
                sb.Append("\tSENT " + sentIdx + "\t" + SentenceStringWithMention(sentIdx, document, true, true)).Append("\n");
            }
            sb.Append("\nMENTION ID\n");
            for (int sentDist_1 = m.sentNum; sentDist_1 >= 0; sentDist_1--)
            {
                if (sentDist_1 == sieve.maxSentDist)
                {
                    sb.Append("\tstart compare from here-------------\n");
                }
                int sentIdx = m.sentNum - sentDist_1;
                sb.Append("\tSENT " + sentIdx + "\t" + SentenceStringWithMention(sentIdx, document, false, false)).Append("\n");
            }
            // get dcoref antecedents ordering
            for (int sentDist_2 = 0; sentDist_2 <= Math.Min(sieve.maxSentDist, m.sentNum); sentDist_2++)
            {
                int sentIdx = m.sentNum - sentDist_2;
                Sharpen.Collections.AddAll(orderedAnts, Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.GetOrderedAntecedents(m, sentIdx, mIdx, document.predictedMentions, dict));
            }
            IDictionary <int, int> orders = Generics.NewHashMap();

            for (int i = 0; i < orderedAnts.Count; i++)
            {
                Mention ant = orderedAnts[i];
                orders[ant.mentionID] = i;
            }
            CorefCluster mC              = document.corefClusters[m.corefClusterID];
            bool         isFirstMention  = IsFirstMention(m, document);
            bool         foundCorefAnt   = (probs.Size() > 0 && Counters.Max(probs) > sieve.thresMerge);
            bool         correctDecision = ((isFirstMention && !foundCorefAnt) || (foundCorefAnt && Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.IsReallyCoref(document, m.mentionID, Counters.Argmax(probs))));
            bool         barePlural      = (m.originalSpan.Count == 1 && m.headWord.Tag().Equals("NNS"));

            if (correctDecision)
            {
                return(string.Empty);
            }
            sb.Append("\nMENTION: " + m.SpanToString() + " (" + m.mentionID + ")\tperson: " + m.person + "\tsingleton? " + (!m.hasTwin) + "\t\tisFirstMention? " + isFirstMention + "\t\tfoundAnt? " + foundCorefAnt + "\t\tcorrectDecision? " + correctDecision
                      + "\tbarePlural? " + barePlural);
            sb.Append("\n\ttype: " + m.mentionType + "\tHeadword: " + m.headWord.Word() + "\tNEtype: " + m.nerString + "\tnumber: " + m.number + "\tgender: " + m.gender + "\tanimacy: " + m.animacy).Append("\n");
            if (m.contextParseTree != null)
            {
                sb.Append(m.contextParseTree.PennString());
            }
            sb.Append("\n\n\t\tOracle\t\tDcoref\t\t\tRF\t\tAntecedent\n");
            foreach (int antID in Counters.ToSortedList(probs))
            {
                Mention      ant       = document.predictedMentionsByID[antID];
                CorefCluster aC        = document.corefClusters[ant.corefClusterID];
                bool         oracle    = Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.IsReallyCoref(document, m.mentionID, antID);
                double       prob      = probs.GetCount(antID);
                int          order     = orders[antID];
                string       oracleStr = (oracle) ? "coref   " : "notcoref";
                //      String dcorefStr = (dcoref)? "coref   " : "notcoref";
                string dcorefStr = "notcoref";
                if (dcorefDiscourse.Coreferent(document, mC, aC, m, ant, dict, null))
                {
                    dcorefStr = "coref-discourse";
                }
                else
                {
                    //      else if(dcorefChineseHeadMatch.coreferent(document, mC, aC, m, ant, dict, null)) dcorefStr = "coref-chineseHeadMatch";
                    if (dcorefExactString.Coreferent(document, mC, aC, m, ant, dict, null))
                    {
                        dcorefStr = "coref-exactString";
                    }
                    else
                    {
                        if (dcorefRelaxedExactString.Coreferent(document, mC, aC, m, ant, dict, null))
                        {
                            dcorefStr = "coref-relaxedExact";
                        }
                        else
                        {
                            if (dcorefPreciseConstructs.Coreferent(document, mC, aC, m, ant, dict, null))
                            {
                                dcorefStr = "coref-preciseConstruct";
                            }
                            else
                            {
                                if (dcorefHead1.Coreferent(document, mC, aC, m, ant, dict, null))
                                {
                                    dcorefStr = "coref-head1";
                                }
                                else
                                {
                                    if (dcorefHead2.Coreferent(document, mC, aC, m, ant, dict, null))
                                    {
                                        dcorefStr = "coref-head2";
                                    }
                                    else
                                    {
                                        if (dcorefHead3.Coreferent(document, mC, aC, m, ant, dict, null))
                                        {
                                            dcorefStr = "coref-head3";
                                        }
                                        else
                                        {
                                            if (dcorefHead4.Coreferent(document, mC, aC, m, ant, dict, null))
                                            {
                                                dcorefStr = "coref-head4";
                                            }
                                            else
                                            {
                                                if (dcorefRelaxedHead.Coreferent(document, mC, aC, m, ant, dict, null))
                                                {
                                                    dcorefStr = "coref-relaxedHead";
                                                }
                                                else
                                                {
                                                    if (dcorefPronounSieve.Coreferent(document, mC, aC, m, ant, dict, null))
                                                    {
                                                        dcorefStr = "coref-pronounSieve";
                                                    }
                                                    else
                                                    {
                                                        if (dcorefSpeaker.Coreferent(document, mC, aC, m, ant, dict, null))
                                                        {
                                                            dcorefStr = "coref-speaker";
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                dcorefStr += "\t" + order.ToString();
                string probStr = df.Format(prob);
                sb.Append("\t\t" + oracleStr + "\t" + dcorefStr + "\t" + probStr + "\t\t" + ant.SpanToString() + " (" + ant.mentionID + ")\n");
            }
            sb.Append("ERROR END -----------------------------------------------------------------------\n");
            return(sb.ToString());
        }

Exemple #12

0

Afficher le fichier

Fichier : HybridCorefPrinter.cs Projet : awesomedotnetcore/Stanford.CoreNLP.NET

        /// <exception cref="System.Exception"/>
        public static string PrintErrorLogDcoref(Mention m, Mention found, Document document, Dictionaries dict, int mIdx, string whichResolver)
        {
            StringBuilder sb = new StringBuilder();

            sb.Append("\nERROR START-----------------------------------------------------------------------\n");
            sb.Append("RESOLVER TYPE: ").Append(whichResolver).Append("\n");
            sb.Append("DOCUMENT: " + document.docInfo["DOC_ID"] + ", " + document.docInfo["DOC_PART"]).Append("\n");
            IList <Mention> orderedAnts = new List <Mention>();

            sb.Append("\nGOLD CLUSTER ID\n");
            for (int sentDist = m.sentNum; sentDist >= 0; sentDist--)
            {
                int sentIdx = m.sentNum - sentDist;
                sb.Append("\tSENT " + sentIdx + "\t" + SentenceStringWithMention(sentIdx, document, true, true)).Append("\n");
            }
            sb.Append("\nMENTION ID\n");
            for (int sentDist_1 = m.sentNum; sentDist_1 >= 0; sentDist_1--)
            {
                int sentIdx = m.sentNum - sentDist_1;
                sb.Append("\tSENT " + sentIdx + "\t" + SentenceStringWithMention(sentIdx, document, false, false)).Append("\n");
            }
            // get dcoref antecedents ordering
            for (int sentDist_2 = 0; sentDist_2 <= m.sentNum; sentDist_2++)
            {
                int sentIdx = m.sentNum - sentDist_2;
                Sharpen.Collections.AddAll(orderedAnts, Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.GetOrderedAntecedents(m, sentIdx, mIdx, document.predictedMentions, dict));
            }
            IDictionary <int, int> orders = Generics.NewHashMap();

            for (int i = 0; i < orderedAnts.Count; i++)
            {
                Mention ant = orderedAnts[i];
                orders[ant.mentionID] = i;
            }
            CorefCluster mC             = document.corefClusters[m.corefClusterID];
            bool         isFirstMention = IsFirstMention(m, document);
            bool         foundCorefAnt  = true;
            // we're printing only mentions that found coref antecedent
            bool correctDecision = document.IsCoref(m, found);

            if (correctDecision)
            {
                return(string.Empty);
            }
            sb.Append("\nMENTION: " + m.SpanToString() + " (" + m.mentionID + ")\tperson: " + m.person + "\tsingleton? " + (!m.hasTwin) + "\t\tisFirstMention? " + isFirstMention + "\t\tfoundAnt? " + foundCorefAnt + "\t\tcorrectDecision? " + correctDecision
                      );
            sb.Append("\n\ttype: " + m.mentionType + "\tHeadword: " + m.headWord.Word() + "\tNEtype: " + m.nerString + "\tnumber: " + m.number + "\tgender: " + m.gender + "\tanimacy: " + m.animacy).Append("\n");
            if (m.contextParseTree != null)
            {
                sb.Append(m.contextParseTree.PennString());
            }
            sb.Append("\n\n\t\tOracle\t\tDcoref\t\t\tRF\t\tAntecedent\n");
            foreach (Mention ant_1 in orderedAnts)
            {
                int          antID     = ant_1.mentionID;
                CorefCluster aC        = document.corefClusters[ant_1.corefClusterID];
                bool         oracle    = Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.IsReallyCoref(document, m.mentionID, antID);
                int          order     = orders[antID];
                string       oracleStr = (oracle) ? "coref   " : "notcoref";
                //      String dcorefStr = (dcoref)? "coref   " : "notcoref";
                string dcorefStr = "notcoref";
                if (dcorefSpeaker.Coreferent(document, mC, aC, m, ant_1, dict, null))
                {
                    dcorefStr = "coref-speaker";
                }
                else
                {
                    //      else if(dcorefChineseHeadMatch.coreferent(document, mC, aC, m, ant, dict, null)) dcorefStr = "coref-chineseHeadMatch";
                    if (dcorefDiscourse.Coreferent(document, mC, aC, m, ant_1, dict, null))
                    {
                        dcorefStr = "coref-discourse";
                    }
                    else
                    {
                        if (dcorefExactString.Coreferent(document, mC, aC, m, ant_1, dict, null))
                        {
                            dcorefStr = "coref-exactString";
                        }
                        else
                        {
                            if (dcorefRelaxedExactString.Coreferent(document, mC, aC, m, ant_1, dict, null))
                            {
                                dcorefStr = "coref-relaxedExact";
                            }
                            else
                            {
                                if (dcorefPreciseConstructs.Coreferent(document, mC, aC, m, ant_1, dict, null))
                                {
                                    dcorefStr = "coref-preciseConstruct";
                                }
                                else
                                {
                                    if (dcorefHead1.Coreferent(document, mC, aC, m, ant_1, dict, null))
                                    {
                                        dcorefStr = "coref-head1";
                                    }
                                    else
                                    {
                                        if (dcorefHead2.Coreferent(document, mC, aC, m, ant_1, dict, null))
                                        {
                                            dcorefStr = "coref-head2";
                                        }
                                        else
                                        {
                                            if (dcorefHead3.Coreferent(document, mC, aC, m, ant_1, dict, null))
                                            {
                                                dcorefStr = "coref-head3";
                                            }
                                            else
                                            {
                                                if (dcorefHead4.Coreferent(document, mC, aC, m, ant_1, dict, null))
                                                {
                                                    dcorefStr = "coref-head4";
                                                }
                                                else
                                                {
                                                    if (dcorefRelaxedHead.Coreferent(document, mC, aC, m, ant_1, dict, null))
                                                    {
                                                        dcorefStr = "coref-relaxedHead";
                                                    }
                                                    else
                                                    {
                                                        if (dcorefPronounSieve.Coreferent(document, mC, aC, m, ant_1, dict, null))
                                                        {
                                                            dcorefStr = "coref-pronounSieve";
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                dcorefStr += "\t" + order.ToString();
                sb.Append("\t\t" + oracleStr + "\t" + dcorefStr + "\t\t" + ant_1.SpanToString() + " (" + ant_1.mentionID + ")\n");
            }
            sb.Append("ERROR END -----------------------------------------------------------------------\n");
            return(sb.ToString());
        }

C# (CSharp) CorefCluster Exemples