コード例 #1
0
        public virtual void RunCoref(Document document)
        {
            IList <Mention> sortedMentions = CorefUtils.GetSortedMentions(document);
            IDictionary <int, IList <Mention> > mentionsByHeadIndex = new Dictionary <int, IList <Mention> >();

            foreach (Mention m in sortedMentions)
            {
                IList <Mention> withIndex = mentionsByHeadIndex.ComputeIfAbsent(m.headIndex, null);
                withIndex.Add(m);
            }
            SimpleMatrix documentEmbedding = embeddingExtractor.GetDocumentEmbedding(document);
            IDictionary <int, SimpleMatrix> antecedentEmbeddings = new Dictionary <int, SimpleMatrix>();
            IDictionary <int, SimpleMatrix> anaphorEmbeddings    = new Dictionary <int, SimpleMatrix>();
            ICounter <int> anaphoricityScores = new ClassicCounter <int>();

            foreach (Mention m_1 in sortedMentions)
            {
                SimpleMatrix mentionEmbedding = embeddingExtractor.GetMentionEmbeddings(m_1, documentEmbedding);
                antecedentEmbeddings[m_1.mentionID] = model.GetAntecedentEmbedding(mentionEmbedding);
                anaphorEmbeddings[m_1.mentionID]    = model.GetAnaphorEmbedding(mentionEmbedding);
                anaphoricityScores.IncrementCount(m_1.mentionID, model.GetAnaphoricityScore(mentionEmbedding, featureExtractor.GetAnaphoricityFeatures(m_1, document, mentionsByHeadIndex)));
            }
            IDictionary <int, IList <int> > mentionToCandidateAntecedents = CorefUtils.HeuristicFilter(sortedMentions, maxMentionDistance, maxMentionDistanceWithStringMatch);

            foreach (KeyValuePair <int, IList <int> > e in mentionToCandidateAntecedents)
            {
                double bestScore  = anaphoricityScores.GetCount(e.Key) - 50 * (greedyness - 0.5);
                int    m_2        = e.Key;
                int    antecedent = null;
                foreach (int ca in e.Value)
                {
                    double score = model.GetPairwiseScore(antecedentEmbeddings[ca], anaphorEmbeddings[m_2], featureExtractor.GetPairFeatures(new Pair <int, int>(ca, m_2), document, mentionsByHeadIndex));
                    if (score > bestScore)
                    {
                        bestScore  = score;
                        antecedent = ca;
                    }
                }
                if (antecedent != null)
                {
                    CorefUtils.MergeCoreferenceClusters(new Pair <int, int>(antecedent, m_2), document);
                }
            }
        }