Esempio n. 1
0
 public static IDictionary <Pair <int, int>, bool> GetUnlabeledMentionPairs(Document document)
 {
     return(CorefUtils.GetMentionPairs(document).Stream().Collect(Collectors.ToMap(null, null)));
 }
        public virtual void Process(int id, Document document)
        {
            // Mention types
            mentionTypes[id] = document.predictedMentionsByID.Stream().Collect(Collectors.ToMap(null, null));
            // Gold clusters
            IList <IList <int> > clusters = new List <IList <int> >();

            foreach (CorefCluster c in document.goldCorefClusters.Values)
            {
                IList <int> cluster = new List <int>();
                foreach (Mention m in c.GetCorefMentions())
                {
                    cluster.Add(m.mentionID);
                }
                clusters.Add(cluster);
            }
            goldClusters[id] = clusters;
            // Word counting
            if (countWords && mentionPairs.Contains(id))
            {
                ICollection <Pair <int, int> > pairs = mentionPairs[id].Keys;
                ICollection <int> mentions           = new HashSet <int>();
                foreach (Pair <int, int> pair in pairs)
                {
                    mentions.Add(pair.first);
                    mentions.Add(pair.second);
                    Mention m1 = document.predictedMentionsByID[pair.first];
                    Mention m2 = document.predictedMentionsByID[pair.second];
                    wordCounts.IncrementCount("h_" + m1.headWord.Word().ToLower() + "_" + m2.headWord.Word().ToLower());
                }
                IDictionary <int, IList <CoreLabel> > sentences = new Dictionary <int, IList <CoreLabel> >();
                foreach (int mention in mentions)
                {
                    Mention m = document.predictedMentionsByID[mention];
                    if (!sentences.Contains(m.sentNum))
                    {
                        sentences[m.sentNum] = m.sentenceWords;
                    }
                }
                foreach (IList <CoreLabel> sentence in sentences.Values)
                {
                    for (int i = 0; i < sentence.Count; i++)
                    {
                        CoreLabel cl = sentence[i];
                        if (cl == null)
                        {
                            continue;
                        }
                        string w = cl.Word().ToLower();
                        wordCounts.IncrementCount(w);
                        if (i > 0)
                        {
                            CoreLabel clp = sentence[i - 1];
                            if (clp == null)
                            {
                                continue;
                            }
                            string wp = clp.Word().ToLower();
                            wordCounts.IncrementCount(wp + "_" + w);
                        }
                    }
                }
            }
        }
            public virtual void Update(IList <IList <int> > gold, IList <Clusterer.Cluster> clusters, IDictionary <int, IList <int> > mentionToGold, IDictionary <int, Clusterer.Cluster> mentionToSystem)
            {
                IList <IList <int> >            clustersAsList       = clusters.Stream().Map(null).Collect(Collectors.ToList());
                IDictionary <int, IList <int> > mentionToSystemLists = mentionToSystem.Stream().Collect(Collectors.ToMap(null, null));
                Pair <double, double>           prec = GetScore(clustersAsList, mentionToGold);
                Pair <double, double>           rec  = GetScore(gold, mentionToSystemLists);

                pNum += prec.first;
                pDen += prec.second;
                rNum += rec.first;
                rDen += rec.second;
            }
        public virtual void RunCoref(Document document)
        {
            IDictionary <Pair <int, int>, bool> mentionPairs = CorefUtils.GetUnlabeledMentionPairs(document);

            if (mentionPairs.Count == 0)
            {
                return;
            }
            Compressor <string>         compressor           = new Compressor <string>();
            DocumentExamples            examples             = extractor.Extract(0, document, mentionPairs, compressor);
            ICounter <Pair <int, int> > classificationScores = new ClassicCounter <Pair <int, int> >();
            ICounter <Pair <int, int> > rankingScores        = new ClassicCounter <Pair <int, int> >();
            ICounter <int> anaphoricityScores = new ClassicCounter <int>();

            foreach (Example example in examples.examples)
            {
                CorefUtils.CheckForInterrupt();
                Pair <int, int> mentionPair = new Pair <int, int>(example.mentionId1, example.mentionId2);
                classificationScores.IncrementCount(mentionPair, classificationModel.Predict(example, examples.mentionFeatures, compressor));
                rankingScores.IncrementCount(mentionPair, rankingModel.Predict(example, examples.mentionFeatures, compressor));
                if (!anaphoricityScores.ContainsKey(example.mentionId2))
                {
                    anaphoricityScores.IncrementCount(example.mentionId2, anaphoricityModel.Predict(new Example(example, false), examples.mentionFeatures, compressor));
                }
            }
            ClustererDataLoader.ClustererDoc doc = new ClustererDataLoader.ClustererDoc(0, classificationScores, rankingScores, anaphoricityScores, mentionPairs, null, document.predictedMentionsByID.Stream().Collect(Collectors.ToMap(null, null)));
            foreach (Pair <int, int> mentionPair_1 in clusterer.GetClusterMerges(doc))
            {
                CorefUtils.MergeCoreferenceClusters(mentionPair_1, document);
            }
        }