public static IDictionary <Pair <int, int>, bool> GetUnlabeledMentionPairs(Document document) { return(CorefUtils.GetMentionPairs(document).Stream().Collect(Collectors.ToMap(null, null))); }
public virtual void Process(int id, Document document) { // Mention types mentionTypes[id] = document.predictedMentionsByID.Stream().Collect(Collectors.ToMap(null, null)); // Gold clusters IList <IList <int> > clusters = new List <IList <int> >(); foreach (CorefCluster c in document.goldCorefClusters.Values) { IList <int> cluster = new List <int>(); foreach (Mention m in c.GetCorefMentions()) { cluster.Add(m.mentionID); } clusters.Add(cluster); } goldClusters[id] = clusters; // Word counting if (countWords && mentionPairs.Contains(id)) { ICollection <Pair <int, int> > pairs = mentionPairs[id].Keys; ICollection <int> mentions = new HashSet <int>(); foreach (Pair <int, int> pair in pairs) { mentions.Add(pair.first); mentions.Add(pair.second); Mention m1 = document.predictedMentionsByID[pair.first]; Mention m2 = document.predictedMentionsByID[pair.second]; wordCounts.IncrementCount("h_" + m1.headWord.Word().ToLower() + "_" + m2.headWord.Word().ToLower()); } IDictionary <int, IList <CoreLabel> > sentences = new Dictionary <int, IList <CoreLabel> >(); foreach (int mention in mentions) { Mention m = document.predictedMentionsByID[mention]; if (!sentences.Contains(m.sentNum)) { sentences[m.sentNum] = m.sentenceWords; } } foreach (IList <CoreLabel> sentence in sentences.Values) { for (int i = 0; i < sentence.Count; i++) { CoreLabel cl = sentence[i]; if (cl == null) { continue; } string w = cl.Word().ToLower(); wordCounts.IncrementCount(w); if (i > 0) { CoreLabel clp = sentence[i - 1]; if (clp == null) { continue; } string wp = clp.Word().ToLower(); wordCounts.IncrementCount(wp + "_" + w); } } } } }
public virtual void Update(IList <IList <int> > gold, IList <Clusterer.Cluster> clusters, IDictionary <int, IList <int> > mentionToGold, IDictionary <int, Clusterer.Cluster> mentionToSystem) { IList <IList <int> > clustersAsList = clusters.Stream().Map(null).Collect(Collectors.ToList()); IDictionary <int, IList <int> > mentionToSystemLists = mentionToSystem.Stream().Collect(Collectors.ToMap(null, null)); Pair <double, double> prec = GetScore(clustersAsList, mentionToGold); Pair <double, double> rec = GetScore(gold, mentionToSystemLists); pNum += prec.first; pDen += prec.second; rNum += rec.first; rDen += rec.second; }
public virtual void RunCoref(Document document) { IDictionary <Pair <int, int>, bool> mentionPairs = CorefUtils.GetUnlabeledMentionPairs(document); if (mentionPairs.Count == 0) { return; } Compressor <string> compressor = new Compressor <string>(); DocumentExamples examples = extractor.Extract(0, document, mentionPairs, compressor); ICounter <Pair <int, int> > classificationScores = new ClassicCounter <Pair <int, int> >(); ICounter <Pair <int, int> > rankingScores = new ClassicCounter <Pair <int, int> >(); ICounter <int> anaphoricityScores = new ClassicCounter <int>(); foreach (Example example in examples.examples) { CorefUtils.CheckForInterrupt(); Pair <int, int> mentionPair = new Pair <int, int>(example.mentionId1, example.mentionId2); classificationScores.IncrementCount(mentionPair, classificationModel.Predict(example, examples.mentionFeatures, compressor)); rankingScores.IncrementCount(mentionPair, rankingModel.Predict(example, examples.mentionFeatures, compressor)); if (!anaphoricityScores.ContainsKey(example.mentionId2)) { anaphoricityScores.IncrementCount(example.mentionId2, anaphoricityModel.Predict(new Example(example, false), examples.mentionFeatures, compressor)); } } ClustererDataLoader.ClustererDoc doc = new ClustererDataLoader.ClustererDoc(0, classificationScores, rankingScores, anaphoricityScores, mentionPairs, null, document.predictedMentionsByID.Stream().Collect(Collectors.ToMap(null, null))); foreach (Pair <int, int> mentionPair_1 in clusterer.GetClusterMerges(doc)) { CorefUtils.MergeCoreferenceClusters(mentionPair_1, document); } }