public virtual void RunCoref(Document document) { IDictionary <Pair <int, int>, bool> mentionPairs = CorefUtils.GetUnlabeledMentionPairs(document); if (mentionPairs.Count == 0) { return; } Compressor <string> compressor = new Compressor <string>(); DocumentExamples examples = extractor.Extract(0, document, mentionPairs, compressor); ICounter <Pair <int, int> > classificationScores = new ClassicCounter <Pair <int, int> >(); ICounter <Pair <int, int> > rankingScores = new ClassicCounter <Pair <int, int> >(); ICounter <int> anaphoricityScores = new ClassicCounter <int>(); foreach (Example example in examples.examples) { CorefUtils.CheckForInterrupt(); Pair <int, int> mentionPair = new Pair <int, int>(example.mentionId1, example.mentionId2); classificationScores.IncrementCount(mentionPair, classificationModel.Predict(example, examples.mentionFeatures, compressor)); rankingScores.IncrementCount(mentionPair, rankingModel.Predict(example, examples.mentionFeatures, compressor)); if (!anaphoricityScores.ContainsKey(example.mentionId2)) { anaphoricityScores.IncrementCount(example.mentionId2, anaphoricityModel.Predict(new Example(example, false), examples.mentionFeatures, compressor)); } } ClustererDataLoader.ClustererDoc doc = new ClustererDataLoader.ClustererDoc(0, classificationScores, rankingScores, anaphoricityScores, mentionPairs, null, document.predictedMentionsByID.Stream().Collect(Collectors.ToMap(null, null))); foreach (Pair <int, int> mentionPair_1 in clusterer.GetClusterMerges(doc)) { CorefUtils.MergeCoreferenceClusters(mentionPair_1, document); } }
public virtual void RunCoref(Document document) { Compressor <string> compressor = new Compressor <string>(); if (Thread.Interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } IDictionary <Pair <int, int>, bool> pairs = new Dictionary <Pair <int, int>, bool>(); foreach (KeyValuePair <int, IList <int> > e in CorefUtils.HeuristicFilter(CorefUtils.GetSortedMentions(document), maxMentionDistance, maxMentionDistanceWithStringMatch)) { foreach (int m1 in e.Value) { pairs[new Pair <int, int>(m1, e.Key)] = true; } } DocumentExamples examples = extractor.Extract(0, document, pairs, compressor); ICounter <Pair <int, int> > pairwiseScores = new ClassicCounter <Pair <int, int> >(); foreach (Example mentionPair in examples.examples) { if (Thread.Interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } pairwiseScores.IncrementCount(new Pair <int, int>(mentionPair.mentionId1, mentionPair.mentionId2), classifier.Predict(mentionPair, examples.mentionFeatures, compressor)); } IList <Pair <int, int> > mentionPairs = new List <Pair <int, int> >(pairwiseScores.KeySet()); mentionPairs.Sort(null); ICollection <int> seenAnaphors = new HashSet <int>(); foreach (Pair <int, int> pair in mentionPairs) { if (seenAnaphors.Contains(pair.second)) { continue; } if (Thread.Interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } seenAnaphors.Add(pair.second); Dictionaries.MentionType mt1 = document.predictedMentionsByID[pair.first].mentionType; Dictionaries.MentionType mt2 = document.predictedMentionsByID[pair.second].mentionType; if (pairwiseScores.GetCount(pair) > thresholds[new Pair <bool, bool>(mt1 == Dictionaries.MentionType.Pronominal, mt2 == Dictionaries.MentionType.Pronominal)]) { CorefUtils.MergeCoreferenceClusters(pair, document); } } }
public static IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > GetExamples(IList <DocumentExamples> documents) { IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > examples = new List <Pair <Example, IDictionary <int, CompressedFeatureVector> > >(); while (!documents.IsEmpty()) { DocumentExamples doc = documents.Remove(documents.Count - 1); IDictionary <int, CompressedFeatureVector> mentionFeatures = doc.mentionFeatures; foreach (Example e in doc.examples) { examples.Add(new Pair <Example, IDictionary <int, CompressedFeatureVector> >(e, mentionFeatures)); } } return(examples); }
public static IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > GetAnaphoricityExamples(IList <DocumentExamples> documents) { int p = 0; int t = 0; IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > examples = new List <Pair <Example, IDictionary <int, CompressedFeatureVector> > >(); while (!documents.IsEmpty()) { DocumentExamples doc = documents.Remove(documents.Count - 1); IDictionary <int, bool> areAnaphoric = new Dictionary <int, bool>(); foreach (Example e in doc.examples) { bool isAnaphoric = areAnaphoric[e.mentionId2]; if (isAnaphoric == null) { areAnaphoric[e.mentionId2] = false; } if (e.label == 1) { areAnaphoric[e.mentionId2] = true; } } foreach (KeyValuePair <int, bool> e_1 in areAnaphoric) { if (e_1.Value) { p++; } t++; } foreach (Example e_2 in doc.examples) { bool isAnaphoric = areAnaphoric[e_2.mentionId2]; if (isAnaphoric != null) { Sharpen.Collections.Remove(areAnaphoric, e_2.mentionId2); examples.Add(new Pair <Example, IDictionary <int, CompressedFeatureVector> >(new Example(e_2, isAnaphoric), doc.mentionFeatures)); } } } Redwood.Log("scoref-train", "Num anaphoricity examples " + p + " positive, " + t + " total"); return(examples); }