public virtual void RunCoref(Document document)
        {
            IDictionary <Pair <int, int>, bool> mentionPairs = CorefUtils.GetUnlabeledMentionPairs(document);

            if (mentionPairs.Count == 0)
            {
                return;
            }
            Compressor <string>         compressor           = new Compressor <string>();
            DocumentExamples            examples             = extractor.Extract(0, document, mentionPairs, compressor);
            ICounter <Pair <int, int> > classificationScores = new ClassicCounter <Pair <int, int> >();
            ICounter <Pair <int, int> > rankingScores        = new ClassicCounter <Pair <int, int> >();
            ICounter <int> anaphoricityScores = new ClassicCounter <int>();

            foreach (Example example in examples.examples)
            {
                CorefUtils.CheckForInterrupt();
                Pair <int, int> mentionPair = new Pair <int, int>(example.mentionId1, example.mentionId2);
                classificationScores.IncrementCount(mentionPair, classificationModel.Predict(example, examples.mentionFeatures, compressor));
                rankingScores.IncrementCount(mentionPair, rankingModel.Predict(example, examples.mentionFeatures, compressor));
                if (!anaphoricityScores.ContainsKey(example.mentionId2))
                {
                    anaphoricityScores.IncrementCount(example.mentionId2, anaphoricityModel.Predict(new Example(example, false), examples.mentionFeatures, compressor));
                }
            }
            ClustererDataLoader.ClustererDoc doc = new ClustererDataLoader.ClustererDoc(0, classificationScores, rankingScores, anaphoricityScores, mentionPairs, null, document.predictedMentionsByID.Stream().Collect(Collectors.ToMap(null, null)));
            foreach (Pair <int, int> mentionPair_1 in clusterer.GetClusterMerges(doc))
            {
                CorefUtils.MergeCoreferenceClusters(mentionPair_1, document);
            }
        }
        public virtual void RunCoref(Document document)
        {
            Compressor <string> compressor = new Compressor <string>();

            if (Thread.Interrupted())
            {
                // Allow interrupting
                throw new RuntimeInterruptedException();
            }
            IDictionary <Pair <int, int>, bool> pairs = new Dictionary <Pair <int, int>, bool>();

            foreach (KeyValuePair <int, IList <int> > e in CorefUtils.HeuristicFilter(CorefUtils.GetSortedMentions(document), maxMentionDistance, maxMentionDistanceWithStringMatch))
            {
                foreach (int m1 in e.Value)
                {
                    pairs[new Pair <int, int>(m1, e.Key)] = true;
                }
            }
            DocumentExamples            examples       = extractor.Extract(0, document, pairs, compressor);
            ICounter <Pair <int, int> > pairwiseScores = new ClassicCounter <Pair <int, int> >();

            foreach (Example mentionPair in examples.examples)
            {
                if (Thread.Interrupted())
                {
                    // Allow interrupting
                    throw new RuntimeInterruptedException();
                }
                pairwiseScores.IncrementCount(new Pair <int, int>(mentionPair.mentionId1, mentionPair.mentionId2), classifier.Predict(mentionPair, examples.mentionFeatures, compressor));
            }
            IList <Pair <int, int> > mentionPairs = new List <Pair <int, int> >(pairwiseScores.KeySet());

            mentionPairs.Sort(null);
            ICollection <int> seenAnaphors = new HashSet <int>();

            foreach (Pair <int, int> pair in mentionPairs)
            {
                if (seenAnaphors.Contains(pair.second))
                {
                    continue;
                }
                if (Thread.Interrupted())
                {
                    // Allow interrupting
                    throw new RuntimeInterruptedException();
                }
                seenAnaphors.Add(pair.second);
                Dictionaries.MentionType mt1 = document.predictedMentionsByID[pair.first].mentionType;
                Dictionaries.MentionType mt2 = document.predictedMentionsByID[pair.second].mentionType;
                if (pairwiseScores.GetCount(pair) > thresholds[new Pair <bool, bool>(mt1 == Dictionaries.MentionType.Pronominal, mt2 == Dictionaries.MentionType.Pronominal)])
                {
                    CorefUtils.MergeCoreferenceClusters(pair, document);
                }
            }
        }
Example #3
0
        public static IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > GetExamples(IList <DocumentExamples> documents)
        {
            IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > examples = new List <Pair <Example, IDictionary <int, CompressedFeatureVector> > >();

            while (!documents.IsEmpty())
            {
                DocumentExamples doc = documents.Remove(documents.Count - 1);
                IDictionary <int, CompressedFeatureVector> mentionFeatures = doc.mentionFeatures;
                foreach (Example e in doc.examples)
                {
                    examples.Add(new Pair <Example, IDictionary <int, CompressedFeatureVector> >(e, mentionFeatures));
                }
            }
            return(examples);
        }
Example #4
0
        public static IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > GetAnaphoricityExamples(IList <DocumentExamples> documents)
        {
            int p = 0;
            int t = 0;
            IList <Pair <Example, IDictionary <int, CompressedFeatureVector> > > examples = new List <Pair <Example, IDictionary <int, CompressedFeatureVector> > >();

            while (!documents.IsEmpty())
            {
                DocumentExamples        doc          = documents.Remove(documents.Count - 1);
                IDictionary <int, bool> areAnaphoric = new Dictionary <int, bool>();
                foreach (Example e in doc.examples)
                {
                    bool isAnaphoric = areAnaphoric[e.mentionId2];
                    if (isAnaphoric == null)
                    {
                        areAnaphoric[e.mentionId2] = false;
                    }
                    if (e.label == 1)
                    {
                        areAnaphoric[e.mentionId2] = true;
                    }
                }
                foreach (KeyValuePair <int, bool> e_1 in areAnaphoric)
                {
                    if (e_1.Value)
                    {
                        p++;
                    }
                    t++;
                }
                foreach (Example e_2 in doc.examples)
                {
                    bool isAnaphoric = areAnaphoric[e_2.mentionId2];
                    if (isAnaphoric != null)
                    {
                        Sharpen.Collections.Remove(areAnaphoric, e_2.mentionId2);
                        examples.Add(new Pair <Example, IDictionary <int, CompressedFeatureVector> >(new Example(e_2, isAnaphoric), doc.mentionFeatures));
                    }
                }
            }
            Redwood.Log("scoref-train", "Num anaphoricity examples " + p + " positive, " + t + " total");
            return(examples);
        }