예제 #1
0
 public Example(Edu.Stanford.Nlp.Coref.Statistical.Example pair, bool isPositive)
 {
     this.docId            = pair.docId;
     this.label            = isPositive ? 1 : 0;
     this.pairwiseFeatures = null;
     this.mentionId1       = -1;
     this.mentionId2       = pair.mentionId2;
     this.mentionType1     = null;
     this.mentionType2     = pair.mentionType2;
 }
예제 #2
0
 public Example(int docId, Mention m1, Mention m2, double label, CompressedFeatureVector pairwiseFeatures)
 {
     this.docId            = docId;
     this.label            = label;
     this.pairwiseFeatures = pairwiseFeatures;
     this.mentionId1       = m1.mentionID;
     this.mentionId2       = m2.mentionID;
     this.mentionType1     = m1.mentionType;
     this.mentionType2     = m2.mentionType;
 }
        public virtual ICounter <K> Uncompress(CompressedFeatureVector cvf)
        {
            ICounter <K> c = new ClassicCounter <K>();

            for (int i = 0; i < cvf.keys.Count; i++)
            {
                c.IncrementCount(inverse[cvf.keys[i]], cvf.values[i]);
            }
            return(c);
        }
        private static ICounter <string> GetFeatures(ClustererDataLoader.ClustererDoc doc, Clusterer.Cluster c1, Clusterer.Cluster c2, Clusterer.GlobalFeatures gf)
        {
            Clusterer.MergeKey      key      = new Clusterer.MergeKey(c1, c2, gf.currentIndex);
            CompressedFeatureVector cfv      = featuresCache[key];
            ICounter <string>       features = cfv == null ? null : compressor.Uncompress(cfv);

            if (features != null)
            {
                featuresCacheHits += isTraining;
                return(features);
            }
            featuresCacheMisses += isTraining;
            features             = new ClassicCounter <string>();
            if (gf.anaphorSeen)
            {
                features.IncrementCount("anaphorSeen");
            }
            features.IncrementCount("docSize", gf.docSize);
            features.IncrementCount("percentComplete", gf.currentIndex / (double)gf.size);
            features.IncrementCount("bias", 1.0);
            int earliest1 = EarliestMention(c1, doc);
            int earliest2 = EarliestMention(c2, doc);

            if (doc.mentionIndices[earliest1] > doc.mentionIndices[earliest2])
            {
                int tmp = earliest1;
                earliest1 = earliest2;
                earliest2 = tmp;
            }
            features.IncrementCount("anaphoricity", doc.anaphoricityScores.GetCount(earliest2));
            if (c1.mentions.Count == 1 && c2.mentions.Count == 1)
            {
                Pair <int, int> mentionPair = new Pair <int, int>(c1.mentions[0], c2.mentions[0]);
                features.AddAll(AddSuffix(GetFeatures(doc, mentionPair, doc.classificationScores), "-classification"));
                features.AddAll(AddSuffix(GetFeatures(doc, mentionPair, doc.rankingScores), "-ranking"));
                features = AddSuffix(features, "-single");
            }
            else
            {
                IList <Pair <int, int> > between = new List <Pair <int, int> >();
                foreach (int m1 in c1.mentions)
                {
                    foreach (int m2 in c2.mentions)
                    {
                        between.Add(new Pair <int, int>(m1, m2));
                    }
                }
                features.AddAll(AddSuffix(GetFeatures(doc, between, doc.classificationScores), "-classification"));
                features.AddAll(AddSuffix(GetFeatures(doc, between, doc.rankingScores), "-ranking"));
            }
            featuresCache[key] = compressor.Compress(features);
            return(features);
        }
예제 #5
0
        public virtual DocumentExamples Extract(int id, Document document, IDictionary <Pair <int, int>, bool> labeledPairs, Compressor <string> compressor)
        {
            IList <Mention> mentionsList = CorefUtils.GetSortedMentions(document);
            IDictionary <int, IList <Mention> > mentionsByHeadIndex = new Dictionary <int, IList <Mention> >();

            foreach (Mention m in mentionsList)
            {
                IList <Mention> withIndex = mentionsByHeadIndex[m.headIndex];
                if (withIndex == null)
                {
                    withIndex = new List <Mention>();
                    mentionsByHeadIndex[m.headIndex] = withIndex;
                }
                withIndex.Add(m);
            }
            IDictionary <int, Mention> mentions          = document.predictedMentionsByID;
            IList <Example>            examples          = new List <Example>();
            ICollection <int>          mentionsToExtract = new HashSet <int>();

            foreach (KeyValuePair <Pair <int, int>, bool> pair in labeledPairs)
            {
                Mention m1 = mentions[pair.Key.first];
                Mention m2 = mentions[pair.Key.second];
                mentionsToExtract.Add(m1.mentionID);
                mentionsToExtract.Add(m2.mentionID);
                CompressedFeatureVector features = compressor.Compress(GetFeatures(document, m1, m2));
                examples.Add(new Example(id, m1, m2, pair.Value ? 1.0 : 0.0, features));
            }
            IDictionary <int, CompressedFeatureVector> mentionFeatures = new Dictionary <int, CompressedFeatureVector>();

            foreach (int mentionID in mentionsToExtract)
            {
                mentionFeatures[mentionID] = compressor.Compress(GetFeatures(document, document.predictedMentionsByID[mentionID], mentionsByHeadIndex));
            }
            return(new DocumentExamples(id, examples, mentionFeatures));
        }