public Example(Edu.Stanford.Nlp.Coref.Statistical.Example pair, bool isPositive) { this.docId = pair.docId; this.label = isPositive ? 1 : 0; this.pairwiseFeatures = null; this.mentionId1 = -1; this.mentionId2 = pair.mentionId2; this.mentionType1 = null; this.mentionType2 = pair.mentionType2; }
public Example(int docId, Mention m1, Mention m2, double label, CompressedFeatureVector pairwiseFeatures) { this.docId = docId; this.label = label; this.pairwiseFeatures = pairwiseFeatures; this.mentionId1 = m1.mentionID; this.mentionId2 = m2.mentionID; this.mentionType1 = m1.mentionType; this.mentionType2 = m2.mentionType; }
public virtual ICounter <K> Uncompress(CompressedFeatureVector cvf) { ICounter <K> c = new ClassicCounter <K>(); for (int i = 0; i < cvf.keys.Count; i++) { c.IncrementCount(inverse[cvf.keys[i]], cvf.values[i]); } return(c); }
private static ICounter <string> GetFeatures(ClustererDataLoader.ClustererDoc doc, Clusterer.Cluster c1, Clusterer.Cluster c2, Clusterer.GlobalFeatures gf) { Clusterer.MergeKey key = new Clusterer.MergeKey(c1, c2, gf.currentIndex); CompressedFeatureVector cfv = featuresCache[key]; ICounter <string> features = cfv == null ? null : compressor.Uncompress(cfv); if (features != null) { featuresCacheHits += isTraining; return(features); } featuresCacheMisses += isTraining; features = new ClassicCounter <string>(); if (gf.anaphorSeen) { features.IncrementCount("anaphorSeen"); } features.IncrementCount("docSize", gf.docSize); features.IncrementCount("percentComplete", gf.currentIndex / (double)gf.size); features.IncrementCount("bias", 1.0); int earliest1 = EarliestMention(c1, doc); int earliest2 = EarliestMention(c2, doc); if (doc.mentionIndices[earliest1] > doc.mentionIndices[earliest2]) { int tmp = earliest1; earliest1 = earliest2; earliest2 = tmp; } features.IncrementCount("anaphoricity", doc.anaphoricityScores.GetCount(earliest2)); if (c1.mentions.Count == 1 && c2.mentions.Count == 1) { Pair <int, int> mentionPair = new Pair <int, int>(c1.mentions[0], c2.mentions[0]); features.AddAll(AddSuffix(GetFeatures(doc, mentionPair, doc.classificationScores), "-classification")); features.AddAll(AddSuffix(GetFeatures(doc, mentionPair, doc.rankingScores), "-ranking")); features = AddSuffix(features, "-single"); } else { IList <Pair <int, int> > between = new List <Pair <int, int> >(); foreach (int m1 in c1.mentions) { foreach (int m2 in c2.mentions) { between.Add(new Pair <int, int>(m1, m2)); } } features.AddAll(AddSuffix(GetFeatures(doc, between, doc.classificationScores), "-classification")); features.AddAll(AddSuffix(GetFeatures(doc, between, doc.rankingScores), "-ranking")); } featuresCache[key] = compressor.Compress(features); return(features); }
public virtual DocumentExamples Extract(int id, Document document, IDictionary <Pair <int, int>, bool> labeledPairs, Compressor <string> compressor) { IList <Mention> mentionsList = CorefUtils.GetSortedMentions(document); IDictionary <int, IList <Mention> > mentionsByHeadIndex = new Dictionary <int, IList <Mention> >(); foreach (Mention m in mentionsList) { IList <Mention> withIndex = mentionsByHeadIndex[m.headIndex]; if (withIndex == null) { withIndex = new List <Mention>(); mentionsByHeadIndex[m.headIndex] = withIndex; } withIndex.Add(m); } IDictionary <int, Mention> mentions = document.predictedMentionsByID; IList <Example> examples = new List <Example>(); ICollection <int> mentionsToExtract = new HashSet <int>(); foreach (KeyValuePair <Pair <int, int>, bool> pair in labeledPairs) { Mention m1 = mentions[pair.Key.first]; Mention m2 = mentions[pair.Key.second]; mentionsToExtract.Add(m1.mentionID); mentionsToExtract.Add(m2.mentionID); CompressedFeatureVector features = compressor.Compress(GetFeatures(document, m1, m2)); examples.Add(new Example(id, m1, m2, pair.Value ? 1.0 : 0.0, features)); } IDictionary <int, CompressedFeatureVector> mentionFeatures = new Dictionary <int, CompressedFeatureVector>(); foreach (int mentionID in mentionsToExtract) { mentionFeatures[mentionID] = compressor.Compress(GetFeatures(document, document.predictedMentionsByID[mentionID], mentionsByHeadIndex)); } return(new DocumentExamples(id, examples, mentionFeatures)); }