private IList <string> Identifiers(ICounter <string> features, Dictionaries.MentionType mentionType) { IList <string> identifiers = new List <string>(); if (mentionType == Dictionaries.MentionType.Pronominal) { foreach (string feature in features.KeySet()) { if (feature.StartsWith("head-word=")) { identifiers.Add(feature.Replace("head-word=", string.Empty)); return(identifiers); } } } else { if (neTypeConjuntion && mentionType == Dictionaries.MentionType.Proper) { foreach (string feature in features.KeySet()) { if (feature.StartsWith("head-ne-type=")) { identifiers.Add(mentionType.ToString() + "_" + feature.Replace("head-ne-type=", string.Empty)); return(identifiers); } } } } identifiers.Add(mentionType.ToString()); return(identifiers); }
public Example(Edu.Stanford.Nlp.Coref.Statistical.Example pair, bool isPositive) { this.docId = pair.docId; this.label = isPositive ? 1 : 0; this.pairwiseFeatures = null; this.mentionId1 = -1; this.mentionId2 = pair.mentionId2; this.mentionType1 = null; this.mentionType2 = pair.mentionType2; }
public Example(int docId, Mention m1, Mention m2, double label, CompressedFeatureVector pairwiseFeatures) { this.docId = docId; this.label = label; this.pairwiseFeatures = pairwiseFeatures; this.mentionId1 = m1.mentionID; this.mentionId2 = m2.mentionID; this.mentionType1 = m1.mentionType; this.mentionType2 = m2.mentionType; }
public virtual void RunCoref(Document document) { Compressor <string> compressor = new Compressor <string>(); if (Thread.Interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } IDictionary <Pair <int, int>, bool> pairs = new Dictionary <Pair <int, int>, bool>(); foreach (KeyValuePair <int, IList <int> > e in CorefUtils.HeuristicFilter(CorefUtils.GetSortedMentions(document), maxMentionDistance, maxMentionDistanceWithStringMatch)) { foreach (int m1 in e.Value) { pairs[new Pair <int, int>(m1, e.Key)] = true; } } DocumentExamples examples = extractor.Extract(0, document, pairs, compressor); ICounter <Pair <int, int> > pairwiseScores = new ClassicCounter <Pair <int, int> >(); foreach (Example mentionPair in examples.examples) { if (Thread.Interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } pairwiseScores.IncrementCount(new Pair <int, int>(mentionPair.mentionId1, mentionPair.mentionId2), classifier.Predict(mentionPair, examples.mentionFeatures, compressor)); } IList <Pair <int, int> > mentionPairs = new List <Pair <int, int> >(pairwiseScores.KeySet()); mentionPairs.Sort(null); ICollection <int> seenAnaphors = new HashSet <int>(); foreach (Pair <int, int> pair in mentionPairs) { if (seenAnaphors.Contains(pair.second)) { continue; } if (Thread.Interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } seenAnaphors.Add(pair.second); Dictionaries.MentionType mt1 = document.predictedMentionsByID[pair.first].mentionType; Dictionaries.MentionType mt2 = document.predictedMentionsByID[pair.second].mentionType; if (pairwiseScores.GetCount(pair) > thresholds[new Pair <bool, bool>(mt1 == Dictionaries.MentionType.Pronominal, mt2 == Dictionaries.MentionType.Pronominal)]) { CorefUtils.MergeCoreferenceClusters(pair, document); } } }
/// <summary>Loads the CorefChain objects from the serialized buffer.</summary> /// <param name="reader">the buffer</param> /// <returns>A map from cluster id to clusters</returns> /// <exception cref="System.IO.IOException"/> private static IDictionary <int, CorefChain> LoadCorefChains(BufferedReader reader) { string line = reader.ReadLine().Trim(); if (line.IsEmpty()) { return(null); } int clusterCount = System.Convert.ToInt32(line); IDictionary <int, CorefChain> chains = Generics.NewHashMap(); // read each cluster for (int c = 0; c < clusterCount; c++) { line = reader.ReadLine().Trim(); string[] bits = line.Split("\\s"); int cid = System.Convert.ToInt32(bits[0]); int mentionCount = System.Convert.ToInt32(bits[1]); IDictionary <IntPair, ICollection <CorefChain.CorefMention> > mentionMap = Generics.NewHashMap(); CorefChain.CorefMention representative = null; // read each mention in this cluster for (int m = 0; m < mentionCount; m++) { line = reader.ReadLine(); bits = line.Split("\\s"); IntPair key = new IntPair(System.Convert.ToInt32(bits[0]), System.Convert.ToInt32(bits[1])); bool rep = bits[2].Equals("1"); Dictionaries.MentionType mentionType = ParseMentionType(bits[3]); Dictionaries.Number number = ParseNumber(bits[4]); Dictionaries.Gender gender = ParseGender(bits[5]); Dictionaries.Animacy animacy = ParseAnimacy(bits[6]); int startIndex = System.Convert.ToInt32(bits[7]); int endIndex = System.Convert.ToInt32(bits[8]); int headIndex = System.Convert.ToInt32(bits[9]); int clusterID = System.Convert.ToInt32(bits[10]); int mentionID = System.Convert.ToInt32(bits[11]); int sentNum = System.Convert.ToInt32(bits[12]); int posLen = System.Convert.ToInt32(bits[13]); int[] posElems = new int[posLen]; for (int i = 0; i < posLen; i++) { posElems[i] = System.Convert.ToInt32(bits[14 + i]); } IntTuple position = new IntTuple(posElems); string span = UnescapeSpace(bits[14 + posLen]); CorefChain.CorefMention mention = new CorefChain.CorefMention(mentionType, number, gender, animacy, startIndex, endIndex, headIndex, clusterID, mentionID, sentNum, position, span); ICollection <CorefChain.CorefMention> mentionsWithThisHead = mentionMap[key]; if (mentionsWithThisHead == null) { mentionsWithThisHead = Generics.NewHashSet(); mentionMap[key] = mentionsWithThisHead; } mentionsWithThisHead.Add(mention); if (rep) { representative = mention; } } // construct the cluster CorefChain chain = new CorefChain(cid, mentionMap, representative); chains[cid] = chain; } reader.ReadLine(); return(chains); }