public MentionExtractor(Dictionaries dict, Semantics semantics) { this.headFinder = new SemanticHeadFinder(); this.dictionaries = dict; this.semantics = semantics; this.mentionFinder = new RuleBasedCorefMentionFinder(); }
/// <summary>Find document type: Conversation or article</summary> private Document.DocType FindDocType(Dictionaries dict) { bool speakerChange = false; ICollection <int> discourseWithIorYou = Generics.NewHashSet(); foreach (ICoreMap sent in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { foreach (CoreLabel w in sent.Get(typeof(CoreAnnotations.TokensAnnotation))) { int utterIndex = w.Get(typeof(CoreAnnotations.UtteranceAnnotation)); if (utterIndex != 0) { speakerChange = true; } if (speakerChange && utterIndex == 0) { return(Document.DocType.Article); } if (dict.firstPersonPronouns.Contains(w.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower()) || dict.secondPersonPronouns.Contains(w.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower())) { discourseWithIorYou.Add(utterIndex); } if (maxUtter < utterIndex) { maxUtter = utterIndex; } } } if (!speakerChange) { return(Document.DocType.Article); } return(Document.DocType.Conversation); }
/// <summary>Check one mention is the speaker of the other mention</summary> public static bool IsSpeaker(Mention m, Mention ant, Dictionaries dict) { if (!dict.firstPersonPronouns.Contains(ant.SpanToString().ToLower()) || ant.number == Dictionaries.Number.Plural || ant.sentNum != m.sentNum) { return(false); } int countQuotationMark = 0; for (int i = System.Math.Min(m.headIndex, ant.headIndex) + 1; i < System.Math.Max(m.headIndex, ant.headIndex); i++) { string word = m.sentenceWords[i].Get(typeof(CoreAnnotations.TextAnnotation)); if (word.Equals("``") || word.Equals("''")) { countQuotationMark++; } } if (countQuotationMark != 1) { return(false); } IndexedWord w = m.dependency.GetNodeByWordPattern(m.sentenceWords[m.headIndex].Get(typeof(CoreAnnotations.TextAnnotation))); if (w == null) { return(false); } foreach (Pair <GrammaticalRelation, IndexedWord> parent in m.dependency.ParentPairs(w)) { if (parent.First().GetShortName().Equals("nsubj") && dict.reportVerb.Contains(parent.Second().Get(typeof(CoreAnnotations.LemmaAnnotation)))) { return(true); } } return(false); }
private void FindSpeakersInArticle(Dictionaries dict) { IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); Pair <int, int> beginQuotation = new Pair <int, int>(); Pair <int, int> endQuotation = new Pair <int, int>(); bool insideQuotation = false; int utterNum = -1; for (int i = 0; i < sentences.Count; i++) { IList <CoreLabel> sent = sentences[i].Get(typeof(CoreAnnotations.TokensAnnotation)); for (int j = 0; j < sent.Count; j++) { int utterIndex = sent[j].Get(typeof(CoreAnnotations.UtteranceAnnotation)); if (utterIndex != 0 && !insideQuotation) { utterNum = utterIndex; insideQuotation = true; beginQuotation.SetFirst(i); beginQuotation.SetSecond(j); } else { if (utterIndex == 0 && insideQuotation) { insideQuotation = false; endQuotation.SetFirst(i); endQuotation.SetSecond(j); FindQuotationSpeaker(utterNum, sentences, beginQuotation, endQuotation, dict); } } } } }
/// <exception cref="System.Exception"/> public MUCMentionExtractor(Dictionaries dict, Properties props, Semantics semantics) : base(dict, semantics) { string fileName = props.GetProperty(Constants.MucProp); fileContents = IOUtils.SlurpFile(fileName); currentOffset = 0; tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(false), string.Empty); stanfordProcessor = LoadStanfordProcessor(props); }
/// <exception cref="System.Exception"/> public CoNLLMentionExtractor(Dictionaries dict, Properties props, Semantics semantics) : base(dict, semantics) { // Initialize reader for reading from CONLL2011 corpus corpusPath = props.GetProperty(Constants.Conll2011Prop); replicateCoNLL = bool.ParseBoolean(props.GetProperty(Constants.ReplicateconllProp, "false")); CoNLL2011DocumentReader.Options options = new CoNLL2011DocumentReader.Options(); options.annotateTokenCoref = false; options.annotateTokenSpeaker = Constants.UseGoldSpeakerTags || replicateCoNLL; options.annotateTokenNer = Constants.UseGoldNe || replicateCoNLL; options.annotateTokenPos = Constants.UseGoldPos || replicateCoNLL; options.SetFilter(".*_auto_conll$"); reader = new CoNLL2011DocumentReader(corpusPath, options); stanfordProcessor = LoadStanfordProcessor(props); }
public virtual bool IsSinglePronounCluster(Dictionaries dict) { if (this.corefMentions.Count > 1) { return(false); } foreach (Mention m in this.corefMentions) { if (m.IsPronominal() || dict.allPronouns.Contains(m.SpanToString().ToLower())) { return(true); } } return(false); }
/// <summary>Speaker extraction</summary> private void FindSpeakers(Dictionaries dict) { bool useMarkedDiscourseBoolean = annotation.Get(typeof(CoreAnnotations.UseMarkedDiscourseAnnotation)); bool useMarkedDiscourse = (useMarkedDiscourseBoolean != null) ? useMarkedDiscourseBoolean : false; if (Constants.UseGoldSpeakerTags || useMarkedDiscourse) { foreach (ICoreMap sent in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { foreach (CoreLabel w in sent.Get(typeof(CoreAnnotations.TokensAnnotation))) { int utterIndex = w.Get(typeof(CoreAnnotations.UtteranceAnnotation)); speakers[utterIndex] = w.Get(typeof(CoreAnnotations.SpeakerAnnotation)); } } } else { if (docType == Document.DocType.Conversation) { FindSpeakersInConversation(dict); } else { if (docType == Document.DocType.Article) { FindSpeakersInArticle(dict); } } // set speaker info to annotation foreach (ICoreMap sent in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { foreach (CoreLabel w in sent.Get(typeof(CoreAnnotations.TokensAnnotation))) { int utterIndex = w.Get(typeof(CoreAnnotations.UtteranceAnnotation)); if (speakers.Contains(utterIndex)) { w.Set(typeof(CoreAnnotations.SpeakerAnnotation), speakers[utterIndex]); } } } } }
private void FindSpeakersInConversation(Dictionaries dict) { foreach (IList <Mention> l in predictedOrderedMentionsBySentence) { foreach (Mention m in l) { if (m.predicateNominatives == null) { continue; } foreach (Mention a in m.predicateNominatives) { if (a.SpanToString().ToLower().Equals("i")) { speakers[m.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation))] = int.ToString(m.mentionID); } } } } IList <ICoreMap> paragraph = new List <ICoreMap>(); int paragraphUtterIndex = 0; string nextParagraphSpeaker = string.Empty; int paragraphOffset = 0; foreach (ICoreMap sent in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { int currentUtter = sent.Get(typeof(CoreAnnotations.TokensAnnotation))[0].Get(typeof(CoreAnnotations.UtteranceAnnotation)); if (paragraphUtterIndex != currentUtter) { nextParagraphSpeaker = FindParagraphSpeaker(paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict); paragraphUtterIndex = currentUtter; paragraphOffset += paragraph.Count; paragraph = new List <ICoreMap>(); } paragraph.Add(sent); } FindParagraphSpeaker(paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict); }
/// <exception cref="System.Exception"/> public ACEMentionExtractor(Dictionaries dict, Properties props, Semantics semantics) : base(dict, semantics) { stanfordProcessor = LoadStanfordProcessor(props); if (props.Contains(Constants.Ace2004Prop)) { corpusPath = props.GetProperty(Constants.Ace2004Prop); aceReader = new AceReader(stanfordProcessor, false, "ACE2004"); } else { if (props.Contains(Constants.Ace2005Prop)) { corpusPath = props.GetProperty(Constants.Ace2005Prop); aceReader = new AceReader(stanfordProcessor, false); } } aceReader.SetLoggerLevel(Level.Info); if (corpusPath[corpusPath.Length - 1] != File.separatorChar) { corpusPath += File.separatorChar; } files = new File(corpusPath).List(); }
/// <summary>When mention boundaries are given</summary> public virtual IList <IList <Mention> > FilterPredictedMentions(IList <IList <Mention> > allGoldMentions, Annotation doc, Dictionaries dict) { IList <IList <Mention> > predictedMentions = new List <IList <Mention> >(); for (int i = 0; i < allGoldMentions.Count; i++) { ICoreMap s = doc.Get(typeof(CoreAnnotations.SentencesAnnotation))[i]; IList <Mention> goldMentions = allGoldMentions[i]; IList <Mention> mentions = new List <Mention>(); predictedMentions.Add(mentions); Sharpen.Collections.AddAll(mentions, goldMentions); FindHead(s, mentions); // todo [cdm 2013]: This block seems to do nothing - the two sets are never used ICollection <IntPair> mentionSpanSet = Generics.NewHashSet(); ICollection <IntPair> namedEntitySpanSet = Generics.NewHashSet(); foreach (Mention m in mentions) { mentionSpanSet.Add(new IntPair(m.startIndex, m.endIndex)); if (!m.headWord.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)).Equals("O")) { namedEntitySpanSet.Add(new IntPair(m.startIndex, m.endIndex)); } } SetBarePlural(mentions); RemoveSpuriousMentions(s, mentions, dict); } return(predictedMentions); }
/// <summary>Process discourse information</summary> protected internal virtual void ProcessDiscourse(Dictionaries dict) { docType = FindDocType(dict); MarkQuotations(this.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)), false); FindSpeakers(dict); // find 'speaker mention' for each mention foreach (Mention m in allPredictedMentions.Values) { int utter = m.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)); string speaker = m.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)); if (speaker != null) { // Populate speaker info SpeakerInfo speakerInfo = speakerInfoMap[speaker]; if (speakerInfo == null) { speakerInfoMap[speaker] = speakerInfo = new SpeakerInfo(speaker); // span indicates this is the speaker if (Rules.MentionMatchesSpeaker(m, speakerInfo, true)) { m.speakerInfo = speakerInfo; } } if (NumberMatchingRegex.IsDecimalInteger(speaker)) { try { int speakerMentionID = System.Convert.ToInt32(speaker); if (utter != 0) { // Add pairs of mention id and the mention id of the speaker speakerPairs.Add(new Pair <int, int>(m.mentionID, speakerMentionID)); } } catch (Exception) { } } } // speakerPairs.add(new Pair<Integer, Integer>(speakerMentionID, m.mentionID)); // no mention found for the speaker // nothing to do // set generic 'you' : e.g., you know in conversation if (docType != Document.DocType.Article && m.person == Dictionaries.Person.You && m.endIndex < m.sentenceWords.Count - 1 && Sharpen.Runtime.EqualsIgnoreCase(m.sentenceWords[m.endIndex].Get(typeof(CoreAnnotations.TextAnnotation)), "know")) { m.generic = true; } } // now that we have identified the speakers, first pass to check if mentions should cluster with the speakers foreach (Mention m_1 in allPredictedMentions.Values) { if (m_1.speakerInfo == null) { foreach (SpeakerInfo speakerInfo in speakerInfoMap.Values) { if (speakerInfo.HasRealSpeakerName()) { // do loose match - assumes that there isn't that many speakers.... if (Rules.MentionMatchesSpeaker(m_1, speakerInfo, false)) { m_1.speakerInfo = speakerInfo; break; } } } } } }
public Document(Annotation anno, IList <IList <Mention> > predictedMentions, IList <IList <Mention> > goldMentions, Dictionaries dict) : this() { annotation = anno; numSentences = anno.Get(typeof(CoreAnnotations.SentencesAnnotation)).Count; predictedOrderedMentionsBySentence = predictedMentions; goldOrderedMentionsBySentence = goldMentions; if (goldMentions != null) { FindTwinMentions(true); // fill allGoldMentions foreach (IList <Mention> l in goldOrderedMentionsBySentence) { foreach (Mention g in l) { allGoldMentions[g.mentionID] = g; } } } // set original ID, initial coref clusters, paragraph annotation, mention positions Initialize(); ProcessDiscourse(dict); PrintMentionDetection(); }
private string FindNextParagraphSpeaker(IList <ICoreMap> paragraph, int paragraphOffset, Dictionaries dict) { ICoreMap lastSent = paragraph[paragraph.Count - 1]; string speaker = string.Empty; foreach (CoreLabel w in lastSent.Get(typeof(CoreAnnotations.TokensAnnotation))) { if (w.Get(typeof(CoreAnnotations.LemmaAnnotation)).Equals("report") || w.Get(typeof(CoreAnnotations.LemmaAnnotation)).Equals("say")) { string word = w.Get(typeof(CoreAnnotations.TextAnnotation)); SemanticGraph dependency = lastSent.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); IndexedWord t = dependency.GetNodeByWordPattern(word); foreach (Pair <GrammaticalRelation, IndexedWord> child in dependency.ChildPairs(t)) { if (child.First().GetShortName().Equals("nsubj")) { int subjectIndex = child.Second().Index(); // start from 1 IntTuple headPosition = new IntTuple(2); headPosition.Set(0, paragraph.Count - 1 + paragraphOffset); headPosition.Set(1, subjectIndex - 1); if (mentionheadPositions.Contains(headPosition) && mentionheadPositions[headPosition].nerString.StartsWith("PER")) { speaker = int.ToString(mentionheadPositions[headPosition].mentionID); } } } } } return(speaker); }
private static bool PartitiveRule(Mention m, IList <CoreLabel> sent, Dictionaries dict) { return(m.startIndex >= 2 && Sharpen.Runtime.EqualsIgnoreCase(sent[m.startIndex - 1].Get(typeof(CoreAnnotations.TextAnnotation)), "of") && dict.parts.Contains(sent[m.startIndex - 2].Get(typeof(CoreAnnotations.TextAnnotation)).ToLower(Locale.English ))); }
/// <summary>Main method of mention detection.</summary> /// <remarks> /// Main method of mention detection. /// Extract all NP, PRP or NE, and filter out by manually written patterns. /// </remarks> public virtual IList <IList <Mention> > ExtractPredictedMentions(Annotation doc, int maxID, Dictionaries dict) { // this.maxID = _maxID; IList <IList <Mention> > predictedMentions = new List <IList <Mention> >(); foreach (ICoreMap s in doc.Get(typeof(CoreAnnotations.SentencesAnnotation))) { IList <Mention> mentions = new List <Mention>(); predictedMentions.Add(mentions); ICollection <IntPair> mentionSpanSet = Generics.NewHashSet(); ICollection <IntPair> namedEntitySpanSet = Generics.NewHashSet(); ExtractPremarkedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet); ExtractNamedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet); ExtractNPorPRP(s, mentions, mentionSpanSet, namedEntitySpanSet); ExtractEnumerations(s, mentions, mentionSpanSet, namedEntitySpanSet); FindHead(s, mentions); SetBarePlural(mentions); RemoveSpuriousMentions(s, mentions, dict); } // assign mention IDs if (assignIds) { AssignMentionIDs(predictedMentions, maxID); } return(predictedMentions); }
/// <exception cref="System.Exception"/> public Semantics(Dictionaries dict) { Constructor <object> wordnetConstructor = (Sharpen.Runtime.GetType("edu.stanford.nlp.dcoref.WordNet")).GetConstructor(); wordnet = wordnetConstructor.NewInstance(); }
private void FindQuotationSpeaker(int utterNum, IList <ICoreMap> sentences, Pair <int, int> beginQuotation, Pair <int, int> endQuotation, Dictionaries dict) { if (FindSpeaker(utterNum, beginQuotation.First(), sentences, 0, beginQuotation.Second(), dict)) { return; } if (FindSpeaker(utterNum, endQuotation.First(), sentences, endQuotation.Second(), sentences[endQuotation.First()].Get(typeof(CoreAnnotations.TokensAnnotation)).Count, dict)) { return; } if (beginQuotation.Second() <= 1 && beginQuotation.First() > 0) { if (FindSpeaker(utterNum, beginQuotation.First() - 1, sentences, 0, sentences[beginQuotation.First() - 1].Get(typeof(CoreAnnotations.TokensAnnotation)).Count, dict)) { return; } } if (endQuotation.Second() == sentences[endQuotation.First()].Size() - 1 && sentences.Count > endQuotation.First() + 1) { if (FindSpeaker(utterNum, endQuotation.First() + 1, sentences, 0, sentences[endQuotation.First() + 1].Get(typeof(CoreAnnotations.TokensAnnotation)).Count, dict)) { return; } } }
/// <exception cref="System.Exception"/> public CoNLLMentionExtractor(Dictionaries dict, Properties props, Semantics semantics, LogisticClassifier <string, string> singletonModel) : this(dict, props, semantics) { singletonPredictor = singletonModel; }
private bool FindSpeaker(int utterNum, int sentNum, IList <ICoreMap> sentences, int startIndex, int endIndex, Dictionaries dict) { IList <CoreLabel> sent = sentences[sentNum].Get(typeof(CoreAnnotations.TokensAnnotation)); for (int i = startIndex; i < endIndex; i++) { if (sent[i].Get(typeof(CoreAnnotations.UtteranceAnnotation)) != 0) { continue; } string lemma = sent[i].Get(typeof(CoreAnnotations.LemmaAnnotation)); string word = sent[i].Get(typeof(CoreAnnotations.TextAnnotation)); if (dict.reportVerb.Contains(lemma)) { // find subject SemanticGraph dependency = sentences[sentNum].Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); IndexedWord w = dependency.GetNodeByWordPattern(word); if (w != null) { foreach (Pair <GrammaticalRelation, IndexedWord> child in dependency.ChildPairs(w)) { if (child.First().GetShortName().Equals("nsubj")) { string subjectString = child.Second().Word(); int subjectIndex = child.Second().Index(); // start from 1 IntTuple headPosition = new IntTuple(2); headPosition.Set(0, sentNum); headPosition.Set(1, subjectIndex - 1); string speaker; if (mentionheadPositions.Contains(headPosition)) { speaker = int.ToString(mentionheadPositions[headPosition].mentionID); } else { speaker = subjectString; } speakers[utterNum] = speaker; return(true); } } } else { SieveCoreferenceSystem.logger.Warning("Cannot find node in dependency for word " + word); } } } return(false); }
/// <summary>Filter out all spurious mentions</summary> protected internal static void RemoveSpuriousMentions(ICoreMap s, IList <Mention> mentions, Dictionaries dict) { Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); ICollection <Mention> remove = Generics.NewHashSet(); foreach (Mention m in mentions) { string headPOS = m.headWord.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); string headNE = m.headWord.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)); // pleonastic it if (IsPleonastic(m, tree)) { remove.Add(m); } // non word such as 'hmm' if (dict.nonWords.Contains(m.headString)) { remove.Add(m); } // quantRule : not starts with 'any', 'all' etc if (m.originalSpan.Count > 0 && dict.quantifiers.Contains(m.originalSpan[0].Get(typeof(CoreAnnotations.TextAnnotation)).ToLower(Locale.English))) { remove.Add(m); } // partitiveRule if (PartitiveRule(m, sent, dict)) { remove.Add(m); } // bareNPRule if (headPOS.Equals("NN") && !dict.temporals.Contains(m.headString) && (m.originalSpan.Count == 1 || m.originalSpan[0].Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).Equals("JJ"))) { remove.Add(m); } // remove generic rule // if(m.generic==true) remove.add(m); if (m.headString.Equals("%")) { remove.Add(m); } if (headNE.Equals("PERCENT") || headNE.Equals("MONEY")) { remove.Add(m); } // adjective form of nations if (dict.IsAdjectivalDemonym(m.SpanToString())) { remove.Add(m); } // stop list (e.g., U.S., there) if (InStopList(m)) { remove.Add(m); } } // nested mention with shared headword (except apposition, enumeration): pick larger one foreach (Mention m1 in mentions) { foreach (Mention m2 in mentions) { if (m1 == m2 || remove.Contains(m1) || remove.Contains(m2)) { continue; } if (m1.sentNum == m2.sentNum && m1.headWord == m2.headWord && m2.InsideIn(m1)) { if (m2.endIndex < sent.Count && (sent[m2.endIndex].Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).Equals(",") || sent[m2.endIndex].Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).Equals("CC"))) { continue; } remove.Add(m2); } } } mentions.RemoveAll(remove); }
private string FindParagraphSpeaker(IList <ICoreMap> paragraph, int paragraphUtterIndex, string nextParagraphSpeaker, int paragraphOffset, Dictionaries dict) { if (!speakers.Contains(paragraphUtterIndex)) { if (!nextParagraphSpeaker.Equals(string.Empty)) { speakers[paragraphUtterIndex] = nextParagraphSpeaker; } else { // find the speaker of this paragraph (John, nbc news) ICoreMap lastSent = paragraph[paragraph.Count - 1]; string speaker = string.Empty; bool hasVerb = false; for (int i = 0; i < lastSent.Get(typeof(CoreAnnotations.TokensAnnotation)).Count; i++) { CoreLabel w = lastSent.Get(typeof(CoreAnnotations.TokensAnnotation))[i]; string pos = w.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); string ner = w.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)); if (pos.StartsWith("V")) { hasVerb = true; break; } if (ner.StartsWith("PER")) { IntTuple headPosition = new IntTuple(2); headPosition.Set(0, paragraph.Count - 1 + paragraphOffset); headPosition.Set(1, i); if (mentionheadPositions.Contains(headPosition)) { speaker = int.ToString(mentionheadPositions[headPosition].mentionID); } } } if (!hasVerb && !speaker.Equals(string.Empty)) { speakers[paragraphUtterIndex] = speaker; } } } return(FindNextParagraphSpeaker(paragraph, paragraphOffset, dict)); }
/// <summary>Generate the training features from the CoNLL input file.</summary> /// <returns>Dataset of feature vectors</returns> /// <exception cref="System.Exception"/> private static GeneralDataset <string, string> GenerateFeatureVectors(Properties props) { GeneralDataset <string, string> dataset = new Dataset <string, string>(); Dictionaries dict = new Dictionaries(props); MentionExtractor mentionExtractor = new CoNLLMentionExtractor(dict, props, new Semantics(dict)); Document document; while ((document = mentionExtractor.NextDoc()) != null) { SetTokenIndices(document); document.ExtractGoldCorefClusters(); IDictionary <int, CorefCluster> entities = document.goldCorefClusters; // Generate features for coreferent mentions with class label 1 foreach (CorefCluster entity in entities.Values) { foreach (Mention mention in entity.GetCorefMentions()) { // Ignore verbal mentions if (mention.headWord.Tag().StartsWith("V")) { continue; } IndexedWord head = mention.dependency.GetNodeByIndexSafe(mention.headWord.Index()); if (head == null) { continue; } List <string> feats = mention.GetSingletonFeatures(dict); dataset.Add(new BasicDatum <string, string>(feats, "1")); } } // Generate features for singletons with class label 0 List <CoreLabel> gold_heads = new List <CoreLabel>(); foreach (Mention gold_men in document.allGoldMentions.Values) { gold_heads.Add(gold_men.headWord); } foreach (Mention predicted_men in document.allPredictedMentions.Values) { SemanticGraph dep = predicted_men.dependency; IndexedWord head = dep.GetNodeByIndexSafe(predicted_men.headWord.Index()); if (head == null) { continue; } // Ignore verbal mentions if (predicted_men.headWord.Tag().StartsWith("V")) { continue; } // If the mention is in the gold set, it is not a singleton and thus ignore if (gold_heads.Contains(predicted_men.headWord)) { continue; } dataset.Add(new BasicDatum <string, string>(predicted_men.GetSingletonFeatures(dict), "0")); } } dataset.SummaryStatistics(); return(dataset); }