public SpeakerInfo(string speakerName) { // tokenized speaker name // Mentions that corresponds to the speaker... // private Mention originalMention; // the mention used when creating this SpeakerInfo // speaker id is a number (probably mention id) // speaker id was auto determined by system // private Mention mainMention; // TODO: keep track of speaker utterances? this.speakerId = speakerName; int commaPos = speakerName.IndexOf(','); if (commaPos > 0) { // drop everything after the , this.speakerName = Sharpen.Runtime.Substring(speakerName, 0, commaPos); if (commaPos < speakerName.Length) { speakerDesc = Sharpen.Runtime.Substring(speakerName, commaPos + 1); speakerDesc = speakerDesc.Trim(); if (speakerDesc.IsEmpty()) { speakerDesc = null; } } } else { this.speakerName = speakerName; } this.speakerNameStrings = WhitespacePattern.Split(this.speakerName); speakerIdIsNumber = NumberMatchingRegex.IsDecimalInteger(speakerId); speakerIdIsAutoDetermined = DefaultSpeakerPattern.Matcher(speakerId).Matches(); }
/// <summary>Process discourse information</summary> protected internal virtual void ProcessDiscourse(Dictionaries dict) { docType = FindDocType(dict); MarkQuotations(this.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)), false); FindSpeakers(dict); // find 'speaker mention' for each mention foreach (Mention m in allPredictedMentions.Values) { int utter = m.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)); string speaker = m.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)); if (speaker != null) { // Populate speaker info SpeakerInfo speakerInfo = speakerInfoMap[speaker]; if (speakerInfo == null) { speakerInfoMap[speaker] = speakerInfo = new SpeakerInfo(speaker); // span indicates this is the speaker if (Rules.MentionMatchesSpeaker(m, speakerInfo, true)) { m.speakerInfo = speakerInfo; } } if (NumberMatchingRegex.IsDecimalInteger(speaker)) { try { int speakerMentionID = System.Convert.ToInt32(speaker); if (utter != 0) { // Add pairs of mention id and the mention id of the speaker speakerPairs.Add(new Pair <int, int>(m.mentionID, speakerMentionID)); } } catch (Exception) { } } } // speakerPairs.add(new Pair<Integer, Integer>(speakerMentionID, m.mentionID)); // no mention found for the speaker // nothing to do // set generic 'you' : e.g., you know in conversation if (docType != Document.DocType.Article && m.person == Dictionaries.Person.You && m.endIndex < m.sentenceWords.Count - 1 && Sharpen.Runtime.EqualsIgnoreCase(m.sentenceWords[m.endIndex].Get(typeof(CoreAnnotations.TextAnnotation)), "know")) { m.generic = true; } } // now that we have identified the speakers, first pass to check if mentions should cluster with the speakers foreach (Mention m_1 in allPredictedMentions.Values) { if (m_1.speakerInfo == null) { foreach (SpeakerInfo speakerInfo in speakerInfoMap.Values) { if (speakerInfo.HasRealSpeakerName()) { // do loose match - assumes that there isn't that many speakers.... if (Rules.MentionMatchesSpeaker(m_1, speakerInfo, false)) { m_1.speakerInfo = speakerInfo; break; } } } } } }
public static string SentenceStringWithMention(int i, Document document, bool gold, bool printClusterID) { StringBuilder sentStr = new StringBuilder(); IList <ICoreMap> sentences = document.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); IList <IList <Mention> > allMentions; if (gold) { allMentions = document.goldMentions; } else { allMentions = document.predictedMentions; } // String filename = document.annotation.get() int previousOffset = 0; ICoreMap sentence = sentences[i]; IList <Mention> mentions = allMentions[i]; IList <CoreLabel> t = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); string speaker = t[0].Get(typeof(CoreAnnotations.SpeakerAnnotation)); if (NumberMatchingRegex.IsDecimalInteger(speaker)) { speaker = speaker + ": " + document.predictedMentionsByID[System.Convert.ToInt32(speaker)].SpanToString(); } sentStr.Append("\tspeaker: " + speaker + " (" + t[0].Get(typeof(CoreAnnotations.UtteranceAnnotation)) + ") "); string[] tokens = new string[t.Count]; foreach (CoreLabel c in t) { tokens[c.Index() - 1] = c.Word(); } // if(previousOffset+2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) && printClusterID) { // sentStr.append("\n"); // } previousOffset = t[t.Count - 1].Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)); ICounter <int> startCounts = new ClassicCounter <int>(); ICounter <int> endCounts = new ClassicCounter <int>(); IDictionary <int, IDeque <Mention> > endMentions = Generics.NewHashMap(); foreach (Mention m in mentions) { // if(!gold && (document.corefClusters.get(m.corefClusterID)==null || document.corefClusters.get(m.corefClusterID).getCorefMentions().size()<=1)) { // continue; // } startCounts.IncrementCount(m.startIndex); endCounts.IncrementCount(m.endIndex); if (!endMentions.Contains(m.endIndex)) { endMentions[m.endIndex] = new ArrayDeque <Mention>(); } endMentions[m.endIndex].Push(m); } for (int j = 0; j < tokens.Length; j++) { if (endMentions.Contains(j)) { foreach (Mention m_1 in endMentions[j]) { int id = (gold) ? m_1.goldCorefClusterID : m_1.corefClusterID; id = (printClusterID) ? id : m_1.mentionID; sentStr.Append("]_").Append(id); } } for (int k = 0; k < startCounts.GetCount(j); k++) { if (sentStr.Length > 0 && sentStr[sentStr.Length - 1] != '[') { sentStr.Append(" "); } sentStr.Append("["); } if (sentStr.Length > 0 && sentStr[sentStr.Length - 1] != '[') { sentStr.Append(" "); } sentStr.Append(tokens[j]); } if (endMentions.Contains(tokens.Length)) { foreach (Mention m_1 in endMentions[tokens.Length]) { int id = (gold) ? m_1.goldCorefClusterID : m_1.corefClusterID; id = (printClusterID) ? id : m_1.mentionID; sentStr.Append("]_").Append(id); } } //append("_").append(m.mentionID); // sentStr.append("\n"); return(sentStr.ToString()); }