public SpeakerInfo(string speakerName)
        {
            // tokenized speaker name
            // Mentions that corresponds to the speaker...
            // private Mention originalMention;            // the mention used when creating this SpeakerInfo
            // speaker id is a number (probably mention id)
            // speaker id was auto determined by system
            // private Mention mainMention;
            // TODO: keep track of speaker utterances?
            this.speakerId = speakerName;
            int commaPos = speakerName.IndexOf(',');

            if (commaPos > 0)
            {
                // drop everything after the ,
                this.speakerName = Sharpen.Runtime.Substring(speakerName, 0, commaPos);
                if (commaPos < speakerName.Length)
                {
                    speakerDesc = Sharpen.Runtime.Substring(speakerName, commaPos + 1);
                    speakerDesc = speakerDesc.Trim();
                    if (speakerDesc.IsEmpty())
                    {
                        speakerDesc = null;
                    }
                }
            }
            else
            {
                this.speakerName = speakerName;
            }
            this.speakerNameStrings   = WhitespacePattern.Split(this.speakerName);
            speakerIdIsNumber         = NumberMatchingRegex.IsDecimalInteger(speakerId);
            speakerIdIsAutoDetermined = DefaultSpeakerPattern.Matcher(speakerId).Matches();
        }
Esempio n. 2
0
 /// <summary>Process discourse information</summary>
 protected internal virtual void ProcessDiscourse(Dictionaries dict)
 {
     docType = FindDocType(dict);
     MarkQuotations(this.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)), false);
     FindSpeakers(dict);
     // find 'speaker mention' for each mention
     foreach (Mention m in allPredictedMentions.Values)
     {
         int    utter   = m.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation));
         string speaker = m.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation));
         if (speaker != null)
         {
             // Populate speaker info
             SpeakerInfo speakerInfo = speakerInfoMap[speaker];
             if (speakerInfo == null)
             {
                 speakerInfoMap[speaker] = speakerInfo = new SpeakerInfo(speaker);
                 // span indicates this is the speaker
                 if (Rules.MentionMatchesSpeaker(m, speakerInfo, true))
                 {
                     m.speakerInfo = speakerInfo;
                 }
             }
             if (NumberMatchingRegex.IsDecimalInteger(speaker))
             {
                 try
                 {
                     int speakerMentionID = System.Convert.ToInt32(speaker);
                     if (utter != 0)
                     {
                         // Add pairs of mention id and the mention id of the speaker
                         speakerPairs.Add(new Pair <int, int>(m.mentionID, speakerMentionID));
                     }
                 }
                 catch (Exception)
                 {
                 }
             }
         }
         //              speakerPairs.add(new Pair<Integer, Integer>(speakerMentionID, m.mentionID));
         // no mention found for the speaker
         // nothing to do
         // set generic 'you' : e.g., you know in conversation
         if (docType != Document.DocType.Article && m.person == Dictionaries.Person.You && m.endIndex < m.sentenceWords.Count - 1 && Sharpen.Runtime.EqualsIgnoreCase(m.sentenceWords[m.endIndex].Get(typeof(CoreAnnotations.TextAnnotation)), "know"))
         {
             m.generic = true;
         }
     }
     // now that we have identified the speakers, first pass to check if mentions should cluster with the speakers
     foreach (Mention m_1 in allPredictedMentions.Values)
     {
         if (m_1.speakerInfo == null)
         {
             foreach (SpeakerInfo speakerInfo in speakerInfoMap.Values)
             {
                 if (speakerInfo.HasRealSpeakerName())
                 {
                     // do loose match - assumes that there isn't that many speakers....
                     if (Rules.MentionMatchesSpeaker(m_1, speakerInfo, false))
                     {
                         m_1.speakerInfo = speakerInfo;
                         break;
                     }
                 }
             }
         }
     }
 }
        public static string SentenceStringWithMention(int i, Document document, bool gold, bool printClusterID)
        {
            StringBuilder            sentStr   = new StringBuilder();
            IList <ICoreMap>         sentences = document.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            IList <IList <Mention> > allMentions;

            if (gold)
            {
                allMentions = document.goldMentions;
            }
            else
            {
                allMentions = document.predictedMentions;
            }
            //    String filename = document.annotation.get()
            int               previousOffset = 0;
            ICoreMap          sentence       = sentences[i];
            IList <Mention>   mentions       = allMentions[i];
            IList <CoreLabel> t       = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
            string            speaker = t[0].Get(typeof(CoreAnnotations.SpeakerAnnotation));

            if (NumberMatchingRegex.IsDecimalInteger(speaker))
            {
                speaker = speaker + ": " + document.predictedMentionsByID[System.Convert.ToInt32(speaker)].SpanToString();
            }
            sentStr.Append("\tspeaker: " + speaker + " (" + t[0].Get(typeof(CoreAnnotations.UtteranceAnnotation)) + ") ");
            string[] tokens = new string[t.Count];
            foreach (CoreLabel c in t)
            {
                tokens[c.Index() - 1] = c.Word();
            }
            //    if(previousOffset+2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) && printClusterID) {
            //      sentStr.append("\n");
            //    }
            previousOffset = t[t.Count - 1].Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
            ICounter <int> startCounts = new ClassicCounter <int>();
            ICounter <int> endCounts   = new ClassicCounter <int>();
            IDictionary <int, IDeque <Mention> > endMentions = Generics.NewHashMap();

            foreach (Mention m in mentions)
            {
                //      if(!gold && (document.corefClusters.get(m.corefClusterID)==null || document.corefClusters.get(m.corefClusterID).getCorefMentions().size()<=1)) {
                //        continue;
                //      }
                startCounts.IncrementCount(m.startIndex);
                endCounts.IncrementCount(m.endIndex);
                if (!endMentions.Contains(m.endIndex))
                {
                    endMentions[m.endIndex] = new ArrayDeque <Mention>();
                }
                endMentions[m.endIndex].Push(m);
            }
            for (int j = 0; j < tokens.Length; j++)
            {
                if (endMentions.Contains(j))
                {
                    foreach (Mention m_1 in endMentions[j])
                    {
                        int id = (gold) ? m_1.goldCorefClusterID : m_1.corefClusterID;
                        id = (printClusterID) ? id : m_1.mentionID;
                        sentStr.Append("]_").Append(id);
                    }
                }
                for (int k = 0; k < startCounts.GetCount(j); k++)
                {
                    if (sentStr.Length > 0 && sentStr[sentStr.Length - 1] != '[')
                    {
                        sentStr.Append(" ");
                    }
                    sentStr.Append("[");
                }
                if (sentStr.Length > 0 && sentStr[sentStr.Length - 1] != '[')
                {
                    sentStr.Append(" ");
                }
                sentStr.Append(tokens[j]);
            }
            if (endMentions.Contains(tokens.Length))
            {
                foreach (Mention m_1 in endMentions[tokens.Length])
                {
                    int id = (gold) ? m_1.goldCorefClusterID : m_1.corefClusterID;
                    id = (printClusterID) ? id : m_1.mentionID;
                    sentStr.Append("]_").Append(id);
                }
            }
            //append("_").append(m.mentionID);
            //    sentStr.append("\n");
            return(sentStr.ToString());
        }