Esempio n. 1
0
 private static void ConfigureTimeRange(ref SpeakerResult currentSpeakerResult, Segment segment)
 {
     foreach (var item in segment.items)
     {
         if (currentSpeakerResult.end == 0m)
         {
             currentSpeakerResult.start = item.start_time;
         }
         currentSpeakerResult.end = item.end_time;
     }
 }
Esempio n. 2
0
        private static async Task <Conversation> ProcessTranscriptionResults(JObject transcriptionResults)
        {
            var result = new Conversation();

            StringBuilder         speakerText = new StringBuilder();
            TranscribeAlternative alternative = null;

            var segments           = transcriptionResults["results"]["speaker_labels"]["segments"].ToObject <List <Segment> >();
            var transciptionsItems = transcriptionResults["results"]["items"].ToObject <List <TranscribeItem> >();

            Console.WriteLine($"items: {transciptionsItems?.Count} segments: {segments.Count}");

            var           speakerLabel         = string.Empty;
            var           lastSpeaker          = "nobody";
            SpeakerResult currentSpeakerResult = new SpeakerResult();

            var itemIdx = 0;

            var ti = transciptionsItems;

            // sements have a begin and end, however the items contained in it also
            // have begin and ends. the range of the items have a 1 to 1 correlation to the 'pronunciation' transcription
            // item types. These also have ends which are outside the range of the segement strangely. So will be using segment to
            // get the speaker, then will create an inclusive range for all items under it using the being of first and end of last.
            foreach (var segment in segments)
            {
                if (segment.items.Length == 0)
                {
                    continue;
                }

                result.duration = segment.end_time;

                if (!lastSpeaker.Equals(segment.speaker_label))
                {
                    // these lines do nothing the first iteration, but tie up last
                    // speaker result when the speaker is changing
                    currentSpeakerResult.text = speakerText.ToString();
                    speakerText = new StringBuilder();

                    // create new speaker result for new speaker - or first speaker on first iteration
                    var idx = result.speakerLabels.IndexOf(segment.speaker_label);
                    if (idx == -1)
                    {
                        idx = result.speakerLabels.Count;
                        result.speakerLabels.Add(segment.speaker_label);
                        result.resultBySpeaker.Add(idx, new List <SpeakerResult>());
                    }

                    currentSpeakerResult         = new SpeakerResult();
                    currentSpeakerResult.speaker = idx;
                    ConfigureTimeRange(ref currentSpeakerResult, segment);
                    lastSpeaker = segment.speaker_label;

                    result.resultBySpeaker[idx].Add(currentSpeakerResult);
                    result.resultByTime.Add(currentSpeakerResult);
                }
                else
                {
                    ConfigureTimeRange(ref currentSpeakerResult, segment);
                }

                for (; itemIdx < ti.Count &&
                     ((currentSpeakerResult.start <= ti[itemIdx].start_time && ti[itemIdx].end_time <= currentSpeakerResult.end) ||
                      (ti[itemIdx].start_time == 0m))
                     ; itemIdx++)
                {
                    alternative = ti[itemIdx].alternatives.First();
                    if (alternative.content.Equals("[SILENCE]"))
                    {
                        speakerText.Append(".");
                    }
                    else
                    {
                        speakerText.Append(alternative.content);
                    }
                    speakerText.Append(" ");
                }
            }
            currentSpeakerResult.text = speakerText.ToString();

            // Call AWS Comprehend client to get sentiment for all speaker results
            List <int> keyList = new List <int>(result.resultBySpeaker.Keys);

            for (int keyIdx = 0; keyIdx < keyList.Count; keyIdx++)
            {
                var spkKey = keyList[keyIdx];
                for (int resultIdx = result.resultBySpeaker[spkKey].Count - 1; resultIdx >= 0; resultIdx--)
                {
                    if (!IsBlankText(result.resultBySpeaker[spkKey][resultIdx].text))
                    {
                        var speakerResult = result.resultBySpeaker[spkKey][resultIdx];
                        speakerResult.sentiment = await DetermineSentiment(result.resultBySpeaker[spkKey][resultIdx].text);

                        var topics = await DetermineTopic(result.resultBySpeaker[spkKey][resultIdx].text);

                        foreach (var topic in topics)
                        {
                            if (!result.topicLocations.ContainsKey(topic.Text))
                            {
                                result.topicLocations.Add(topic.Text, new List <SpeakerResult>());
                            }
                            result.topicLocations[topic.Text].Add(speakerResult);
                        }
                    }
                }
            }

            return(result);
        }