private static void ConfigureTimeRange(ref SpeakerResult currentSpeakerResult, Segment segment) { foreach (var item in segment.items) { if (currentSpeakerResult.end == 0m) { currentSpeakerResult.start = item.start_time; } currentSpeakerResult.end = item.end_time; } }
private static async Task <Conversation> ProcessTranscriptionResults(JObject transcriptionResults) { var result = new Conversation(); StringBuilder speakerText = new StringBuilder(); TranscribeAlternative alternative = null; var segments = transcriptionResults["results"]["speaker_labels"]["segments"].ToObject <List <Segment> >(); var transciptionsItems = transcriptionResults["results"]["items"].ToObject <List <TranscribeItem> >(); Console.WriteLine($"items: {transciptionsItems?.Count} segments: {segments.Count}"); var speakerLabel = string.Empty; var lastSpeaker = "nobody"; SpeakerResult currentSpeakerResult = new SpeakerResult(); var itemIdx = 0; var ti = transciptionsItems; // sements have a begin and end, however the items contained in it also // have begin and ends. the range of the items have a 1 to 1 correlation to the 'pronunciation' transcription // item types. These also have ends which are outside the range of the segement strangely. So will be using segment to // get the speaker, then will create an inclusive range for all items under it using the being of first and end of last. foreach (var segment in segments) { if (segment.items.Length == 0) { continue; } result.duration = segment.end_time; if (!lastSpeaker.Equals(segment.speaker_label)) { // these lines do nothing the first iteration, but tie up last // speaker result when the speaker is changing currentSpeakerResult.text = speakerText.ToString(); speakerText = new StringBuilder(); // create new speaker result for new speaker - or first speaker on first iteration var idx = result.speakerLabels.IndexOf(segment.speaker_label); if (idx == -1) { idx = result.speakerLabels.Count; result.speakerLabels.Add(segment.speaker_label); result.resultBySpeaker.Add(idx, new List <SpeakerResult>()); } currentSpeakerResult = new SpeakerResult(); currentSpeakerResult.speaker = idx; ConfigureTimeRange(ref currentSpeakerResult, segment); lastSpeaker = segment.speaker_label; result.resultBySpeaker[idx].Add(currentSpeakerResult); result.resultByTime.Add(currentSpeakerResult); } else { ConfigureTimeRange(ref currentSpeakerResult, segment); } for (; itemIdx < ti.Count && ((currentSpeakerResult.start <= ti[itemIdx].start_time && ti[itemIdx].end_time <= currentSpeakerResult.end) || (ti[itemIdx].start_time == 0m)) ; itemIdx++) { alternative = ti[itemIdx].alternatives.First(); if (alternative.content.Equals("[SILENCE]")) { speakerText.Append("."); } else { speakerText.Append(alternative.content); } speakerText.Append(" "); } } currentSpeakerResult.text = speakerText.ToString(); // Call AWS Comprehend client to get sentiment for all speaker results List <int> keyList = new List <int>(result.resultBySpeaker.Keys); for (int keyIdx = 0; keyIdx < keyList.Count; keyIdx++) { var spkKey = keyList[keyIdx]; for (int resultIdx = result.resultBySpeaker[spkKey].Count - 1; resultIdx >= 0; resultIdx--) { if (!IsBlankText(result.resultBySpeaker[spkKey][resultIdx].text)) { var speakerResult = result.resultBySpeaker[spkKey][resultIdx]; speakerResult.sentiment = await DetermineSentiment(result.resultBySpeaker[spkKey][resultIdx].text); var topics = await DetermineTopic(result.resultBySpeaker[spkKey][resultIdx].text); foreach (var topic in topics) { if (!result.topicLocations.ContainsKey(topic.Text)) { result.topicLocations.Add(topic.Text, new List <SpeakerResult>()); } result.topicLocations[topic.Text].Add(speakerResult); } } } } return(result); }