private LexiconSpeechResult CreateSpeechResult(SpeechRecognitionResult watsonResult, float realtimeStart) { if (watsonResult.alternatives.Length == 0) { return(null); } LexiconSpeechResult speechResult = new LexiconSpeechResult(); SpeechRecognitionAlternative bestAlternative = watsonResult.alternatives[0]; speechResult.Transcript = bestAlternative.transcript.Trim(); speechResult.IsFinal = watsonResult.final; speechResult.Confidence = (float)bestAlternative.confidence; speechResult.RealtimeStart = realtimeStart; speechResult.RealtimeEnd = -1; string[] words = speechResult.Transcript.Split(' '); int wordCount = words.Length; if (wordCount > 0) { speechResult.WordResults = new LexiconSpeechResult.WordResult[wordCount]; for (int i = 0; i < wordCount; i++) { speechResult.WordResults[i] = new LexiconSpeechResult.WordResult(); speechResult.WordResults[i].Word = words[i]; } if (bestAlternative.Timestamps != null) { if (bestAlternative.Timestamps.Length == wordCount) { for (int i = 0; i < wordCount; i++) { if (string.Equals(words[i], bestAlternative.Timestamps[i].Word, StringComparison.OrdinalIgnoreCase)) { speechResult.WordResults[i].TimeStart = (float)bestAlternative.Timestamps[i].Start; speechResult.WordResults[i].TimeEnd = (float)bestAlternative.Timestamps[i].End; speechResult.WordResults[i].RealtimeStart = realtimeStart + speechResult.WordResults[i].TimeStart; speechResult.WordResults[i].RealtimeEnd = realtimeStart + speechResult.WordResults[i].TimeEnd; } else { Debug.LogWarning("word: " + words[i] + " does not match timestamp word: " + bestAlternative.Timestamps[i].Word); } } if (speechResult.WordResults.Length > 0) { speechResult.RealtimeEnd = speechResult.WordResults[speechResult.WordResults.Length - 1].RealtimeEnd; } } else { Debug.LogWarning("word count: " + wordCount + ", timestamp count: " + bestAlternative.Timestamps.Length); } } if (bestAlternative.WordConfidence != null) { if (bestAlternative.WordConfidence.Length == wordCount) { for (int i = 0; i < wordCount; i++) { if (string.Equals(words[i], bestAlternative.WordConfidence[i].Word, StringComparison.OrdinalIgnoreCase)) { speechResult.WordResults[i].Confidence = (float)bestAlternative.WordConfidence[i].Confidence; } else { Debug.LogWarning("word: " + words[i] + " does not match confidence word: " + bestAlternative.WordConfidence[i].Word); } } } else { Debug.LogWarning("word count: " + wordCount + ", confidence count: " + bestAlternative.WordConfidence.Length); } } } if (watsonResult.keywords_result != null && watsonResult.keywords_result.keyword != null && watsonResult.keywords_result.keyword.Length > 0) { speechResult.KeywordResults = new LexiconSpeechResult.KeywordResult[watsonResult.keywords_result.keyword.Length]; for (int i = 0; i < watsonResult.keywords_result.keyword.Length; i++) { KeywordResult watsonKeywordResult = watsonResult.keywords_result.keyword[i]; LexiconSpeechResult.KeywordResult keywordResult = new LexiconSpeechResult.KeywordResult(); keywordResult.Keyword = watsonKeywordResult.keyword; keywordResult.TranscriptText = watsonKeywordResult.normalized_text; keywordResult.Confidence = (float)watsonKeywordResult.confidence; keywordResult.TimeStart = (float)watsonKeywordResult.start_time; keywordResult.TimeEnd = (float)watsonKeywordResult.end_time; keywordResult.RealtimeStart = realtimeStart + keywordResult.TimeStart; keywordResult.RealtimeEnd = realtimeStart + keywordResult.TimeEnd; speechResult.KeywordResults[i] = keywordResult; } } if (watsonResult.word_alternatives != null && watsonResult.word_alternatives.Length > 0) { speechResult.AlternativeWordResults = new LexiconSpeechResult.WordAlternativeResults[watsonResult.word_alternatives.Length]; for (int i = 0; i < watsonResult.word_alternatives.Length; i++) { WordAlternativeResults watsonAlternativeResults = watsonResult.word_alternatives[i]; LexiconSpeechResult.WordAlternativeResults alternativeResults = new LexiconSpeechResult.WordAlternativeResults(); alternativeResults.Alternatives = new LexiconSpeechResult.WordAlternative[watsonAlternativeResults.alternatives.Length]; alternativeResults.TimeStart = (float)watsonAlternativeResults.start_time; alternativeResults.TimeEnd = (float)watsonAlternativeResults.end_time; alternativeResults.RealtimeStart = realtimeStart + alternativeResults.TimeStart; alternativeResults.RealtimeEnd = realtimeStart + alternativeResults.TimeEnd; for (int j = 0; j < watsonAlternativeResults.alternatives.Length; j++) { LexiconSpeechResult.WordAlternative alternative = new LexiconSpeechResult.WordAlternative(); alternative.Word = watsonAlternativeResults.alternatives[j].word; alternative.Confidence = (float)watsonAlternativeResults.alternatives[j].confidence; alternativeResults.Alternatives[j] = alternative; } speechResult.AlternativeWordResults[i] = alternativeResults; } } return(speechResult); }
private SpeechRecognitionEvent ParseRecognizeResponse(IDictionary resp) { if (resp == null) { return(null); } try { List <SpeechRecognitionResult> results = new List <SpeechRecognitionResult>(); IList iresults = resp["results"] as IList; if (iresults == null) { return(null); } foreach (var r in iresults) { IDictionary iresult = r as IDictionary; if (iresults == null) { continue; } SpeechRecognitionResult result = new SpeechRecognitionResult(); result.final = (bool)iresult["final"]; IList iwordAlternatives = iresult["word_alternatives"] as IList; if (iwordAlternatives != null) { List <WordAlternativeResults> wordAlternatives = new List <WordAlternativeResults>(); foreach (var w in iwordAlternatives) { IDictionary iwordAlternative = w as IDictionary; if (iwordAlternative == null) { continue; } WordAlternativeResults wordAlternativeResults = new WordAlternativeResults(); if (iwordAlternative.Contains("start_time")) { wordAlternativeResults.start_time = (double)iwordAlternative["start_time"]; } if (iwordAlternative.Contains("end_time")) { wordAlternativeResults.end_time = (double)iwordAlternative["end_time"]; } if (iwordAlternative.Contains("alternatives")) { List <WordAlternativeResult> wordAlternativeResultList = new List <WordAlternativeResult>(); IList iwordAlternativeResult = iwordAlternative["alternatives"] as IList; if (iwordAlternativeResult == null) { continue; } foreach (var a in iwordAlternativeResult) { WordAlternativeResult wordAlternativeResult = new WordAlternativeResult(); IDictionary ialternative = a as IDictionary; if (ialternative.Contains("word")) { wordAlternativeResult.word = (string)ialternative["word"]; } if (ialternative.Contains("confidence")) { wordAlternativeResult.confidence = (double)ialternative["confidence"]; } wordAlternativeResultList.Add(wordAlternativeResult); } wordAlternativeResults.alternatives = wordAlternativeResultList.ToArray(); } wordAlternatives.Add(wordAlternativeResults); } result.word_alternatives = wordAlternatives.ToArray(); } IList ialternatives = iresult["alternatives"] as IList; if (ialternatives != null) { List <SpeechRecognitionAlternative> alternatives = new List <SpeechRecognitionAlternative>(); foreach (var a in ialternatives) { IDictionary ialternative = a as IDictionary; if (ialternative == null) { continue; } SpeechRecognitionAlternative alternative = new SpeechRecognitionAlternative(); alternative.transcript = (string)ialternative["transcript"]; if (ialternative.Contains("confidence")) { alternative.confidence = (double)ialternative["confidence"]; } if (ialternative.Contains("timestamps")) { IList itimestamps = ialternative["timestamps"] as IList; TimeStamp[] timestamps = new TimeStamp[itimestamps.Count]; for (int i = 0; i < itimestamps.Count; ++i) { IList itimestamp = itimestamps[i] as IList; if (itimestamp == null) { continue; } TimeStamp ts = new TimeStamp(); ts.Word = (string)itimestamp[0]; ts.Start = (double)itimestamp[1]; ts.End = (double)itimestamp[2]; timestamps[i] = ts; } alternative.Timestamps = timestamps; } if (ialternative.Contains("word_confidence")) { IList iconfidence = ialternative["word_confidence"] as IList; WordConfidence[] confidence = new WordConfidence[iconfidence.Count]; for (int i = 0; i < iconfidence.Count; ++i) { IList iwordconf = iconfidence[i] as IList; if (iwordconf == null) { continue; } WordConfidence wc = new WordConfidence(); wc.Word = (string)iwordconf[0]; wc.Confidence = (double)iwordconf[1]; confidence[i] = wc; } alternative.WordConfidence = confidence; } alternatives.Add(alternative); } result.alternatives = alternatives.ToArray(); } IDictionary iKeywords = iresult["keywords_result"] as IDictionary; if (iKeywords != null) { result.keywords_result = new KeywordResults(); List <KeywordResult> keywordResults = new List <KeywordResult>(); foreach (string keyword in Keywords) { if (iKeywords[keyword] != null) { IList iKeywordList = iKeywords[keyword] as IList; if (iKeywordList == null) { continue; } foreach (var k in iKeywordList) { IDictionary iKeywordDictionary = k as IDictionary; KeywordResult keywordResult = new KeywordResult(); keywordResult.keyword = keyword; keywordResult.confidence = (double)iKeywordDictionary["confidence"]; keywordResult.end_time = (double)iKeywordDictionary["end_time"]; keywordResult.start_time = (double)iKeywordDictionary["start_time"]; keywordResult.normalized_text = (string)iKeywordDictionary["normalized_text"]; keywordResults.Add(keywordResult); } } } result.keywords_result.keyword = keywordResults.ToArray(); } results.Add(result); } return(new SpeechRecognitionEvent(results.ToArray())); } catch (Exception e) { Log.Error("SpeechToText.ParseRecognizeResponse()", "ParseJsonResponse exception: {0}", e.ToString()); return(null); } }