private void CapturedPhrase(LexiconSpeechResult speechResult) { awaitingSpeechCapture = false; if (speechResult.IsFinal) { OnSpeechToTextResults -= CapturedPhrase; StopSpeechService(); } }
private void HandleWatsonSpeechToTextResponse(SpeechRecognitionEvent response, float realtimeStart) { if (response.results.Length == 0) { Debug.LogError("SpeechRecognitionEvent has no results!"); return; } if (response.results.Length > 1) { Debug.LogWarning("SpeechRecognitionEvent has multiple results!"); } LexiconSpeechResult speechResult = CreateSpeechResult(response.results[0], realtimeStart); if (speechResult.IsFinal && speechResult.Confidence >= speechConfidenceThreshold) { if (workspace.UseWatsonConversation && conversationActive) { // Send the final transcript to the conversation service for processing. workspace.WatsonConversationManager.SendRequest(speechResult.Transcript); // Cache the speech to text results to be matched with conversation results later. finalSpeechResults.Add(speechResult); } } if (OnSpeechToTextResults != null) { // Share speech results with our delegates. OnSpeechToTextResults(speechResult); } if (speechResult.KeywordResults != null && OnKeywordDetected != null) { // Share keyword results with our delegates (called once for each detected keyword). foreach (LexiconSpeechResult.KeywordResult keywordResult in speechResult.KeywordResults) { OnKeywordDetected(keywordResult); } } }
private LexiconSpeechResult CreateSpeechResult(SpeechRecognitionResult watsonResult, float realtimeStart) { if (watsonResult.alternatives.Length == 0) { return(null); } LexiconSpeechResult speechResult = new LexiconSpeechResult(); SpeechRecognitionAlternative bestAlternative = watsonResult.alternatives[0]; speechResult.Transcript = bestAlternative.transcript.Trim(); speechResult.IsFinal = watsonResult.final; speechResult.Confidence = (float)bestAlternative.confidence; speechResult.RealtimeStart = realtimeStart; speechResult.RealtimeEnd = -1; string[] words = speechResult.Transcript.Split(' '); int wordCount = words.Length; if (wordCount > 0) { speechResult.WordResults = new LexiconSpeechResult.WordResult[wordCount]; for (int i = 0; i < wordCount; i++) { speechResult.WordResults[i] = new LexiconSpeechResult.WordResult(); speechResult.WordResults[i].Word = words[i]; } if (bestAlternative.Timestamps != null) { if (bestAlternative.Timestamps.Length == wordCount) { for (int i = 0; i < wordCount; i++) { if (string.Equals(words[i], bestAlternative.Timestamps[i].Word, StringComparison.OrdinalIgnoreCase)) { speechResult.WordResults[i].TimeStart = (float)bestAlternative.Timestamps[i].Start; speechResult.WordResults[i].TimeEnd = (float)bestAlternative.Timestamps[i].End; speechResult.WordResults[i].RealtimeStart = realtimeStart + speechResult.WordResults[i].TimeStart; speechResult.WordResults[i].RealtimeEnd = realtimeStart + speechResult.WordResults[i].TimeEnd; } else { Debug.LogWarning("word: " + words[i] + " does not match timestamp word: " + bestAlternative.Timestamps[i].Word); } } if (speechResult.WordResults.Length > 0) { speechResult.RealtimeEnd = speechResult.WordResults[speechResult.WordResults.Length - 1].RealtimeEnd; } } else { Debug.LogWarning("word count: " + wordCount + ", timestamp count: " + bestAlternative.Timestamps.Length); } } if (bestAlternative.WordConfidence != null) { if (bestAlternative.WordConfidence.Length == wordCount) { for (int i = 0; i < wordCount; i++) { if (string.Equals(words[i], bestAlternative.WordConfidence[i].Word, StringComparison.OrdinalIgnoreCase)) { speechResult.WordResults[i].Confidence = (float)bestAlternative.WordConfidence[i].Confidence; } else { Debug.LogWarning("word: " + words[i] + " does not match confidence word: " + bestAlternative.WordConfidence[i].Word); } } } else { Debug.LogWarning("word count: " + wordCount + ", confidence count: " + bestAlternative.WordConfidence.Length); } } } if (watsonResult.keywords_result != null && watsonResult.keywords_result.keyword != null && watsonResult.keywords_result.keyword.Length > 0) { speechResult.KeywordResults = new LexiconSpeechResult.KeywordResult[watsonResult.keywords_result.keyword.Length]; for (int i = 0; i < watsonResult.keywords_result.keyword.Length; i++) { KeywordResult watsonKeywordResult = watsonResult.keywords_result.keyword[i]; LexiconSpeechResult.KeywordResult keywordResult = new LexiconSpeechResult.KeywordResult(); keywordResult.Keyword = watsonKeywordResult.keyword; keywordResult.TranscriptText = watsonKeywordResult.normalized_text; keywordResult.Confidence = (float)watsonKeywordResult.confidence; keywordResult.TimeStart = (float)watsonKeywordResult.start_time; keywordResult.TimeEnd = (float)watsonKeywordResult.end_time; keywordResult.RealtimeStart = realtimeStart + keywordResult.TimeStart; keywordResult.RealtimeEnd = realtimeStart + keywordResult.TimeEnd; speechResult.KeywordResults[i] = keywordResult; } } if (watsonResult.word_alternatives != null && watsonResult.word_alternatives.Length > 0) { speechResult.AlternativeWordResults = new LexiconSpeechResult.WordAlternativeResults[watsonResult.word_alternatives.Length]; for (int i = 0; i < watsonResult.word_alternatives.Length; i++) { WordAlternativeResults watsonAlternativeResults = watsonResult.word_alternatives[i]; LexiconSpeechResult.WordAlternativeResults alternativeResults = new LexiconSpeechResult.WordAlternativeResults(); alternativeResults.Alternatives = new LexiconSpeechResult.WordAlternative[watsonAlternativeResults.alternatives.Length]; alternativeResults.TimeStart = (float)watsonAlternativeResults.start_time; alternativeResults.TimeEnd = (float)watsonAlternativeResults.end_time; alternativeResults.RealtimeStart = realtimeStart + alternativeResults.TimeStart; alternativeResults.RealtimeEnd = realtimeStart + alternativeResults.TimeEnd; for (int j = 0; j < watsonAlternativeResults.alternatives.Length; j++) { LexiconSpeechResult.WordAlternative alternative = new LexiconSpeechResult.WordAlternative(); alternative.Word = watsonAlternativeResults.alternatives[j].word; alternative.Confidence = (float)watsonAlternativeResults.alternatives[j].confidence; alternativeResults.Alternatives[j] = alternative; } speechResult.AlternativeWordResults[i] = alternativeResults; } } return(speechResult); }
private void HandleWatsonConversationResponse(MessageResponse response) { LexiconSpeechResult matchingSpeechResult = null; object text; string utterance = ""; if (response.input.TryGetValue("text", out text)) { utterance = (string)text; // Find the speech to text result that matches this conversation result. foreach (LexiconSpeechResult speechResult in finalSpeechResults) { if (string.Equals(speechResult.Transcript, utterance, StringComparison.OrdinalIgnoreCase)) { matchingSpeechResult = speechResult; } } if (matchingSpeechResult != null) { // Remove the cached speech to text result. finalSpeechResults.Remove(matchingSpeechResult); } } // Make a list of all entity/value pairs in this response. List <EntityPair> entityPairs = new List <EntityPair>(); foreach (RuntimeEntity entity in response.entities) { entityPairs.Add(new EntityPair(entity.entity, entity.value)); } List <LexiconRuntimeResult> runtimeResults = new List <LexiconRuntimeResult>(); bool consumed = false; // Process each intent (there will only be one if matchMultipleIntents is false). // TODO: We may want to process entities even if there isn't an intent match. foreach (RuntimeIntent watsonIntent in response.intents) { if (watsonIntent.confidence < intentConfidenceThreshold) { // Ignore intents that don't meet the confidence threshold. continue; } List <LexiconIntent> matchingIntents = workspace.FindMatchingIntents(watsonIntent.intent, entityPairs); LexiconFocusManager focusManager = LexiconFocusManager.Instance; foreach (LexiconIntent intent in matchingIntents) { LexiconRuntimeResult runtimeResult = new LexiconRuntimeResult(); runtimeResult.Intent = intent; runtimeResult.Confidence = watsonIntent.confidence; runtimeResult.Utterance = utterance; runtimeResult.SpeechResult = matchingSpeechResult; foreach (RuntimeEntity watsonEntity in response.entities) { // Find the entity and entityValue that match this intent. LexiconEntity entity; LexiconEntityValue entityValue = intent.FindEntityValue(watsonEntity.entity, watsonEntity.value, out entity); if (entityValue != null) { LexiconEntityMatch entityMatch = new LexiconEntityMatch(); entityMatch.Entity = entity; entityMatch.EntityValue = entityValue; entityMatch.FirstCharacter = watsonEntity.location[0]; entityMatch.LastCharacter = watsonEntity.location[1] - 1; // Convert to index of last character. if (matchingSpeechResult != null) { // Find the entity in the speech to text transcript and extract the timestamps. LexiconSpeechResult.WordResult[] wordResults = matchingSpeechResult.GetWordsFromTranscriptPositions(entityMatch.FirstCharacter, entityMatch.LastCharacter); if (wordResults.Length > 0) { entityMatch.TimeStart = wordResults[0].TimeStart; entityMatch.TimeEnd = wordResults[wordResults.Length - 1].TimeEnd; entityMatch.RealtimeStart = wordResults[0].RealtimeStart; entityMatch.RealtimeEnd = wordResults[wordResults.Length - 1].RealtimeEnd; if (useDwellPosition) { FocusDwellPosition dwellPosition = focusManager.GetFocusData <FocusDwellPosition>(entityMatch.RealtimeStart, 0.2f); if (dwellPosition != null) { FocusPosition focusPosition = new FocusPosition(); focusPosition.Timestamp = dwellPosition.Timestamp; focusPosition.Position = dwellPosition.Position; focusPosition.Normal = dwellPosition.Normal; entityMatch.FocusPosition = focusPosition; } else { entityMatch.FocusPosition = focusManager.GetFocusData <FocusPosition>(entityMatch.RealtimeStart); } } else { entityMatch.FocusPosition = focusManager.GetFocusData <FocusPosition>(entityMatch.RealtimeStart); } entityMatch.FocusSelection = focusManager.GetFocusData <FocusSelection>(entityMatch.RealtimeStart); } } runtimeResult.EntityMatches.Add(entityMatch); } } runtimeResult.EntityMatches.Sort((x, y) => x.FirstCharacter.CompareTo(y.FirstCharacter)); runtimeResults.Add(runtimeResult); if (!consumed) { // If an action has consumed this result no other actions will be fired. // But, we continue to process the intents for the global handler. consumed = intent.Process(runtimeResult); } } } if (OnLexiconResults != null) { // Share runtime results with our delegates. OnLexiconResults(runtimeResults); } }