示例#1
0
        private void CapturedPhrase(LexiconSpeechResult speechResult)
        {
            awaitingSpeechCapture = false;

            if (speechResult.IsFinal)
            {
                OnSpeechToTextResults -= CapturedPhrase;
                StopSpeechService();
            }
        }
示例#2
0
        private void HandleWatsonSpeechToTextResponse(SpeechRecognitionEvent response, float realtimeStart)
        {
            if (response.results.Length == 0)
            {
                Debug.LogError("SpeechRecognitionEvent has no results!");
                return;
            }

            if (response.results.Length > 1)
            {
                Debug.LogWarning("SpeechRecognitionEvent has multiple results!");
            }

            LexiconSpeechResult speechResult = CreateSpeechResult(response.results[0], realtimeStart);

            if (speechResult.IsFinal && speechResult.Confidence >= speechConfidenceThreshold)
            {
                if (workspace.UseWatsonConversation && conversationActive)
                {
                    // Send the final transcript to the conversation service for processing.
                    workspace.WatsonConversationManager.SendRequest(speechResult.Transcript);

                    // Cache the speech to text results to be matched with conversation results later.
                    finalSpeechResults.Add(speechResult);
                }
            }

            if (OnSpeechToTextResults != null)
            {
                // Share speech results with our delegates.
                OnSpeechToTextResults(speechResult);
            }

            if (speechResult.KeywordResults != null && OnKeywordDetected != null)
            {
                // Share keyword results with our delegates (called once for each detected keyword).
                foreach (LexiconSpeechResult.KeywordResult keywordResult in speechResult.KeywordResults)
                {
                    OnKeywordDetected(keywordResult);
                }
            }
        }
示例#3
0
        private LexiconSpeechResult CreateSpeechResult(SpeechRecognitionResult watsonResult, float realtimeStart)
        {
            if (watsonResult.alternatives.Length == 0)
            {
                return(null);
            }

            LexiconSpeechResult speechResult = new LexiconSpeechResult();

            SpeechRecognitionAlternative bestAlternative = watsonResult.alternatives[0];

            speechResult.Transcript    = bestAlternative.transcript.Trim();
            speechResult.IsFinal       = watsonResult.final;
            speechResult.Confidence    = (float)bestAlternative.confidence;
            speechResult.RealtimeStart = realtimeStart;
            speechResult.RealtimeEnd   = -1;

            string[] words     = speechResult.Transcript.Split(' ');
            int      wordCount = words.Length;

            if (wordCount > 0)
            {
                speechResult.WordResults = new LexiconSpeechResult.WordResult[wordCount];

                for (int i = 0; i < wordCount; i++)
                {
                    speechResult.WordResults[i]      = new LexiconSpeechResult.WordResult();
                    speechResult.WordResults[i].Word = words[i];
                }

                if (bestAlternative.Timestamps != null)
                {
                    if (bestAlternative.Timestamps.Length == wordCount)
                    {
                        for (int i = 0; i < wordCount; i++)
                        {
                            if (string.Equals(words[i], bestAlternative.Timestamps[i].Word, StringComparison.OrdinalIgnoreCase))
                            {
                                speechResult.WordResults[i].TimeStart     = (float)bestAlternative.Timestamps[i].Start;
                                speechResult.WordResults[i].TimeEnd       = (float)bestAlternative.Timestamps[i].End;
                                speechResult.WordResults[i].RealtimeStart = realtimeStart + speechResult.WordResults[i].TimeStart;
                                speechResult.WordResults[i].RealtimeEnd   = realtimeStart + speechResult.WordResults[i].TimeEnd;
                            }
                            else
                            {
                                Debug.LogWarning("word: " + words[i] + " does not match timestamp word: " + bestAlternative.Timestamps[i].Word);
                            }
                        }

                        if (speechResult.WordResults.Length > 0)
                        {
                            speechResult.RealtimeEnd = speechResult.WordResults[speechResult.WordResults.Length - 1].RealtimeEnd;
                        }
                    }
                    else
                    {
                        Debug.LogWarning("word count: " + wordCount + ", timestamp count: " + bestAlternative.Timestamps.Length);
                    }
                }

                if (bestAlternative.WordConfidence != null)
                {
                    if (bestAlternative.WordConfidence.Length == wordCount)
                    {
                        for (int i = 0; i < wordCount; i++)
                        {
                            if (string.Equals(words[i], bestAlternative.WordConfidence[i].Word, StringComparison.OrdinalIgnoreCase))
                            {
                                speechResult.WordResults[i].Confidence = (float)bestAlternative.WordConfidence[i].Confidence;
                            }
                            else
                            {
                                Debug.LogWarning("word: " + words[i] + " does not match confidence word: " + bestAlternative.WordConfidence[i].Word);
                            }
                        }
                    }
                    else
                    {
                        Debug.LogWarning("word count: " + wordCount + ", confidence count: " + bestAlternative.WordConfidence.Length);
                    }
                }
            }

            if (watsonResult.keywords_result != null && watsonResult.keywords_result.keyword != null && watsonResult.keywords_result.keyword.Length > 0)
            {
                speechResult.KeywordResults = new LexiconSpeechResult.KeywordResult[watsonResult.keywords_result.keyword.Length];

                for (int i = 0; i < watsonResult.keywords_result.keyword.Length; i++)
                {
                    KeywordResult watsonKeywordResult = watsonResult.keywords_result.keyword[i];
                    LexiconSpeechResult.KeywordResult keywordResult = new LexiconSpeechResult.KeywordResult();

                    keywordResult.Keyword        = watsonKeywordResult.keyword;
                    keywordResult.TranscriptText = watsonKeywordResult.normalized_text;
                    keywordResult.Confidence     = (float)watsonKeywordResult.confidence;
                    keywordResult.TimeStart      = (float)watsonKeywordResult.start_time;
                    keywordResult.TimeEnd        = (float)watsonKeywordResult.end_time;
                    keywordResult.RealtimeStart  = realtimeStart + keywordResult.TimeStart;
                    keywordResult.RealtimeEnd    = realtimeStart + keywordResult.TimeEnd;

                    speechResult.KeywordResults[i] = keywordResult;
                }
            }

            if (watsonResult.word_alternatives != null && watsonResult.word_alternatives.Length > 0)
            {
                speechResult.AlternativeWordResults = new LexiconSpeechResult.WordAlternativeResults[watsonResult.word_alternatives.Length];

                for (int i = 0; i < watsonResult.word_alternatives.Length; i++)
                {
                    WordAlternativeResults watsonAlternativeResults = watsonResult.word_alternatives[i];
                    LexiconSpeechResult.WordAlternativeResults alternativeResults = new LexiconSpeechResult.WordAlternativeResults();

                    alternativeResults.Alternatives  = new LexiconSpeechResult.WordAlternative[watsonAlternativeResults.alternatives.Length];
                    alternativeResults.TimeStart     = (float)watsonAlternativeResults.start_time;
                    alternativeResults.TimeEnd       = (float)watsonAlternativeResults.end_time;
                    alternativeResults.RealtimeStart = realtimeStart + alternativeResults.TimeStart;
                    alternativeResults.RealtimeEnd   = realtimeStart + alternativeResults.TimeEnd;

                    for (int j = 0; j < watsonAlternativeResults.alternatives.Length; j++)
                    {
                        LexiconSpeechResult.WordAlternative alternative = new LexiconSpeechResult.WordAlternative();

                        alternative.Word       = watsonAlternativeResults.alternatives[j].word;
                        alternative.Confidence = (float)watsonAlternativeResults.alternatives[j].confidence;

                        alternativeResults.Alternatives[j] = alternative;
                    }

                    speechResult.AlternativeWordResults[i] = alternativeResults;
                }
            }

            return(speechResult);
        }
示例#4
0
        private void HandleWatsonConversationResponse(MessageResponse response)
        {
            LexiconSpeechResult matchingSpeechResult = null;

            object text;
            string utterance = "";

            if (response.input.TryGetValue("text", out text))
            {
                utterance = (string)text;

                // Find the speech to text result that matches this conversation result.
                foreach (LexiconSpeechResult speechResult in finalSpeechResults)
                {
                    if (string.Equals(speechResult.Transcript, utterance, StringComparison.OrdinalIgnoreCase))
                    {
                        matchingSpeechResult = speechResult;
                    }
                }

                if (matchingSpeechResult != null)
                {
                    // Remove the cached speech to text result.
                    finalSpeechResults.Remove(matchingSpeechResult);
                }
            }

            // Make a list of all entity/value pairs in this response.
            List <EntityPair> entityPairs = new List <EntityPair>();

            foreach (RuntimeEntity entity in response.entities)
            {
                entityPairs.Add(new EntityPair(entity.entity, entity.value));
            }

            List <LexiconRuntimeResult> runtimeResults = new List <LexiconRuntimeResult>();
            bool consumed = false;

            // Process each intent (there will only be one if matchMultipleIntents is false).
            // TODO: We may want to process entities even if there isn't an intent match.
            foreach (RuntimeIntent watsonIntent in response.intents)
            {
                if (watsonIntent.confidence < intentConfidenceThreshold)
                {
                    // Ignore intents that don't meet the confidence threshold.
                    continue;
                }

                List <LexiconIntent> matchingIntents = workspace.FindMatchingIntents(watsonIntent.intent, entityPairs);
                LexiconFocusManager  focusManager    = LexiconFocusManager.Instance;

                foreach (LexiconIntent intent in matchingIntents)
                {
                    LexiconRuntimeResult runtimeResult = new LexiconRuntimeResult();
                    runtimeResult.Intent       = intent;
                    runtimeResult.Confidence   = watsonIntent.confidence;
                    runtimeResult.Utterance    = utterance;
                    runtimeResult.SpeechResult = matchingSpeechResult;

                    foreach (RuntimeEntity watsonEntity in response.entities)
                    {
                        // Find the entity and entityValue that match this intent.
                        LexiconEntity      entity;
                        LexiconEntityValue entityValue = intent.FindEntityValue(watsonEntity.entity, watsonEntity.value, out entity);
                        if (entityValue != null)
                        {
                            LexiconEntityMatch entityMatch = new LexiconEntityMatch();
                            entityMatch.Entity         = entity;
                            entityMatch.EntityValue    = entityValue;
                            entityMatch.FirstCharacter = watsonEntity.location[0];
                            entityMatch.LastCharacter  = watsonEntity.location[1] - 1; // Convert to index of last character.

                            if (matchingSpeechResult != null)
                            {
                                // Find the entity in the speech to text transcript and extract the timestamps.
                                LexiconSpeechResult.WordResult[] wordResults = matchingSpeechResult.GetWordsFromTranscriptPositions(entityMatch.FirstCharacter, entityMatch.LastCharacter);
                                if (wordResults.Length > 0)
                                {
                                    entityMatch.TimeStart     = wordResults[0].TimeStart;
                                    entityMatch.TimeEnd       = wordResults[wordResults.Length - 1].TimeEnd;
                                    entityMatch.RealtimeStart = wordResults[0].RealtimeStart;
                                    entityMatch.RealtimeEnd   = wordResults[wordResults.Length - 1].RealtimeEnd;

                                    if (useDwellPosition)
                                    {
                                        FocusDwellPosition dwellPosition = focusManager.GetFocusData <FocusDwellPosition>(entityMatch.RealtimeStart, 0.2f);
                                        if (dwellPosition != null)
                                        {
                                            FocusPosition focusPosition = new FocusPosition();
                                            focusPosition.Timestamp = dwellPosition.Timestamp;
                                            focusPosition.Position  = dwellPosition.Position;
                                            focusPosition.Normal    = dwellPosition.Normal;

                                            entityMatch.FocusPosition = focusPosition;
                                        }
                                        else
                                        {
                                            entityMatch.FocusPosition = focusManager.GetFocusData <FocusPosition>(entityMatch.RealtimeStart);
                                        }
                                    }
                                    else
                                    {
                                        entityMatch.FocusPosition = focusManager.GetFocusData <FocusPosition>(entityMatch.RealtimeStart);
                                    }

                                    entityMatch.FocusSelection = focusManager.GetFocusData <FocusSelection>(entityMatch.RealtimeStart);
                                }
                            }

                            runtimeResult.EntityMatches.Add(entityMatch);
                        }
                    }

                    runtimeResult.EntityMatches.Sort((x, y) => x.FirstCharacter.CompareTo(y.FirstCharacter));

                    runtimeResults.Add(runtimeResult);

                    if (!consumed)
                    {
                        // If an action has consumed this result no other actions will be fired.
                        // But, we continue to process the intents for the global handler.
                        consumed = intent.Process(runtimeResult);
                    }
                }
            }

            if (OnLexiconResults != null)
            {
                // Share runtime results with our delegates.
                OnLexiconResults(runtimeResults);
            }
        }