Ejemplo n.º 1
0
        /// <summary>
        /// Receiver for the combined VAD signal and audio data.
        /// </summary>
        /// <param name="data">A message containing the combined VAD signal and audio data.</param>
        /// <param name="e">The message envelope</param>
        /// <returns>The <see cref="Task"/> representing the asynchronous operation.</returns>
        protected override async Task ReceiveAsync(ValueTuple <AudioBuffer, bool> data, Envelope e)
        {
            byte[] audioData = data.Item1.Data;
            bool   hasSpeech = data.Item2;

            var previousAudioOriginatingTime = this.lastAudioOriginatingTime;

            this.lastAudioOriginatingTime = e.OriginatingTime;

            // Throw if a fatal error has occurred in the OnConversationError event handler
            if (this.fatalError)
            {
                if (this.conversationError != null)
                {
                    var error = this.conversationError;
                    this.conversationError = null;
                    throw error;
                }

                // Stop processing until the pipeline terminates
                return;
            }

            if (hasSpeech)
            {
                this.lastAudioContainingSpeechTime = e.OriginatingTime;
            }

            if (hasSpeech || this.lastAudioContainedSpeech)
            {
                // Send the audio data to the cloud
                await this.speechRecognitionClient.SendAudioAsync(audioData, this.cancellationTokenSource.Token);

                // Add audio to the current utterance queue so we can reconstruct it in the recognition result later
                this.currentQueue.Enqueue(data.DeepClone(this.In.Recycler));
            }

            // If this is the last audio packet containing speech
            if (!hasSpeech && this.lastAudioContainedSpeech)
            {
                this.lastVADSpeechEndTime      = this.lastAudioContainingSpeechTime;
                this.lastVADSpeechTimeInterval = new TimeInterval(this.lastVADSpeechStartTime, this.lastVADSpeechEndTime);

                // Allocate a buffer large enough to hold the buffered audio
                BufferWriter bw = new BufferWriter(this.currentQueue.Sum(b => b.Item1.Length));

                // Get the audio associated with the recognized text from the current queue.
                ValueTuple <AudioBuffer, bool> buffer;
                while (this.currentQueue.TryDequeue(out buffer))
                {
                    bw.Write(buffer.Item1.Data);

                    // We are done with this buffer so enqueue it for recycling
                    this.In.Recycle(buffer);
                }

                // Save the buffered audio
                this.lastAudioBuffer = bw.Buffer;

                // Call EndAudio to signal that this is the last packet
                await this.speechRecognitionClient.SendEndAudioAsync(this.cancellationTokenSource.Token);
            }
            else if (hasSpeech && !this.lastAudioContainedSpeech)
            {
                // If this is the first audio packet containing speech, mark the time of the previous audio packet
                // as the start of the actual speech
                this.lastVADSpeechStartTime = previousAudioOriginatingTime;

                // Also post a null partial recognition result
                this.lastPartialResult = string.Empty;
                this.PostWithOriginatingTimeConsistencyCheck(this.PartialRecognitionResults, this.BuildPartialSpeechRecognitionResult(this.lastPartialResult), e.OriginatingTime);
            }

            // Remember last audio state.
            this.lastAudioContainedSpeech = hasSpeech;
        }
Ejemplo n.º 2
0
 Point IDeepCloneable<Point>.Clone() => new Point(_value.DeepClone());
Ejemplo n.º 3
0
 Line IDeepCloneable <Line> .Clone() => new Line(_value.DeepClone());
Ejemplo n.º 4
0
        /// <summary>
        /// Receiver for the combined VAD signal and audio data.
        /// </summary>
        /// <param name="data">A message containing the combined VAD signal and audio data.</param>
        /// <param name="e">The message envelope.</param>
        /// <returns>The <see cref="Task"/> representing the asynchronous operation.</returns>
        protected override async Task ReceiveAsync(ValueTuple <AudioBuffer, bool> data, Envelope e)
        {
            byte[] audioData = data.Item1.Data;
            bool   hasSpeech = data.Item2;

            if (this.lastAudioOriginatingTime == default)
            {
                this.lastAudioOriginatingTime = e.OriginatingTime - data.Item1.Duration;
            }

            var previousAudioOriginatingTime = this.lastAudioOriginatingTime;

            this.lastAudioOriginatingTime = e.OriginatingTime;

            // Throw if a fatal error has occurred in the OnConversationError event handler
            if (this.fatalError)
            {
                if (this.conversationError != null)
                {
                    var error = this.conversationError;
                    this.conversationError = null;
                    throw error;
                }

                // Stop processing until the pipeline terminates
                return;
            }

            if (hasSpeech)
            {
                this.lastAudioContainingSpeechTime = e.OriginatingTime;

                bool newSession = false;
                if (!this.lastAudioContainedSpeech)
                {
                    // queue a new recognition task
                    this.currentRecognitionTask = new SpeechRecognitionTask {
                        SpeechStartTime = previousAudioOriginatingTime
                    };
                    this.pendingRecognitionTasks.Enqueue(this.currentRecognitionTask);

                    // create a new session when sending the first audio packet
                    newSession = true;
                }

                // Send the audio data to the cloud
                await this.speechRecognitionClient.SendAudioAsync(audioData, this.cancellationTokenSource.Token, newSession);

                // Add audio to the current utterance queue so we can reconstruct it in the recognition result later
                this.currentQueue.Enqueue(data.DeepClone(this.In.Recycler));
            }

            // If this is the last audio packet containing speech
            if (!hasSpeech && this.lastAudioContainedSpeech)
            {
                // If this is the first audio packet containing no speech, use the time of the previous audio packet
                // as the end of the actual speech, since that is the last packet that contained any speech.
                var lastVADSpeechEndTime = this.lastAudioContainingSpeechTime;

                // update the latest in-progress recognition

                // Allocate a buffer large enough to hold the buffered audio
                BufferWriter bw = new BufferWriter(this.currentQueue.Sum(b => b.Item1.Length));

                // Get the audio associated with the recognized text from the current queue.
                ValueTuple <AudioBuffer, bool> buffer;
                while (this.currentQueue.Count > 0)
                {
                    buffer = this.currentQueue.Dequeue();
                    bw.Write(buffer.Item1.Data);

                    // We are done with this buffer so enqueue it for recycling
                    this.In.Recycle(buffer);
                }

                // Save the buffered audio
                this.currentRecognitionTask.Audio         = new AudioBuffer(bw.Buffer, this.Configuration.InputFormat);
                this.currentRecognitionTask.SpeechEndTime = lastVADSpeechEndTime;

                // Call EndAudio to signal that this is the last packet
                await this.speechRecognitionClient.SendEndAudioAsync(this.cancellationTokenSource.Token);
            }

            // Remember last audio state.
            this.lastAudioContainedSpeech = hasSpeech;
        }