/// <summary> /// Receiver for the combined VAD signal and audio data. /// </summary> /// <param name="data">A message containing the combined VAD signal and audio data.</param> /// <param name="e">The message envelope</param> /// <returns>The <see cref="Task"/> representing the asynchronous operation.</returns> protected override async Task ReceiveAsync(ValueTuple <AudioBuffer, bool> data, Envelope e) { byte[] audioData = data.Item1.Data; bool hasSpeech = data.Item2; var previousAudioOriginatingTime = this.lastAudioOriginatingTime; this.lastAudioOriginatingTime = e.OriginatingTime; // Throw if a fatal error has occurred in the OnConversationError event handler if (this.fatalError) { if (this.conversationError != null) { var error = this.conversationError; this.conversationError = null; throw error; } // Stop processing until the pipeline terminates return; } if (hasSpeech) { this.lastAudioContainingSpeechTime = e.OriginatingTime; } if (hasSpeech || this.lastAudioContainedSpeech) { // Send the audio data to the cloud await this.speechRecognitionClient.SendAudioAsync(audioData, this.cancellationTokenSource.Token); // Add audio to the current utterance queue so we can reconstruct it in the recognition result later this.currentQueue.Enqueue(data.DeepClone(this.In.Recycler)); } // If this is the last audio packet containing speech if (!hasSpeech && this.lastAudioContainedSpeech) { this.lastVADSpeechEndTime = this.lastAudioContainingSpeechTime; this.lastVADSpeechTimeInterval = new TimeInterval(this.lastVADSpeechStartTime, this.lastVADSpeechEndTime); // Allocate a buffer large enough to hold the buffered audio BufferWriter bw = new BufferWriter(this.currentQueue.Sum(b => b.Item1.Length)); // Get the audio associated with the recognized text from the current queue. ValueTuple <AudioBuffer, bool> buffer; while (this.currentQueue.TryDequeue(out buffer)) { bw.Write(buffer.Item1.Data); // We are done with this buffer so enqueue it for recycling this.In.Recycle(buffer); } // Save the buffered audio this.lastAudioBuffer = bw.Buffer; // Call EndAudio to signal that this is the last packet await this.speechRecognitionClient.SendEndAudioAsync(this.cancellationTokenSource.Token); } else if (hasSpeech && !this.lastAudioContainedSpeech) { // If this is the first audio packet containing speech, mark the time of the previous audio packet // as the start of the actual speech this.lastVADSpeechStartTime = previousAudioOriginatingTime; // Also post a null partial recognition result this.lastPartialResult = string.Empty; this.PostWithOriginatingTimeConsistencyCheck(this.PartialRecognitionResults, this.BuildPartialSpeechRecognitionResult(this.lastPartialResult), e.OriginatingTime); } // Remember last audio state. this.lastAudioContainedSpeech = hasSpeech; }
Point IDeepCloneable<Point>.Clone() => new Point(_value.DeepClone());
Line IDeepCloneable <Line> .Clone() => new Line(_value.DeepClone());
/// <summary> /// Receiver for the combined VAD signal and audio data. /// </summary> /// <param name="data">A message containing the combined VAD signal and audio data.</param> /// <param name="e">The message envelope.</param> /// <returns>The <see cref="Task"/> representing the asynchronous operation.</returns> protected override async Task ReceiveAsync(ValueTuple <AudioBuffer, bool> data, Envelope e) { byte[] audioData = data.Item1.Data; bool hasSpeech = data.Item2; if (this.lastAudioOriginatingTime == default) { this.lastAudioOriginatingTime = e.OriginatingTime - data.Item1.Duration; } var previousAudioOriginatingTime = this.lastAudioOriginatingTime; this.lastAudioOriginatingTime = e.OriginatingTime; // Throw if a fatal error has occurred in the OnConversationError event handler if (this.fatalError) { if (this.conversationError != null) { var error = this.conversationError; this.conversationError = null; throw error; } // Stop processing until the pipeline terminates return; } if (hasSpeech) { this.lastAudioContainingSpeechTime = e.OriginatingTime; bool newSession = false; if (!this.lastAudioContainedSpeech) { // queue a new recognition task this.currentRecognitionTask = new SpeechRecognitionTask { SpeechStartTime = previousAudioOriginatingTime }; this.pendingRecognitionTasks.Enqueue(this.currentRecognitionTask); // create a new session when sending the first audio packet newSession = true; } // Send the audio data to the cloud await this.speechRecognitionClient.SendAudioAsync(audioData, this.cancellationTokenSource.Token, newSession); // Add audio to the current utterance queue so we can reconstruct it in the recognition result later this.currentQueue.Enqueue(data.DeepClone(this.In.Recycler)); } // If this is the last audio packet containing speech if (!hasSpeech && this.lastAudioContainedSpeech) { // If this is the first audio packet containing no speech, use the time of the previous audio packet // as the end of the actual speech, since that is the last packet that contained any speech. var lastVADSpeechEndTime = this.lastAudioContainingSpeechTime; // update the latest in-progress recognition // Allocate a buffer large enough to hold the buffered audio BufferWriter bw = new BufferWriter(this.currentQueue.Sum(b => b.Item1.Length)); // Get the audio associated with the recognized text from the current queue. ValueTuple <AudioBuffer, bool> buffer; while (this.currentQueue.Count > 0) { buffer = this.currentQueue.Dequeue(); bw.Write(buffer.Item1.Data); // We are done with this buffer so enqueue it for recycling this.In.Recycle(buffer); } // Save the buffered audio this.currentRecognitionTask.Audio = new AudioBuffer(bw.Buffer, this.Configuration.InputFormat); this.currentRecognitionTask.SpeechEndTime = lastVADSpeechEndTime; // Call EndAudio to signal that this is the last packet await this.speechRecognitionClient.SendEndAudioAsync(this.cancellationTokenSource.Token); } // Remember last audio state. this.lastAudioContainedSpeech = hasSpeech; }