private async Task StartStreamAsync() { var now = DateTime.UtcNow; if (m_rpcStream != null && now >= m_rpcStreamDeadline) { Console.WriteLine($"Closing stream before it times out"); await m_rpcStream.WriteCompleteAsync(); m_rpcStream.GrpcCall.Dispose(); m_rpcStream = null; } // If we have a valid stream at this point, we're fine. if (m_rpcStream != null) { return; } // We need to create a new stream, either because we're just starting or because we've just closed the previous one. m_rpcStream = m_client.StreamingRecognize(); m_rpcStreamDeadline = now + s_streamTimeLimit; m_processingBufferStart = TimeSpan.Zero; m_serverResponseAvailableTask = m_rpcStream.GetResponseStream().MoveNextAsync(); await m_rpcStream.WriteAsync(m_streamingRecognizeRequest); Console.WriteLine($"Writing {m_speechUnprocessedBuffer.Count} chunks into the new stream."); foreach (var chunk in m_speechUnprocessedBuffer) { await WriteAudioChunk(chunk); } }
/// <summary> /// Processes responses received so far from the server, /// returning whether "exit" or "quit" have been heard. /// </summary> private bool ProcessResponses() { while (_serverResponseAvailableTask.IsCompleted && _serverResponseAvailableTask.Result) { var response = _rpcStream.GetResponseStream().Current; _serverResponseAvailableTask = _rpcStream.GetResponseStream().MoveNextAsync(); // Uncomment this to see the details of interim results. // Console.WriteLine($"Response: {response}"); // See if one of the results is a "final result". If so, we trim our // processing buffer. var finalResult = response.Results.FirstOrDefault(r => r.IsFinal); if (finalResult != null) { string transcript = finalResult.Alternatives[0].Transcript; Console.WriteLine($"Transcript: {transcript}"); if (transcript.ToLowerInvariant().Contains("exit") || transcript.ToLowerInvariant().Contains("quit")) { return(false); } TimeSpan resultEndTime = finalResult.ResultEndTime.ToTimeSpan(); // Rather than explicitly iterate over the list, we just always deal with the first // element, either removing it or stopping. int removed = 0; while (_processingBuffer.First != null) { var sampleDuration = TimeSpan.FromSeconds(_processingBuffer.First.Value.Length / (double)BytesPerSecond); var sampleEnd = _processingBufferStart + sampleDuration; // If the first sample in the buffer ends after the result ended, stop. // Note that part of the sample might have been included in the result, but the samples // are short enough that this shouldn't cause problems. if (sampleEnd > resultEndTime) { break; } _processingBufferStart = sampleEnd; _processingBuffer.RemoveFirst(); removed++; } } } return(true); }
private bool ProcessResponses() { while (m_serverResponseAvailableTask.IsCompleted && m_serverResponseAvailableTask.Result) { var response = m_rpcStream.GetResponseStream().Current; m_serverResponseAvailableTask = m_rpcStream.GetResponseStream().MoveNextAsync(); // See if one of the results is a "final result". If so, we trim our // processing buffer. var finalResult = response.Results.FirstOrDefault(r => r.IsFinal); if (finalResult != null) { var recognizedSpeech = finalResult.Alternatives[0].Transcript; Console.WriteLine($"Recognized Speech: {recognizedSpeech}"); m_speechHandlerChain.HandleSpeechRequest(recognizedSpeech); var resultEndTime = finalResult.ResultEndTime.ToTimeSpan(); // Rather than explicitly iterate over the list, we just always deal with the first // element, either removing it or stopping. int removed = 0; while (m_speechUnprocessedBuffer.First != null) { var sampleDuration = TimeSpan.FromSeconds(m_speechUnprocessedBuffer.First.Value.Length / (double)m_bytesPerSecond); var sampleEnd = m_processingBufferStart + sampleDuration; // If the first sample in the buffer ends after the result ended, stop. // Note that part of the sample might have been included in the result, but the samples // are short enough that this shouldn't cause problems. if (sampleEnd > resultEndTime) { break; } m_processingBufferStart = sampleEnd; m_speechUnprocessedBuffer.RemoveFirst(); removed++; } } } return(true); }
/// <summary> /// Starts a new RPC streaming call if necessary. This will be if either it's the first call /// (so we don't have a current request) or if the current request will time out soon. /// In the latter case, after starting the new request, we copy any chunks we'd already sent /// in the previous request which hadn't been included in a "final result". /// </summary> private async Task MaybeStartStreamAsync() { var now = DateTime.UtcNow; if (_rpcStream != null && now >= _rpcStreamDeadline) { Console.WriteLine($"Closing stream before it times out"); await _rpcStream.WriteCompleteAsync(); _rpcStream.GrpcCall.Dispose(); _rpcStream = null; } // If we have a valid stream at this point, we're fine. if (_rpcStream != null) { //Console.WriteLine("We already have a google stream"); return; } var translationLanguage = MapLanguageCodeToGoogleLanguage(_myct.FromLang); Console.WriteLine($"Creating new google stream to translate from {translationLanguage}"); // We need to create a new stream, either because we're just starting or because we've just closed the previous one. _rpcStream = _client.StreamingRecognize(); _rpcStreamDeadline = now + s_streamTimeLimit; _processingBufferStart = TimeSpan.Zero; _serverResponseAvailableTask = _rpcStream.GetResponseStream().MoveNextAsync(); await _rpcStream.WriteAsync(new StreamingRecognizeRequest { StreamingConfig = new StreamingRecognitionConfig { Config = new RecognitionConfig { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = SampleRate, LanguageCode = translationLanguage,// _myct.FromLang,//"en-US", MaxAlternatives = 1, UseEnhanced = true, EnableAutomaticPunctuation = true }, InterimResults = true, //SingleUtterance=false } }); Console.WriteLine($"Writing {_processingBuffer.Count} chunks into the new stream."); foreach (var chunk in _processingBuffer) { await WriteAudioChunk(chunk); } }
/// <summary>Snippet for StreamingRecognize</summary> public async Task StreamingRecognize() { // Snippet: StreamingRecognize(CallSettings, BidirectionalStreamingSettings) // Create client SpeechClient speechClient = SpeechClient.Create(); // Initialize streaming call, retrieving the stream object SpeechClient.StreamingRecognizeStream response = speechClient.StreamingRecognize(); // Sending requests and retrieving responses can be arbitrarily interleaved // Exact sequence will depend on client/server behavior // Create task to do something with responses from server Task responseHandlerTask = Task.Run(async() => { // Note that C# 8 code can use await foreach AsyncResponseStream <StreamingRecognizeResponse> responseStream = response.GetResponseStream(); while (await responseStream.MoveNextAsync()) { StreamingRecognizeResponse responseItem = responseStream.Current; // Do something with streamed response } // The response stream has completed }); // Send requests to the server bool done = false; while (!done) { // Initialize a request StreamingRecognizeRequest request = new StreamingRecognizeRequest { StreamingConfig = new StreamingRecognitionConfig(), AudioContent = ByteString.Empty, }; // Stream a request to the server await response.WriteAsync(request); // Set "done" to true when sending requests is complete } // Complete writing requests to the stream await response.WriteCompleteAsync(); // Await the response handler // This will complete once all server responses have been processed await responseHandlerTask; // End snippet }
/// <summary> /// Starts a new RPC streaming call if necessary. This will be if either it's the first call /// (so we don't have a current request) or if the current request will time out soon. /// In the latter case, after starting the new request, we copy any chunks we'd already sent /// in the previous request which hadn't been included in a "final result". /// </summary> private async Task MaybeStartStreamAsync() { var now = DateTime.UtcNow; if (_rpcStream != null && now >= _rpcStreamDeadline) { Console.WriteLine($"Closing stream before it times out"); await _rpcStream.WriteCompleteAsync(); _rpcStream.GrpcCall.Dispose(); _rpcStream = null; } // If we have a valid stream at this point, we're fine. if (_rpcStream != null) { return; } // We need to create a new stream, either because we're just starting or because we've just closed the previous one. _rpcStream = _client.StreamingRecognize(); _rpcStreamDeadline = now + s_streamTimeLimit; _processingBufferStart = TimeSpan.Zero; _serverResponseAvailableTask = _rpcStream.GetResponseStream().MoveNextAsync(); await _rpcStream.WriteAsync(new StreamingRecognizeRequest { StreamingConfig = new StreamingRecognitionConfig { Config = new RecognitionConfig { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = SampleRate, LanguageCode = "en-US", MaxAlternatives = 1 }, InterimResults = true, } }); Console.WriteLine($"Writing {_processingBuffer.Count} chunks into the new stream."); foreach (var chunk in _processingBuffer) { await WriteAudioChunk(chunk); } }
/// <summary> /// Start the loop of continuous reading result. /// </summary> public async Task Run(CancellationToken cancellationToken) { await CreateNewStreamIfNeeded(); _wasapiCapture.StartRecording(); while (!cancellationToken.IsCancellationRequested) { AsyncResponseStream <StreamingRecognizeResponse> responseStream; lock (_recognizeStreamLock) { responseStream = _recognizeStream.GetResponseStream(); } try { await responseStream.MoveNextAsync(cancellationToken); } catch (RpcException) // When _recognizeStream changes while waiting next { continue; } var results = responseStream.Current.Results; ResultArrive?.Invoke(this, new ResultArriveEventArgs(results)); } }
/** (Re-)initializes the Cloud-based streaming speech recognizer. */ private void ReInitStreamRecognizer() { lock (speakerIdBufferLock) { speakerIdBufferPos = 0; } recogStream = speechClient.StreamingRecognize(); SpeakerDiarizationConfig diarizationConfig = new SpeakerDiarizationConfig() { EnableSpeakerDiarization = ENABLE_SPEAKER_DIARIZATION, MaxSpeakerCount = MAX_SPEAKER_COUNT, MinSpeakerCount = MIN_SPEAKER_COUNT, }; recogStream.WriteAsync(new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, AudioChannelCount = 1, SampleRateHertz = audioFormat.SampleRate, LanguageCode = LANGUAGE_CODE, DiarizationConfig = diarizationConfig, }, SingleUtterance = false, }, });; Task.Run(async() => { while (await recogStream.GetResponseStream().MoveNextAsync()) { foreach (var result in recogStream.GetResponseStream().Current.Results) { if (result.Alternatives.Count == 0) { continue; } // Identify the alternative with the highest confidence. SpeechRecognitionAlternative bestAlt = null; foreach (var alternative in result.Alternatives) { if (bestAlt == null || alternative.Confidence > bestAlt.Confidence) { bestAlt = alternative; } } string transcript = bestAlt.Transcript.Trim(); if (transcript.Length == 0) { continue; } string transcriptInfo = $"Speech transcript: {DateTime.Now}: \"" + $"{transcript}\" (confidence={bestAlt.Confidence})"; if (ENABLE_SPEAKER_DIARIZATION) { int speakerTag = bestAlt.Words[bestAlt.Words.Count - 1].SpeakerTag; transcriptInfo += $" (speakerTag={speakerTag})"; } Debug.WriteLine(transcriptInfo); if (ENABLE_SPEAKER_DIARIZATION && ENABLE_SPEAKER_ID) { recognizeSpeaker(transcript, bestAlt); } } } }); cummulativeRecogSeconds = 0f; }