private async Task StartStreamAsync() { var now = DateTime.UtcNow; if (m_rpcStream != null && now >= m_rpcStreamDeadline) { Console.WriteLine($"Closing stream before it times out"); await m_rpcStream.WriteCompleteAsync(); m_rpcStream.GrpcCall.Dispose(); m_rpcStream = null; } // If we have a valid stream at this point, we're fine. if (m_rpcStream != null) { return; } // We need to create a new stream, either because we're just starting or because we've just closed the previous one. m_rpcStream = m_client.StreamingRecognize(); m_rpcStreamDeadline = now + s_streamTimeLimit; m_processingBufferStart = TimeSpan.Zero; m_serverResponseAvailableTask = m_rpcStream.GetResponseStream().MoveNextAsync(); await m_rpcStream.WriteAsync(m_streamingRecognizeRequest); Console.WriteLine($"Writing {m_speechUnprocessedBuffer.Count} chunks into the new stream."); foreach (var chunk in m_speechUnprocessedBuffer) { await WriteAudioChunk(chunk); } }
/// <summary> /// /// </summary> /// <param name="encoding"></param> /// <param name="sampleRateHertz"></param> /// <param name="languageCode"></param> /// <param name="intermResults"></param> public async Task ConfigureSpeechRequest( RecognitionConfig.Types.AudioEncoding encoding, int sampleRateHertz, string languageCode, bool intermResults) { if (null == speechClient) { return; } streamingCall = speechClient.StreamingRecognize(); request = new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = encoding, SampleRateHertz = sampleRateHertz, LanguageCode = languageCode, }, InterimResults = intermResults, } }; // Write the initial request with the config. await streamingCall.WriteAsync(request); }
/// <summary> /// Starts a new RPC streaming call if necessary. This will be if either it's the first call /// (so we don't have a current request) or if the current request will time out soon. /// In the latter case, after starting the new request, we copy any chunks we'd already sent /// in the previous request which hadn't been included in a "final result". /// </summary> private async Task MaybeStartStreamAsync() { var now = DateTime.UtcNow; if (_rpcStream != null && now >= _rpcStreamDeadline) { Console.WriteLine($"Closing stream before it times out"); await _rpcStream.WriteCompleteAsync(); _rpcStream.GrpcCall.Dispose(); _rpcStream = null; } // If we have a valid stream at this point, we're fine. if (_rpcStream != null) { //Console.WriteLine("We already have a google stream"); return; } var translationLanguage = MapLanguageCodeToGoogleLanguage(_myct.FromLang); Console.WriteLine($"Creating new google stream to translate from {translationLanguage}"); // We need to create a new stream, either because we're just starting or because we've just closed the previous one. _rpcStream = _client.StreamingRecognize(); _rpcStreamDeadline = now + s_streamTimeLimit; _processingBufferStart = TimeSpan.Zero; _serverResponseAvailableTask = _rpcStream.GetResponseStream().MoveNextAsync(); await _rpcStream.WriteAsync(new StreamingRecognizeRequest { StreamingConfig = new StreamingRecognitionConfig { Config = new RecognitionConfig { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = SampleRate, LanguageCode = translationLanguage,// _myct.FromLang,//"en-US", MaxAlternatives = 1, UseEnhanced = true, EnableAutomaticPunctuation = true }, InterimResults = true, //SingleUtterance=false } }); Console.WriteLine($"Writing {_processingBuffer.Count} chunks into the new stream."); foreach (var chunk in _processingBuffer) { await WriteAudioChunk(chunk); } }
/// <summary>Snippet for StreamingRecognize</summary> public async Task StreamingRecognize() { // Snippet: StreamingRecognize(CallSettings, BidirectionalStreamingSettings) // Create client SpeechClient speechClient = SpeechClient.Create(); // Initialize streaming call, retrieving the stream object SpeechClient.StreamingRecognizeStream response = speechClient.StreamingRecognize(); // Sending requests and retrieving responses can be arbitrarily interleaved // Exact sequence will depend on client/server behavior // Create task to do something with responses from server Task responseHandlerTask = Task.Run(async() => { // Note that C# 8 code can use await foreach AsyncResponseStream <StreamingRecognizeResponse> responseStream = response.GetResponseStream(); while (await responseStream.MoveNextAsync()) { StreamingRecognizeResponse responseItem = responseStream.Current; // Do something with streamed response } // The response stream has completed }); // Send requests to the server bool done = false; while (!done) { // Initialize a request StreamingRecognizeRequest request = new StreamingRecognizeRequest { StreamingConfig = new StreamingRecognitionConfig(), AudioContent = ByteString.Empty, }; // Stream a request to the server await response.WriteAsync(request); // Set "done" to true when sending requests is complete } // Complete writing requests to the stream await response.WriteCompleteAsync(); // Await the response handler // This will complete once all server responses have been processed await responseHandlerTask; // End snippet }
/// <summary> /// Starts a new RPC streaming call if necessary. This will be if either it's the first call /// (so we don't have a current request) or if the current request will time out soon. /// In the latter case, after starting the new request, we copy any chunks we'd already sent /// in the previous request which hadn't been included in a "final result". /// </summary> private async Task MaybeStartStreamAsync() { var now = DateTime.UtcNow; if (_rpcStream != null && now >= _rpcStreamDeadline) { Console.WriteLine($"Closing stream before it times out"); await _rpcStream.WriteCompleteAsync(); _rpcStream.GrpcCall.Dispose(); _rpcStream = null; } // If we have a valid stream at this point, we're fine. if (_rpcStream != null) { return; } // We need to create a new stream, either because we're just starting or because we've just closed the previous one. _rpcStream = _client.StreamingRecognize(); _rpcStreamDeadline = now + s_streamTimeLimit; _processingBufferStart = TimeSpan.Zero; _serverResponseAvailableTask = _rpcStream.GetResponseStream().MoveNextAsync(); await _rpcStream.WriteAsync(new StreamingRecognizeRequest { StreamingConfig = new StreamingRecognitionConfig { Config = new RecognitionConfig { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = SampleRate, LanguageCode = "en-US", MaxAlternatives = 1 }, InterimResults = true, } }); Console.WriteLine($"Writing {_processingBuffer.Count} chunks into the new stream."); foreach (var chunk in _processingBuffer) { await WriteAudioChunk(chunk); } }
/// <summary>Snippet for StreamingRecognize</summary> public async Task StreamingRecognize() { // Snippet: StreamingRecognize(CallSettings,BidirectionalStreamingSettings) // Create client SpeechClient speechClient = SpeechClient.Create(); // Initialize streaming call, retrieving the stream object SpeechClient.StreamingRecognizeStream duplexStream = speechClient.StreamingRecognize(); // Sending requests and retrieving responses can be arbitrarily interleaved. // Exact sequence will depend on client/server behavior. // Create task to do something with responses from server Task responseHandlerTask = Task.Run(async() => { IAsyncEnumerator <StreamingRecognizeResponse> responseStream = duplexStream.ResponseStream; while (await responseStream.MoveNext()) { StreamingRecognizeResponse response = responseStream.Current; // Do something with streamed response } // The response stream has completed }); // Send requests to the server bool done = false; while (!done) { // Initialize a request StreamingRecognizeRequest request = new StreamingRecognizeRequest(); // Stream a request to the server await duplexStream.WriteAsync(request); // Set "done" to true when sending requests is complete } // Complete writing requests to the stream await duplexStream.WriteCompleteAsync(); // Await the response handler. // This will complete once all server responses have been processed. await responseHandlerTask; // End snippet }
private async Task SetupSpeechClient() { _speech = SpeechClient.Create(); _streamingCall = _speech.StreamingRecognize(); await _streamingCall.WriteAsync( new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = 16000, LanguageCode = "sv", }, InterimResults = true, } }); }
public async Task StartRecording() { _speechClient = SpeechClient.Create(); _streamingRecognizeStream = _speechClient.StreamingRecognize(); // Write the initial request with the config. await _streamingRecognizeStream.WriteAsync( new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = 16000, LanguageCode = "ro" }, InterimResults = false, SingleUtterance = false } }); _sendResponses = Task.Run(async() => { var responseStream = _streamingRecognizeStream.GetResponseStream(); while (await responseStream.MoveNextAsync()) { StreamingRecognizeResponse response = responseStream.Current; foreach (StreamingRecognitionResult result in response.Results) { foreach (SpeechRecognitionAlternative alternative in result.Alternatives) { await Clients.All.SendAsync("ReceiveMessage", alternative.Transcript); } } } }); }
public async void Start() { try { var credentials = GoogleCredential.FromFile("Extensions/25670f9058d6.json"); var channel = new Channel(SpeechClient.DefaultEndpoint.Host, credentials.ToChannelCredentials()); timer = new Timer(Elapsed, null, 60000, Timeout.Infinite); speech = SpeechClient.Create(channel); stream = new SpeechStream(65536); streaming = speech.StreamingRecognize(); engine = new SpeechRecognitionEngine(); engine.LoadGrammar(new DictationGrammar()); engine.AudioStateChanged += Engine_AudioStateChanged; engine.SetInputToAudioStream(stream, new SpeechAudioFormatInfo(EncodingFormat.ULaw, 8000, 8, 1, 8000, 1, null)); engine.RecognizeAsync(RecognizeMode.Multiple); } catch (Exception ex) { systemLog.Error(prefix + " Could not create SpeechClient", ex); } await streaming.WriteAsync(new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { InterimResults = true, Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Mulaw, SampleRateHertz = 8000, LanguageCode = "en-US" }, } }); systemLog.DebugFormat("{0} Google Speech To Text stream started", prefix); IsReady = true; handle = Task.Run(async() => { while (await streaming.ResponseStream.MoveNext(default(CancellationToken))) { try { foreach (var result in streaming.ResponseStream.Current.Results) { if (!result.IsFinal) continue; if (result.Alternatives.Count == 0) continue; var alternative = result.Alternatives.OrderByDescending(a => a.Confidence).FirstOrDefault(); var args = new TextEventArgs(alternative.Transcript, Encoding.UTF8); TextAvailable(this, args); } } catch { break; } } }); }
/** (Re-)initializes the Cloud-based streaming speech recognizer. */ private void ReInitStreamRecognizer() { lock (speakerIdBufferLock) { speakerIdBufferPos = 0; } recogStream = speechClient.StreamingRecognize(); SpeakerDiarizationConfig diarizationConfig = new SpeakerDiarizationConfig() { EnableSpeakerDiarization = ENABLE_SPEAKER_DIARIZATION, MaxSpeakerCount = MAX_SPEAKER_COUNT, MinSpeakerCount = MIN_SPEAKER_COUNT, }; recogStream.WriteAsync(new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, AudioChannelCount = 1, SampleRateHertz = audioFormat.SampleRate, LanguageCode = LANGUAGE_CODE, DiarizationConfig = diarizationConfig, }, SingleUtterance = false, }, });; Task.Run(async() => { while (await recogStream.GetResponseStream().MoveNextAsync()) { foreach (var result in recogStream.GetResponseStream().Current.Results) { if (result.Alternatives.Count == 0) { continue; } // Identify the alternative with the highest confidence. SpeechRecognitionAlternative bestAlt = null; foreach (var alternative in result.Alternatives) { if (bestAlt == null || alternative.Confidence > bestAlt.Confidence) { bestAlt = alternative; } } string transcript = bestAlt.Transcript.Trim(); if (transcript.Length == 0) { continue; } string transcriptInfo = $"Speech transcript: {DateTime.Now}: \"" + $"{transcript}\" (confidence={bestAlt.Confidence})"; if (ENABLE_SPEAKER_DIARIZATION) { int speakerTag = bestAlt.Words[bestAlt.Words.Count - 1].SpeakerTag; transcriptInfo += $" (speakerTag={speakerTag})"; } Debug.WriteLine(transcriptInfo); if (ENABLE_SPEAKER_DIARIZATION && ENABLE_SPEAKER_ID) { recognizeSpeaker(transcript, bestAlt); } } } }); cummulativeRecogSeconds = 0f; }
public static async Task StreamingMicRecognizeAsync(int ms, ISpeechOutput output, Action finish, float minConfidence, int captureDeviceIndex) { var streamingCall = Client.StreamingRecognize(); await streamingCall.WriteAsync( new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = 16000, LanguageCode = "ru" }, InterimResults = false, } }); var normalSpeechDetected = false; Task printResponses = Task.Run(async() => { var outText = string.Empty; while (await streamingCall.ResponseStream.MoveNext()) { foreach (var result in streamingCall.ResponseStream.Current.Results) { var confidence = result.Alternatives.Max(x => x.Confidence); Debug.WriteLine("SpeechRecognition"); foreach (var alt in result.Alternatives) { Debug.WriteLine(alt.Transcript + " " + alt.Confidence); } if (confidence >= minConfidence) { var alternative = result.Alternatives.LastOrDefault(x => x.Confidence == confidence); output.IntermediateResult(alternative.Transcript); outText = alternative.Transcript; normalSpeechDetected = true; } } } if (normalSpeechDetected) { output.Result(outText); } }); object writeLock = new object(); bool writeMore = true; var waveIn = new NAudio.Wave.WaveInEvent(); waveIn.DeviceNumber = captureDeviceIndex; waveIn.WaveFormat = new NAudio.Wave.WaveFormat(16000, 1); waveIn.DataAvailable += (object sender, NAudio.Wave.WaveInEventArgs args) => { lock (writeLock) { if (!writeMore) { return; } streamingCall.WriteAsync( new StreamingRecognizeRequest() { AudioContent = Google.Protobuf.ByteString .CopyFrom(args.Buffer, 0, args.BytesRecorded) }).Wait(); } }; output.RecordStarted(); try { waveIn.StartRecording(); await Task.Delay(TimeSpan.FromMilliseconds(ms)); if (!normalSpeechDetected) { output.RecordCanceled(); } waveIn.StopRecording(); lock (writeLock) { writeMore = false; } await streamingCall.WriteCompleteAsync(); await printResponses; finish?.Invoke(); output.RecordFinished(); } catch { output.RecordCanceled(); } }
private async Task <int> StreamingMicRecognizeAsync() { try { _writeMore = true; timer = new Stopwatch(); timer.Start(); if (WaveIn.DeviceCount < 1) { throw new ApplicationException("No microphone!"); } _speechClient = SpeechClient.Create(); var stream = _speechClient.StreamingRecognize(); streams.Add(stream); var speechContext = new SpeechContext(); speechContext.Phrases.AddRange(new[] { "int", "for", "true", "false", "public", "private", "bool", "static", "void", "переменная" } /*.Concat(_variableProvider.GetVariables().Select(v => v.Name))*/); // Write the initial request with the config. StreamingRecognizeRequest recognizeRequest = GetStreamingRecognizeRequest(speechContext); await stream.WriteAsync(recognizeRequest); // Print responses as they arrive. Task printResponses = Task.Run(async() => { while (await stream.ResponseStream.MoveNext(default(CancellationToken))) { foreach (StreamingRecognitionResult streamingRecognitionResult in stream .ResponseStream .Current.Results) { if (streamingRecognitionResult.IsFinal) { var transcript = streamingRecognitionResult.Alternatives[0].Transcript; OnSpeechRecognized?.Invoke(this, new SpeechRecognizerEventArgs(transcript)); if (timer.Elapsed.TotalSeconds >= threshold) { Restart(); } } } } }); // Read from the microphone and stream to API. ActivateMicrophone(); Console.WriteLine("Speak now."); //await Task.Delay(TimeSpan.FromSeconds(seconds)); // Stop recording and shut down. //StopRecognition(); await printResponses; //await printResponses; return(0); } catch (Exception e) { Debug.WriteLine(e); } return(-1); }
public async Task <int> Run(int seconds, string locale) { tokenCancelEarly = new CancellationTokenSource(); var streamingCall = speechClient.StreamingRecognize(); // Write the initial request with the config. await streamingCall.WriteAsync( new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = 16000, LanguageCode = locale, EnableAutomaticPunctuation = true, }, InterimResults = true, SingleUtterance = true } }); // Print responses as they arrive. Task printResponses = Task.Run(async() => { Console.WriteLine("Start Print"); while (await streamingCall.ResponseStream.MoveNext(default(CancellationToken))) { var error = streamingCall.ResponseStream.Current.Error; if (error != null) { Console.WriteLine(String.Format("Code: {0} \"{1}\"", error.Code, error.Message), Color.Red); tokenCancelEarly.Cancel(); await Task.Delay(TimeSpan.FromSeconds(5)); return; } foreach (var result in streamingCall.ResponseStream.Current.Results) { if (result.Stability > 0.8) { foreach (var alternative in result.Alternatives) { if (lastString != alternative.Transcript) { // Console.WriteLine(alternative.Transcript); // Console.SetCursorPosition(0, Console.CursorTop - 1); PartialTranscriptionReceivedEventArgs args = new PartialTranscriptionReceivedEventArgs(); args.Transcription = alternative.Transcript; OnPartialTranscriptionReceived(args); } lastString = alternative.Transcript; } } if (result.IsFinal != true) { continue; } foreach (var alternative in result.Alternatives) { Console.WriteLine(alternative.Transcript); CompleteTranscriptionReceivedEventArgs args = new CompleteTranscriptionReceivedEventArgs(); args.Transcription = alternative.Transcript; OnCompleteTranscriptionReceived(args); tokenCancelEarly.Cancel(); } } } Console.WriteLine("End Print"); }); // Read from the microphone and stream to API. object writeLock = new object(); bool writeMore = true; waveIn.DataAvailable += (object sender, NAudio.Wave.WaveInEventArgs waveInEventArgs) => { lock (writeLock) { if (!writeMore) { return; } if (streamingCall != null) { streamingCall.WriteAsync( new StreamingRecognizeRequest() { AudioContent = Google.Protobuf.ByteString .CopyFrom(waveInEventArgs.Buffer, 0, waveInEventArgs.BytesRecorded) }).Wait(); } } }; try { await Task.Delay(TimeSpan.FromSeconds(seconds), tokenCancelEarly.Token); } catch (TaskCanceledException e) { } finally { lock (writeLock) writeMore = false; await streamingCall.WriteCompleteAsync(); } // Stop recording and shut down. await printResponses; return(0); }