private async Task Connect() { try { await _streamingCall.WriteAsync(new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = Config.AudioEncoding, SampleRateHertz = Config.SampleRateHertz, LanguageCode = Config.LanguageCode, }, InterimResults = Config.InterimResults, } }); } catch (Exception connectEx) { _log.Error(connectEx); throw; } _closeTokenSource = new CancellationTokenSource(); }
/// <summary> /// /// </summary> /// <param name="encoding"></param> /// <param name="sampleRateHertz"></param> /// <param name="languageCode"></param> /// <param name="intermResults"></param> public async Task ConfigureSpeechRequest( RecognitionConfig.Types.AudioEncoding encoding, int sampleRateHertz, string languageCode, bool intermResults) { if (null == speechClient) { return; } streamingCall = speechClient.StreamingRecognize(); request = new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = encoding, SampleRateHertz = sampleRateHertz, LanguageCode = languageCode, }, InterimResults = intermResults, } }; // Write the initial request with the config. await streamingCall.WriteAsync(request); }
private async Task StartStreamAsync() { var now = DateTime.UtcNow; if (m_rpcStream != null && now >= m_rpcStreamDeadline) { Console.WriteLine($"Closing stream before it times out"); await m_rpcStream.WriteCompleteAsync(); m_rpcStream.GrpcCall.Dispose(); m_rpcStream = null; } // If we have a valid stream at this point, we're fine. if (m_rpcStream != null) { return; } // We need to create a new stream, either because we're just starting or because we've just closed the previous one. m_rpcStream = m_client.StreamingRecognize(); m_rpcStreamDeadline = now + s_streamTimeLimit; m_processingBufferStart = TimeSpan.Zero; m_serverResponseAvailableTask = m_rpcStream.GetResponseStream().MoveNextAsync(); await m_rpcStream.WriteAsync(m_streamingRecognizeRequest); Console.WriteLine($"Writing {m_speechUnprocessedBuffer.Count} chunks into the new stream."); foreach (var chunk in m_speechUnprocessedBuffer) { await WriteAudioChunk(chunk); } }
private void NAudioConfiguration() { WaveIn = new WaveInEvent { DeviceNumber = 0, WaveFormat = new WaveFormat(16000, 1) }; WaveIn.DataAvailable += (sender, args) => { lock (WriteLock) { if (!WriteMore) { return; } StreamingCall.WriteAsync( new StreamingRecognizeRequest() { AudioContent = Google.Protobuf.ByteString .CopyFrom(args.Buffer, 0, args.BytesRecorded) }).Wait(); } }; }
/// <summary> /// Starts a new RPC streaming call if necessary. This will be if either it's the first call /// (so we don't have a current request) or if the current request will time out soon. /// In the latter case, after starting the new request, we copy any chunks we'd already sent /// in the previous request which hadn't been included in a "final result". /// </summary> private async Task MaybeStartStreamAsync() { var now = DateTime.UtcNow; if (_rpcStream != null && now >= _rpcStreamDeadline) { Console.WriteLine($"Closing stream before it times out"); await _rpcStream.WriteCompleteAsync(); _rpcStream.GrpcCall.Dispose(); _rpcStream = null; } // If we have a valid stream at this point, we're fine. if (_rpcStream != null) { //Console.WriteLine("We already have a google stream"); return; } var translationLanguage = MapLanguageCodeToGoogleLanguage(_myct.FromLang); Console.WriteLine($"Creating new google stream to translate from {translationLanguage}"); // We need to create a new stream, either because we're just starting or because we've just closed the previous one. _rpcStream = _client.StreamingRecognize(); _rpcStreamDeadline = now + s_streamTimeLimit; _processingBufferStart = TimeSpan.Zero; _serverResponseAvailableTask = _rpcStream.GetResponseStream().MoveNextAsync(); await _rpcStream.WriteAsync(new StreamingRecognizeRequest { StreamingConfig = new StreamingRecognitionConfig { Config = new RecognitionConfig { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = SampleRate, LanguageCode = translationLanguage,// _myct.FromLang,//"en-US", MaxAlternatives = 1, UseEnhanced = true, EnableAutomaticPunctuation = true }, InterimResults = true, //SingleUtterance=false } }); Console.WriteLine($"Writing {_processingBuffer.Count} chunks into the new stream."); foreach (var chunk in _processingBuffer) { await WriteAudioChunk(chunk); } }
public async Task Start(string lang) { var speech = SpeechClient.Create(_channel); WriteMore = true; lock (WriteLock) WriteMore = true; StreamingCall = speech.StreamingRecognize(); await StreamingCall.WriteAsync(ConfigRequest(lang)); WaveIn.StartRecording(); Console.WriteLine("Speak now."); }
/// <summary>Snippet for StreamingRecognize</summary> public async Task StreamingRecognize() { // Snippet: StreamingRecognize(CallSettings, BidirectionalStreamingSettings) // Create client SpeechClient speechClient = SpeechClient.Create(); // Initialize streaming call, retrieving the stream object SpeechClient.StreamingRecognizeStream response = speechClient.StreamingRecognize(); // Sending requests and retrieving responses can be arbitrarily interleaved // Exact sequence will depend on client/server behavior // Create task to do something with responses from server Task responseHandlerTask = Task.Run(async() => { // Note that C# 8 code can use await foreach AsyncResponseStream <StreamingRecognizeResponse> responseStream = response.GetResponseStream(); while (await responseStream.MoveNextAsync()) { StreamingRecognizeResponse responseItem = responseStream.Current; // Do something with streamed response } // The response stream has completed }); // Send requests to the server bool done = false; while (!done) { // Initialize a request StreamingRecognizeRequest request = new StreamingRecognizeRequest { StreamingConfig = new StreamingRecognitionConfig(), AudioContent = ByteString.Empty, }; // Stream a request to the server await response.WriteAsync(request); // Set "done" to true when sending requests is complete } // Complete writing requests to the stream await response.WriteCompleteAsync(); // Await the response handler // This will complete once all server responses have been processed await responseHandlerTask; // End snippet }
/// <summary> /// /// </summary> /// <param name="sender"></param> /// <param name="args"></param> public void OnDataAvailable(object sender, WaveInEventArgs args) { lock (writeLock) { if (!writeMore) { return; } streamingCall.WriteAsync( new StreamingRecognizeRequest() { AudioContent = Google.Protobuf.ByteString.CopyFrom(args.Buffer, 0, args.BytesRecorded) }).Wait(); } }
private async void OnWaveInDataAvailable(object s, WaveInEventArgs e) { _reSamplerWaveProvider.AddSamples(e.Buffer, 0, e.BytesRecorded); int convertedBytes = _reSampler.Read(_convertedBuffer, 0, _convertedBuffer.Length); Task writeTask; lock (_recognizeStreamLock) { writeTask = _recognizeStream.WriteAsync(new StreamingRecognizeRequest() { AudioContent = Google.Protobuf.ByteString.CopyFrom(_convertedBuffer, 0, convertedBytes) }); } await writeTask; await CreateNewStreamIfNeeded(); }
/// <summary> /// Starts a new RPC streaming call if necessary. This will be if either it's the first call /// (so we don't have a current request) or if the current request will time out soon. /// In the latter case, after starting the new request, we copy any chunks we'd already sent /// in the previous request which hadn't been included in a "final result". /// </summary> private async Task MaybeStartStreamAsync() { var now = DateTime.UtcNow; if (_rpcStream != null && now >= _rpcStreamDeadline) { Console.WriteLine($"Closing stream before it times out"); await _rpcStream.WriteCompleteAsync(); _rpcStream.GrpcCall.Dispose(); _rpcStream = null; } // If we have a valid stream at this point, we're fine. if (_rpcStream != null) { return; } // We need to create a new stream, either because we're just starting or because we've just closed the previous one. _rpcStream = _client.StreamingRecognize(); _rpcStreamDeadline = now + s_streamTimeLimit; _processingBufferStart = TimeSpan.Zero; _serverResponseAvailableTask = _rpcStream.GetResponseStream().MoveNextAsync(); await _rpcStream.WriteAsync(new StreamingRecognizeRequest { StreamingConfig = new StreamingRecognitionConfig { Config = new RecognitionConfig { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = SampleRate, LanguageCode = "en-US", MaxAlternatives = 1 }, InterimResults = true, } }); Console.WriteLine($"Writing {_processingBuffer.Count} chunks into the new stream."); foreach (var chunk in _processingBuffer) { await WriteAudioChunk(chunk); } }
/// <summary>Snippet for StreamingRecognize</summary> public async Task StreamingRecognize() { // Snippet: StreamingRecognize(CallSettings,BidirectionalStreamingSettings) // Create client SpeechClient speechClient = SpeechClient.Create(); // Initialize streaming call, retrieving the stream object SpeechClient.StreamingRecognizeStream duplexStream = speechClient.StreamingRecognize(); // Sending requests and retrieving responses can be arbitrarily interleaved. // Exact sequence will depend on client/server behavior. // Create task to do something with responses from server Task responseHandlerTask = Task.Run(async() => { IAsyncEnumerator <StreamingRecognizeResponse> responseStream = duplexStream.ResponseStream; while (await responseStream.MoveNext()) { StreamingRecognizeResponse response = responseStream.Current; // Do something with streamed response } // The response stream has completed }); // Send requests to the server bool done = false; while (!done) { // Initialize a request StreamingRecognizeRequest request = new StreamingRecognizeRequest(); // Stream a request to the server await duplexStream.WriteAsync(request); // Set "done" to true when sending requests is complete } // Complete writing requests to the stream await duplexStream.WriteCompleteAsync(); // Await the response handler. // This will complete once all server responses have been processed. await responseHandlerTask; // End snippet }
public Windows.Foundation.IAsyncOperationWithProgress <uint, uint> WriteAsync(IBuffer buffer) { return(AsyncInfo.Run <uint, uint>((token, progress) => { return Task.Run(() => { using (var memoryStream = new MemoryStream()) using (var outputStream = memoryStream.AsOutputStream()) { outputStream.WriteAsync(buffer).AsTask().Wait(); var byteArray = memoryStream.ToArray(); var enumerable = Decode(byteArray); var list = new List <short>(); foreach (var e in enumerable) { list.Add((short)e); } lock (writeLock) { Debug.WriteLine("Send StreamingRecognizeRequest"); _streamingCall.WriteAsync( new StreamingRecognizeRequest() { AudioContent = Google.Protobuf.ByteString.CopyFrom(byteArray, 0, byteArray.Length) }).Wait(); } var amplitude = list.Select(Math.Abs).Average(x => x); if (AmplitudeReading != null) { this.AmplitudeReading(this, amplitude); } progress.Report((uint)memoryStream.Length); return (uint)memoryStream.Length; } }); })); }
private async Task SetupSpeechClient() { _speech = SpeechClient.Create(); _streamingCall = _speech.StreamingRecognize(); await _streamingCall.WriteAsync( new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = 16000, LanguageCode = "sv", }, InterimResults = true, } }); }
public void Transcribe(byte[] buffer, int length) { if (state == AudioState.Silence) { var silence = DateTime.Now - timestamp; if (silence.TotalMilliseconds > treshold) { systemLog.DebugFormat("{0} Silence detected", prefix); Elapsed(null); } } var tasks = new List<Task> { stream.WriteAsync(buffer, 0, length), streaming.WriteAsync(new StreamingRecognizeRequest() { AudioContent = Google.Protobuf.ByteString.CopyFrom(buffer, 0, length) }) }; Task.WaitAll(tasks.ToArray()); }
/// <summary> /// Check if current prc stream exceed its life. If so end current one and start a new one. /// </summary> private async Task CreateNewStreamIfNeeded() { if (_recognizeStream == null) // The first one { lock (_recognizeStreamLock) { _rpcStreamDeadline = DateTime.UtcNow + TimeSpan.FromSeconds(RpcStreamLife); _recognizeStream = GoogleCloudSpeechClient.StreamingRecognize(); } await _recognizeStream.WriteAsync(new StreamingRecognizeRequest() { StreamingConfig = _recognitionConfig }); //Configured _wasapiCapture.DataAvailable += OnWaveInDataAvailable; } else if (DateTime.UtcNow >= _rpcStreamDeadline) // Expiring, switch to new { _wasapiCapture.DataAvailable -= OnWaveInDataAvailable; // Stop sending new bytes SpeechClient.StreamingRecognizeStream oldStream; lock (_recognizeStreamLock) { oldStream = _recognizeStream; _rpcStreamDeadline = DateTime.UtcNow + TimeSpan.FromSeconds(RpcStreamLife); _recognizeStream = GoogleCloudSpeechClient.StreamingRecognize(); // Create new one } await _recognizeStream.WriteAsync(new StreamingRecognizeRequest() { StreamingConfig = _recognitionConfig }); //Configure new one _wasapiCapture.DataAvailable += OnWaveInDataAvailable; // Start sending to new stream await oldStream.WriteCompleteAsync(); // Complete old one oldStream.GrpcCall.Dispose(); } }
/** * Feed samples to ASR and other analyses. * The caller should call this whenever a new frame of PCM samples * become available. */ public void AddSamples(byte[] samples, int numBytes) { lock (speakerIdBufferLock) { Array.Copy(samples, 0, speakerIdBuffer, speakerIdBufferPos, numBytes); speakerIdBufferPos += numBytes; } recogBuffer.AddSamples(samples, 0, numBytes); float bufferedSeconds = (float)recogBuffer.BufferedBytes / ( audioFormat.BitsPerSample / 8) / audioFormat.SampleRate; if (bufferedSeconds >= RECOG_PERIOD_SECONDS) { int bufferNumBytes = recogBuffer.BufferedBytes; byte[] frameBuffer = new byte[bufferNumBytes]; recogBuffer.Read(frameBuffer, 0, bufferNumBytes); try { recogStream.WriteAsync(new StreamingRecognizeRequest() { AudioContent = Google.Protobuf.ByteString.CopyFrom( frameBuffer, 0, bufferNumBytes) }); } catch (Exception ex) { Debug.WriteLine($"Streaming recog exception: {ex.Message}"); } cummulativeRecogSeconds += bufferedSeconds; recogBuffer.ClearBuffer(); if (cummulativeRecogSeconds > STREAMING_RECOG_MAX_DURATION_SECONDS) { ReInitStreamRecognizer(); } } }
/** (Re-)initializes the Cloud-based streaming speech recognizer. */ private void ReInitStreamRecognizer() { lock (speakerIdBufferLock) { speakerIdBufferPos = 0; } recogStream = speechClient.StreamingRecognize(); SpeakerDiarizationConfig diarizationConfig = new SpeakerDiarizationConfig() { EnableSpeakerDiarization = ENABLE_SPEAKER_DIARIZATION, MaxSpeakerCount = MAX_SPEAKER_COUNT, MinSpeakerCount = MIN_SPEAKER_COUNT, }; recogStream.WriteAsync(new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, AudioChannelCount = 1, SampleRateHertz = audioFormat.SampleRate, LanguageCode = LANGUAGE_CODE, DiarizationConfig = diarizationConfig, }, SingleUtterance = false, }, });; Task.Run(async() => { while (await recogStream.GetResponseStream().MoveNextAsync()) { foreach (var result in recogStream.GetResponseStream().Current.Results) { if (result.Alternatives.Count == 0) { continue; } // Identify the alternative with the highest confidence. SpeechRecognitionAlternative bestAlt = null; foreach (var alternative in result.Alternatives) { if (bestAlt == null || alternative.Confidence > bestAlt.Confidence) { bestAlt = alternative; } } string transcript = bestAlt.Transcript.Trim(); if (transcript.Length == 0) { continue; } string transcriptInfo = $"Speech transcript: {DateTime.Now}: \"" + $"{transcript}\" (confidence={bestAlt.Confidence})"; if (ENABLE_SPEAKER_DIARIZATION) { int speakerTag = bestAlt.Words[bestAlt.Words.Count - 1].SpeakerTag; transcriptInfo += $" (speakerTag={speakerTag})"; } Debug.WriteLine(transcriptInfo); if (ENABLE_SPEAKER_DIARIZATION && ENABLE_SPEAKER_ID) { recognizeSpeaker(transcript, bestAlt); } } } }); cummulativeRecogSeconds = 0f; }
private static async Task <object> StreamingMicRecognizeAsync(int seconds) { if (WaveIn.DeviceCount < 1) { File.WriteAllText("error.txt", "No microphone!"); return((object)-1); } string lower = INISetting.GetValueWithAdd <string>("CredentialsFilePath", "credentials.json").ToLower(); Console.WriteLine(lower); GoogleCredential googleCredential; using (Stream stream = (Stream) new FileStream(lower, FileMode.Open)) googleCredential = GoogleCredential.FromStream(stream); SpeechClient.StreamingRecognizeStream streamingCall = SpeechClient.Create(new Channel(SpeechClient.DefaultEndpoint.Host, googleCredential.ToChannelCredentials())).StreamingRecognize(); await streamingCall.WriteAsync(new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = 16000, LanguageCode = "ru" }, InterimResults = true } }); Task printResponses = Task.Run((Func <Task>)(async() => { string s = ""; while (true) { if (await streamingCall.ResponseStream.MoveNext(new CancellationToken())) { using (IEnumerator <StreamingRecognitionResult> enumerator1 = streamingCall.ResponseStream.Current.Results.GetEnumerator()) { if (enumerator1.MoveNext()) { using (IEnumerator <SpeechRecognitionAlternative> enumerator2 = enumerator1.Current.Alternatives.GetEnumerator()) { if (enumerator2.MoveNext()) { SpeechRecognitionAlternative current = enumerator2.Current; Console.WriteLine(current.Transcript); s += current.Transcript; } } } } File.WriteAllText(Path.GetTempPath() + "\\speechtext\\speechtext.txt", s); s = ""; } else { break; } } })); object writeLock = new object(); bool writeMore = true; WaveInEvent waveIn = new WaveInEvent(); waveIn.DeviceNumber = 0; waveIn.WaveFormat = new WaveFormat(16000, 1); waveIn.DataAvailable += (EventHandler <WaveInEventArgs>)((sender, args) => { lock (writeLock) { if (!writeMore) { return; } streamingCall.WriteAsync(new StreamingRecognizeRequest() { AudioContent = ByteString.CopyFrom(args.Buffer, 0, args.BytesRecorded) }).Wait(); } }); waveIn.StartRecording(); Console.WriteLine("Speak now " + (object)seconds); await Task.Delay(TimeSpan.FromSeconds((double)seconds)); waveIn.StopRecording(); lock (writeLock) writeMore = false; await streamingCall.WriteCompleteAsync(); await printResponses; return((object)0); }
/// <summary> /// Writes a single chunk to the RPC stream. /// </summary> private Task WriteAudioChunk(ByteString chunk) => _rpcStream.WriteAsync(new StreamingRecognizeRequest { AudioContent = chunk });
public async void Start() { try { var credentials = GoogleCredential.FromFile("Extensions/25670f9058d6.json"); var channel = new Channel(SpeechClient.DefaultEndpoint.Host, credentials.ToChannelCredentials()); timer = new Timer(Elapsed, null, 60000, Timeout.Infinite); speech = SpeechClient.Create(channel); stream = new SpeechStream(65536); streaming = speech.StreamingRecognize(); engine = new SpeechRecognitionEngine(); engine.LoadGrammar(new DictationGrammar()); engine.AudioStateChanged += Engine_AudioStateChanged; engine.SetInputToAudioStream(stream, new SpeechAudioFormatInfo(EncodingFormat.ULaw, 8000, 8, 1, 8000, 1, null)); engine.RecognizeAsync(RecognizeMode.Multiple); } catch (Exception ex) { systemLog.Error(prefix + " Could not create SpeechClient", ex); } await streaming.WriteAsync(new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { InterimResults = true, Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Mulaw, SampleRateHertz = 8000, LanguageCode = "en-US" }, } }); systemLog.DebugFormat("{0} Google Speech To Text stream started", prefix); IsReady = true; handle = Task.Run(async() => { while (await streaming.ResponseStream.MoveNext(default(CancellationToken))) { try { foreach (var result in streaming.ResponseStream.Current.Results) { if (!result.IsFinal) continue; if (result.Alternatives.Count == 0) continue; var alternative = result.Alternatives.OrderByDescending(a => a.Confidence).FirstOrDefault(); var args = new TextEventArgs(alternative.Transcript, Encoding.UTF8); TextAvailable(this, args); } } catch { break; } } }); }