/// <summary> /// Starts this instance. /// </summary> private async Task _start() { await this._syncLock.WaitAsync().ConfigureAwait(false); if (!_isRunning) { _tokenSource = new CancellationTokenSource(); _buffer = new BufferBlock <SerializableAudioMediaBuffer>(new DataflowBlockOptions { CancellationToken = this._tokenSource.Token }); await Task.Factory.StartNew(this._process).ConfigureAwait(false); // Initialize speech recognizer. Debug.WriteLine("RecordingBot _start."); _audioStream = new VoiceAudioStream(); var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1); var audioConfig = AudioConfig.FromStreamInput(_audioStream, audioFormat); var speechConfig = SpeechConfig.FromSubscription("03f0f0daa33448ba9f9bf799d2e14d2a", "westus2"); _speechClient = new SpeechRecognizer(speechConfig, audioConfig); _speechClient.Recognized += _speechClient_Recognized; _speechClient.Recognizing += _speechClient_Recognizing; _speechClient.Canceled += _speechClient_Canceled; await _speechClient.StartContinuousRecognitionAsync(); _isRunning = true; } this._syncLock.Release(); }
public async void RegisterAttendeeAsync(string name, string myLanguage, string preferredLanguage) { Debug.WriteLine($"User {name}, Language: {myLanguage}, Connection {Context.ConnectionId} starting audio."); var config = _config.GetSection("SpeechAPI").Get <AppSettings>(); bool exists = await InitializeAttendeeInfo(name, myLanguage, preferredLanguage); var audioStream = new VoiceAudioStream(); var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1); var audioConfig = AudioConfig.FromStreamInput(audioStream, audioFormat); var speechKey = config.SubscriptionKey; var speechRegion = config.Region; var url = config.EndpointUri; Debug.WriteLine($"Key:{speechKey} | Region:{speechRegion}"); var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion); speechConfig.SpeechRecognitionLanguage = preferredLanguage; speechConfig.OutputFormat = OutputFormat.Simple; var speechClient = new SpeechRecognizer(speechConfig, audioConfig); speechClient.Recognized += _speechClient_Recognized; speechClient.Recognizing += _speechClient_Recognizing; speechClient.Canceled += _speechClient_Canceled; speechClient.SessionStarted += _speechClient_SessionStarted; string sessionId = speechClient.Properties.GetProperty(PropertyId.Speech_SessionId); //Maintains only one API connection per language SpeechAPIConnection conn = null; if (_connections.ContainsKey(preferredLanguage)) { conn = _connections[preferredLanguage]; conn.SessionId = sessionId; } else { conn = new SpeechAPIConnection() { SessionId = sessionId, AudioStream = audioStream, Recognizer = speechClient, Language = preferredLanguage }; _connections[preferredLanguage] = conn; } Debug.WriteLine($"Connection for {preferredLanguage} added | SessionId:{sessionId}"); await SendToAttendeeAsync(_attendeeInfo.GetAttendeeByConnectionID(Context.ConnectionId), $"Welcome:{name}"); await speechClient.StartContinuousRecognitionAsync(); Debug.WriteLine("Audio start message."); }
public async Task TranscribeSpeechFromAudioStream(SpeechConfig config, string person, int startSecond = 0, int endSecond = 0) { var audioFormat = AudioStreamFormat.GetWaveFormatPCM(SamplesPerSecond, BitsPerSample, Channels); using (var waveFileReader = new WaveFileReader(_wavFilePath)) { var pullAudioInputStreamCallback = new VoiceAudioStream(); TrimWavFile(waveFileReader, pullAudioInputStreamCallback, BytesPerSecond * startSecond, BytesPerSecond * endSecond); var speechToText = new SpeechToTextRecognizer(person, _streamWriter); using (var audioConfig = AudioConfig.FromStreamInput(pullAudioInputStreamCallback, audioFormat)) { using (var basicRecognizer = new SpeechRecognizer(config, audioConfig)) { await speechToText.RunRecognizer(basicRecognizer, RecognizerType.Base, _stopBaseRecognitionTaskCompletionSource).ConfigureAwait(false); } } }; }
public void TrimWavFile(WaveFileReader reader, VoiceAudioStream writer, long startPos, long endPos) { reader.Position = startPos; var buffer = new byte[1024]; while (reader.Position < endPos) { var bytesRequired = (int)(endPos - reader.Position); if (bytesRequired <= 0) { continue; } var bytesToRead = Math.Min(bytesRequired, buffer.Length); var bytesRead = reader.Read(buffer, 0, bytesToRead); if (bytesRead > 0) { writer.Write(buffer, 0, bytesRead); } } }
public async void AudioStart(byte[] args) { Debug.WriteLine($"Connection {Context.ConnectionId} starting audio."); var str = System.Text.Encoding.ASCII.GetString(args); var keys = JObject.Parse(str); var audioStream = new VoiceAudioStream(); var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1); var audioConfig = AudioConfig.FromStreamInput(audioStream, audioFormat); var speechConfig = SpeechConfig.FromSubscription(keys["speechKey"].Value <string>(), keys["speechRegion"].Value <string>()); speechConfig.EndpointId = keys["speechEndpoint"].Value <string>(); var speechClient = new SpeechRecognizer(speechConfig, audioConfig); speechClient.Recognized += _speechClient_Recognized; speechClient.Recognizing += _speechClient_Recognizing; speechClient.Canceled += _speechClient_Canceled; string sessionId = speechClient.Properties.GetProperty(PropertyId.Speech_SessionId); var conn = new Connection() { SessionId = sessionId, AudioStream = audioStream, SpeechClient = speechClient, }; _connections.Add(Context.ConnectionId, conn); await speechClient.StartContinuousRecognitionAsync(); Debug.WriteLine("Audio start message."); }