private void OnAudioFrameCaptured(object sender, NewFrameEventArgs e) { Interlocked.Increment(ref _framesCaptured); Trace.WriteLine($"Sending {e.Signal.RawData.Length}"); _audioInput.Write(e.Signal.RawData); _audio.Write(e.Signal.RawData, 0, e.Signal.RawData.Length); _audio.Flush(); }
void FixedUpdate() { #if PLATFORM_ANDROID if (!micPermissionGranted && Permission.HasUserAuthorizedPermission(Permission.Microphone)) { micPermissionGranted = true; message = "Click button to recognize speech"; } #elif PLATFORM_IOS if (!micPermissionGranted && Application.HasUserAuthorization(UserAuthorization.Microphone)) { micPermissionGranted = true; message = "Click button to recognize speech"; } #endif lock (threadLocker) { if (recoButton != null) { recoButton.interactable = micPermissionGranted; } if (outputText != null && recognitionStarted == true) { outputText.text = message; } } if (Microphone.IsRecording(Microphone.devices[0]) && recognitionStarted == true) { GameObject.Find("MyButton").GetComponentInChildren <Text>().text = "Stop"; int pos = Microphone.GetPosition(Microphone.devices[0]); int diff = pos - lastSample; if (diff > 0) { float[] samples = new float[diff * audioSource.clip.channels]; audioSource.clip.GetData(samples, lastSample); byte[] ba = ConvertAudioClipDataToInt16ByteArray(samples); if (ba.Length != 0) { Debug.Log("pushStream.Write pos:" + Microphone.GetPosition(Microphone.devices[0]).ToString() + " length: " + ba.Length.ToString()); pushStream.Write(ba); } } lastSample = pos; } else if (!Microphone.IsRecording(Microphone.devices[0]) && recognitionStarted == false) { GameObject.Find("MyButton").GetComponentInChildren <Text>().text = "Start"; } }
public async Task Transcribe(byte[] audioBytes) { _dictionaryTempByteList[_socketId].Add(audioBytes); if (counter % 20 == 0) { byte[] completeAudioBuffer = CreateAudioByteArray(_dictionaryTempByteList); _inputStream.Write(completeAudioBuffer); _dictionaryTempByteList[_socketId].Clear(); } counter++; }
public static async Task WriteToTranscriber(HttpContext context, WebSocket webSocket) { const int BUFFER_SIZE = 160 * 2; var buffer = new byte[BUFFER_SIZE]; try { var init_buffer = new byte[500]; var initial_response = await webSocket.ReceiveAsync(new ArraySegment <byte>(init_buffer), CancellationToken.None); var converted = Encoding.UTF8.GetString(init_buffer); var initial_object = JObject.Parse(converted); string language; if (initial_object.ContainsKey("language")) { language = (string)initial_object["language"]; } else { language = "en-US"; } await StartSpeechTranscriptionEngine(language); WebSocketReceiveResult result = await webSocket.ReceiveAsync(new ArraySegment <byte>(buffer), CancellationToken.None); while (!result.CloseStatus.HasValue) { await webSocket.SendAsync(new ArraySegment <byte>(buffer, 0, result.Count), result.MessageType, result.EndOfMessage, CancellationToken.None); result = await webSocket.ReceiveAsync(new ArraySegment <byte>(buffer), CancellationToken.None); _inputStream.Write(buffer); Console.WriteLine(result.Count); } await webSocket.CloseAsync(result.CloseStatus.Value, result.CloseStatusDescription, CancellationToken.None); } catch (Exception e) { Debug.WriteLine(e.ToString()); } finally { StopTranscriptionEngine(); } }
/// <summary> /// Creates Recognizer with baseline model and selected language: /// Creates a config with subscription key and selected region /// If input source is audio file, creates recognizer with audio file otherwise with default mic /// Waits on RunRecognition. /// </summary> private async Task CreateRecognizer(byte[] channel) { // Todo: suport users to specifiy a different region. var config = SpeechConfig.FromSubscription(this.SubscriptionKey, this.Region); config.SpeechRecognitionLanguage = this.RecognitionLanguage; config.OutputFormat = OutputFormat.Detailed; SpeechRecognizer basicRecognizer; PushAudioInputStream pushStream = AudioInputStream.CreatePushStream(); pushStream.Write(channel); pushStream.Close(); using (var audioInput = AudioConfig.FromStreamInput(pushStream)) { using (basicRecognizer = new SpeechRecognizer(config, audioInput)) { await this.RunRecognizer(basicRecognizer, stopBaseRecognitionTaskCompletionSource).ConfigureAwait(false); } } }
private async Task InjectStreamIntoRecognizerAsync(PushAudioInputStream audioInputStream, BlobClient blobStream) { using (var stream = await blobStream.OpenReadAsync()) { var decoder = new OpusDecoder(16000, 1); var opus = new OpusOggReadStream(decoder, stream); while (opus.HasNextPacket) { short[] packet = opus.DecodeNextPacket(); if (packet != null) { for (int i = 0; i < packet.Length; i++) { var bytes = BitConverter.GetBytes(packet[i]); audioInputStream.Write(bytes, bytes.Length); } } } } audioInputStream.Close(); }
void FixedUpdate() { #if PLATFORM_ANDROID if (!_micPermissionGranted && Permission.HasUserAuthorizedPermission(Permission.Microphone)) { _micPermissionGranted = true; } #elif PLATFORM_IOS if (!_micPermissionGranted && Application.HasUserAuthorization(UserAuthorization.Microphone)) { _micPermissionGranted = true; } #endif lock (_threadLocker) { if (recoButton != null) { recoButton.interactable = _micPermissionGranted && !_processingService && !_processingAnswer; } if (outputText != null && _recognitionStarted) { if (string.IsNullOrWhiteSpace(_transcription) && string.IsNullOrWhiteSpace(_answer)) { outputText.text = _unfinishedTranscription; outputText.color = Color.red; recoButton.image.color = Color.red; recoButton.GetComponentInChildren <TextMeshProUGUI>().color = Color.white; recoButton.GetComponentInChildren <TextMeshProUGUI>().text = "Listening..."; } else if (!string.IsNullOrWhiteSpace(_transcription)) { _transcription = null; recoButton.onClick.Invoke(); } else { outputText.text = _answer; outputText.color = Color.white; } } } if (Microphone.IsRecording(Microphone.devices[0]) && _recognitionStarted) { int pos = Microphone.GetPosition(Microphone.devices[0]); int diff = pos - _lastSample; if (diff > 0) { float[] samples = new float[diff * _audioSource.clip.channels]; _audioSource.clip.GetData(samples, _lastSample); byte[] ba = ConvertAudioClipDataToInt16ByteArray(samples); if (ba.Length != 0) { _pushStream.Write(ba); } } _lastSample = pos; } else if (!Microphone.IsRecording(Microphone.devices[0]) && !_recognitionStarted) { outputText.text = _processingAnswer ? _answer : ""; outputText.color = Color.white; recoButton.image.color = _processingAnswer ? Color.red : Color.white; recoButton.GetComponentInChildren <TextMeshProUGUI>().color = _processingAnswer ? Color.white : Color.black; recoButton.GetComponentInChildren <TextMeshProUGUI>().text = _processingAnswer ? "Checking..." : "Press and say the word"; } }