private void OnAudioFrameCaptured(object sender, NewFrameEventArgs e)
 {
     Interlocked.Increment(ref _framesCaptured);
     Trace.WriteLine($"Sending {e.Signal.RawData.Length}");
     _audioInput.Write(e.Signal.RawData);
     _audio.Write(e.Signal.RawData, 0, e.Signal.RawData.Length);
     _audio.Flush();
 }
Exemple #2
0
    void FixedUpdate()
    {
#if PLATFORM_ANDROID
        if (!micPermissionGranted && Permission.HasUserAuthorizedPermission(Permission.Microphone))
        {
            micPermissionGranted = true;
            message = "Click button to recognize speech";
        }
#elif PLATFORM_IOS
        if (!micPermissionGranted && Application.HasUserAuthorization(UserAuthorization.Microphone))
        {
            micPermissionGranted = true;
            message = "Click button to recognize speech";
        }
#endif
        lock (threadLocker)
        {
            if (recoButton != null)
            {
                recoButton.interactable = micPermissionGranted;
            }
            if (outputText != null && recognitionStarted == true)
            {
                outputText.text = message;
            }
        }


        if (Microphone.IsRecording(Microphone.devices[0]) && recognitionStarted == true)
        {
            GameObject.Find("MyButton").GetComponentInChildren <Text>().text = "Stop";
            int pos  = Microphone.GetPosition(Microphone.devices[0]);
            int diff = pos - lastSample;

            if (diff > 0)
            {
                float[] samples = new float[diff * audioSource.clip.channels];
                audioSource.clip.GetData(samples, lastSample);
                byte[] ba = ConvertAudioClipDataToInt16ByteArray(samples);
                if (ba.Length != 0)
                {
                    Debug.Log("pushStream.Write pos:" + Microphone.GetPosition(Microphone.devices[0]).ToString() + " length: " + ba.Length.ToString());
                    pushStream.Write(ba);
                }
            }
            lastSample = pos;
        }
        else if (!Microphone.IsRecording(Microphone.devices[0]) && recognitionStarted == false)
        {
            GameObject.Find("MyButton").GetComponentInChildren <Text>().text = "Start";
        }
    }
        public async Task Transcribe(byte[] audioBytes)
        {
            _dictionaryTempByteList[_socketId].Add(audioBytes);

            if (counter % 20 == 0)
            {
                byte[] completeAudioBuffer = CreateAudioByteArray(_dictionaryTempByteList);

                _inputStream.Write(completeAudioBuffer);

                _dictionaryTempByteList[_socketId].Clear();
            }

            counter++;
        }
Exemple #4
0
        public static async Task WriteToTranscriber(HttpContext context, WebSocket webSocket)
        {
            const int BUFFER_SIZE = 160 * 2;
            var       buffer      = new byte[BUFFER_SIZE];

            try
            {
                var init_buffer      = new byte[500];
                var initial_response = await webSocket.ReceiveAsync(new ArraySegment <byte>(init_buffer), CancellationToken.None);

                var    converted      = Encoding.UTF8.GetString(init_buffer);
                var    initial_object = JObject.Parse(converted);
                string language;
                if (initial_object.ContainsKey("language"))
                {
                    language = (string)initial_object["language"];
                }
                else
                {
                    language = "en-US";
                }

                await StartSpeechTranscriptionEngine(language);

                WebSocketReceiveResult result = await webSocket.ReceiveAsync(new ArraySegment <byte>(buffer), CancellationToken.None);

                while (!result.CloseStatus.HasValue)
                {
                    await webSocket.SendAsync(new ArraySegment <byte>(buffer, 0, result.Count), result.MessageType, result.EndOfMessage, CancellationToken.None);

                    result = await webSocket.ReceiveAsync(new ArraySegment <byte>(buffer), CancellationToken.None);

                    _inputStream.Write(buffer);

                    Console.WriteLine(result.Count);
                }
                await webSocket.CloseAsync(result.CloseStatus.Value, result.CloseStatusDescription, CancellationToken.None);
            }
            catch (Exception e)
            {
                Debug.WriteLine(e.ToString());
            }
            finally
            {
                StopTranscriptionEngine();
            }
        }
        /// <summary>
        /// Creates Recognizer with baseline model and selected language:
        /// Creates a config with subscription key and selected region
        /// If input source is audio file, creates recognizer with audio file otherwise with default mic
        /// Waits on RunRecognition.
        /// </summary>
        private async Task CreateRecognizer(byte[] channel)
        {
            // Todo: suport users to specifiy a different region.
            var config = SpeechConfig.FromSubscription(this.SubscriptionKey, this.Region);

            config.SpeechRecognitionLanguage = this.RecognitionLanguage;
            config.OutputFormat = OutputFormat.Detailed;

            SpeechRecognizer basicRecognizer;

            PushAudioInputStream pushStream = AudioInputStream.CreatePushStream();

            pushStream.Write(channel);
            pushStream.Close();
            using (var audioInput = AudioConfig.FromStreamInput(pushStream))
            {
                using (basicRecognizer = new SpeechRecognizer(config, audioInput))
                {
                    await this.RunRecognizer(basicRecognizer, stopBaseRecognitionTaskCompletionSource).ConfigureAwait(false);
                }
            }
        }
Exemple #6
0
        private async Task InjectStreamIntoRecognizerAsync(PushAudioInputStream audioInputStream, BlobClient blobStream)
        {
            using (var stream = await blobStream.OpenReadAsync())
            {
                var decoder = new OpusDecoder(16000, 1);
                var opus    = new OpusOggReadStream(decoder, stream);

                while (opus.HasNextPacket)
                {
                    short[] packet = opus.DecodeNextPacket();
                    if (packet != null)
                    {
                        for (int i = 0; i < packet.Length; i++)
                        {
                            var bytes = BitConverter.GetBytes(packet[i]);
                            audioInputStream.Write(bytes, bytes.Length);
                        }
                    }
                }
            }

            audioInputStream.Close();
        }
Exemple #7
0
    void FixedUpdate()
    {
#if PLATFORM_ANDROID
        if (!_micPermissionGranted && Permission.HasUserAuthorizedPermission(Permission.Microphone))
        {
            _micPermissionGranted = true;
        }
#elif PLATFORM_IOS
        if (!_micPermissionGranted && Application.HasUserAuthorization(UserAuthorization.Microphone))
        {
            _micPermissionGranted = true;
        }
#endif
        lock (_threadLocker)
        {
            if (recoButton != null)
            {
                recoButton.interactable = _micPermissionGranted && !_processingService && !_processingAnswer;
            }
            if (outputText != null && _recognitionStarted)
            {
                if (string.IsNullOrWhiteSpace(_transcription) && string.IsNullOrWhiteSpace(_answer))
                {
                    outputText.text        = _unfinishedTranscription;
                    outputText.color       = Color.red;
                    recoButton.image.color = Color.red;
                    recoButton.GetComponentInChildren <TextMeshProUGUI>().color = Color.white;
                    recoButton.GetComponentInChildren <TextMeshProUGUI>().text  = "Listening...";
                }
                else if (!string.IsNullOrWhiteSpace(_transcription))
                {
                    _transcription = null;
                    recoButton.onClick.Invoke();
                }
                else
                {
                    outputText.text  = _answer;
                    outputText.color = Color.white;
                }
            }
        }

        if (Microphone.IsRecording(Microphone.devices[0]) && _recognitionStarted)
        {
            int pos  = Microphone.GetPosition(Microphone.devices[0]);
            int diff = pos - _lastSample;

            if (diff > 0)
            {
                float[] samples = new float[diff * _audioSource.clip.channels];
                _audioSource.clip.GetData(samples, _lastSample);
                byte[] ba = ConvertAudioClipDataToInt16ByteArray(samples);
                if (ba.Length != 0)
                {
                    _pushStream.Write(ba);
                }
            }
            _lastSample = pos;
        }
        else if (!Microphone.IsRecording(Microphone.devices[0]) && !_recognitionStarted)
        {
            outputText.text        = _processingAnswer ? _answer : "";
            outputText.color       = Color.white;
            recoButton.image.color = _processingAnswer ? Color.red : Color.white;
            recoButton.GetComponentInChildren <TextMeshProUGUI>().color = _processingAnswer ? Color.white : Color.black;
            recoButton.GetComponentInChildren <TextMeshProUGUI>().text  = _processingAnswer ? "Checking..." : "Press and say the word";
        }
    }