Exemple #1
0
        public gSTT(string speechURI, string languageCode = LanguageCodes.Ukrainian.Ukraine, RecognitionConfig.Types.AudioEncoding audioEncoding = RecognitionConfig.Types.AudioEncoding.OggOpus)
        {
            byte[] data;

            using (var client = new WebClient())
            {
                data = client.DownloadData(speechURI);
            }

            var response = SpeechClient.Create().Recognize(new RecognitionConfig()
            {
                Encoding                   = audioEncoding,
                SampleRateHertz            = new TagLibFileAbstraction(speechURI, data).AudioSampleRate,
                LanguageCode               = languageCode,
                EnableAutomaticPunctuation = true,
            }, RecognitionAudio.FromBytes(data));

            Result = string.Empty;

            foreach (var result in response.Results)
            {
                foreach (var alternative in result.Alternatives)
                {
                    Result += alternative.Transcript;
                }
            }

            if (string.IsNullOrEmpty(Result))
            {
                throw new ArgumentNullException();
            }
        }
        public async Task <SpeechToTextViewModel> AsyncRecognize(byte[] file)
        {
            var longOperation = await SpeechProperty().LongRunningRecognizeAsync(new RecognitionConfig()
            {
                Encoding                   = RecognitionConfig.Types.AudioEncoding.Flac,
                SampleRateHertz            = 16000,
                LanguageCode               = "en",
                EnableAutomaticPunctuation = true,
                EnableWordTimeOffsets      = true
            }, RecognitionAudio.FromBytes(file));

            longOperation = longOperation.PollUntilCompleted();
            var response = longOperation.Result;

            foreach (var result in response.Results)
            {
                foreach (var alternative in result.Alternatives)
                {
                    var count = alternative.Words.Count;
                    _model.WordInfo = new WordInfo[count];
                    for (var i = 0; i < count; i++)
                    {
                        _model.WordInfo[i] = alternative.Words[i];
                    }
                }
            }
            return(_model);
        }
        public async Task <string> Recognize(byte[] file)
        {
            var speech   = SpeechClient.Create();
            var response = await speech.RecognizeAsync(new RecognitionConfig()
            {
                Encoding     = RecognitionConfig.Types.AudioEncoding.Linear16,
                LanguageCode = "en",
            }, RecognitionAudio.FromBytes(file));

            return(response.Results?.FirstOrDefault()?.Alternatives?.FirstOrDefault()?.Transcript);
        }
Exemple #4
0
        public async Task <dynamic> AnalyzeSpeechAsync(byte[] audio, int frequency, string language, int encoding)
        {
            var client = SpeechClient.Create();

            return(await client.RecognizeAsync(new RecognitionConfig
            {
                Encoding = (RecognitionConfig.Types.AudioEncoding)encoding,
                SampleRateHertz = frequency,
                LanguageCode = language
            }, RecognitionAudio.FromBytes(audio)));
        }
Exemple #5
0
        public async Task <string> wavToText(byte[] audio)
        {
            var speechClient      = SpeechClient.Create();
            var recognitionConfig = new RecognitionConfig()
            {
                Encoding        = RecognitionConfig.Types.AudioEncoding.Linear16,
                SampleRateHertz = 48000,
                LanguageCode    = "ru-RU",
            };
            var recognitionAudio = RecognitionAudio.FromBytes(audio);
            var response         = await speechClient.RecognizeAsync(recognitionConfig, recognitionAudio);

            _logger.Log(NLog.LogLevel.Info, response);

            return(response.Results != null?response.Results.SelectMany(t => t.Alternatives).Select(t => t.Transcript).FirstOrDefault() : null);
        }
Exemple #6
0
        // Not an actual test... just examples
        public void FactoryMethods()
        {
            // Sample: FactoryMethods
            RecognitionAudio audio1 = RecognitionAudio.FromFile("Sound/SpeechSample.flac");
            RecognitionAudio audio2 = RecognitionAudio.FromUri("https://.../HostedSpeech.flac");
            RecognitionAudio audio3 = RecognitionAudio.FromStorageUri("gs://my-bucket/my-file");

            byte[]           bytes  = ReadAudioData(); // For example, from a database
            RecognitionAudio audio4 = RecognitionAudio.FromBytes(bytes);

            using (Stream stream = OpenAudioStream()) // Any regular .NET stream
            {
                RecognitionAudio audio5 = RecognitionAudio.FromStream(stream);
            }
            // End sample
        }
Exemple #7
0
        public string Recognize(byte[] stream)
        {
            string messageresult = string.Empty;

            try
            {
                var speech = SpeechClient.Create();

                var response = speech.Recognize(new RecognitionConfig()
                {
                    Encoding        = RecognitionConfig.Types.AudioEncoding.Linear16,
                    SampleRateHertz = 16000,
                    LanguageCode    = "pl",
                }, RecognitionAudio.FromBytes(stream));



                string resultMsg = string.Empty;

                foreach (var result in response.Results)
                {
                    foreach (var alternative in result.Alternatives)
                    {
                        messageresult += (alternative.Transcript);
                    }
                }

                return(messageresult);
            }
            catch (Exception e)
            {
                if (e.InnerException != null)
                {
                    messageresult += e.InnerException.Message;
                    if (e.InnerException.InnerException != null)
                    {
                        messageresult += e.InnerException.InnerException.Message;
                        if (e.InnerException.InnerException.InnerException != null)
                        {
                            messageresult += e.InnerException.InnerException.InnerException.Message;
                        }
                    }
                }
                messageresult += e.Message;
                return(messageresult);
            }
        }
Exemple #8
0
        public async Task <string> RecognizeFromFile(byte[] audio)
        {
            if (_disabled)
            {
                return("Speech recognition is currently disabled");
            }

            var recognitionAudio = RecognitionAudio.FromBytes(audio);

            RecognizeResponse response = await _speechClient.RecognizeAsync(_config, recognitionAudio);

            var recognized = response.Results
                             .SelectMany(result => result.Alternatives.Select(alternative => alternative.Transcript))
                             .Aggregate((x, y) => x + " " + y);

            return(recognized);
        }
        static void Recognize(byte[] recording, SpeechClient speech, RecognitionConfig config, ref string s)
        {
            s = "";
            var response = speech.Recognize(config, RecognitionAudio.FromBytes(recording));

            foreach (var result in response.Results)
            {
                foreach (var alternative in result.Alternatives)
                {
                    //Console.WriteLine(alternative.Transcript);
                    s += alternative.Transcript;
                }
                if (result != response.Results.Last())
                {
                    s += " ";
                }
            }
        }
Exemple #10
0
        //todo: move to google service
        public async Task <string> flacToText(byte[] audio)
        {
            //send to google
            var speechClient      = SpeechClient.Create();
            var recognitionConfig = new RecognitionConfig()
            {
                //EnableAutomaticPunctuation = true,
                Encoding        = RecognitionConfig.Types.AudioEncoding.Flac,
                LanguageCode    = "ru-Ru",
                Model           = "default",
                SampleRateHertz = 48000,
            };
            var recognitionAudio = RecognitionAudio.FromBytes(audio);
            var response         = await speechClient.RecognizeAsync(recognitionConfig, recognitionAudio);

            _logger.Log(NLog.LogLevel.Info, response);

            return(response.Results != null?response.Results.SelectMany(t => t.Alternatives).Select(t => t.Transcript).FirstOrDefault() : null);
        }
Exemple #11
0
        /// <summary>
        /// Pipeline function that will handle incoming pipeline packages of audio bytes.
        /// Will translate audio bytes to text and send text down the pipeline.
        /// </summary>
        /// <param name="audio"></param>
        /// <param name="e"></param>
        protected override void Receive(AudioBuffer audio, Envelope e)
        {
            if (audio.Data.Length > 0)
            {
                var response = speech.Recognize(new RecognitionConfig()
                {
                    Encoding              = RecognitionConfig.Types.AudioEncoding.Linear16,
                    SampleRateHertz       = 16000,
                    LanguageCode          = this.AudioLanguage,
                    EnableWordTimeOffsets = true,
                }, RecognitionAudio.FromBytes(audio.Data));

                if (response.Results.Count > 0)
                {
                    string transcribedAudio = response.Results.First().Alternatives.First().Transcript;
                    this.Out.Post(transcribedAudio, e.OriginatingTime);
                }
            }
        }
        public string Recognize()
        {
            if (Recognizer.longerAudioList.Count < 3200)
            {
                return("ERROR");
            }
            RecognitionAudio  audio5   = RecognitionAudio.FromBytes(Recognizer.longerAudioList.ToArray());
            RecognizeResponse response = client.Recognize(config, audio5);

            Console.WriteLine(response);
            Recognizer.longerAudioList.Clear();

            try
            {
                return(response.Results[0].Alternatives[0].Transcript);
            }
            catch (Exception ex)
            {
                return("ERROR");
            }
        }
Exemple #13
0
        public string GoogleSpeechRecognition(byte[] filedata, List <string> KeyWordList)
        {
            try
            {
                var speech = SpeechClient.Create();

                var Speechcontext = new SpeechContext();
                foreach (var Key in KeyWordList)
                {
                    Speechcontext.Phrases.Add(Key);
                }

                var response = speech.Recognize(new RecognitionConfig()
                {
                    Encoding        = RecognitionConfig.Types.AudioEncoding.Linear16,
                    SampleRateHertz = 16000,
                    LanguageCode    = "ko",
                    Model           = "command_and_search",
                    SpeechContexts  = { Speechcontext }
                }, RecognitionAudio.FromBytes(filedata));

                string resultstring = "";
                foreach (var result in response.Results)
                {
                    foreach (var alternative in result.Alternatives)
                    {
                        resultstring = resultstring + " " + alternative.Transcript;
                    }
                }
                if (resultstring.Length > 1)
                {
                    resultstring = resultstring.Substring(1);
                }
                return(resultstring);
            }
            catch
            {
                return("");
            }
        }
        private async void OnDataAvailable(object sender, WaveInEventArgs e)
        {
            //Debug.WriteLine(nameof(OnDataAvailable) + ": Start");

            var audioRequest = new StreamingRecognizeRequest()
            {
                AudioContent = RecognitionAudio.FromBytes(e.Buffer, 0, e.BytesRecorded).Content
            };

            try
            {
                if (_call != null && _canWrite)
                {
                    await _call.RequestStream.WriteAsync(audioRequest);
                }
            }
            catch (Exception ex)
            {
                Debug.WriteLine(nameof(OnDataAvailable) + ": Failed send data" + ex.Message);
            }
            //Debug.WriteLine(nameof(OnDataAvailable) + ": End");
        }
Exemple #15
0
        public async Task <RecognitionResponseModel> Recognize(byte[] file, string languageCode, CancellationToken cancellationToken = default)
        {
            var speech   = SpeechClient.Create();
            var response = await speech.RecognizeAsync(new RecognitionConfig()
            {
                Encoding        = RecognitionConfig.Types.AudioEncoding.OggOpus,
                LanguageCode    = "uz-UZ",
                SampleRateHertz = 48000
            },
                                                       RecognitionAudio.FromBytes(file),
                                                       cancellationToken);

            var alternative = response?.Results?.FirstOrDefault()?.Alternatives?.FirstOrDefault();

            return(new RecognitionResponseModel()
            {
                Transcript = alternative?.Transcript,
                Confidence = alternative?.Confidence ?? 0,
                Words = alternative?.Words.Select(p => new Models.WordInfo {
                    Confidence = p.Confidence, EndTime = p.EndTime.ToTimeSpan().ToString(), SpeakerTag = p.SpeakerTag, StartTime = p.StartTime.ToTimeSpan().ToString(), Word = p.Word
                }).ToArray()
            });
        }
Exemple #16
0
        static void Main(string[] args)
        {
            // 証明書を作成
            var credential = GoogleCredential.FromJson(File.ReadAllText("SpeechTest-4db378c087bb.json"));

            credential = credential.CreateScoped("https://www.googleapis.com/auth/cloud-platform");

            // サーバに接続するためのチャンネルを作成
            var channel = new Channel("speech.googleapis.com:443", credential.ToChannelCredentials());

            // Google Speech APIを利用するためのクライアントを作成
            var client = new Speech.SpeechClient(channel);

            // ストリーミングの設定
            var streamingConfig = new StreamingRecognitionConfig
            {
                Config = new RecognitionConfig
                {
                    SampleRate   = 16000,
                    Encoding     = RecognitionConfig.Types.AudioEncoding.Linear16,
                    LanguageCode = "ja-JP",
                },
                InterimResults  = true,
                SingleUtterance = false,
            };

            // ストリーミングを開始
            using (var call = client.StreamingRecognize())
            {
                Console.WriteLine("-----------\nstart.\n");

                // Cloud Speech APIからレスポンスが返ってきた時の挙動を設定
                var responseReaderTask = Task.Run(async() =>
                {
                    // MoveNext1回につきレスポンス1回分のデータがくる
                    while (await call.ResponseStream.MoveNext())
                    {
                        var note = call.ResponseStream.Current;

                        // データがあれば、認識結果を出力する
                        if (note.Results != null && note.Results.Count > 0 &&
                            note.Results[0].Alternatives.Count > 0)
                        {
                            Console.WriteLine("result: " + note.Results[0].Alternatives[0].Transcript);
                        }
                    }
                });

                // 最初の呼び出しを行う。最初は設定データだけを送る
                var initialRequest = new StreamingRecognizeRequest
                {
                    StreamingConfig = streamingConfig,
                };
                call.RequestStream.WriteAsync(initialRequest).Wait();

                // 録音モデルの作成
                IAudioRecorder recorder = new RecordModel();

                // 録音モデルが音声データを吐いたら、それをすかさずサーバに送信する
                recorder.RecordDataAvailabled += (sender, e) =>
                {
                    if (e.Length > 0)
                    {
                        // WriteAsyncは一度に一回しか実行できないので非同期処理の時は特に注意
                        // ここではlockをかけて処理が重ならないようにしている
                        lock (recorder)
                        {
                            call.RequestStream.WriteAsync(new StreamingRecognizeRequest
                            {
                                AudioContent = RecognitionAudio.FromBytes(e.Buffer, 0, e.Length).Content,
                            }).Wait();
                        }
                    }
                };

                // 録音の開始
                recorder.Start();

                // Cloud Speech APIのストリーミングは1回60秒までなので、50秒まできたら打ち切る
                var timer = new Timer(1000 * 50);
                timer.Start();

                // 50秒経過した時、実際に打ち切るコード
                timer.Elapsed += async(sender, e) =>
                {
                    recorder.Stop();
                    await call.RequestStream.CompleteAsync();
                };

                // 待機
                responseReaderTask.Wait();

                // ここに到達した時点で、APIの呼び出しが終了したということなので、タイマーを切る
                timer.Dispose();
            }

            Console.WriteLine("\n-----------\nCompleted (Time out)");
            Console.ReadKey();
        }
        internal string Recognize(byte[] speech)
        {
            var response = client.Recognize(config, RecognitionAudio.FromBytes(speech));

            return(response.Results.Count != 0 ? response.Results[0].Alternatives[0].Transcript : "");
        }