public gSTT(string speechURI, string languageCode = LanguageCodes.Ukrainian.Ukraine, RecognitionConfig.Types.AudioEncoding audioEncoding = RecognitionConfig.Types.AudioEncoding.OggOpus) { byte[] data; using (var client = new WebClient()) { data = client.DownloadData(speechURI); } var response = SpeechClient.Create().Recognize(new RecognitionConfig() { Encoding = audioEncoding, SampleRateHertz = new TagLibFileAbstraction(speechURI, data).AudioSampleRate, LanguageCode = languageCode, EnableAutomaticPunctuation = true, }, RecognitionAudio.FromBytes(data)); Result = string.Empty; foreach (var result in response.Results) { foreach (var alternative in result.Alternatives) { Result += alternative.Transcript; } } if (string.IsNullOrEmpty(Result)) { throw new ArgumentNullException(); } }
public async Task <SpeechToTextViewModel> AsyncRecognize(byte[] file) { var longOperation = await SpeechProperty().LongRunningRecognizeAsync(new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Flac, SampleRateHertz = 16000, LanguageCode = "en", EnableAutomaticPunctuation = true, EnableWordTimeOffsets = true }, RecognitionAudio.FromBytes(file)); longOperation = longOperation.PollUntilCompleted(); var response = longOperation.Result; foreach (var result in response.Results) { foreach (var alternative in result.Alternatives) { var count = alternative.Words.Count; _model.WordInfo = new WordInfo[count]; for (var i = 0; i < count; i++) { _model.WordInfo[i] = alternative.Words[i]; } } } return(_model); }
public async Task <string> Recognize(byte[] file) { var speech = SpeechClient.Create(); var response = await speech.RecognizeAsync(new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, LanguageCode = "en", }, RecognitionAudio.FromBytes(file)); return(response.Results?.FirstOrDefault()?.Alternatives?.FirstOrDefault()?.Transcript); }
public async Task <dynamic> AnalyzeSpeechAsync(byte[] audio, int frequency, string language, int encoding) { var client = SpeechClient.Create(); return(await client.RecognizeAsync(new RecognitionConfig { Encoding = (RecognitionConfig.Types.AudioEncoding)encoding, SampleRateHertz = frequency, LanguageCode = language }, RecognitionAudio.FromBytes(audio))); }
public async Task <string> wavToText(byte[] audio) { var speechClient = SpeechClient.Create(); var recognitionConfig = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = 48000, LanguageCode = "ru-RU", }; var recognitionAudio = RecognitionAudio.FromBytes(audio); var response = await speechClient.RecognizeAsync(recognitionConfig, recognitionAudio); _logger.Log(NLog.LogLevel.Info, response); return(response.Results != null?response.Results.SelectMany(t => t.Alternatives).Select(t => t.Transcript).FirstOrDefault() : null); }
// Not an actual test... just examples public void FactoryMethods() { // Sample: FactoryMethods RecognitionAudio audio1 = RecognitionAudio.FromFile("Sound/SpeechSample.flac"); RecognitionAudio audio2 = RecognitionAudio.FromUri("https://.../HostedSpeech.flac"); RecognitionAudio audio3 = RecognitionAudio.FromStorageUri("gs://my-bucket/my-file"); byte[] bytes = ReadAudioData(); // For example, from a database RecognitionAudio audio4 = RecognitionAudio.FromBytes(bytes); using (Stream stream = OpenAudioStream()) // Any regular .NET stream { RecognitionAudio audio5 = RecognitionAudio.FromStream(stream); } // End sample }
public string Recognize(byte[] stream) { string messageresult = string.Empty; try { var speech = SpeechClient.Create(); var response = speech.Recognize(new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = 16000, LanguageCode = "pl", }, RecognitionAudio.FromBytes(stream)); string resultMsg = string.Empty; foreach (var result in response.Results) { foreach (var alternative in result.Alternatives) { messageresult += (alternative.Transcript); } } return(messageresult); } catch (Exception e) { if (e.InnerException != null) { messageresult += e.InnerException.Message; if (e.InnerException.InnerException != null) { messageresult += e.InnerException.InnerException.Message; if (e.InnerException.InnerException.InnerException != null) { messageresult += e.InnerException.InnerException.InnerException.Message; } } } messageresult += e.Message; return(messageresult); } }
public async Task <string> RecognizeFromFile(byte[] audio) { if (_disabled) { return("Speech recognition is currently disabled"); } var recognitionAudio = RecognitionAudio.FromBytes(audio); RecognizeResponse response = await _speechClient.RecognizeAsync(_config, recognitionAudio); var recognized = response.Results .SelectMany(result => result.Alternatives.Select(alternative => alternative.Transcript)) .Aggregate((x, y) => x + " " + y); return(recognized); }
static void Recognize(byte[] recording, SpeechClient speech, RecognitionConfig config, ref string s) { s = ""; var response = speech.Recognize(config, RecognitionAudio.FromBytes(recording)); foreach (var result in response.Results) { foreach (var alternative in result.Alternatives) { //Console.WriteLine(alternative.Transcript); s += alternative.Transcript; } if (result != response.Results.Last()) { s += " "; } } }
//todo: move to google service public async Task <string> flacToText(byte[] audio) { //send to google var speechClient = SpeechClient.Create(); var recognitionConfig = new RecognitionConfig() { //EnableAutomaticPunctuation = true, Encoding = RecognitionConfig.Types.AudioEncoding.Flac, LanguageCode = "ru-Ru", Model = "default", SampleRateHertz = 48000, }; var recognitionAudio = RecognitionAudio.FromBytes(audio); var response = await speechClient.RecognizeAsync(recognitionConfig, recognitionAudio); _logger.Log(NLog.LogLevel.Info, response); return(response.Results != null?response.Results.SelectMany(t => t.Alternatives).Select(t => t.Transcript).FirstOrDefault() : null); }
/// <summary> /// Pipeline function that will handle incoming pipeline packages of audio bytes. /// Will translate audio bytes to text and send text down the pipeline. /// </summary> /// <param name="audio"></param> /// <param name="e"></param> protected override void Receive(AudioBuffer audio, Envelope e) { if (audio.Data.Length > 0) { var response = speech.Recognize(new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = 16000, LanguageCode = this.AudioLanguage, EnableWordTimeOffsets = true, }, RecognitionAudio.FromBytes(audio.Data)); if (response.Results.Count > 0) { string transcribedAudio = response.Results.First().Alternatives.First().Transcript; this.Out.Post(transcribedAudio, e.OriginatingTime); } } }
public string Recognize() { if (Recognizer.longerAudioList.Count < 3200) { return("ERROR"); } RecognitionAudio audio5 = RecognitionAudio.FromBytes(Recognizer.longerAudioList.ToArray()); RecognizeResponse response = client.Recognize(config, audio5); Console.WriteLine(response); Recognizer.longerAudioList.Clear(); try { return(response.Results[0].Alternatives[0].Transcript); } catch (Exception ex) { return("ERROR"); } }
public string GoogleSpeechRecognition(byte[] filedata, List <string> KeyWordList) { try { var speech = SpeechClient.Create(); var Speechcontext = new SpeechContext(); foreach (var Key in KeyWordList) { Speechcontext.Phrases.Add(Key); } var response = speech.Recognize(new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, SampleRateHertz = 16000, LanguageCode = "ko", Model = "command_and_search", SpeechContexts = { Speechcontext } }, RecognitionAudio.FromBytes(filedata)); string resultstring = ""; foreach (var result in response.Results) { foreach (var alternative in result.Alternatives) { resultstring = resultstring + " " + alternative.Transcript; } } if (resultstring.Length > 1) { resultstring = resultstring.Substring(1); } return(resultstring); } catch { return(""); } }
private async void OnDataAvailable(object sender, WaveInEventArgs e) { //Debug.WriteLine(nameof(OnDataAvailable) + ": Start"); var audioRequest = new StreamingRecognizeRequest() { AudioContent = RecognitionAudio.FromBytes(e.Buffer, 0, e.BytesRecorded).Content }; try { if (_call != null && _canWrite) { await _call.RequestStream.WriteAsync(audioRequest); } } catch (Exception ex) { Debug.WriteLine(nameof(OnDataAvailable) + ": Failed send data" + ex.Message); } //Debug.WriteLine(nameof(OnDataAvailable) + ": End"); }
public async Task <RecognitionResponseModel> Recognize(byte[] file, string languageCode, CancellationToken cancellationToken = default) { var speech = SpeechClient.Create(); var response = await speech.RecognizeAsync(new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.OggOpus, LanguageCode = "uz-UZ", SampleRateHertz = 48000 }, RecognitionAudio.FromBytes(file), cancellationToken); var alternative = response?.Results?.FirstOrDefault()?.Alternatives?.FirstOrDefault(); return(new RecognitionResponseModel() { Transcript = alternative?.Transcript, Confidence = alternative?.Confidence ?? 0, Words = alternative?.Words.Select(p => new Models.WordInfo { Confidence = p.Confidence, EndTime = p.EndTime.ToTimeSpan().ToString(), SpeakerTag = p.SpeakerTag, StartTime = p.StartTime.ToTimeSpan().ToString(), Word = p.Word }).ToArray() }); }
static void Main(string[] args) { // 証明書を作成 var credential = GoogleCredential.FromJson(File.ReadAllText("SpeechTest-4db378c087bb.json")); credential = credential.CreateScoped("https://www.googleapis.com/auth/cloud-platform"); // サーバに接続するためのチャンネルを作成 var channel = new Channel("speech.googleapis.com:443", credential.ToChannelCredentials()); // Google Speech APIを利用するためのクライアントを作成 var client = new Speech.SpeechClient(channel); // ストリーミングの設定 var streamingConfig = new StreamingRecognitionConfig { Config = new RecognitionConfig { SampleRate = 16000, Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, LanguageCode = "ja-JP", }, InterimResults = true, SingleUtterance = false, }; // ストリーミングを開始 using (var call = client.StreamingRecognize()) { Console.WriteLine("-----------\nstart.\n"); // Cloud Speech APIからレスポンスが返ってきた時の挙動を設定 var responseReaderTask = Task.Run(async() => { // MoveNext1回につきレスポンス1回分のデータがくる while (await call.ResponseStream.MoveNext()) { var note = call.ResponseStream.Current; // データがあれば、認識結果を出力する if (note.Results != null && note.Results.Count > 0 && note.Results[0].Alternatives.Count > 0) { Console.WriteLine("result: " + note.Results[0].Alternatives[0].Transcript); } } }); // 最初の呼び出しを行う。最初は設定データだけを送る var initialRequest = new StreamingRecognizeRequest { StreamingConfig = streamingConfig, }; call.RequestStream.WriteAsync(initialRequest).Wait(); // 録音モデルの作成 IAudioRecorder recorder = new RecordModel(); // 録音モデルが音声データを吐いたら、それをすかさずサーバに送信する recorder.RecordDataAvailabled += (sender, e) => { if (e.Length > 0) { // WriteAsyncは一度に一回しか実行できないので非同期処理の時は特に注意 // ここではlockをかけて処理が重ならないようにしている lock (recorder) { call.RequestStream.WriteAsync(new StreamingRecognizeRequest { AudioContent = RecognitionAudio.FromBytes(e.Buffer, 0, e.Length).Content, }).Wait(); } } }; // 録音の開始 recorder.Start(); // Cloud Speech APIのストリーミングは1回60秒までなので、50秒まできたら打ち切る var timer = new Timer(1000 * 50); timer.Start(); // 50秒経過した時、実際に打ち切るコード timer.Elapsed += async(sender, e) => { recorder.Stop(); await call.RequestStream.CompleteAsync(); }; // 待機 responseReaderTask.Wait(); // ここに到達した時点で、APIの呼び出しが終了したということなので、タイマーを切る timer.Dispose(); } Console.WriteLine("\n-----------\nCompleted (Time out)"); Console.ReadKey(); }
internal string Recognize(byte[] speech) { var response = client.Recognize(config, RecognitionAudio.FromBytes(speech)); return(response.Results.Count != 0 ? response.Results[0].Alternatives[0].Transcript : ""); }