public async Task <string> Invork(byte[] voiceData) { var apiSetttings = new SpeechKitClientOptions($"{AppConfig.YandexSpeechApiKey}", "MashaWebApi", Guid.Empty, "server"); using (var client = new SpeechKitClient(apiSetttings)) { var speechRecognitionOptions = new SpeechRecognitionOptions(SpeechModel.Queries, RecognitionAudioFormat.Pcm16K, _language); try { Stream mediaStream = new MemoryStream(voiceData); var result = await client.SpeechToTextAsync(speechRecognitionOptions, mediaStream, cancellationToken).ConfigureAwait(false); if (result.TransportStatus != TransportStatus.Ok || result.StatusCode != HttpStatusCode.OK) { throw new Exception("YandexSpeechKit error: " + result.TransportStatus.ToString()); } if (!result.Result.Success) { throw new Exception("Unable to recognize speech"); } Console.WriteLine(result); var utterances = result.Result.Variants; //Use recognition results return(utterances.First().Text); } catch (OperationCanceledException ex) { throw new Exception(ex.Message); } } }
public async Task UpdateOptions(SpeechRecognitionOptions options) { if (!IsRunning) { //nothing to do return; } Logger.LogInformation("UpdateOptions"); try { await StopInt(false); await Start(options); } catch (Exception e) { //could not restart Logger.LogError(e, "Could not restart recognition from UpdateOptions"); Stopped?.Invoke(this, new SpeechRecognitionStoppedEvent() { Exception = e }); return; } }
private static async Task CheckMessagesAsync(Taikandi.Telebot.Types.Message message) { try { var fileTask = _telebot.GetFileAsync(message.Voice.FileId).GetAwaiter().GetResult(); _telebot.DownloadFileAsync(fileTask, Path.GetFullPath(@".\\voice.ogg"), overwrite: true).GetAwaiter().GetResult(); var apiSetttings = new SpeechKitClientOptions("1774531e-6a7c-4973-878c-d84d121d9ae1", "Key #1", Guid.Empty, "pc"); FileStream stream = new FileStream(@".\\voice.ogg", FileMode.Open); using (var client = new SpeechKitClient(apiSetttings)) { var speechRecognitionOptions = new SpeechRecognitionOptions(SpeechModel.Queries, RecognitionAudioFormat.Ogg, RecognitionLanguage.Russian); try { var result = await client.SpeechToTextAsync(speechRecognitionOptions, stream, CancellationToken.None).ConfigureAwait(false); if (result.TransportStatus != TransportStatus.Ok || result.StatusCode != HttpStatusCode.OK) { await TelegramMessager._telebot.SendMessageAsync(message.Chat.Id, "Ошибка передачи").ConfigureAwait(false); return; } if (!result.Result.Success) { await TelegramMessager._telebot.SendMessageAsync(message.Chat.Id, "Ошибка распознавания").ConfigureAwait(false); return; } var utterances = result.Result.Variants; string text = utterances[0].Text; text = text.First().ToString().ToUpper() + text.Substring(1); await TelegramMessager._telebot.SendMessageAsync(message.Chat.Id, text).ConfigureAwait(false); return; } catch (OperationCanceledException) { //Handle operation cancellation } } } catch (Exception exc) { ; } }
public async System.Threading.Tasks.Task <TextMessage> ProcessAsync(VoiceMessage message) { switch (message.Language) { case Core.Enums.Language.English: this._language = RecognitionLanguage.English; break; case Core.Enums.Language.Russian: this._language = RecognitionLanguage.Russian; break; default: throw new Exceptions.InvalidMessageException(message.Id, "Invalid Language: " + message.Language.ToString()); } var apiSetttings = new SpeechKitClientOptions($"{YandexCompmnentConfig.YandexSpeechApiKey}", "MashaWebApi", Guid.Empty, "server"); using (var client = new SpeechKitClient(apiSetttings)) { MemoryStream mediaStream = new MemoryStream(message.Vioce); var speechRecognitionOptions = new SpeechRecognitionOptions(SpeechModel.Queries, RecognitionAudioFormat.Wav, RecognitionLanguage.Russian); try { var result = await client.SpeechToTextAsync(speechRecognitionOptions, mediaStream, cancellationToken).ConfigureAwait(false); if (result.TransportStatus != TransportStatus.Ok || result.StatusCode != HttpStatusCode.OK) { //Handle network and request parameters error } if (!result.Result.Success) { //Unable to recognize speech } var utterances = result.Result.Variants; if (utterances.Count > 0) { var max = utterances[0]; foreach (var item in utterances) { if (item.Confidence > max.Confidence) { max = item; } } TextMessage res = new TextMessage() { Id = message.Id, Language = message.Language, Text = max.Text }; return(res); } throw new Exception("invdlid answer"); } catch (OperationCanceledException) { throw new Exception("invdlid answer"); } } }
public async Task Start(SpeechRecognitionOptions options) { SpeechRecognizer recognizer = null; try { Logger.LogInformation("Starting speech recognition"); var credentials = this.Credentials; var speechConfig = SpeechConfig.FromEndpoint(new Uri($"wss://{credentials.Region}.stt.speech.microsoft.com/speech/universal/v2"), credentials.SubscriptionKey); speechConfig.SetProfanity(ProfanityOption.Raw); if (options.Languages.Count > 1) { //enable continuous language detection when we have more than 1 language //this seems kind of buggy though, at times the speech recognition just simply doesn't work at all when this is enabled speechConfig.SetProperty(PropertyId.SpeechServiceConnection_ContinuousLanguageIdPriority, "Latency"); } var languageConfig = AutoDetectSourceLanguageConfig.FromLanguages(options.Languages.Select(lang => { //convert language selections if (lang.Length == 2) { //two-letter code. select some default five-letter code instead. if (lang == "en") { lang = "en-US"; } else { lang = lang + "-" + lang.ToUpperInvariant(); } } return(lang); }).ToArray()); recognizer = new SpeechRecognizer(speechConfig, languageConfig, AudioConfig); //set up the special phrases if any if (options.Phrases?.Count > 0) { var phrases = PhraseListGrammar.FromRecognizer(recognizer); foreach (var phrase in options.Phrases) { phrases.AddPhrase(phrase); } } //prepare events recognizer.Canceled += (sender, e) => { SpeechRecognizer = null; Dispose(Disposables); if (e.ErrorCode == CancellationErrorCode.Forbidden || e.ErrorCode == CancellationErrorCode.AuthenticationFailure) { //out of quota (or invalid key, try the next one anyway) int credentialsIndexCurrent = CredentialsIndex; if (NextCredentials()) { Logger.LogInformation($"Out of quota for credentials {credentialsIndexCurrent}. Restarting with {CredentialsIndex}"); Threading.Tasks.FireAndForget(() => Start(options)); return; } } if (e.Reason != CancellationReason.EndOfStream && e.Reason != CancellationReason.CancelledByUser) { Logger.LogWarning($"Recognition stopped. reason={e.Reason}, erroCode={e.ErrorCode}, details={e.ErrorDetails}"); } Stopped?.Invoke(this, new SpeechRecognitionStoppedEvent() { Message = $"{e.ErrorCode}: {e.ErrorDetails}" }); }; recognizer.Recognizing += (sender, e) => { OnSpeechEvent(e, false); }; recognizer.Recognized += (sender, e) => { OnSpeechEvent(e, true); }; recognizer.SpeechEndDetected += (sender, e) => { StreamAudioNoiseGate?.OnAudioStop(); }; //start recognizing await recognizer.StartContinuousRecognitionAsync(); //start our audio source if (!IsRunning && StreamAudioSource != null) { await StreamAudioSource.Start(); } } catch (Exception e) { Logger.LogError(e, "Could not start continuous recognition"); recognizer?.Dispose(); throw; } SpeechRecognizer = recognizer; IsRunning = true; Disposables.Add(recognizer); }