public async Task DecodeAudioAsync( Stream stream, string locale, PhraseMode mode, Func <RecognitionStep, Task> partialResult, Func <RecognitionFinalResult, Task> finalResult) { var serviceUrl = (mode == PhraseMode.LongDictation ? LongDictationUrl : ShortPhraseUrl); string subscriptionKey = configuration.GetSection("AudioToTextService.Core.AudioDecoder")["subscriptionKey"]; // create the preferences object var preferences = new Preferences(locale, serviceUrl, new CognitiveServicesAuthorizationProvider(subscriptionKey)); // Create a a speech client using (var speechClient = new SpeechClient(preferences)) { speechClient.SubscribeToPartialResult((args) => { return(partialResult(ToStep(args))); }); speechClient.SubscribeToRecognitionResult((args) => { return(finalResult(ToFinalResult(args))); }); // create an audio content and pass it a stream. var deviceMetadata = new DeviceMetadata(DeviceType.Near, DeviceFamily.Desktop, NetworkType.Ethernet, OsName.Windows, "1607", "Dell", "T3600"); var applicationMetadata = new ApplicationMetadata("SampleApp", "1.0.0"); var requestMetadata = new RequestMetadata(Guid.NewGuid(), deviceMetadata, applicationMetadata, "SampleAppService"); await speechClient.RecognizeAsync(new SpeechInput(stream, requestMetadata), this.cts.Token).ConfigureAwait(false); } }
/// <summary> /// 通过语音数据识别语音内容 /// </summary> /// <param name="audioBytes">语音数据</param> /// <param name="locale">语种</param> /// <returns></returns> public async Task RunDiscernAsBytes(byte[] audioBytes, string locale) { Uri serviceUrl = ShortUrl; if (GetWavPlayTime(audioBytes) > 15) { serviceUrl = LongUrl; } var preferences = new Preferences(locale, serviceUrl, new AsrCheckKeyProvider(apiKey)); //参数配置类 using (var speechClient = new SpeechClient(preferences)) //创建语音客户端 { speechClient.SubscribeToPartialResult(this.OnPartialResult); speechClient.SubscribeToRecognitionResult(this.OnRecognitionResult); using (Stream stream = new MemoryStream(audioBytes)) { var deviceMetadata = new DeviceMetadata(DeviceType.Near, DeviceFamily.Desktop, NetworkType.Ethernet, OsName.Windows, "1607", "Dell", "T3600"); var applicationMetadata = new ApplicationMetadata("SampleApp", "1.0.0"); var requestMetadata = new RequestMetadata(Guid.NewGuid(), deviceMetadata, applicationMetadata, "SampleAppService"); try { await speechClient.RecognizeAsync(new SpeechInput(stream, requestMetadata), this.cts.Token).ConfigureAwait(false); } catch (Exception ex) { } } } }
/// <summary> /// Sends a speech recognition request to the speech service /// </summary> /// <param name="audioFile">The audio file.</param> /// <param name="locale">The locale.</param> /// <param name="serviceUrl">The service URL.</param> /// <param name="subscriptionKey">The subscription key.</param> /// <returns> /// A task /// </returns> public async Task Run(string audioFile, string locale, Uri serviceUrl, string subscriptionKey, string resultFile) { // create the preferences object var preferences = new Preferences(locale, serviceUrl, new CognitiveServicesAuthorizationProvider(subscriptionKey)); ResultFile = resultFile; File.CreateText(ResultFile); // Create a a speech client using (var speechClient = new SpeechClient(preferences)) { speechClient.SubscribeToPartialResult(this.OnPartialResult); speechClient.SubscribeToRecognitionResult(this.OnRecognitionResult); // create an audio content and pass it a stream. using (var audio = new FileStream(audioFile, FileMode.Open, FileAccess.Read)) { var deviceMetadata = new DeviceMetadata(DeviceType.Near, DeviceFamily.Desktop, NetworkType.Ethernet, OsName.Windows, "1607", "Dell", "T3600"); var applicationMetadata = new ApplicationMetadata("SampleApp", "1.0.0"); var requestMetadata = new RequestMetadata(Guid.NewGuid(), deviceMetadata, applicationMetadata, "SampleAppService"); await speechClient.RecognizeAsync(new SpeechInput(audio, requestMetadata), this.cts.Token).ConfigureAwait(false); } } }
private Task <Tuple <string, string> > transcribeAudioSegement(int index, string fileName, string audioFile, TimeSpan totalTime, CancellationToken token, string user) { return(Task.Factory.StartNew <Tuple <string, string> >(() => { //var preferences = new Preferences("en-US", new Uri("wss://speech.platform.bing.com/api/service/recognition/continuous"), new CognitiveServicesAuthorizationProvider("68ecbfed77384b0badae81995a5b256b")); var preferences = new Preferences("en-US", new Uri("wss://5ba5d066af03405ba71e84ba3bc4d185.api.cris.ai/ws/cris/speech/recognize/continuous"), new CognitiveServicesAuthorizationProvider("36677b4f10da4d2a946af66da757ef0b")); DateTime lastReportTime = DateTime.Now; DateTime lastDetectionTime = DateTime.Now; int runonLength = 0; StringBuilder text = new StringBuilder(); using (var speechClient = new SpeechClient(preferences)) { speechClient.SubscribeToPartialResult((args) => { return Task.Factory.StartNew(() => { token.ThrowIfCancellationRequested(); if (DateTime.Now - lastReportTime >= mReportInterval) { var percent = (int)(args.MediaTime * 0.00001 / totalTime.TotalSeconds); percentages[index] = percent; reportProgress(fileName, Math.Min(99, percentages.Sum()), args.DisplayText.Substring(0, Math.Min(args.DisplayText.Length, 50)) + "...", user).Wait(); lastReportTime = DateTime.Now; } }); }); speechClient.SubscribeToRecognitionResult((args) => { return Task.Factory.StartNew(() => { if (args.Phrases.Count > 0) { string bestText = args.Phrases[args.Phrases.Count - 1].DisplayText; runonLength += bestText.Length; if ((DateTime.Now - lastDetectionTime >= TimeSpan.FromSeconds(5) || runonLength >= 1800) && runonLength >= 250) { text.Append("\r\n\r\n "); runonLength = 0; } text.Append(Regex.Replace(bestText, "(?<=[\\.,?])(?![$ ])", " ")); lastDetectionTime = DateTime.Now; } }); }); using (var audio = new FileStream(audioFile, FileMode.Open, FileAccess.Read)) { var deviceMetadata = new DeviceMetadata(DeviceType.Near, DeviceFamily.Desktop, NetworkType.Ethernet, OsName.Windows, "1607", "Dell", "T3600"); var applicationMetadata = new ApplicationMetadata("TranscriptionApp", "1.0.0"); var requestMetadata = new RequestMetadata(Guid.NewGuid(), deviceMetadata, applicationMetadata, "TranscriptionService"); speechClient.RecognizeAsync(new SpeechInput(audio, requestMetadata), mTokensource.Token).Wait(); return new Tuple <string, string>(audioFile, text.ToString()); } } })); }
public static async Task <string> ReconhecerFala(string contentUrl) { RecognitionStatus status = RecognitionStatus.None; string stringResult = string.Empty; WebClient wc = new WebClient(); OgaToWavConverter converter = new OgaToWavConverter(); var ogaData = await wc.DownloadDataTaskAsync(contentUrl); var wavData = converter.Convert(ogaData); var preferences = new Preferences("pt-BR", new Uri(@"wss://speech.platform.bing.com/api/service/recognition"), new CognitiveServicesAuthorizationProvider(CognitiveServicesAuthorizationProvider.API_KEY)); using (var speechClient = new SpeechClient(preferences)) { speechClient.SubscribeToPartialResult( result => { stringResult = result.DisplayText; return(Task.FromResult(true)); }); speechClient.SubscribeToRecognitionResult( result => { status = result.RecognitionStatus; return(Task.FromResult(true)); }); var deviceMetadata = new DeviceMetadata(DeviceType.Near, DeviceFamily.Unknown, NetworkType.Unknown, OsName.Windows, "10", "IBM", "ThinkCenter"); var applicationMetadata = new ApplicationMetadata("WorkshopAtentoBot", "1.0"); var requestMetada = new RequestMetadata(Guid.NewGuid(), deviceMetadata, applicationMetadata, "ReconhecimentoFalaService"); await speechClient.RecognizeAsync(new SpeechInput(new MemoryStream(wavData), requestMetada), CancellationToken.None).ConfigureAwait(false); } while (status == RecognitionStatus.None) { await Task.Delay(200); } if (status == RecognitionStatus.Success) { return(stringResult); } else { return($"Ocorreu um erro no reconhecimento de fala. Status = {status}"); } }
/// <summary> /// Sends a speech recognition request to the speech service /// </summary> /// <param name="audioFile">The audio file.</param> /// <param name="locale">The locale.</param> /// <returns> /// A task /// </returns> public async Task <string> TranslateToText(string audioFile, string locale = "en-GB") { var preferences = new Preferences(locale, _serviceUrl, new CognitiveServicesAuthorizationProvider(_subscriptionKey)); // Create a a speech client using (var speechClient = new SpeechClient(preferences)) { speechClient.SubscribeToPartialResult(this.OnPartialResult); speechClient.SubscribeToRecognitionResult(this.OnRecognitionResult); try { // create an audio content and pass it a stream. using (var downloadStream = new WebClient()) using (var audio = new MemoryStream(downloadStream.DownloadData(audioFile))) { var deviceMetadata = new DeviceMetadata(DeviceType.Near, DeviceFamily.Desktop, NetworkType.Ethernet, OsName.Windows, "1607", "Dell", "T3600"); var applicationMetadata = new ApplicationMetadata("SampleApp", "1.0.0"); var requestMetadata = new RequestMetadata(Guid.NewGuid(), deviceMetadata, applicationMetadata, "SampleAppService"); await speechClient.RecognizeAsync(new SpeechInput(audio, requestMetadata), this.cts.Token) .ConfigureAwait(false); } } catch (Exception e) { if (e is PlatformNotSupportedException) { return(await TranslateToTextFallback(audioFile).ConfigureAwait(false)); //fallback for when websockets are not supported } } } return(string.Empty); }