public async Task DecodeAudioAsync( Stream stream, string locale, PhraseMode mode, Func <RecognitionStep, Task> partialResult, Func <RecognitionFinalResult, Task> finalResult) { var serviceUrl = (mode == PhraseMode.LongDictation ? LongDictationUrl : ShortPhraseUrl); string subscriptionKey = configuration.GetSection("AudioToTextService.Core.AudioDecoder")["subscriptionKey"]; // create the preferences object var preferences = new Preferences(locale, serviceUrl, new CognitiveServicesAuthorizationProvider(subscriptionKey)); // Create a a speech client using (var speechClient = new SpeechClient(preferences)) { speechClient.SubscribeToPartialResult((args) => { return(partialResult(ToStep(args))); }); speechClient.SubscribeToRecognitionResult((args) => { return(finalResult(ToFinalResult(args))); }); // create an audio content and pass it a stream. var deviceMetadata = new DeviceMetadata(DeviceType.Near, DeviceFamily.Desktop, NetworkType.Ethernet, OsName.Windows, "1607", "Dell", "T3600"); var applicationMetadata = new ApplicationMetadata("SampleApp", "1.0.0"); var requestMetadata = new RequestMetadata(Guid.NewGuid(), deviceMetadata, applicationMetadata, "SampleAppService"); await speechClient.RecognizeAsync(new SpeechInput(stream, requestMetadata), this.cts.Token).ConfigureAwait(false); } }
public async Task <int> Handle(IConfigurationRoot config, string inputFile, PhraseMode mode, string locale) { if (!File.Exists(inputFile)) { Console.WriteLine("Invalid input file"); return(1); } using (FileStream istream = new FileStream(inputFile, FileMode.Open, FileAccess.Read)) { Console.WriteLine("Converting audio"); using (Stream wavStream = await new WavAudioConverter(config).ConvertAsync(istream)) { Console.WriteLine("Converted audio"); Console.WriteLine("Decoding audio"); await new AudioDecoderService(config).DecodeAudioAsync(wavStream, locale, mode, (args) => { Console.WriteLine("--- Partial result received by OnPartialResult ---"); // Print the partial response recognition hypothesis. Console.WriteLine(args.DisplayText); Console.WriteLine(); return(CompletedTask); }, (args) => { Console.WriteLine(); Console.WriteLine("--- Phrase result received by OnRecognitionResult ---"); // Print the recognition status. Console.WriteLine("***** Phrase Recognition Status = [{0}] ***", args.RecognitionStatus); if (args.Phrases != null) { foreach (var result in args.Phrases) { // Print the recognition phrase display text. Console.WriteLine("{0} (Confidence:{1})", result.DisplayText, result.Confidence); } } Console.WriteLine(); return(CompletedTask); }); Console.WriteLine("Audio decoded"); } } return(0); }
public async System.Threading.Tasks.Task <ActionResult> Post(string culture) { /* * foreach (String key in Request.Files) * { * HttpPostedFileBase f = Request.Files[key]; * Response.Write($"filename {f.FileName} size {f.ContentLength}"); * } */ if (String.IsNullOrEmpty(culture) || Request.Files.Count != 1) { return(new HttpStatusCodeResult(HttpStatusCode.BadRequest)); } var file = Request.Files[0]; Response.Buffer = false; Response.ContentType = "text/plain"; using (WavStream wavStream = await new WavAudioConverter(configuration).ConvertAsync(file.InputStream)) { /* * ShortPhrase mode: An utterance up to 15 seconds long. As data is sent to the server, * the client receives multiple partial results and one final best result. * * LongDictation mode: An utterance up to 10 minutes long. As data is sent to the server, * the client receives multiple partial results and multiple final results, based on where the * server indicates sentence pauses. */ PhraseMode mode = (wavStream.AudioLength > MaxShortAudioLength ? PhraseMode.LongDictation : PhraseMode.ShortPhrase); await new AudioDecoderService(configuration).DecodeAudioAsync(wavStream, culture, mode, (args) => { Response.Write(JsonConvert.SerializeObject(args)); Response.Write(Environment.NewLine); return(Response.FlushAsync()); }, (args) => { Response.Write(JsonConvert.SerializeObject(args)); Response.Write(Environment.NewLine); return(Response.FlushAsync()); }); } return(null); }