public RecognitionDemo(string region, string key, string locale, int millisecondsPerFrame) { _disposed = false; _millisecondsPerFrame = millisecondsPerFrame; SpeechConfig config = SpeechConfig.FromSubscription(key, region); config.SpeechRecognitionLanguage = locale; config.OutputFormat = OutputFormat.Detailed; _audioInput = CreateAudioInputStream(); _recognizer = new SpeechRecognizer(config, AudioConfig.FromStreamInput(_audioInput)); _audioCapture = CreateAudioCaptureDevice(); _audio = new FileStream("audio.raw", FileMode.Create); _transcript = new StreamWriter(new FileStream("transcript.txt", FileMode.Create), Encoding.UTF8); _stopwatch = new Stopwatch(); _framesCaptured = 0; _intermediateResultsReceived = 0; _finalResultsReceived = 0; _identicalResults = 0; _lastResult = null; }
public static async Task SpeakerVerify(SpeechConfig config, VoiceProfile profile, Dictionary <string, string> profileMapping, string file) { var model = SpeakerVerificationModel.FromProfile(profile); Console.WriteLine($"Veryifying {file} ..."); try { var speakerRecognizer = new SpeakerRecognizer(config, AudioConfig.FromWavFileInput(file)); var result = await speakerRecognizer.RecognizeOnceAsync(model); Console.WriteLine($"Verified voice profile for speaker {profileMapping[result.ProfileId]}, score is {result.Score}"); if (result.Score >= 0.5) { File.Copy(file, Path.Combine(settings[SettingIndex.ResDir], Path.GetFileName(file)), true); } } catch (Exception ex) { Console.WriteLine("Exception caught: " + ex.Message); } }
public async Task <IAudioClip> Synthesize(string text) { var stream = AudioOutputStream.CreatePullStream(AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1)); //Generate voice data into stream using (var streamConfig = AudioConfig.FromStreamOutput(stream)) using (var synthesizer = new SpeechSynthesizer(_config, streamConfig)) { using (var result = await synthesizer.SpeakTextAsync(text)) { if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); throw new TaskCanceledException($"{cancellation.Reason}: {cancellation.ErrorDetails}"); } } } //Create a clip which consumes this audio data return(new AudioOutputStreamClip($"TTS:`{text}`", stream, new WaveFormat(16000, 16, 1))); }
/// <summary>语音转文字 从内存流识别</summary> public static async Task <string> RecognizeFromStreamAsync(string inputFileName) { var config = SpeechConfig.FromSubscription(subscriptionKey, region); var reader = new BinaryReader(File.OpenRead(inputFileName)); using var audioInputStream = AudioInputStream.CreatePushStream(); using var audioConfig = AudioConfig.FromStreamInput(audioInputStream); using var recognizer = new SpeechRecognizer(config, audioConfig); byte[] readBytes; do { readBytes = reader.ReadBytes(1024); audioInputStream.Write(readBytes, readBytes.Length); } while (readBytes.Length > 0); var result = await recognizer.RecognizeOnceAsync(); return(result.Text); }
//This is from the google api documentation private string speak(string e, string number) { { var client = TextToSpeechClient.Create(); // The input to be synthesized, can be provided as text or SSML. var input = new SynthesisInput { Text = e //Text = "This is a demonstration of the Google Cloud Text-to-Speech API" }; // Build the voice request. var voiceSelection = new VoiceSelectionParams { LanguageCode = "en-US", SsmlGender = SsmlVoiceGender.Female //, Name = }; // Specify the type of audio file. var audioConfig = new AudioConfig { AudioEncoding = AudioEncoding.Mp3 }; // Perform the text-to-speech request. var response = client.SynthesizeSpeech(input, voiceSelection, audioConfig); // Write the response to the output file. using (var output = File.Create(currentDir + "\\" + number + "output.mp3")) { response.AudioContent.WriteTo(output); output.Flush(); output.Close(); } Console.WriteLine("Audio content written to file \"" + number + "output.mp3\""); return(currentDir + "\\" + number + "output.mp3"); } }
public static async Task <string> RecognizeSpeechAsync() { Debug.WriteLine("Starting Speech2Text service..."); var config = SpeechConfig.FromSubscription("d882cca2d3b44735b0760cbaece4b340", "westus"); config.SpeechRecognitionLanguage = "es-MX"; using (var audioInput = AudioConfig.FromWavFileInput(@"D:\VS Projects\Ignite\Consultant\Consultant\Speech.wav")) { using (var recognizer = new SpeechRecognizer(config, audioInput)) { Debug.WriteLine("Recognizing first result..."); var result = await recognizer.RecognizeOnceAsync(); if (result.Reason == ResultReason.RecognizedSpeech) { Debug.WriteLine($"We recognized: {result.Text}"); return(result.Text); } else if (result.Reason == ResultReason.NoMatch) { Debug.WriteLine($"NOMATCH: Speech could not be recognized."); return("Not recognized"); } else if (result.Reason == ResultReason.Canceled) { var cancellation = CancellationDetails.FromResult(result); Debug.WriteLine($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { Debug.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); Debug.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}"); Debug.WriteLine($"CANCELED: Did you update the subscription info?"); } return("Canceled"); } return(""); } } }
private void button1_Click(object sender, EventArgs e) { TextToSpeechClient client = TextToSpeechClient.Create(); // Set the text input to be synthesized. SynthesisInput input = new SynthesisInput { Text = "Just putting something which makes nosense to read stuff" }; // Build the voice request, select the language code ("en-US"), // and the SSML voice gender ("neutral"). VoiceSelectionParams voice = new VoiceSelectionParams { LanguageCode = "en-US", SsmlGender = SsmlVoiceGender.Neutral }; // Select the type of audio file you want returned. AudioConfig config = new AudioConfig { AudioEncoding = AudioEncoding.Mp3 }; // Perform the Text-to-Speech request, passing the text input // with the selected voice parameters and audio file type var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest { Input = input, Voice = voice, AudioConfig = config }); // Write the binary AudioContent of the response to an MP3 file. using (Stream output = File.Create("sample.mp3")) { response.AudioContent.WriteTo(output); Console.WriteLine($"Audio content written to file 'sample.mp3'"); } }
private byte[] GetGoogleSpeech(string speechText, string languageCode) { string path = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), @"SpellingApp-7fc0cf8b5885.json"); var credential = GoogleCredential.FromFile(path); var channel = new Grpc.Core.Channel(TextToSpeechClient.DefaultEndpoint.ToString(), credential.ToChannelCredentials()); TextToSpeechClient client = TextToSpeechClient.Create(channel); // Set the text input to be synthesized. SynthesisInput input = new SynthesisInput { Text = speechText }; // Build the voice request, select the language code ("en-US"), // and the SSML voice gender ("neutral"). VoiceSelectionParams voice = new VoiceSelectionParams { LanguageCode = languageCode, SsmlGender = SsmlVoiceGender.Neutral }; // Select the type of audio file you want returned. AudioConfig config = new AudioConfig { AudioEncoding = AudioEncoding.Mp3 }; // Perform the Text-to-Speech request, passing the text input // with the selected voice parameters and audio file type var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest { Input = input, Voice = voice, AudioConfig = config }); // Write the binary AudioContent of the response to an MP3 file. return(response.AudioContent.ToByteArray()); }
public async Task continuosRecognition() { var speechConfig = SpeechConfig.FromSubscription("myKey", "northeurope"); using var audioConfig = AudioConfig.FromDefaultMicrophoneInput(); speechConfig.SpeechRecognitionLanguage = "ro-RO"; speechConfig.EnableDictation(); using var recognizer = new SpeechRecognizer(speechConfig, audioConfig); var stopRecognition = new TaskCompletionSource <int>(); Console.OutputEncoding = Encoding.UTF8; recognizer.Recognizing += (s, e) => { Console.WriteLine("Text=" + e.Result.Text); }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.RecognizedSpeech) { Console.WriteLine("Final Text=" + e.Result.Text); } else { Console.WriteLine("Speech not found!"); } }; recognizer.Canceled += (s, e) => { Console.WriteLine("Reason=" + e.Reason); }; recognizer.SessionStopped += (s, e) => { Console.WriteLine("\n Session Stopped!"); stopRecognition.TrySetResult(0); }; await recognizer.StartContinuousRecognitionAsync(); Task.WaitAny(new[] { stopRecognition.Task }); await recognizer.StopContinuousRecognitionAsync(); }
public void seslendir(string metin, string detected) { mediaPlayer.Close(); string lang = ""; if (detected == "tr") { lang = "en-EN"; } if (detected == "en") { lang = "tr-TR"; } VoiceSelectionParams voice = new VoiceSelectionParams { LanguageCode = lang, SsmlGender = SsmlVoiceGender.Male }; AudioConfig config = new AudioConfig { AudioEncoding = AudioEncoding.Mp3 }; SynthesisInput input = new SynthesisInput { Text = metin }; var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest { Input = input, Voice = voice, AudioConfig = config }); using (Stream output = File.Create("C:\\Users\\corx\\source\\repos\\Selami\\Selami\\ses\\sample.mp3")) { response.AudioContent.WriteTo(output); } mediaPlayer.Open(new Uri("C:\\Users\\corx\\source\\repos\\Selami\\Selami\\ses\\sample.mp3")); mediaPlayer.Play(); }
public async Task <Result <LuisResult> > Recognize(string filePath) { // Credenciais do LUIS var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourRegion"); config.SpeechRecognitionLanguage = "pt-br"; using (var audioInput = AudioConfig.FromWavFileInput(filePath)) { using (var recognizer = new IntentRecognizer(config, audioInput)) { var model = LanguageUnderstandingModel.FromAppId("YourLuisAppId"); recognizer.AddIntent(model, "intent.iot.device_off", "device_off"); recognizer.AddIntent(model, "intent.iot.device_on", "device_on"); var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false); if (result.Reason == ResultReason.RecognizedIntent) { var js = new DataContractJsonSerializer(typeof(LuisResult)); var ms = new MemoryStream(Encoding.UTF8.GetBytes(result.Properties.GetProperty(PropertyId.LanguageUnderstandingServiceResponse_JsonResult))); return(new Result <LuisResult>((js.ReadObject(ms) as LuisResult))); } else if (result.Reason == ResultReason.NoMatch) { return(new Result <LuisResult>(null, false, "Falha no reconhecimento do áudio!")); } else if (result.Reason == ResultReason.Canceled) { var cancellation = CancellationDetails.FromResult(result); if (cancellation.Reason == CancellationReason.Error) { return(new Result <LuisResult>(null, false, $"Motivo: {cancellation.Reason}. Detalhes: {cancellation.ErrorDetails}")); } return(new Result <LuisResult>(null, false, $"Motivo: {cancellation.Reason}.")); } } } return(new Result <LuisResult>(null, false, "Erro desconhecido!")); }
public static void CreateSpeechFile(string inputText, string filPath) { TextToSpeechClient client = TextToSpeechClient.Create(); // Set the text input to be synthesized. SynthesisInput input = new SynthesisInput { Text = inputText }; // Build the voice request, select the language code ("en-US"), // and the SSML voice gender ("neutral"). VoiceSelectionParams voice = new VoiceSelectionParams { LanguageCode = "en-US", SsmlGender = SsmlVoiceGender.Neutral }; // Select the type of audio file you want returned. AudioConfig config = new AudioConfig { AudioEncoding = AudioEncoding.Mp3 }; // Perform the Text-to-Speech request, passing the text input // with the selected voice parameters and audio file type var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest { Input = input, Voice = voice, AudioConfig = config }); // Write the binary AudioContent of the response to an MP3 file. using (Stream output = File.Create(filPath)) { response.AudioContent.WriteTo(output); } }
public async Task <Stream> SynthesizeTextToStreamAsync(IVoice voice, string text) { var input = new SynthesisInput { Text = text }; var config = new AudioConfig { AudioEncoding = AudioEncoding.Mp3 }; var response = await Client.SynthesizeSpeechAsync(new SynthesizeSpeechRequest { Input = input, Voice = new VoiceSelectionParams() { Name = voice.Name, LanguageCode = voice.Language }, AudioConfig = config, }); return(new MemoryStream(response.AudioContent.ToByteArray())); }
public static async Task VerificationEnroll(SpeechConfig config, Dictionary <string, string> profileMapping) { using (var client = new VoiceProfileClient(config)) using (var profile = await client.CreateProfileAsync(VoiceProfileType.TextIndependentVerification, "en-us")) { using (var audioInput = AudioConfig.FromWavFileInput(settings[SettingIndex.ExampleAudio])) { Console.WriteLine($"Enrolling profile id {profile.Id}."); // give the profile a human-readable display name profileMapping.Add(profile.Id, "Test speaker"); VoiceProfileEnrollmentResult result = null; result = await client.EnrollProfileAsync(profile, audioInput); if (result != null) { if (result.Reason == ResultReason.EnrolledVoiceProfile) { string[] files = Directory.GetFiles(settings[SettingIndex.SourceDir], "*.wav", SearchOption.TopDirectoryOnly); foreach (string file in files) { await SpeakerVerify(config, profile, profileMapping, file); } } else if (result.Reason == ResultReason.Canceled) { var cancellation = VoiceProfileEnrollmentCancellationDetails.FromResult(result); Console.WriteLine($"CANCELED {profile.Id}: ErrorCode={cancellation.ErrorCode} ErrorDetails={cancellation.ErrorDetails}"); } await client.DeleteProfileAsync(profile); } else { Console.WriteLine("Profile enrollment error"); } } } }
public static string ConvertAudioTOpus(FileConfig fileConfig) { AudioConfig audioConfig = fileConfig.AudioConfig; string tmp = Config.Temp; string audiofile = FileUtility.RandomName(tmp) + ".opus"; int bitrat = 0; if (audioConfig.Quality < 1) { bitrat = (int)(audioConfig.Quality * 400); } else { bitrat = (int)audioConfig.Quality; } var eac3to = Path.Combine(Environment.CurrentDirectory, Eac3toExecute); var opusenc = Path.Combine(Environment.CurrentDirectory, OpusEnc); string bat = $"{eac3to.Maohao()} {fileConfig.VedioFileFullName.Maohao()} {audioConfig.Tracker}: {audioConfig.CommandLineArgs} stdout.wav | {opusenc.Maohao()} --ignorelength --bitrate {bitrat} --vbr - {audiofile.Maohao()}"; ProcessCmd.RunBat(bat, Config.Temp); return(audiofile); }
//Sampling Rate 11050*2, bitRate=16, channels = 1 public async UniTask STT(string wavFilepath, int sampleRate, int bitRate, int channels) { var speechConfig = SpeechConfig.FromSubscription(subscription_key, region); speechConfig.SpeechRecognitionLanguage = location; var reader = new BinaryReader(File.OpenRead(wavFilepath)); var audioStreamFormat = AudioStreamFormat.GetWaveFormatPCM((uint)sampleRate, (byte)bitRate, (byte)channels); var audioInputStream = AudioInputStream.CreatePushStream(audioStreamFormat); var audioConfig = AudioConfig.FromStreamInput(audioInputStream); var recognizer = new SpeechRecognizer(speechConfig, audioConfig); byte[] readBytes; do { readBytes = reader.ReadBytes(1024); audioInputStream.Write(readBytes, readBytes.Length); } while (readBytes.Length > 0); var result = await recognizer.RecognizeOnceAsync(); Debug.Log($"Recognized Line : = {result.Text}"); }
public static async Task SynthesisToAudioFileAsync() { var sub = ConfigurationManager.AppSettings["sub"]; var region = ConfigurationManager.AppSettings["region"] ?? "southcentralus"; var outFilename = ConfigurationManager.AppSettings["filename"] ?? "voice.wav"; var text = ConfigurationManager.AppSettings["text"]; var voice = ConfigurationManager.AppSettings["voice"] ?? "en-US-JessaNeural"; var config = SpeechConfig.FromSubscription(sub, region); using (var fileOutput = AudioConfig.FromWavFileOutput(outFilename)) { using (var synthesizer = new SpeechSynthesizer(config, fileOutput)) { var xmlSpeech = @"<speak version='1.0' xmlns='https://www.w3.org/2001/10/synthesis' xml:lang='en-US'>" + @"<voice name='" + voice + "'>" + text + @"</voice></speak>"; var result = await synthesizer.SpeakSsmlAsync(xmlSpeech); if (result.Reason == ResultReason.SynthesizingAudioCompleted) { Console.WriteLine($"Speech synthesized to [{outFilename}] for text [{text}]"); } else if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); Console.WriteLine($"CANCELED: {cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: {cancellation.ErrorCode}\n{cancellation.ErrorDetails}"); } } } } }
public async Task <byte[]> TranslateTextToWav(string text) { // Instantiate a client var client = TextToSpeechClient.Create(); // Set the text input to be synthesized. var input = new SynthesisInput { Text = text }; // Build the voice request, select the language code ("en-US"), // and the SSML voice gender ("neutral"). var voice = new VoiceSelectionParams { LanguageCode = "sv-SE", SsmlGender = SsmlVoiceGender.Female }; // Select the type of audio file you want returned. var config = new AudioConfig { AudioEncoding = AudioEncoding.Linear16 }; // Perform the Text-to-Speech request, passing the text input // with the selected voice parameters and audio file type var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest { Input = input, Voice = voice, AudioConfig = config }); var memStream = new System.IO.MemoryStream(); response.AudioContent.WriteTo(memStream); return(memStream.ToArray()); }
private async Task <string> RecognizeSpeechAsync(string uri) { var substriptionKey = Environment.GetEnvironmentVariable("SPEECH_SUBSCRIPTION_KEY"); var serviceRegion = Environment.GetEnvironmentVariable("SPEECH_SERVICE_REGION"); var config = SpeechConfig.FromSubscription(substriptionKey, serviceRegion); using (var audioInput = AudioConfig.FromWavFileInput(uri)) { using (var recognizer = new SpeechRecognizer(config, audioInput)) { _log.LogInformation("Recognizing first result..."); var result = await recognizer.RecognizeOnceAsync(); if (result.Reason == ResultReason.RecognizedSpeech) { _log.LogInformation($"We recognized: {result.Text}"); } else if (result.Reason == ResultReason.NoMatch) { _log.LogInformation($"NOMATCH: Speech could not be recognized."); } else if (result.Reason == ResultReason.Canceled) { var cancellation = CancellationDetails.FromResult(result); _log.LogInformation($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { _log.LogInformation($"CANCELED: ErrorCode={cancellation.ErrorCode}"); _log.LogInformation($"CANCELED: ErrorDetails={cancellation.ErrorDetails}"); _log.LogInformation($"CANCELED: Did you update the subscription info?"); } } return(result.Text); } } }
protected async Task <string> GetGoogleSynthSpeech(string text, TTSVoice voicePreference, TTSPitch pitchPreference, string filename = null) { VoiceSelectionParams voice = voicePreference.GetGoogleVoiceSelectionParams(); AudioConfig config = new AudioConfig { AudioEncoding = AudioEncoding.Mp3, Pitch = pitchPreference.GetSemitoneShift() }; //TTS SynthesisInput input = new SynthesisInput { Ssml = PrepareGoogleSSML(text) }; // Perform the Text-to-Speech request, passing the text input // with the selected voice parameters and audio file type GoogleSynthesizeSpeechResponse response = await googleClient.SynthesizeSpeechAsync(input, voice, config); // Write the binary AudioContent of the response to file. string filepath; if (string.IsNullOrWhiteSpace(filename)) { filepath = Path.Combine(TTSFilesPath, $"{Guid.NewGuid()}.mp3"); } else { filepath = Path.Combine(TTSFilesPath, $"{filename}.mp3"); } using (Stream file = new FileStream(filepath, FileMode.Create)) { response.AudioContent.WriteTo(file); } return(filepath); }
/// <summary> /// Creates Recognizer with baseline model and selected language: /// Creates a config with subscription key and selected region /// If input source is audio file, creates recognizer with audio file otherwise with default mic /// Waits on RunRecognition. /// </summary> private async Task CreateRecognizer(byte[] channel) { // Todo: suport users to specifiy a different region. var config = SpeechConfig.FromSubscription(this.SubscriptionKey, this.Region); config.SpeechRecognitionLanguage = this.RecognitionLanguage; config.OutputFormat = OutputFormat.Detailed; SpeechRecognizer basicRecognizer; PushAudioInputStream pushStream = AudioInputStream.CreatePushStream(); pushStream.Write(channel); pushStream.Close(); using (var audioInput = AudioConfig.FromStreamInput(pushStream)) { using (basicRecognizer = new SpeechRecognizer(config, audioInput)) { await this.RunRecognizer(basicRecognizer, stopBaseRecognitionTaskCompletionSource).ConfigureAwait(false); } } }
public async Task <string> AudioToTextAsync(byte[] pcm) { var guid = Guid.NewGuid(); if (!Text.ContainsKey(guid)) { Text[guid] = null; } // Build out the speech recognizer using (var pushStream = AudioInputStream.CreatePushStream(AudioStreamFormat.GetDefaultInputFormat())) using (var audioInput = AudioConfig.FromStreamInput(pushStream)) using (var recognizer = new SpeechRecognizer(SpeechConfig, audioInput)) { // Subscribe to speech recognizer events. recognizer.SessionStarted += OnSpeechRecognitionSessionStarted; recognizer.Recognizing += OnSpeechRecognizing; recognizer.Recognized += (s, e) => OnSpeechRecognized(s, e, guid); recognizer.Canceled += OnSpeechCanceled; recognizer.SessionStopped += OnSpeechRecognitionSessionStopped; // Start continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Send the pcm data to the speech recognizer pushStream.Write(pcm); pushStream.Close(); // Wait for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(StopRecognition.Task); // Stop recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); return(Text[guid]); } }
public async Task SendDirectLineSpeechVoiceMessage() { GetEnvironmentVars(); // Create a Dialog Service Config for use with the Direct Line Speech Connector var config = DialogServiceConfig.FromBotSecret(speechBotSecret, speechSubscription, speechRegion); config.SpeechRecognitionLanguage = "en-us"; config.SetProperty(PropertyId.Conversation_From_Id, FromUser); // Create a new Dialog Service Connector for the above configuration and register to receive events var connector = new DialogServiceConnector(config, AudioConfig.FromWavFileInput(soundFilePath)); connector.ActivityReceived += Connector_ActivityReceived; // Open a connection to Direct Line Speech channel. No await because the call will block until the connection closes. #pragma warning disable CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed connector.ConnectAsync(); #pragma warning restore CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed // Send the message activity to the bot. await connector.ListenOnceAsync(); // Give the bot time to respond. System.Threading.Thread.Sleep(1000); // Read the bot's message. var botAnswer = messages.LastOrDefault(); // Cleanup await connector.DisconnectAsync(); connector.Dispose(); // Assert Assert.IsNotNull(botAnswer); Assert.AreEqual(string.Format("You said '{0}'", soundFileMessage), botAnswer.Message); }
/// <summary> /// Common routine for transcribing an audio file. /// </summary> /// <param name="apiKey">The subscription key.</param> /// <param name="region">The region of the resource.</param> /// <param name="reader">BinaryReader instance for reading the input stream.</param> /// <returns>A Task returning the transcribed speech.</returns> private async Task <string> TranscribeAudioCommonAsync(Secret apiKey, string region, BinaryReader reader) { string transcript = null; using (BinaryAudioStreamReader streamReader = new BinaryAudioStreamReader(reader)) { AudioStreamFormat audioStreamFormat = ReadWaveHeader(reader); AudioConfig audioConfig = AudioConfig.FromStreamInput(streamReader, audioStreamFormat); SpeechConfig speechConfig = SpeechConfig.FromSubscription(apiKey.Value, region); _speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig); _speechRecognizer.Recognized += Recognized; _speechRecognizer.Canceled += Canceled; _speechRecognizer.SessionStopped += SessionStopped; _speechRecognizer.Canceled += SessionStopped; await _speechRecognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); Task.WaitAny(new[] { _stopRecognition.Task }); await _speechRecognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); if (!string.IsNullOrWhiteSpace(_cancellationDetails)) { throw new TranscriberCanceledException($"Azure Speech cancellation error: {_cancellationDetails}"); } transcript = _transcriptBuilder.ToString(); if (string.IsNullOrWhiteSpace(transcript)) { throw new TranscriberEmptyTranscriptException("Azure Speech returned blank transcript!"); } } return(transcript); }
/// <summary> /// Creates a class-level Speech Recognizer for a specific language using Azure credentials /// and hooks-up lifecycle & recognition events /// </summary> void CreateSpeechRecognizer() { if (SpeechServiceAPIKey.Length == 0 || SpeechServiceAPIKey == String.Empty) { finalString = "You forgot to obtain Cognitive Services Speech credentials and inserting them in this app." + Environment.NewLine + "See the README file and/or the instructions in the Awake() function for more info before proceeding."; errorString = "ERROR: Missing service credentials"; UnityEngine.Debug.LogFormat(errorString); return; } UnityEngine.Debug.LogFormat("Creating Speech Recognizer."); // finalString = "Initializing speech recognition, please wait..."; finalString = "Start: "; if (recognizer == null) { SpeechConfig sconfig = SpeechConfig.FromSubscription("b9bdc34702c1439589daf92475e8f827", "westus2"); sconfig.SpeechRecognitionLanguage = fromLanguage; audioStream = new MicToAudioStream(); AudioConfig aconfig = AudioConfig.FromStreamInput(audioStream, AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1)); recognizer = new SpeechRecognizer(sconfig, aconfig); if (recognizer != null) { // Subscribes to speech events. recognizer.Recognizing += RecognizingHandler; recognizer.Recognized += RecognizedHandler; recognizer.SpeechStartDetected += SpeechStartDetectedHandler; recognizer.SpeechEndDetected += SpeechEndDetectedHandler; recognizer.Canceled += CanceledHandler; recognizer.SessionStarted += SessionStartedHandler; recognizer.SessionStopped += SessionStoppedHandler; } } UnityEngine.Debug.LogFormat("CreateSpeechRecognizer exit"); }
IEnumerator ShowYakuOneByOne() { yield return(new WaitForSeconds(1.0f)); var yakuArr = currentAgari.hanteiYakus; for (int i = 0; i < yakuArr.Length; i++) { var yaku = yakuArr[i]; string yakuName = yaku.getYakuNameKey(); UIYakuItem item; if (yaku.isYakuman()) { item = CreateYakuItem_Yakuman(yakuName, yaku.isDoubleYakuman()); } else { item = CreateYakuItem(yakuName, yaku.getHanSuu()); } item.transform.parent = yakuRoot; item.transform.localScale = yakuItemPrefab.transform.localScale; item.transform.localPosition = new Vector3(yakuItemPosOffset.x, yakuItemPosOffset.y * (i + 1), 0f); _yakuItems.Add(item); AudioManager.Get().PlaySFX(AudioConfig.GetSEPath(ESeType.Yaku)); yield return(new WaitForSeconds(yakuDisplayTime)); } yield return(new WaitForSeconds(yakuDisplayTime * 0.5f)); ShowTotalScrote(); }
public string generateVoice(string text) { var client = TextToSpeechClient.Create(); var inputText = new SynthesisInput { Text = text + " Thank you for using Rela." }; var voiceParameters = new VoiceSelectionParams { LanguageCode = "en-US", SsmlGender = SsmlVoiceGender.Female }; var audioParams = new AudioConfig { AudioEncoding = AudioEncoding.Mp3 }; var response = client.SynthesizeSpeech(inputText, voiceParameters, audioParams); string audioContent = response.AudioContent.ToBase64(); return(audioContent); }
private async Task InitializeSpeechConnectorAsync() { audioConfig = AudioConfig.FromDefaultMicrophoneInput(); var config = CustomCommandsConfig.FromSubscription(Constants.CustomCommandsAppId, Constants.SubscriptionKey, Constants.Region); config.Language = Constants.Language; // Create a new Dialog Service Connector for the above configuration and register to receive events connector = new DialogServiceConnector(config, audioConfig); connector.ActivityReceived += Connector_ActivityReceived; connector.Recognizing += Connector_Recognizing; connector.Recognized += Connector_Recognized; connector.Canceled += Connector_Canceled; connector.SessionStarted += Connector_SessionStarted; connector.SessionStopped += Connector_SessionStopped; // Open a connection to Direct Line Speech channel await connector.ConnectAsync(); var keywordRecognitionModel = KeywordRecognitionModel.FromFile(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Computer.table")); _ = connector.StartKeywordRecognitionAsync(keywordRecognitionModel); }
public async Task <string> MicrophoneInput(e_language curLanguage) { if (speechConfig != null) { switch (curLanguage) { case e_language.English: speechConfig.SpeechRecognitionLanguage = CAzureLanguage.English_UnitedState; break; case e_language.Korean: speechConfig.SpeechRecognitionLanguage = CAzureLanguage.Korean_Korea; break; } AudioConfig audioConfig = AudioConfig.FromDefaultMicrophoneInput(); SpeechRecognizer recognizer = new SpeechRecognizer(speechConfig, audioConfig); SpeechRecognitionResult result = await recognizer.RecognizeOnceAsync(); return(result.Text); } return(""); }
public static async Task SpeakAsync(string txt, QuestToSpeech.Voice voice, string filePath, AzureAPIConfig config) { SpeechConfig speechConfig = SpeechConfig.FromSubscription(config.Key, config.Region); speechConfig.SpeechSynthesisVoiceName = voice.Name; speechConfig.SpeechSynthesisLanguage = voice.LangCode; using (AudioConfig fileOutput = AudioConfig.FromWavFileOutput(filePath)) { using (SpeechSynthesizer tts = new SpeechSynthesizer(speechConfig, fileOutput)) { using (SpeechSynthesisResult result = await tts.SpeakTextAsync(txt)) { if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); if (cancellation.Reason == CancellationReason.Error) { throw new Exception(string.Format("API Error (Code: {0}): {1}", cancellation.ErrorCode, cancellation.ErrorDetails)); } } } } } }