public void StartWithAudioOutputStream(AudioOutputStream audioOutputStream) { _audioOutputStream = audioOutputStream; // Speaker AudioSource audioSource = GetComponent <AudioSource>(); // Create audio source if needed. if (audioSource == null) { audioSource = gameObject.AddComponent <AudioSource>(); audioSource.spatialize = true; audioSource.spatialBlend = 1.0f; } // TODO: Do we want AudioClip's sample rate to match OPUS? That means Unity is left with doing any resampling. We might be able to do the resampling better ourselves. // TODO: We can probably specify a shorter clip length here since it's autogenerated now. if (_audioClip == null) { _audioClip = AudioClip.Create("Normcore Audio Stream", 48000, 1, 48000, true, (float[] data) => { for (int i = 0; i < data.Length; i++) { data[i] = 1.0f; } }); } audioSource.enabled = true; audioSource.loop = true; audioSource.clip = _audioClip; audioSource.pitch = 1.0f; audioSource.spatializePostEffects = true; audioSource.Play(); }
private async Task <byte[]> SynthesisWithByteStreamToByteArrayAsync(string ssmlInput, SpeechConfig config) { var callback = new PushAudioOutputStreamSampleCallback(); using var stream = AudioOutputStream.CreatePushStream(callback); using var streamConfig = AudioConfig.FromStreamOutput(stream); using var synthesizer = new SpeechSynthesizer(config, streamConfig); using var result = await synthesizer.SpeakSsmlAsync(ssmlInput); if (result.Reason == ResultReason.SynthesizingAudioCompleted) { return(callback.GetAudioData()); } else if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } } return(null); }
private void ConnectRemoteAudioStream() { // Remote player, lookup audio stream and create audio output int clientID = model.clientID; int streamID = model.streamID; // Ignore invalid state model state if (clientID < 0 || streamID < 0) { return; } // Find AudioOutputStream AudioOutputStream audioOutputStream = room.GetAudioOutputStream(clientID, streamID); if (audioOutputStream != null) { _audioOutput = gameObject.AddComponent <AudioOutput>(); _audioOutput.mute = mute; _audioOutput.StartWithAudioOutputStream(audioOutputStream); } else { Debug.LogError($"RealtimeAvatarVoice: Unable to find matching audio stream for avatar (clientID: {clientID}, streamID: {streamID})."); } }
// Speech synthesis to pull audio output stream. public static async Task SynthesisToPullAudioOutputStreamAsync() { // Creates an instance of a speech config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion"); // Creates an audio out stream. using (var stream = AudioOutputStream.CreatePullStream()) { // Creates a speech synthesizer using audio stream output. using (var streamConfig = AudioConfig.FromStreamOutput(stream)) using (var synthesizer = new SpeechSynthesizer(config, streamConfig)) { while (true) { // Receives a text from console input and synthesize it to pull audio output stream. Console.WriteLine("Enter some text that you want to synthesize, or enter empty text to exit."); Console.Write("> "); string text = Console.ReadLine(); if (string.IsNullOrEmpty(text)) { break; } using (var result = await synthesizer.SpeakTextAsync(text)) { if (result.Reason == ResultReason.SynthesizingAudioCompleted) { Console.WriteLine($"Speech synthesized for text [{text}], and the audio was written to output stream."); } else if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } } } } } // Reads(pulls) data from the stream byte[] buffer = new byte[32000]; uint filledSize = 0; uint totalSize = 0; while ((filledSize = stream.Read(buffer)) > 0) { Console.WriteLine($"{filledSize} bytes received."); totalSize += filledSize; } Console.WriteLine($"Totally {totalSize} bytes received."); } }
// Speech synthesis to pull audio output stream. public async Task SpeakWithSDKPlugin(string message) { Synthesize cortana = new Synthesize(); // Creates an instance of a speech config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechConfig.FromSubscription(SpeechServiceAPIKey, SpeechServiceRegion); config.SpeechSynthesisLanguage = cortana.GetVoiceLocale(voiceName); config.SpeechSynthesisVoiceName = cortana.ConvertVoiceNametoString(voiceName); // Creates an audio out stream. using (var stream = AudioOutputStream.CreatePullStream()) { // Creates a speech synthesizer using audio stream output. using (var streamConfig = AudioConfig.FromStreamOutput(stream)) using (var synthesizer = new SpeechSynthesizer(config, streamConfig)) { using (var result = await synthesizer.SpeakTextAsync(message)) { if (result.Reason == ResultReason.SynthesizingAudioCompleted) { var audiodata = result.AudioData; Debug.Log($"Speech synthesized for text and the audio was written to output stream."); int sampleCount = 0; int frequency = 16000; var unityData = FixedRAWAudioToUnityAudio(audiodata, 1, 16, out sampleCount); // Convert data to a Unity audio clip Debug.Log($"Converting audio data of size {unityData.Length} to Unity audio clip with {sampleCount} samples at frequency {frequency}."); var clip = ToClip("Speech", unityData, sampleCount, frequency); // Set the source on the audio clip audioSource.clip = clip; Debug.Log($"Trigger playback of audio clip on AudioSource."); // Play audio audioSource.Play(); } else if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); Debug.Log($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { Debug.Log($"CANCELED: ErrorCode={cancellation.ErrorCode}"); Debug.Log($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]"); Debug.Log($"CANCELED: Did you update the subscription info?"); } } } } } }
public TranslationEngine(IConfiguration config, IHubContext <TranslationHub> hub) { _hub = hub; _config = config; _translationConfig = SpeechTranslationConfig.FromSubscription(_config["SUBSCRIPTION_KEY"], _config["REGION"]); _speechConfig = SpeechTranslationConfig.FromSubscription(_config["SUBSCRIPTION_KEY"], _config["REGION"]); _audioInput = AudioConfig.FromStreamInput(_inputStream); _audioOutputStream = AudioOutputStream.CreatePullStream(); _output = AudioConfig.FromStreamOutput(_audioOutputStream); }
public void Stop() { AudioSource audioSource = GetComponent <AudioSource>(); if (audioSource != null) { audioSource.Stop(); } _audioOutputStream = null; }
private SpeechSynthesizer BuildAzureSpeechSynthesizer() { // Create an audio config to tell Azure Speech SDK to return speech output as a memory stream // using its default output format (16kHz, 16bit, mono). var audioConfig = AudioConfig.FromStreamOutput( AudioOutputStream.CreatePullStream(AudioStreamFormat.GetDefaultOutputFormat())); // Create an instance of the Azure Speech SDK speech synthesizer return(new SpeechSynthesizer(SpeechConfig, audioConfig)); }
public void Stop() { if (_audioSource != null) { _audioSource.Stop(); Destroy(_audioSource); _audioSource = null; } _audioOutputStream = null; }
// Speech synthesis to push audio output stream. public static async Task SynthesisToPushAudioOutputStreamAsync() { // Creates an instance of a speech config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion"); // Creates an instance of a customer class inherited from PushAudioOutputStreamCallback var callback = new PushAudioOutputStreamSampleCallback(); // Creates an audio out stream from the callback. using (var stream = AudioOutputStream.CreatePushStream(callback)) { // Creates a speech synthesizer using audio stream output. using (var streamConfig = AudioConfig.FromStreamOutput(stream)) using (var synthesizer = new SpeechSynthesizer(config, streamConfig)) { while (true) { // Receives a text from console input and synthesize it to push audio output stream. Console.WriteLine("Enter some text that you want to synthesize, or enter empty text to exit."); Console.Write("> "); string text = Console.ReadLine(); if (string.IsNullOrEmpty(text)) { break; } using (var result = await synthesizer.SpeakTextAsync(text)) { if (result.Reason == ResultReason.SynthesizingAudioCompleted) { Console.WriteLine($"Speech synthesized for text [{text}], and the audio was written to output stream."); } else if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); Console.WriteLine($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]"); Console.WriteLine($"CANCELED: Did you update the subscription info?"); } } } } } Console.WriteLine($"Totally {callback.GetAudioData().Length} bytes received."); } }
public void StartWithAudioOutputStream(AudioOutputStream audioOutputStream) { _audioOutputStream = audioOutputStream; // Speaker // TODO: Do we want AudioClip's sample rate to match OPUS? That means Unity is left with doing any resampling. We might be able to do the resampling better ourselves. // TODO: We can probably specify a shorter clip length here since it's autogenerated now. _audioSource.loop = true; _audioSource.clip = AudioClip.Create("Audio Stream", 48000, 1, 48000, true, (float[] data) => { for (int i = 0; i < data.Length; i++) { data[i] = 1.0f; } }); _audioSource.outputAudioMixerGroup = audioMixerGroup; _audioSource.Play(); }
public async Task <Stream> RenderSpeechAsync(string content) { var audioStream = AudioOutputStream.CreatePullStream(); var audioConfig = AudioConfig.FromStreamOutput(audioStream); using var _synthesizer = new SpeechSynthesizer(_configuration, audioConfig); using var result = await _synthesizer.SpeakTextAsync(content); if (result.Reason == ResultReason.SynthesizingAudioCompleted) { var stream = new MemoryStream(); stream.Write(result.AudioData, 0, result.AudioData.Length); return(stream); } var details = SpeechSynthesisCancellationDetails.FromResult(result); throw new RenderSpeechException(details.ErrorDetails); }
private async Task <KeyValuePair <string, string>?> CreateAndUploadSpeech(int episodeId, SrStoredEpisode storedEpisode, string text, string language, string voice) { var speechConfig = _speechConfigFactory.Get(); speechConfig.SpeechSynthesisLanguage = language; speechConfig.SpeechSynthesisVoiceName = voice; speechConfig.SetSpeechSynthesisOutputFormat(SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3); using var stream = new MemoryStream(); using var audioStream = AudioOutputStream.CreatePushStream(new AudioPushAudioOutputStreamCallback(stream)); using var fileOutput = AudioConfig.FromStreamOutput(audioStream); using var synthesizer = new SpeechSynthesizer(speechConfig, fileOutput); var result = await synthesizer.SpeakTextAsync(text); if (result.Reason == ResultReason.SynthesizingAudioCompleted) { _logger.LogInformation($"Created speech for episode {episodeId}"); var uploadedBlob = await UploadSpeech(storedEpisode, stream, voice); _logger.LogInformation($"Uploaded speech for episode {episodeId}"); return(uploadedBlob); } if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); _logger.LogError($"Error creating speech for episode {episodeId}: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { // Expect some texts to be to long etc _logger.LogError( $"Error creating speech for episode {episodeId}: ErrorCode={cancellation.ErrorCode}; ErrorDetails=[{cancellation.ErrorDetails}]"); } return(null); } throw new Exception($"Unknown result status for speech: {result.Reason}"); }
public async Task <IAudioClip> Synthesize(string text) { var stream = AudioOutputStream.CreatePullStream(); //Generate voice data into stream using (var streamConfig = AudioConfig.FromStreamOutput(stream)) using (var synthesizer = new SpeechSynthesizer(_config, streamConfig)) { using var result = await synthesizer.SpeakTextAsync(text); if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); throw new TaskCanceledException($"{cancellation.Reason}: {cancellation.ErrorDetails}"); } } //Create a clip which consumes this audio data return(new AudioOutputStreamClip($"TTS:`{text}`", stream, new WaveFormat(16000, 16, 1))); }
protected virtual MappedAudioStream MapAudioStream(FFmpegConfig config, AudioStreamInfo sourceStream, AudioOutputStream outputStream) { var result = new MappedAudioStream() { Input = GetStreamInput(sourceStream), Codec = new Codec(GetAudioCodecName(config, outputStream.Format)) }; if (outputStream.Mixdown.HasValue) { result.ChannelCount = AudioUtility.GetChannelCount(outputStream.Mixdown.Value); } if (outputStream.Quality.HasValue) { result.Bitrate = $"{outputStream.Quality:0}k"; } return(result); }
public void Stop() { AudioSource audioSource = GetComponent <AudioSource>(); if (audioSource != null) { audioSource.Stop(); if (audioSource.clip == _audioClip) { audioSource.clip = null; } } if (_audioClip != null) { Destroy(_audioClip); _audioClip = null; } _audioOutputStream = null; }
public async Task Speak(string text, BufferedWaveProvider waveProvider, int rate) { var fmt = new System.Speech.AudioFormat.SpeechAudioFormatInfo(waveProvider.WaveFormat.SampleRate, (System.Speech.AudioFormat.AudioBitsPerSample)waveProvider.WaveFormat.BitsPerSample, (System.Speech.AudioFormat.AudioChannel)waveProvider.WaveFormat.Channels); // Creates an instance of a speech config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechConfig.FromSubscription(Key, Region); config.SpeechSynthesisLanguage = Language; config.SpeechSynthesisVoiceName = Voice; // Creates an audio out stream. using (var stream = AudioOutputStream.CreatePullStream(AudioStreamFormat.GetWaveFormatPCM((uint)waveProvider.WaveFormat.SampleRate, (byte)waveProvider.WaveFormat.BitsPerSample, (byte)waveProvider.WaveFormat.Channels))) { // Creates a speech synthesizer using audio stream output. using (var streamConfig = AudioConfig.FromStreamOutput(stream)) using (var synthesizer = new SpeechSynthesizer(config, streamConfig)) { using (var result = await synthesizer.SpeakTextAsync(text)) { if (result.Reason == ResultReason.SynthesizingAudioCompleted) { //Console.WriteLine($"Speech synthesized for text [{text}], and the audio was written to output stream."); } else if (result.Reason == ResultReason.Canceled) { var cancellation = SpeechSynthesisCancellationDetails.FromResult(result); OnLog?.Invoke($"CANCELED: Reason={cancellation.Reason}"); if (cancellation.Reason == CancellationReason.Error) { OnLog?.Invoke($"CANCELED: ErrorCode={cancellation.ErrorCode}"); OnLog?.Invoke($"CANCELED: ErrorDetails=[{cancellation.ErrorDetails}]"); OnLog?.Invoke($"CANCELED: Did you update the subscription info?"); } } } } /* * using (var reader = new WaveFileReader(new PullStream(stream))) * { * var newFormat = new WaveFormat(waveProvider.WaveFormat.SampleRate, waveProvider.WaveFormat.BitsPerSample, waveProvider.WaveFormat.Channels); * using (var conversionStream = new WaveFormatConversionStream(newFormat, reader)) * { * //WaveFileWriter.CreateWaveFile("output.wav", conversionStream); * byte[] buffer = new byte[32000]; * int filledSize = 0; * int totalSize = 0; * while ((filledSize = conversionStream.Read(buffer, 0, buffer.Length)) > 0) * { * waveProvider.AddSamples(buffer, 0, (int)filledSize); * //Console.WriteLine($"{filledSize} bytes received."); * totalSize += filledSize; * } * } * }*/ // Reads(pulls) data from the stream byte[] buffer = new byte[32000]; uint filledSize = 0; uint totalSize = 0; while ((filledSize = stream.Read(buffer)) > 0) { waveProvider.AddSamples(buffer, 0, (int)filledSize); //Console.WriteLine($"{filledSize} bytes received."); totalSize += filledSize; } } }
void ConnectAudioStream() { // Delete the old audio stream DisconnectAudioStream(); if (_model == null) { return; } if (isOwnedLocally) { // Local player, create microphone stream // First check if this platform supports our native microphone wrapper (lower latency + native echo cancellation if available) _microphoneSampleRate = 48000; _microphoneChannels = 1; // Check for Oculus native microphone device API bool foundOculusMicrophoneDevice = false; if (OculusMicrophoneDevice.IsOculusPlatformAvailable()) { foundOculusMicrophoneDevice = OculusMicrophoneDevice.IsOculusPlatformInitialized(); if (!foundOculusMicrophoneDevice && Application.platform == RuntimePlatform.Android) { Debug.LogWarning("Normcore: Oculus Platform SDK found, but it's not initialized. Oculus Quest native echo cancellation will be unavailable."); } } if (foundOculusMicrophoneDevice) { // Create Oculus microphone device _oculusMicrophoneDevice = new OculusMicrophoneDevice(); _oculusMicrophoneDevice.Start(); _microphoneSampleRate = 48000; _microphoneChannels = 1; } else if (Native.Microphone.PlatformSupported()) { _nativeMicrophoneDevice = new Native.Microphone(); // If we failed to connect to the local microphone, bail. if (!_nativeMicrophoneDevice.Start()) { Debug.LogError("Failed to connect to default microphone device. Make sure it is plugged in and functioning properly."); _nativeMicrophoneDevice.Dispose(); _nativeMicrophoneDevice = null; return; } _microphoneSampleRate = _nativeMicrophoneDevice.SampleRate(); _microphoneChannels = _nativeMicrophoneDevice.Channels(); } else { // Create a microphone device _unityMicrophoneDevice = MicrophoneDevice.Start(""); // If we failed to connect to the local microphone, bail. if (_unityMicrophoneDevice == null) { Debug.LogError("Failed to connect to default microphone device. Make sure it is plugged in and functioning properly."); return; } _unityMicrophoneDeviceDataReader = new AudioDeviceDataReader(_unityMicrophoneDevice); _microphoneSampleRate = _unityMicrophoneDevice.sampleRate; _microphoneChannels = _unityMicrophoneDevice.numberOfChannels; } // Compute frame size with the sample rate of the microphone we received _microphoneFrameSize = _microphoneSampleRate / 100; // Create microphone stream with this sample rate (stream will automatically resample to 48000 before encoding with OPUS) _microphoneStream = room.CreateAudioInputStream(true, _microphoneSampleRate, _microphoneChannels); // Audio Preprocessor bool createAudioPreprocessor = Application.platform != RuntimePlatform.IPhonePlayer; // Create it for all platforms except iOS. iOS provides a nice built-in one. if (createAudioPreprocessor) { // Turn on echo cancellation for mobile devices; bool echoCancellation = Application.isMobilePlatform && Application.platform != RuntimePlatform.IPhonePlayer; _audioPreprocessor = new AudioPreprocessor(_microphoneSampleRate, _microphoneFrameSize, // Input stream true, // Automatic gain control true, // Noise suppression true, // Reverb suppression echoCancellation, AudioSettings.outputSampleRate, 2, 0.28f); // Echo cancellation if (echoCancellation) { // Find the audio listener in the scene so we can perform echo cancellation with it AudioListener[] audioListeners = FindObjectsOfType <AudioListener>(); if (audioListeners.Length <= 0) { Debug.LogWarning("RealtimeAvatarVoice: Unable to find any AudioListeners in the scene. RealtimeAvatarVoice will not be able to perform echo cancellation."); } else { AudioListener audioListener = audioListeners[0]; if (audioListeners.Length > 1) { Debug.LogWarning("RealtimeAvatarVoice: Multiple AudioListeners found in the scene. Performing echo cancellation with the first one: " + audioListener.gameObject.name); } _audioPreprocessorPlaybackListener = audioListener.gameObject.AddComponent <AudioPreprocessorPlaybackListener>(); _audioPreprocessorPlaybackListener.audioPreprocessor = _audioPreprocessor; } } } } else { // Remote player, lookup audio stream and create audio output int clientID = _model.clientID; int streamID = _model.streamID; if (clientID >= 0 && streamID >= 0) { // Find AudioOutputStream AudioOutputStream audioOutputStream = room.GetAudioOutputStream(clientID, streamID); if (audioOutputStream != null) { _audioOutput = gameObject.AddComponent <AudioOutput>(); _audioOutput.mute = mute; _audioOutput.StartWithAudioOutputStream(audioOutputStream); } else { Debug.LogError("RealtimeAvatarVoice: Unable to find matching audio stream for avatar (clientID: " + clientID + ", streamID: " + streamID + ")."); } } } }