/// <inheritdoc/> public async Task <AudioClip> ConvertTextToSpeech(string text) { string filename = Configuration.GetUniqueTextToSpeechFilename(text); string filePath = GetPathToFile(filename); AudioClip audioClip; if (IsFileCached(filePath)) { byte[] bytes = GetCachedFile(filePath); float[] sound = TextToSpeechUtils.ShortsInByteArrayToFloats(bytes); audioClip = AudioClip.Create(text, channels: 1, frequency: 48000, lengthSamples: sound.Length, stream: false); audioClip.SetData(sound, 0); } else { audioClip = await FallbackProvider.ConvertTextToSpeech(text); if (Configuration.SaveAudioFilesToStreamingAssets) { CacheAudio(audioClip, filePath); } } if (audioClip == null) { throw new CouldNotLoadAudioFileException("AudioClip is null."); } return(audioClip); }
private float[] Synthesize(string text, string outputPath, string language, string voice) { // Despite the fact that SpVoice.AudioOutputStream accepts values of type ISpeechBaseStream, // the single type of a stream that is actually working is a SpFileStream. SpFileStream stream = PrepareFileStreamToWrite(outputPath); SpVoice synthesizer = new SpVoice { AudioOutputStream = stream }; string ssmlText = string.Format(ssmlTemplate, language, voice, text); synthesizer.Speak(ssmlText, SpeechVoiceSpeakFlags.SVSFIsXML); synthesizer.WaitUntilDone(-1); stream.Close(); byte[] data = File.ReadAllBytes(outputPath); float[] sampleData = TextToSpeechUtils.ShortsInByteArrayToFloats(data); float[] cleanData = RemoveArtifacts(sampleData); ClearCache(outputPath); return(cleanData); }