public RecognitionDemo(string region, string key, string locale, int millisecondsPerFrame)
        {
            _disposed             = false;
            _millisecondsPerFrame = millisecondsPerFrame;
            SpeechConfig config = SpeechConfig.FromSubscription(key, region);

            config.SpeechRecognitionLanguage = locale;
            config.OutputFormat = OutputFormat.Detailed;
            _audioInput         = CreateAudioInputStream();
            _recognizer         = new SpeechRecognizer(config, AudioConfig.FromStreamInput(_audioInput));
            _audioCapture       = CreateAudioCaptureDevice();
            _audio      = new FileStream("audio.raw", FileMode.Create);
            _transcript = new StreamWriter(new FileStream("transcript.txt", FileMode.Create), Encoding.UTF8);
            _stopwatch  = new Stopwatch();

            _framesCaptured = 0;
            _intermediateResultsReceived = 0;
            _finalResultsReceived        = 0;
            _identicalResults            = 0;
            _lastResult = null;
        }
Beispiel #2
0
        public static async Task SpeakerVerify(SpeechConfig config, VoiceProfile profile, Dictionary <string, string> profileMapping, string file)
        {
            var model = SpeakerVerificationModel.FromProfile(profile);

            Console.WriteLine($"Veryifying {file} ...");
            try
            {
                var speakerRecognizer = new SpeakerRecognizer(config, AudioConfig.FromWavFileInput(file));
                var result            = await speakerRecognizer.RecognizeOnceAsync(model);

                Console.WriteLine($"Verified voice profile for speaker {profileMapping[result.ProfileId]}, score is {result.Score}");
                if (result.Score >= 0.5)
                {
                    File.Copy(file, Path.Combine(settings[SettingIndex.ResDir], Path.GetFileName(file)), true);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("Exception caught: " + ex.Message);
            }
        }
Beispiel #3
0
        public async Task <IAudioClip> Synthesize(string text)
        {
            var stream = AudioOutputStream.CreatePullStream(AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1));

            //Generate voice data into stream
            using (var streamConfig = AudioConfig.FromStreamOutput(stream))
                using (var synthesizer = new SpeechSynthesizer(_config, streamConfig))
                {
                    using (var result = await synthesizer.SpeakTextAsync(text))
                    {
                        if (result.Reason == ResultReason.Canceled)
                        {
                            var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
                            throw new TaskCanceledException($"{cancellation.Reason}: {cancellation.ErrorDetails}");
                        }
                    }
                }

            //Create a clip which consumes this audio data
            return(new AudioOutputStreamClip($"TTS:`{text}`", stream, new WaveFormat(16000, 16, 1)));
        }
Beispiel #4
0
        /// <summary>语音转文字 从内存流识别</summary>
        public static async Task <string> RecognizeFromStreamAsync(string inputFileName)
        {
            var config = SpeechConfig.FromSubscription(subscriptionKey, region);

            var reader = new BinaryReader(File.OpenRead(inputFileName));

            using var audioInputStream = AudioInputStream.CreatePushStream();
            using var audioConfig      = AudioConfig.FromStreamInput(audioInputStream);
            using var recognizer       = new SpeechRecognizer(config, audioConfig);

            byte[] readBytes;
            do
            {
                readBytes = reader.ReadBytes(1024);
                audioInputStream.Write(readBytes, readBytes.Length);
            } while (readBytes.Length > 0);

            var result = await recognizer.RecognizeOnceAsync();

            return(result.Text);
        }
Beispiel #5
0
        //This is from the google api documentation
        private string speak(string e, string number)
        {
            {
                var client = TextToSpeechClient.Create();

                // The input to be synthesized, can be provided as text or SSML.
                var input = new SynthesisInput
                {
                    Text = e
                           //Text = "This is a demonstration of the Google Cloud Text-to-Speech API"
                };

                // Build the voice request.
                var voiceSelection = new VoiceSelectionParams
                {
                    LanguageCode = "en-US",
                    SsmlGender   = SsmlVoiceGender.Female
                                   //, Name =
                };

                // Specify the type of audio file.
                var audioConfig = new AudioConfig
                {
                    AudioEncoding = AudioEncoding.Mp3
                };

                // Perform the text-to-speech request.
                var response = client.SynthesizeSpeech(input, voiceSelection, audioConfig);

                // Write the response to the output file.
                using (var output = File.Create(currentDir + "\\" + number + "output.mp3"))
                {
                    response.AudioContent.WriteTo(output);
                    output.Flush();
                    output.Close();
                }
                Console.WriteLine("Audio content written to file \"" + number + "output.mp3\"");
                return(currentDir + "\\" + number + "output.mp3");
            }
        }
Beispiel #6
0
        public static async Task <string> RecognizeSpeechAsync()
        {
            Debug.WriteLine("Starting Speech2Text service...");
            var config = SpeechConfig.FromSubscription("d882cca2d3b44735b0760cbaece4b340", "westus");

            config.SpeechRecognitionLanguage = "es-MX";
            using (var audioInput = AudioConfig.FromWavFileInput(@"D:\VS Projects\Ignite\Consultant\Consultant\Speech.wav"))
            {
                using (var recognizer = new SpeechRecognizer(config, audioInput))
                {
                    Debug.WriteLine("Recognizing first result...");
                    var result = await recognizer.RecognizeOnceAsync();

                    if (result.Reason == ResultReason.RecognizedSpeech)
                    {
                        Debug.WriteLine($"We recognized: {result.Text}");
                        return(result.Text);
                    }
                    else if (result.Reason == ResultReason.NoMatch)
                    {
                        Debug.WriteLine($"NOMATCH: Speech could not be recognized.");
                        return("Not recognized");
                    }
                    else if (result.Reason == ResultReason.Canceled)
                    {
                        var cancellation = CancellationDetails.FromResult(result);
                        Debug.WriteLine($"CANCELED: Reason={cancellation.Reason}");

                        if (cancellation.Reason == CancellationReason.Error)
                        {
                            Debug.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                            Debug.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                            Debug.WriteLine($"CANCELED: Did you update the subscription info?");
                        }
                        return("Canceled");
                    }
                    return("");
                }
            }
        }
Beispiel #7
0
        private void button1_Click(object sender, EventArgs e)
        {
            TextToSpeechClient client = TextToSpeechClient.Create();

            // Set the text input to be synthesized.
            SynthesisInput input = new SynthesisInput
            {
                Text = "Just putting something which makes nosense to read stuff"
            };

            // Build the voice request, select the language code ("en-US"),
            // and the SSML voice gender ("neutral").
            VoiceSelectionParams voice = new VoiceSelectionParams
            {
                LanguageCode = "en-US",
                SsmlGender   = SsmlVoiceGender.Neutral
            };

            // Select the type of audio file you want returned.
            AudioConfig config = new AudioConfig
            {
                AudioEncoding = AudioEncoding.Mp3
            };

            // Perform the Text-to-Speech request, passing the text input
            // with the selected voice parameters and audio file type
            var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest
            {
                Input       = input,
                Voice       = voice,
                AudioConfig = config
            });

            // Write the binary AudioContent of the response to an MP3 file.
            using (Stream output = File.Create("sample.mp3"))
            {
                response.AudioContent.WriteTo(output);
                Console.WriteLine($"Audio content written to file 'sample.mp3'");
            }
        }
Beispiel #8
0
        private byte[] GetGoogleSpeech(string speechText, string languageCode)
        {
            string             path       = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), @"SpellingApp-7fc0cf8b5885.json");
            var                credential = GoogleCredential.FromFile(path);
            var                channel    = new Grpc.Core.Channel(TextToSpeechClient.DefaultEndpoint.ToString(), credential.ToChannelCredentials());
            TextToSpeechClient client     = TextToSpeechClient.Create(channel);

            // Set the text input to be synthesized.
            SynthesisInput input = new SynthesisInput
            {
                Text = speechText
            };

            // Build the voice request, select the language code ("en-US"),
            // and the SSML voice gender ("neutral").
            VoiceSelectionParams voice = new VoiceSelectionParams
            {
                LanguageCode = languageCode,
                SsmlGender   = SsmlVoiceGender.Neutral
            };

            // Select the type of audio file you want returned.
            AudioConfig config = new AudioConfig
            {
                AudioEncoding = AudioEncoding.Mp3
            };

            // Perform the Text-to-Speech request, passing the text input
            // with the selected voice parameters and audio file type
            var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest
            {
                Input       = input,
                Voice       = voice,
                AudioConfig = config
            });

            // Write the binary AudioContent of the response to an MP3 file.

            return(response.AudioContent.ToByteArray());
        }
Beispiel #9
0
        public async Task continuosRecognition()
        {
            var speechConfig = SpeechConfig.FromSubscription("myKey", "northeurope");

            using var audioConfig = AudioConfig.FromDefaultMicrophoneInput();
            speechConfig.SpeechRecognitionLanguage = "ro-RO";
            speechConfig.EnableDictation();
            using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
            var stopRecognition = new TaskCompletionSource <int>();

            Console.OutputEncoding  = Encoding.UTF8;
            recognizer.Recognizing += (s, e) =>
            {
                Console.WriteLine("Text=" + e.Result.Text);
            };
            recognizer.Recognized += (s, e) =>
            {
                if (e.Result.Reason == ResultReason.RecognizedSpeech)
                {
                    Console.WriteLine("Final Text=" + e.Result.Text);
                }
                else
                {
                    Console.WriteLine("Speech not found!");
                }
            };
            recognizer.Canceled += (s, e) =>
            {
                Console.WriteLine("Reason=" + e.Reason);
            };
            recognizer.SessionStopped += (s, e) =>
            {
                Console.WriteLine("\n Session Stopped!");
                stopRecognition.TrySetResult(0);
            };
            await recognizer.StartContinuousRecognitionAsync();

            Task.WaitAny(new[] { stopRecognition.Task });
            await recognizer.StopContinuousRecognitionAsync();
        }
Beispiel #10
0
        public void seslendir(string metin, string detected)
        {
            mediaPlayer.Close();
            string lang = "";

            if (detected == "tr")
            {
                lang = "en-EN";
            }
            if (detected == "en")
            {
                lang = "tr-TR";
            }
            VoiceSelectionParams voice = new VoiceSelectionParams
            {
                LanguageCode = lang,
                SsmlGender   = SsmlVoiceGender.Male
            };
            AudioConfig config = new AudioConfig
            {
                AudioEncoding = AudioEncoding.Mp3
            };
            SynthesisInput input = new SynthesisInput
            {
                Text = metin
            };
            var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest
            {
                Input       = input,
                Voice       = voice,
                AudioConfig = config
            });

            using (Stream output = File.Create("C:\\Users\\corx\\source\\repos\\Selami\\Selami\\ses\\sample.mp3"))
            {
                response.AudioContent.WriteTo(output);
            }
            mediaPlayer.Open(new Uri("C:\\Users\\corx\\source\\repos\\Selami\\Selami\\ses\\sample.mp3"));
            mediaPlayer.Play();
        }
        public async Task <Result <LuisResult> > Recognize(string filePath)
        {
            // Credenciais do LUIS
            var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourRegion");

            config.SpeechRecognitionLanguage = "pt-br";

            using (var audioInput = AudioConfig.FromWavFileInput(filePath))
            {
                using (var recognizer = new IntentRecognizer(config, audioInput))
                {
                    var model = LanguageUnderstandingModel.FromAppId("YourLuisAppId");
                    recognizer.AddIntent(model, "intent.iot.device_off", "device_off");
                    recognizer.AddIntent(model, "intent.iot.device_on", "device_on");

                    var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false);

                    if (result.Reason == ResultReason.RecognizedIntent)
                    {
                        var js = new DataContractJsonSerializer(typeof(LuisResult));
                        var ms = new MemoryStream(Encoding.UTF8.GetBytes(result.Properties.GetProperty(PropertyId.LanguageUnderstandingServiceResponse_JsonResult)));
                        return(new Result <LuisResult>((js.ReadObject(ms) as LuisResult)));
                    }
                    else if (result.Reason == ResultReason.NoMatch)
                    {
                        return(new Result <LuisResult>(null, false, "Falha no reconhecimento do áudio!"));
                    }
                    else if (result.Reason == ResultReason.Canceled)
                    {
                        var cancellation = CancellationDetails.FromResult(result);
                        if (cancellation.Reason == CancellationReason.Error)
                        {
                            return(new Result <LuisResult>(null, false, $"Motivo: {cancellation.Reason}. Detalhes: {cancellation.ErrorDetails}"));
                        }
                        return(new Result <LuisResult>(null, false, $"Motivo: {cancellation.Reason}."));
                    }
                }
            }
            return(new Result <LuisResult>(null, false, "Erro desconhecido!"));
        }
Beispiel #12
0
        public static void CreateSpeechFile(string inputText, string filPath)
        {
            TextToSpeechClient client = TextToSpeechClient.Create();

            // Set the text input to be synthesized.
            SynthesisInput input = new SynthesisInput
            {
                Text = inputText
            };

            // Build the voice request, select the language code ("en-US"),
            // and the SSML voice gender ("neutral").
            VoiceSelectionParams voice = new VoiceSelectionParams
            {
                LanguageCode = "en-US",
                SsmlGender   = SsmlVoiceGender.Neutral
            };

            // Select the type of audio file you want returned.
            AudioConfig config = new AudioConfig
            {
                AudioEncoding = AudioEncoding.Mp3
            };

            // Perform the Text-to-Speech request, passing the text input
            // with the selected voice parameters and audio file type
            var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest
            {
                Input       = input,
                Voice       = voice,
                AudioConfig = config
            });

            // Write the binary AudioContent of the response to an MP3 file.
            using (Stream output = File.Create(filPath))
            {
                response.AudioContent.WriteTo(output);
            }
        }
        public async Task <Stream> SynthesizeTextToStreamAsync(IVoice voice, string text)
        {
            var input = new SynthesisInput {
                Text = text
            };

            var config = new AudioConfig {
                AudioEncoding = AudioEncoding.Mp3
            };

            var response = await Client.SynthesizeSpeechAsync(new SynthesizeSpeechRequest {
                Input = input,
                Voice = new VoiceSelectionParams()
                {
                    Name         = voice.Name,
                    LanguageCode = voice.Language
                },
                AudioConfig = config,
            });

            return(new MemoryStream(response.AudioContent.ToByteArray()));
        }
Beispiel #14
0
        public static async Task VerificationEnroll(SpeechConfig config, Dictionary <string, string> profileMapping)
        {
            using (var client = new VoiceProfileClient(config))
                using (var profile = await client.CreateProfileAsync(VoiceProfileType.TextIndependentVerification, "en-us"))
                {
                    using (var audioInput = AudioConfig.FromWavFileInput(settings[SettingIndex.ExampleAudio]))
                    {
                        Console.WriteLine($"Enrolling profile id {profile.Id}.");
                        // give the profile a human-readable display name
                        profileMapping.Add(profile.Id, "Test speaker");

                        VoiceProfileEnrollmentResult result = null;
                        result = await client.EnrollProfileAsync(profile, audioInput);

                        if (result != null)
                        {
                            if (result.Reason == ResultReason.EnrolledVoiceProfile)
                            {
                                string[] files = Directory.GetFiles(settings[SettingIndex.SourceDir], "*.wav", SearchOption.TopDirectoryOnly);

                                foreach (string file in files)
                                {
                                    await SpeakerVerify(config, profile, profileMapping, file);
                                }
                            }
                            else if (result.Reason == ResultReason.Canceled)
                            {
                                var cancellation = VoiceProfileEnrollmentCancellationDetails.FromResult(result);
                                Console.WriteLine($"CANCELED {profile.Id}: ErrorCode={cancellation.ErrorCode} ErrorDetails={cancellation.ErrorDetails}");
                            }
                            await client.DeleteProfileAsync(profile);
                        }
                        else
                        {
                            Console.WriteLine("Profile enrollment error");
                        }
                    }
                }
        }
Beispiel #15
0
        public static string ConvertAudioTOpus(FileConfig fileConfig)
        {
            AudioConfig audioConfig = fileConfig.AudioConfig;
            string      tmp         = Config.Temp;
            string      audiofile   = FileUtility.RandomName(tmp) + ".opus";
            int         bitrat      = 0;

            if (audioConfig.Quality < 1)
            {
                bitrat = (int)(audioConfig.Quality * 400);
            }
            else
            {
                bitrat = (int)audioConfig.Quality;
            }
            var    eac3to  = Path.Combine(Environment.CurrentDirectory, Eac3toExecute);
            var    opusenc = Path.Combine(Environment.CurrentDirectory, OpusEnc);
            string bat     = $"{eac3to.Maohao()} {fileConfig.VedioFileFullName.Maohao()}  {audioConfig.Tracker}: {audioConfig.CommandLineArgs} stdout.wav | {opusenc.Maohao()} --ignorelength --bitrate {bitrat} --vbr -  {audiofile.Maohao()}";

            ProcessCmd.RunBat(bat, Config.Temp);
            return(audiofile);
        }
Beispiel #16
0
    //Sampling Rate 11050*2, bitRate=16, channels = 1
    public async UniTask STT(string wavFilepath, int sampleRate, int bitRate, int channels)
    {
        var speechConfig = SpeechConfig.FromSubscription(subscription_key, region);

        speechConfig.SpeechRecognitionLanguage = location;
        var reader            = new BinaryReader(File.OpenRead(wavFilepath));
        var audioStreamFormat = AudioStreamFormat.GetWaveFormatPCM((uint)sampleRate, (byte)bitRate, (byte)channels);
        var audioInputStream  = AudioInputStream.CreatePushStream(audioStreamFormat);
        var audioConfig       = AudioConfig.FromStreamInput(audioInputStream);
        var recognizer        = new SpeechRecognizer(speechConfig, audioConfig);

        byte[] readBytes;
        do
        {
            readBytes = reader.ReadBytes(1024);
            audioInputStream.Write(readBytes, readBytes.Length);
        } while (readBytes.Length > 0);

        var result = await recognizer.RecognizeOnceAsync();

        Debug.Log($"Recognized Line : = {result.Text}");
    }
Beispiel #17
0
        public static async Task SynthesisToAudioFileAsync()
        {
            var sub         = ConfigurationManager.AppSettings["sub"];
            var region      = ConfigurationManager.AppSettings["region"] ?? "southcentralus";
            var outFilename = ConfigurationManager.AppSettings["filename"] ?? "voice.wav";
            var text        = ConfigurationManager.AppSettings["text"];
            var voice       = ConfigurationManager.AppSettings["voice"] ?? "en-US-JessaNeural";

            var config = SpeechConfig.FromSubscription(sub, region);

            using (var fileOutput = AudioConfig.FromWavFileOutput(outFilename))
            {
                using (var synthesizer = new SpeechSynthesizer(config, fileOutput))
                {
                    var xmlSpeech =
                        @"<speak version='1.0' xmlns='https://www.w3.org/2001/10/synthesis' xml:lang='en-US'>" +
                        @"<voice name='" + voice + "'>" +
                        text +
                        @"</voice></speak>";

                    var result = await synthesizer.SpeakSsmlAsync(xmlSpeech);

                    if (result.Reason == ResultReason.SynthesizingAudioCompleted)
                    {
                        Console.WriteLine($"Speech synthesized to [{outFilename}] for text [{text}]");
                    }
                    else if (result.Reason == ResultReason.Canceled)
                    {
                        var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
                        Console.WriteLine($"CANCELED: {cancellation.Reason}");

                        if (cancellation.Reason == CancellationReason.Error)
                        {
                            Console.WriteLine($"CANCELED: {cancellation.ErrorCode}\n{cancellation.ErrorDetails}");
                        }
                    }
                }
            }
        }
Beispiel #18
0
        public async Task <byte[]> TranslateTextToWav(string text)
        {
            // Instantiate a client
            var client = TextToSpeechClient.Create();

            // Set the text input to be synthesized.
            var input = new SynthesisInput
            {
                Text = text
            };

            // Build the voice request, select the language code ("en-US"),
            // and the SSML voice gender ("neutral").
            var voice = new VoiceSelectionParams
            {
                LanguageCode = "sv-SE",
                SsmlGender   = SsmlVoiceGender.Female
            };

            // Select the type of audio file you want returned.
            var config = new AudioConfig
            {
                AudioEncoding = AudioEncoding.Linear16
            };

            // Perform the Text-to-Speech request, passing the text input
            // with the selected voice parameters and audio file type
            var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest
            {
                Input       = input,
                Voice       = voice,
                AudioConfig = config
            });

            var memStream = new System.IO.MemoryStream();

            response.AudioContent.WriteTo(memStream);
            return(memStream.ToArray());
        }
        private async Task <string> RecognizeSpeechAsync(string uri)
        {
            var substriptionKey = Environment.GetEnvironmentVariable("SPEECH_SUBSCRIPTION_KEY");
            var serviceRegion   = Environment.GetEnvironmentVariable("SPEECH_SERVICE_REGION");

            var config = SpeechConfig.FromSubscription(substriptionKey, serviceRegion);

            using (var audioInput = AudioConfig.FromWavFileInput(uri))
            {
                using (var recognizer = new SpeechRecognizer(config, audioInput))
                {
                    _log.LogInformation("Recognizing first result...");
                    var result = await recognizer.RecognizeOnceAsync();

                    if (result.Reason == ResultReason.RecognizedSpeech)
                    {
                        _log.LogInformation($"We recognized: {result.Text}");
                    }
                    else if (result.Reason == ResultReason.NoMatch)
                    {
                        _log.LogInformation($"NOMATCH: Speech could not be recognized.");
                    }
                    else if (result.Reason == ResultReason.Canceled)
                    {
                        var cancellation = CancellationDetails.FromResult(result);
                        _log.LogInformation($"CANCELED: Reason={cancellation.Reason}");

                        if (cancellation.Reason == CancellationReason.Error)
                        {
                            _log.LogInformation($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                            _log.LogInformation($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                            _log.LogInformation($"CANCELED: Did you update the subscription info?");
                        }
                    }

                    return(result.Text);
                }
            }
        }
        protected async Task <string> GetGoogleSynthSpeech(string text, TTSVoice voicePreference, TTSPitch pitchPreference, string filename = null)
        {
            VoiceSelectionParams voice = voicePreference.GetGoogleVoiceSelectionParams();

            AudioConfig config = new AudioConfig
            {
                AudioEncoding = AudioEncoding.Mp3,
                Pitch         = pitchPreference.GetSemitoneShift()
            };

            //TTS
            SynthesisInput input = new SynthesisInput
            {
                Ssml = PrepareGoogleSSML(text)
            };

            // Perform the Text-to-Speech request, passing the text input
            // with the selected voice parameters and audio file type
            GoogleSynthesizeSpeechResponse response = await googleClient.SynthesizeSpeechAsync(input, voice, config);

            // Write the binary AudioContent of the response to file.
            string filepath;

            if (string.IsNullOrWhiteSpace(filename))
            {
                filepath = Path.Combine(TTSFilesPath, $"{Guid.NewGuid()}.mp3");
            }
            else
            {
                filepath = Path.Combine(TTSFilesPath, $"{filename}.mp3");
            }

            using (Stream file = new FileStream(filepath, FileMode.Create))
            {
                response.AudioContent.WriteTo(file);
            }

            return(filepath);
        }
        /// <summary>
        /// Creates Recognizer with baseline model and selected language:
        /// Creates a config with subscription key and selected region
        /// If input source is audio file, creates recognizer with audio file otherwise with default mic
        /// Waits on RunRecognition.
        /// </summary>
        private async Task CreateRecognizer(byte[] channel)
        {
            // Todo: suport users to specifiy a different region.
            var config = SpeechConfig.FromSubscription(this.SubscriptionKey, this.Region);

            config.SpeechRecognitionLanguage = this.RecognitionLanguage;
            config.OutputFormat = OutputFormat.Detailed;

            SpeechRecognizer basicRecognizer;

            PushAudioInputStream pushStream = AudioInputStream.CreatePushStream();

            pushStream.Write(channel);
            pushStream.Close();
            using (var audioInput = AudioConfig.FromStreamInput(pushStream))
            {
                using (basicRecognizer = new SpeechRecognizer(config, audioInput))
                {
                    await this.RunRecognizer(basicRecognizer, stopBaseRecognitionTaskCompletionSource).ConfigureAwait(false);
                }
            }
        }
Beispiel #22
0
        public async Task <string> AudioToTextAsync(byte[] pcm)
        {
            var guid = Guid.NewGuid();

            if (!Text.ContainsKey(guid))
            {
                Text[guid] = null;
            }

            // Build out the speech recognizer
            using (var pushStream = AudioInputStream.CreatePushStream(AudioStreamFormat.GetDefaultInputFormat()))
                using (var audioInput = AudioConfig.FromStreamInput(pushStream))
                    using (var recognizer = new SpeechRecognizer(SpeechConfig, audioInput))
                    {
                        // Subscribe to speech recognizer events.
                        recognizer.SessionStarted += OnSpeechRecognitionSessionStarted;
                        recognizer.Recognizing    += OnSpeechRecognizing;
                        recognizer.Recognized     += (s, e) => OnSpeechRecognized(s, e, guid);
                        recognizer.Canceled       += OnSpeechCanceled;
                        recognizer.SessionStopped += OnSpeechRecognitionSessionStopped;

                        // Start continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
                        await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                        // Send the pcm data to the speech recognizer
                        pushStream.Write(pcm);
                        pushStream.Close();

                        // Wait for completion.
                        // Use Task.WaitAny to keep the task rooted.
                        Task.WaitAny(StopRecognition.Task);

                        // Stop recognition.
                        await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);

                        return(Text[guid]);
                    }
        }
Beispiel #23
0
        public async Task SendDirectLineSpeechVoiceMessage()
        {
            GetEnvironmentVars();

            // Create a Dialog Service Config for use with the Direct Line Speech Connector
            var config = DialogServiceConfig.FromBotSecret(speechBotSecret, speechSubscription, speechRegion);

            config.SpeechRecognitionLanguage = "en-us";
            config.SetProperty(PropertyId.Conversation_From_Id, FromUser);

            // Create a new Dialog Service Connector for the above configuration and register to receive events
            var connector = new DialogServiceConnector(config, AudioConfig.FromWavFileInput(soundFilePath));

            connector.ActivityReceived += Connector_ActivityReceived;

            // Open a connection to Direct Line Speech channel. No await because the call will block until the connection closes.
#pragma warning disable CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
            connector.ConnectAsync();
#pragma warning restore CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed

            // Send the message activity to the bot.
            await connector.ListenOnceAsync();

            // Give the bot time to respond.
            System.Threading.Thread.Sleep(1000);

            // Read the bot's message.
            var botAnswer = messages.LastOrDefault();

            // Cleanup
            await connector.DisconnectAsync();

            connector.Dispose();

            // Assert
            Assert.IsNotNull(botAnswer);
            Assert.AreEqual(string.Format("You said '{0}'", soundFileMessage), botAnswer.Message);
        }
Beispiel #24
0
        /// <summary>
        /// Common routine for transcribing an audio file.
        /// </summary>
        /// <param name="apiKey">The subscription key.</param>
        /// <param name="region">The region of the resource.</param>
        /// <param name="reader">BinaryReader instance for reading the input stream.</param>
        /// <returns>A Task returning the transcribed speech.</returns>
        private async Task <string> TranscribeAudioCommonAsync(Secret apiKey, string region, BinaryReader reader)
        {
            string transcript = null;

            using (BinaryAudioStreamReader streamReader = new BinaryAudioStreamReader(reader))
            {
                AudioStreamFormat audioStreamFormat = ReadWaveHeader(reader);
                AudioConfig       audioConfig       = AudioConfig.FromStreamInput(streamReader, audioStreamFormat);
                SpeechConfig      speechConfig      = SpeechConfig.FromSubscription(apiKey.Value, region);

                _speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);

                _speechRecognizer.Recognized     += Recognized;
                _speechRecognizer.Canceled       += Canceled;
                _speechRecognizer.SessionStopped += SessionStopped;
                _speechRecognizer.Canceled       += SessionStopped;

                await _speechRecognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                Task.WaitAny(new[] { _stopRecognition.Task });

                await _speechRecognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);

                if (!string.IsNullOrWhiteSpace(_cancellationDetails))
                {
                    throw new TranscriberCanceledException($"Azure Speech cancellation error: {_cancellationDetails}");
                }

                transcript = _transcriptBuilder.ToString();

                if (string.IsNullOrWhiteSpace(transcript))
                {
                    throw new TranscriberEmptyTranscriptException("Azure Speech returned blank transcript!");
                }
            }

            return(transcript);
        }
Beispiel #25
0
        /// <summary>
        /// Creates a class-level Speech Recognizer for a specific language using Azure credentials
        /// and hooks-up lifecycle & recognition events
        /// </summary>
        void CreateSpeechRecognizer()
        {
            if (SpeechServiceAPIKey.Length == 0 || SpeechServiceAPIKey == String.Empty)
            {
                finalString = "You forgot to obtain Cognitive Services Speech credentials and inserting them in this app." + Environment.NewLine +
                              "See the README file and/or the instructions in the Awake() function for more info before proceeding.";
                errorString = "ERROR: Missing service credentials";
                UnityEngine.Debug.LogFormat(errorString);
                return;
            }
            UnityEngine.Debug.LogFormat("Creating Speech Recognizer.");
            // finalString = "Initializing speech recognition, please wait...";
            finalString = "Start: ";

            if (recognizer == null)
            {
                SpeechConfig sconfig = SpeechConfig.FromSubscription("b9bdc34702c1439589daf92475e8f827", "westus2");
                sconfig.SpeechRecognitionLanguage = fromLanguage;

                audioStream = new MicToAudioStream();
                AudioConfig aconfig = AudioConfig.FromStreamInput(audioStream, AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1));

                recognizer = new SpeechRecognizer(sconfig, aconfig);

                if (recognizer != null)
                {
                    // Subscribes to speech events.
                    recognizer.Recognizing         += RecognizingHandler;
                    recognizer.Recognized          += RecognizedHandler;
                    recognizer.SpeechStartDetected += SpeechStartDetectedHandler;
                    recognizer.SpeechEndDetected   += SpeechEndDetectedHandler;
                    recognizer.Canceled            += CanceledHandler;
                    recognizer.SessionStarted      += SessionStartedHandler;
                    recognizer.SessionStopped      += SessionStoppedHandler;
                }
            }
            UnityEngine.Debug.LogFormat("CreateSpeechRecognizer exit");
        }
    IEnumerator ShowYakuOneByOne()
    {
        yield return(new WaitForSeconds(1.0f));

        var yakuArr = currentAgari.hanteiYakus;

        for (int i = 0; i < yakuArr.Length; i++)
        {
            var yaku = yakuArr[i];

            string yakuName = yaku.getYakuNameKey();

            UIYakuItem item;

            if (yaku.isYakuman())
            {
                item = CreateYakuItem_Yakuman(yakuName, yaku.isDoubleYakuman());
            }
            else
            {
                item = CreateYakuItem(yakuName, yaku.getHanSuu());
            }

            item.transform.parent        = yakuRoot;
            item.transform.localScale    = yakuItemPrefab.transform.localScale;
            item.transform.localPosition = new Vector3(yakuItemPosOffset.x, yakuItemPosOffset.y * (i + 1), 0f);

            _yakuItems.Add(item);

            AudioManager.Get().PlaySFX(AudioConfig.GetSEPath(ESeType.Yaku));

            yield return(new WaitForSeconds(yakuDisplayTime));
        }

        yield return(new WaitForSeconds(yakuDisplayTime * 0.5f));

        ShowTotalScrote();
    }
Beispiel #27
0
        public string generateVoice(string text)
        {
            var client    = TextToSpeechClient.Create();
            var inputText = new SynthesisInput
            {
                Text = text + " Thank you for using Rela."
            };
            var voiceParameters = new VoiceSelectionParams
            {
                LanguageCode = "en-US",
                SsmlGender   = SsmlVoiceGender.Female
            };

            var audioParams = new AudioConfig
            {
                AudioEncoding = AudioEncoding.Mp3
            };

            var    response     = client.SynthesizeSpeech(inputText, voiceParameters, audioParams);
            string audioContent = response.AudioContent.ToBase64();

            return(audioContent);
        }
Beispiel #28
0
        private async Task InitializeSpeechConnectorAsync()
        {
            audioConfig = AudioConfig.FromDefaultMicrophoneInput();
            var config = CustomCommandsConfig.FromSubscription(Constants.CustomCommandsAppId, Constants.SubscriptionKey, Constants.Region);

            config.Language = Constants.Language;

            // Create a new Dialog Service Connector for the above configuration and register to receive events
            connector = new DialogServiceConnector(config, audioConfig);
            connector.ActivityReceived += Connector_ActivityReceived;
            connector.Recognizing      += Connector_Recognizing;
            connector.Recognized       += Connector_Recognized;
            connector.Canceled         += Connector_Canceled;
            connector.SessionStarted   += Connector_SessionStarted;
            connector.SessionStopped   += Connector_SessionStopped;

            // Open a connection to Direct Line Speech channel
            await connector.ConnectAsync();

            var keywordRecognitionModel = KeywordRecognitionModel.FromFile(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Computer.table"));

            _ = connector.StartKeywordRecognitionAsync(keywordRecognitionModel);
        }
Beispiel #29
0
        public async Task <string> MicrophoneInput(e_language curLanguage)
        {
            if (speechConfig != null)
            {
                switch (curLanguage)
                {
                case e_language.English:
                    speechConfig.SpeechRecognitionLanguage = CAzureLanguage.English_UnitedState;
                    break;

                case e_language.Korean:
                    speechConfig.SpeechRecognitionLanguage = CAzureLanguage.Korean_Korea;
                    break;
                }

                AudioConfig             audioConfig = AudioConfig.FromDefaultMicrophoneInput();
                SpeechRecognizer        recognizer  = new SpeechRecognizer(speechConfig, audioConfig);
                SpeechRecognitionResult result      = await recognizer.RecognizeOnceAsync();

                return(result.Text);
            }
            return("");
        }
Beispiel #30
0
        public static async Task SpeakAsync(string txt, QuestToSpeech.Voice voice, string filePath, AzureAPIConfig config)
        {
            SpeechConfig speechConfig = SpeechConfig.FromSubscription(config.Key, config.Region);

            speechConfig.SpeechSynthesisVoiceName = voice.Name;
            speechConfig.SpeechSynthesisLanguage  = voice.LangCode;

            using (AudioConfig fileOutput = AudioConfig.FromWavFileOutput(filePath)) {
                using (SpeechSynthesizer tts = new SpeechSynthesizer(speechConfig, fileOutput)) {
                    using (SpeechSynthesisResult result = await tts.SpeakTextAsync(txt)) {
                        if (result.Reason == ResultReason.Canceled)
                        {
                            var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);

                            if (cancellation.Reason == CancellationReason.Error)
                            {
                                throw new Exception(string.Format("API Error (Code: {0}): {1}", cancellation.ErrorCode, cancellation.ErrorDetails));
                            }
                        }
                    }
                }
            }
        }