private void SetupTranscriptionAndTranslationService() { try { var lCognitiveKey = _settings.AzureCognitiveKey; var lCognitiveRegion = _settings.AzureCognitiveRegion; _eventPublisher.Publish("MySTT Setup", $"Got region: {lCognitiveRegion}, key starting from: {lCognitiveKey??lCognitiveKey.Substring(0, lCognitiveKey.Length /2)}"); this.mTransSpeechConfig = SpeechTranslationConfig.FromSubscription(lCognitiveKey, lCognitiveRegion); var fromLanguage = "en-US"; var toLanguages = new List <string> { "el-GR" }; //var toLanguages = new List<string> { "ru-RU" }; this.mTransSpeechConfig.SpeechRecognitionLanguage = fromLanguage; toLanguages.ForEach(this.mTransSpeechConfig.AddTargetLanguage); this.mInputStream = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM(SAMPLESPERSECOND, BITSPERSAMPLE, NUMBEROFCHANNELS)); this.mAudioConfig = AudioConfig.FromStreamInput(this.mInputStream); this.mTranslationRecognizer = new TranslationRecognizer(this.mTransSpeechConfig, this.mAudioConfig); this.mTranslationRecognizer.Recognizing += this.MSpeechRecognizer_Recognizing; this.mTranslationRecognizer.Recognized += this.MSpeechRecognizer_Recognized; this.mTranslationRecognizer.SpeechEndDetected += this.MSpeechRecognizer_SpeechEndDetected; this.StartRecognisionIfNeeded(); } catch (Exception ex) { _eventPublisher.Publish("MySTT Setup - Failed", $"Failed to initialize: {ex.Message}"); } }
/// <summary> /// Initializes a new instance of the <see cref="ContinuousSpeechRecognizer"/> class. /// </summary> /// <param name="pipeline">The pipeline in which to create the component.</param> /// <param name="subscriptionKey">The subscription key for the Azure speech resource.</param> /// <param name="region">The service region of the Azure speech resource.</param> public ContinuousSpeechRecognizer(Pipeline pipeline, string subscriptionKey, string region) : base(pipeline) { var config = SpeechConfig.FromSubscription(subscriptionKey, region); this.pushStream = AudioInputStream.CreatePushStream(); this.audioInput = AudioConfig.FromStreamInput(this.pushStream); this.recognizer = new SpeechRecognizer(config, this.audioInput); }
void Start() { if (outputText == null) { UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it."); } else if (recoButton == null) { message = "recoButton property is null! Assign a UI Button to it."; UnityEngine.Debug.LogError(message); } else { // Continue with normal initialization, Text and Button objects are present. #if PLATFORM_ANDROID // Request to use the microphone, cf. // https://docs.unity3d.com/Manual/android-RequestingPermissions.html message = "Waiting for mic permission"; if (!Permission.HasUserAuthorizedPermission(Permission.Microphone)) { Permission.RequestUserPermission(Permission.Microphone); } #elif PLATFORM_IOS if (!Application.HasUserAuthorization(UserAuthorization.Microphone)) { Application.RequestUserAuthorization(UserAuthorization.Microphone); } #else micPermissionGranted = true; message = "Click button to recognize speech"; #endif grabacionCompleta = new StringBuilder(200); config = SpeechConfig.FromSubscription("b899f4a3bc2b4b30b3e690476b1af952", "westus"); config.SpeechRecognitionLanguage = "es-ES"; pushStream = AudioInputStream.CreatePushStream(); audioInput = AudioConfig.FromStreamInput(pushStream); recognizer = new SpeechRecognizer(config, audioInput); recognizer.Recognizing += RecognizingHandler; recognizer.Recognized += RecognizedHandler; recognizer.Canceled += CanceledHandler; recoButton.onClick.AddListener(ButtonClick); foreach (var device in Microphone.devices) { Debug.Log("DeviceName: " + device); } audioSource = GameObject.Find("MyAudioSource").GetComponent <AudioSource>(); } }
void ConfigureSpeechRecognizer() { _speechConfig = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion); _speechConfig.SpeechRecognitionLanguage = "es-US"; _speechConfig.OutputFormat = OutputFormat.Detailed; _pushStream = AudioInputStream.CreatePushStream(); _audioInput = AudioConfig.FromStreamInput(_pushStream); _speechRecognizer = new SpeechRecognizer(_speechConfig, _audioInput); _speechRecognizer.Recognizing += SpeechRecognizingHandler; _speechRecognizer.Recognized += SpeechRecognizedHandler; _speechRecognizer.Canceled += SpeechCanceledHandler; _audioSource = GameObject.Find("AudioSource").GetComponent <AudioSource>(); _audioSource.loop = false; _audioSource.playOnAwake = false; }
void Start() { if (outputText == null) { UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it."); } else if (recoButton == null) { _message = "recoButton property is null! Assign a UI Button to it."; UnityEngine.Debug.LogError(_message); } else { // Continue with normal initialization, Text and Button objects are present. #if PLATFORM_ANDROID // Request to use the microphone, cf. // https://docs.unity3d.com/Manual/android-RequestingPermissions.html message = "Waiting for mic permission"; if (!Permission.HasUserAuthorizedPermission(Permission.Microphone)) { Permission.RequestUserPermission(Permission.Microphone); } #elif PLATFORM_IOS if (!Application.HasUserAuthorization(UserAuthorization.Microphone)) { Application.RequestUserAuthorization(UserAuthorization.Microphone); } #else _micPermissionGranted = true; _message = "Click button to recognize speech"; #endif _config = SpeechTranslationConfig.FromSubscription(SubscriptionKey, SubscriptionRegion); _config.SpeechRecognitionLanguage = "es-US"; _config.AddTargetLanguage("en-US"); _pushStream = AudioInputStream.CreatePushStream(); _audioInput = AudioConfig.FromStreamInput(_pushStream); _recognizer = new TranslationRecognizer(_config, _audioInput); _recognizer.Recognizing += RecognizingHandler; _recognizer.Recognized += RecognizedHandler; _recognizer.Canceled += CanceledHandler; foreach (var device in Microphone.devices) { Debug.Log("DeviceName: " + device); } _audioSource = GameObject.Find("AudioSource").GetComponent <AudioSource>(); } }
public async Task Start() { var config = SpeechConfig.FromSubscription(_projectSettings.AzureSpeechServiceSubscriptionKey, _projectSettings.AzureSpeechServiceRegionName); var audioFormat = AudioStreamFormat.GetWaveFormatPCM(8000, 16, 1); _inputStream = AudioInputStream.CreatePushStream(audioFormat); _audioInput = AudioConfig.FromStreamInput(_inputStream); _recognizer = new SpeechRecognizer(config, _audioInput); _recognizer.SessionStarted += RecognizerStarted; _recognizer.Recognized += RecognizerRecognized; _recognizer.Canceled += RecognizerCancelled; await _recognizer.StartContinuousRecognitionAsync(); }
public AzureSpeechRecognizer(string key, string region, WaveStream stream) { var speechConfig = SpeechConfig.FromSubscription(key, region); this.stream = NormalizeStream(stream); this.pushStream = AudioInputStream.CreatePushStream(); this.recognizer = new SpeechRecognizer(speechConfig, AudioConfig.FromStreamInput(this.pushStream)); this.resultId = Guid.NewGuid().ToString(); this.lockObj = new object(); this.recognizer.Recognized += (snd, evt) => { string id = null; lock (this.lockObj) { id = this.resultId; this.resultId = Guid.NewGuid().ToString(); } if (!string.IsNullOrWhiteSpace(evt.Result.Text)) { this.SpeechRecognized?.Invoke(this, new RecognitionEventArgs(evt, id)); } }; this.recognizer.Recognizing += (snd, evt) => { string id = null; lock (this.lockObj) { id = this.resultId; } this.SpeechPredicted?.Invoke(this, new RecognitionEventArgs(evt, id)); }; this.recognizer.Canceled += (snd, evt) => { Debug.WriteLine("lost recognizer"); }; }
public RecognitionDemo(string region, string key, string locale, int millisecondsPerFrame) { _disposed = false; _millisecondsPerFrame = millisecondsPerFrame; SpeechConfig config = SpeechConfig.FromSubscription(key, region); config.SpeechRecognitionLanguage = locale; config.OutputFormat = OutputFormat.Detailed; _audioInput = CreateAudioInputStream(); _recognizer = new SpeechRecognizer(config, AudioConfig.FromStreamInput(_audioInput)); _audioCapture = CreateAudioCaptureDevice(); _audio = new FileStream("audio.raw", FileMode.Create); _transcript = new StreamWriter(new FileStream("transcript.txt", FileMode.Create), Encoding.UTF8); _stopwatch = new Stopwatch(); _framesCaptured = 0; _intermediateResultsReceived = 0; _finalResultsReceived = 0; _identicalResults = 0; _lastResult = null; }
/// <summary> /// Creates Recognizer with baseline model and selected language: /// Creates a config with subscription key and selected region /// If input source is audio file, creates recognizer with audio file otherwise with default mic /// Waits on RunRecognition. /// </summary> private async Task CreateRecognizer(byte[] channel) { // Todo: suport users to specifiy a different region. var config = SpeechConfig.FromSubscription(this.SubscriptionKey, this.Region); config.SpeechRecognitionLanguage = this.RecognitionLanguage; config.OutputFormat = OutputFormat.Detailed; SpeechRecognizer basicRecognizer; PushAudioInputStream pushStream = AudioInputStream.CreatePushStream(); pushStream.Write(channel); pushStream.Close(); using (var audioInput = AudioConfig.FromStreamInput(pushStream)) { using (basicRecognizer = new SpeechRecognizer(config, audioInput)) { await this.RunRecognizer(basicRecognizer, stopBaseRecognitionTaskCompletionSource).ConfigureAwait(false); } } }
private async Task InjectStreamIntoRecognizerAsync(PushAudioInputStream audioInputStream, BlobClient blobStream) { using (var stream = await blobStream.OpenReadAsync()) { var decoder = new OpusDecoder(16000, 1); var opus = new OpusOggReadStream(decoder, stream); while (opus.HasNextPacket) { short[] packet = opus.DecodeNextPacket(); if (packet != null) { for (int i = 0; i < packet.Length; i++) { var bytes = BitConverter.GetBytes(packet[i]); audioInputStream.Write(bytes, bytes.Length); } } } } audioInputStream.Close(); }
/// <summary> /// Sets up the initial state needed for Direct Line Speech, including creation of the /// underlying DialogServiceConnector and wiring of its events. /// </summary> /// <param name="keywordFile"> The keyword file to be loaded as part of initialization.</param> /// <returns> A task that completes once initialization is complete. </returns> public Task InitializeAsync(StorageFile keywordFile) { Contract.Requires(keywordFile != null); var configRefreshRequired = this.TryRefreshConfigValues(); var refreshConnector = configRefreshRequired || (this.keywordFilePath != keywordFile.Path); if (LocalSettingsHelper.SetProperty != null) { this.enableKwsLogging = true; } if (this.enableKwsLogging) { refreshConnector = true; this.enableKwsLogging = false; } if (refreshConnector) { var newConnectorConfiguration = this.CreateConfiguration(); this.ConfirmationModel = KeywordRecognitionModel.FromFile(keywordFile.Path); this.keywordFilePath = keywordFile.Path; this.ConnectorConfiguration = newConnectorConfiguration; this.connectorInputStream = AudioInputStream.CreatePushStream(); this.connector?.Dispose(); this.connector = new DialogServiceConnector( this.ConnectorConfiguration, AudioConfig.FromStreamInput(this.connectorInputStream)); this.connector.SessionStarted += (s, e) => this.SessionStarted?.Invoke(e.SessionId); this.connector.SessionStopped += (s, e) => this.SessionStopped?.Invoke(e.SessionId); this.connector.Recognizing += (s, e) => { switch (e.Result.Reason) { case ResultReason.RecognizingKeyword: this.logger.Log(LogMessageLevel.SignalDetection, $"Local model recognized keyword \"{e.Result.Text}\""); this.KeywordRecognizing?.Invoke(e.Result.Text); this.secondStageConfirmed = true; break; case ResultReason.RecognizingSpeech: this.logger.Log(LogMessageLevel.SignalDetection, $"Recognized speech in progress: \"{e.Result.Text}\""); this.SpeechRecognizing?.Invoke(e.Result.Text); break; default: throw new InvalidOperationException(); } }; this.connector.Recognized += (s, e) => { KwsPerformanceLogger.KwsEventFireTime = TimeSpan.FromTicks(DateTime.Now.Ticks); switch (e.Result.Reason) { case ResultReason.RecognizedKeyword: var thirdStageStartTime = KwsPerformanceLogger.KwsStartTime.Ticks; thirdStageStartTime = DateTime.Now.Ticks; this.logger.Log(LogMessageLevel.SignalDetection, $"Cloud model recognized keyword \"{e.Result.Text}\""); this.KeywordRecognized?.Invoke(e.Result.Text); this.kwsPerformanceLogger.LogSignalReceived("SWKWS", "A", "3", KwsPerformanceLogger.KwsEventFireTime.Ticks, thirdStageStartTime, DateTime.Now.Ticks); this.secondStageConfirmed = false; break; case ResultReason.RecognizedSpeech: this.logger.Log(LogMessageLevel.SignalDetection, $"Recognized final speech: \"{e.Result.Text}\""); this.SpeechRecognized?.Invoke(e.Result.Text); break; case ResultReason.NoMatch: // If a KeywordRecognized handler is available, this is a final stage // keyword verification rejection. this.logger.Log(LogMessageLevel.SignalDetection, $"Cloud model rejected keyword"); if (this.secondStageConfirmed) { var thirdStageStartTimeRejected = KwsPerformanceLogger.KwsStartTime.Ticks; thirdStageStartTimeRejected = DateTime.Now.Ticks; this.kwsPerformanceLogger.LogSignalReceived("SWKWS", "R", "3", KwsPerformanceLogger.KwsEventFireTime.Ticks, thirdStageStartTimeRejected, DateTime.Now.Ticks); this.secondStageConfirmed = false; } this.KeywordRecognized?.Invoke(null); break; default: throw new InvalidOperationException(); } }; this.connector.Canceled += (s, e) => { var code = (int)e.ErrorCode; var message = $"{e.Reason.ToString()}: {e.ErrorDetails}"; this.ErrorReceived?.Invoke(new DialogErrorInformation(code, message)); }; this.connector.ActivityReceived += (s, e) => { // Note: the contract of when to end a turn is unique to your dialog system. In this sample, // it's assumed that receiving a message activity without audio marks the end of a turn. Your // dialog system may have a different contract! var wrapper = new ActivityWrapper(e.Activity); if (wrapper.Type == ActivityWrapper.ActivityType.Event) { if (!this.startEventReceived) { this.startEventReceived = true; return; } else { this.startEventReceived = false; } } var payload = new DialogResponse( messageBody: e.Activity, messageMedia: e.HasAudio ? new DirectLineSpeechAudioOutputStream(e.Audio, LocalSettingsHelper.OutputFormat) : null, shouldEndTurn: (e.Audio == null && wrapper.Type == ActivityWrapper.ActivityType.Message) || wrapper.Type == ActivityWrapper.ActivityType.Event, shouldStartNewTurn: wrapper.InputHint == ActivityWrapper.InputHintType.ExpectingInput); this.DialogResponseReceived?.Invoke(payload); }; } return(Task.FromResult(0)); }
/// <summary> /// Initializes the connection to the Bot. /// </summary> /// <param name="settings">Application settings object, built from the input JSON file supplied as run-time argument.</param> public void InitConnector(AppSettings settings) { DialogServiceConfig config; this.BotReplyList = new List <BotReply>(); this.stopWatch = new Stopwatch(); this.appsettings = settings; if (!string.IsNullOrWhiteSpace(this.appsettings.CustomCommandsAppId)) { // NOTE: Custom commands is a preview Azure Service. // Set the custom commands configuration object based on three items: // - The Custom commands application ID // - Cognitive services speech subscription key. // - The Azure region of the subscription key(e.g. "westus"). config = CustomCommandsConfig.FromSubscription(this.appsettings.CustomCommandsAppId, this.appsettings.SpeechSubscriptionKey, this.appsettings.SpeechRegion); } else { // Set the bot framework configuration object based on two items: // - Cognitive services speech subscription key. It is needed for billing and is tied to the bot registration. // - The Azure region of the subscription key(e.g. "westus"). config = BotFrameworkConfig.FromSubscription(this.appsettings.SpeechSubscriptionKey, this.appsettings.SpeechRegion); } if (this.appsettings.SpeechSDKLogEnabled) { // Speech SDK has verbose logging to local file, which may be useful when reporting issues. config.SetProperty(PropertyId.Speech_LogFilename, $"{this.appsettings.OutputFolder}SpeechSDKLog-{DateTime.Now.ToString("yyyy-MM-dd-HH-mm-ss", CultureInfo.CurrentCulture)}.log"); } if (!string.IsNullOrWhiteSpace(this.appsettings.SRLanguage)) { // Set the speech recognition language. If not set, the default is "en-us". config.Language = this.appsettings.SRLanguage; } if (!string.IsNullOrWhiteSpace(this.appsettings.CustomSREndpointId)) { // Set your custom speech end-point id here, as given to you by the speech portal https://speech.microsoft.com/portal. // Otherwise the standard speech end-point will be used. config.SetServiceProperty("cid", this.appsettings.CustomSREndpointId, ServicePropertyChannel.UriQueryParameter); // Custom Speech does not support cloud Keyword Verification at the moment. If this is not done, there will be an error // from the service and connection will close. Remove line below when supported. config.SetProperty("KeywordConfig_EnableKeywordVerification", "false"); } if (!string.IsNullOrWhiteSpace(this.appsettings.CustomVoiceDeploymentIds)) { // Set one or more IDs associated with the custom TTS voice your bot will use. // The format of the string is one or more GUIDs separated by comma (no spaces). You get these GUIDs from // your custom TTS on the speech portal https://speech.microsoft.com/portal. config.SetProperty(PropertyId.Conversation_Custom_Voice_Deployment_Ids, this.appsettings.CustomVoiceDeploymentIds); } this.timeout = this.appsettings.Timeout; if (!string.IsNullOrWhiteSpace(this.appsettings.KeywordRecognitionModel)) { this.kwsTable = KeywordRecognitionModel.FromFile(this.appsettings.KeywordRecognitionModel); } if (this.appsettings.SetPropertyId != null) { foreach (KeyValuePair <string, JToken> setPropertyIdPair in this.appsettings.SetPropertyId) { config.SetProperty(setPropertyIdPair.Key, setPropertyIdPair.Value.ToString()); } } if (this.appsettings.SetPropertyString != null) { foreach (KeyValuePair <string, JToken> setPropertyStringPair in this.appsettings.SetPropertyString) { config.SetProperty(setPropertyStringPair.Key.ToString(CultureInfo.CurrentCulture), setPropertyStringPair.Value.ToString()); } } if (this.appsettings.SetServiceProperty != null) { foreach (KeyValuePair <string, JToken> setServicePropertyPair in this.appsettings.SetServiceProperty) { config.SetServiceProperty(setServicePropertyPair.Key.ToString(CultureInfo.CurrentCulture), setServicePropertyPair.Value.ToString(), ServicePropertyChannel.UriQueryParameter); } } if (this.appsettings.RealTimeAudio) { config.SetProperty("SPEECH-AudioThrottleAsPercentageOfRealTime", "100"); config.SetProperty("SPEECH-TransmitLengthBeforeThrottleMs", "0"); } if (this.connector != null) { // Then dispose the object this.connector.Dispose(); this.connector = null; } this.pushAudioInputStream = AudioInputStream.CreatePushStream(); this.connector = new DialogServiceConnector(config, AudioConfig.FromStreamInput(this.pushAudioInputStream)); if (this.appsettings.BotGreeting) { // Starting the timer to calculate latency for Bot Greeting. this.stopWatch.Restart(); } this.AttachHandlers(); }
/// <summary> /// Sets up the initial state needed for Direct Line Speech, including creation of the /// underlying DialogServiceConnector and wiring of its events. /// </summary> /// <param name="keywordFile"> The keyword file to be loaded as part of initialization.</param> /// <returns> A task that completes once initialization is complete. </returns> public Task InitializeAsync(StorageFile keywordFile) { Contract.Requires(keywordFile != null); // Default values -- these can be updated this.ConnectorConfiguration = this.CreateConfiguration(); this.ConfirmationModel = KeywordRecognitionModel.FromFile(keywordFile.Path); this.connectorInputStream = AudioInputStream.CreatePushStream(); this.connector = new DialogServiceConnector( this.ConnectorConfiguration, AudioConfig.FromStreamInput(this.connectorInputStream)); this.connector.SessionStarted += (s, e) => this.SessionStarted?.Invoke(e.SessionId); this.connector.SessionStopped += (s, e) => this.SessionStopped?.Invoke(e.SessionId); this.connector.Recognizing += (s, e) => { switch (e.Result.Reason) { case ResultReason.RecognizingKeyword: this.logger.Log($"Local model recognized keyword \"{e.Result.Text}\""); this.KeywordRecognizing?.Invoke(e.Result.Text); break; case ResultReason.RecognizingSpeech: this.logger.Log($"Recognized speech in progress: \"{e.Result.Text}\""); this.SpeechRecognizing?.Invoke(e.Result.Text); break; default: throw new InvalidOperationException(); } }; this.connector.Recognized += (s, e) => { switch (e.Result.Reason) { case ResultReason.RecognizedKeyword: this.logger.Log($"Cloud model recognized keyword \"{e.Result.Text}\""); this.KeywordRecognized?.Invoke(e.Result.Text); break; case ResultReason.RecognizedSpeech: this.logger.Log($"Recognized final speech: \"{e.Result.Text}\""); this.SpeechRecognized?.Invoke(e.Result.Text); break; case ResultReason.NoMatch: // If a KeywordRecognized handler is available, this is a final stage // keyword verification rejection. this.logger.Log($"Cloud model rejected keyword"); this.KeywordRecognized?.Invoke(null); break; default: throw new InvalidOperationException(); } }; this.connector.Canceled += (s, e) => { var code = (int)e.ErrorCode; var message = $"{e.Reason.ToString()}: {e.ErrorDetails}"; this.ErrorReceived?.Invoke(new DialogErrorInformation(code, message)); }; this.connector.ActivityReceived += (s, e) => { // Note: the contract of when to end a turn is unique to your dialog system. In this sample, // it's assumed that receiving a message activity without audio marks the end of a turn. Your // dialog system may have a different contract! var wrapper = new ActivityWrapper(e.Activity); var payload = new DialogResponse( messageBody: e.Activity, messageMedia: e.HasAudio ? new DirectLineSpeechAudioOutputStream(e.Audio, LocalSettingsHelper.OutputFormat) : null, shouldEndTurn: e.Audio == null && wrapper.Type == ActivityWrapper.ActivityType.Message, shouldStartNewTurn: wrapper.InputHint == ActivityWrapper.InputHintType.ExpectingInput); this.logger.Log($"Connector activity received"); this.DialogResponseReceived?.Invoke(payload); }; return(Task.FromResult(0)); }
public MessageSendingStream(PushAudioInputStream pushStream) { this.pushStream = pushStream; this.memoryBuffer = new InMemoryRandomAccessStream(); }