public async Task StopContinuousRecognitionAsync() { await speechRecognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); Logger.LogInformation($"Session stopped for {ConversationId}:{SpeakerType.ToString()}"); await OnCompletedSpeechAsync().ConfigureAwait(false); }
IEnumerator Say(string speech, SpeakerType speaker = SpeakerType.mijin) { Debug.Log("[SpeechRenderrer::Say]" + speech); //#if UNITY_ANDROID // string uriSpeech = Application.persistentDataPath + "/tts.mp3"; //#else // string uriSpeech = Application.dataPath + "/tts.mp3"; //#endif // File.Delete(uriSpeech); //ServicePointManager.ServerCertificateValidationCallback = Validator; string url = "https://naveropenapi.apigw.ntruss.com/voice/v1/tts"; //HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); //request.Headers.Add("X-NCP-APIGW-API-KEY-ID", "4lk8cmcq67"); //request.Headers.Add("X-NCP-APIGW-API-KEY", "Dnv1bksb2Trwh7DIbahih3QxFR9FOtAEdN1fPZz2"); //request.Method = "POST"; //byte[] byteDataParams = Encoding.UTF8.GetBytes("speaker=jinho&speed=0&text=" + speech); //request.ContentType = "application/x-www-form-urlencoded"; //request.ContentLength = byteDataParams.Length; //Stream st = request.GetRequestStream(); //st.Write(byteDataParams, 0, byteDataParams.Length); //st.Close(); //HttpWebResponse response = (HttpWebResponse)request.GetResponse(); //string status = response.StatusCode.ToString(); ////Console.WriteLine("status=" + status); //using (Stream output = File.OpenWrite(uriSpeech)) //using (Stream input = response.GetResponseStream()) //{ // input.CopyTo(output); //} //WWW mp3Open = new WWW(uriSpeech); //while (mp3Open.isDone) //{ // yield return null; //} //byte[] mp3bytes = File.ReadAllBytes(uriSpeech); //audioSource.clip = Utils.GetAudioClipFromMP3ByteArray(mp3bytes); //audioSource.Play(); //StopCoroutine("Feedback"); //StartCoroutine("Feedback"); WWWForm form = new WWWForm(); Dictionary <string, string> headers = new Dictionary <string, string>(); headers.Add("X-NCP-APIGW-API-KEY-ID", "4lk8cmcq67"); headers.Add("X-NCP-APIGW-API-KEY", "Dnv1bksb2Trwh7DIbahih3QxFR9FOtAEdN1fPZz2"); //form.AddField("speaker", "jinho"); form.AddField("speaker", speaker.ToString()); form.AddField("speed", "0"); form.AddField("text", speech); byte[] rawData = form.data; using (WWW ttsRequest = new WWW(url, rawData, headers)) { yield return(ttsRequest); if (ttsRequest.error != null) { Debug.Log(ttsRequest.error); } audioSource.clip = GetAudioClipFromMP3ByteArray(ttsRequest.bytes); audioSource.Play(); StopCoroutine("Feedback"); StartCoroutine("Feedback"); } yield return(null); }
public SpeechRecognition(IConfiguration config, ILogger <SpeechRecognition> logger) { if (config == null) { throw new ArgumentNullException(nameof(config)); } if (logger == null) { throw new ArgumentNullException(nameof(logger)); } this.Logger = logger; this.Config = config; var speechKey = config["Azure.Cognitive.Speech.Key"]; var speechRegion = config["Azure.Cognitive.Speech.Region"]; var speechLanguage = config["Azure.Cognitive.Speech.Language"]; var speechEndpointId = config["Azure.Cognitive.Speech.EndpointId"]; listSegment = new List <SegmentResult>(); speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion); if (speechConfig != null) { speechConfig.SpeechRecognitionLanguage = speechLanguage; speechConfig.OutputFormat = OutputFormat.Detailed; if (!string.IsNullOrEmpty(speechEndpointId)) { speechConfig.EndpointId = speechEndpointId; } PushStream = AudioInputStream.CreatePushStream(); audioConfig = AudioConfig.FromStreamInput(PushStream); speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig); } speechRecognizer.Recognizing += (s, e) => { Logger.LogDebug($"Recognizing: Text={e.Result.Text}"); }; speechRecognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.RecognizedSpeech) { // Building transcription. var listBest = new List <NBest>(); foreach (var best in e.Result.Best()) { listBest.Add(new NBest() { Confidence = best.Confidence, Lexical = best.LexicalForm, Itn = best.NormalizedForm, MaskedItn = best.MaskedNormalizedForm, Display = best.Text, Sentiment = null, Words = null }); } var segment = new SegmentResult() { ChannelNumber = null, SpeakerId = (long)SpeakerType, Offset = e.Result.OffsetInTicks, Duration = e.Result.Duration.Ticks, OffsetInSeconds = new TimeSpan(e.Result.OffsetInTicks).TotalSeconds, DurationInSeconds = e.Result.Duration.TotalSeconds, SpeechCompletedTimeStamp = DateTime.Now, NBest = listBest.ToArray() }; listSegment.Add(segment); } else if (e.Result.Reason == ResultReason.NoMatch) { Logger.LogDebug($"NoMatch: Speech could not be recognized."); } }; speechRecognizer.Canceled += (s, e) => { Logger.LogInformation($"Canceled: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { Logger.LogDebug($"Canceled: ErrorCode={e.ErrorCode}"); Logger.LogDebug($"Canceled: ErrorDetails={e.ErrorDetails}"); Logger.LogDebug($"Canceled: Did you update the subscription info?"); } }; speechRecognizer.SessionStarted += (s, e) => { Logger.LogInformation($"Session started for {ConversationId}:{SpeakerType.ToString()}"); }; speechRecognizer.SessionStopped += (s, e) => { Logger.LogInformation($"Session stopped for {ConversationId}:{SpeakerType.ToString()}"); Logger.LogInformation("Stop recognition."); }; }
public async Task OnCompletedSpeechAsync() { var culture = CultureInfo.InvariantCulture; var timestamp = StartTime.ToString("s", culture); var audioFileName = $"{ConversationId}/{ConversationId}-{culture.TextInfo.ToLower(SpeakerType.ToString())}-{timestamp}.wav"; var audioFileResult = new AudioFileResult() { AudioFileName = audioFileName, AudioFileUrl = audioFileName, SegmentResults = listSegment.ToArray() }; var json = new RootObject() { AudioFileResults = new AudioFileResult[] { audioFileResult } }.ToJson(); var localDirectory = Environment.GetEnvironmentVariable("LocalAppData"); var outFilePath = Path.Combine(localDirectory, $"{Guid.NewGuid()}.json"); try { File.WriteAllText(outFilePath, json); if (File.Exists(outFilePath)) { var blobName = $"{ConversationId}/{ConversationId}-{culture.TextInfo.ToLower(SpeakerType.ToString())}-{timestamp}.json"; await AzureStorageHelper.UploadTranscriptFileAsync(outFilePath, Config["Azure.Storage.ConnectionString"], Config["Azure.Storage.Container.Transcript"], blobName).ConfigureAwait(false); File.Delete(outFilePath); Logger.LogInformation($"Successfully uploaded transcript file for {ConversationId}:{SpeakerType.ToString()}."); } } catch (IOException ex) { Logger.LogError(ex, $"Issue when uploading (or deleting) transcript file for {ConversationId}:{SpeakerType.ToString()}."); }; }
public async Task ReceiveAsync(WebSocket socket, string conversationId, SpeakerType speakerType) { // PCM format, 16000 samples per second, 16 bits per sample, 1 channel (mono) var outFormat = new WaveFormat(16000, 16, 1); var localDirectory = Environment.GetEnvironmentVariable("LocalAppData"); var outFilePath = Path.Combine(localDirectory, $"{Guid.NewGuid()}.wav"); var startTime = DateTime.Now; using (var outFileWriter = new WaveFileWriter(outFilePath, outFormat)) { await speech.StartContinuousRecognitionAsync(conversationId, speakerType, startTime).ConfigureAwait(false); var socketBuffer = new byte[Settings.ReceiveBufferSize]; if (socket != null) { var result = await socket.ReceiveAsync(new ArraySegment <byte>(socketBuffer), CancellationToken.None).ConfigureAwait(false); while (!result.CloseStatus.HasValue) { outFileWriter.Write(socketBuffer, 0, result.Count); if (result.Count > 0) { speech.PushStream.Write(socketBuffer, result.Count); } result = await socket.ReceiveAsync(new ArraySegment <byte>(socketBuffer), CancellationToken.None).ConfigureAwait(false);; } await speech.StopContinuousRecognitionAsync().ConfigureAwait(false); await socket.CloseAsync(result.CloseStatus.Value, result.CloseStatusDescription, CancellationToken.None).ConfigureAwait(false); } outFileWriter.Close(); } try { if (File.Exists(outFilePath)) { var culture = CultureInfo.InvariantCulture; var timestamp = startTime.ToString("s", culture); var blobName = $"{conversationId}/{conversationId}-{culture.TextInfo.ToLower(speakerType.ToString())}-{timestamp}.wav"; await AzureStorageHelper.UploadAudioFileAsync(outFilePath, config["Azure.Storage.ConnectionString"], config["Azure.Storage.Container.Audio"], blobName).ConfigureAwait(false); File.Delete(outFilePath); logger.LogInformation($"Successfully uploaded audio file for {conversationId}:{speakerType.ToString()}."); } } catch (IOException ex) { logger.LogError(ex, $"Issue when uploading (or deleting) audio file for {conversationId}:{speakerType.ToString()}."); }; }