コード例 #1
0
        public async Task StopContinuousRecognitionAsync()
        {
            await speechRecognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);

            Logger.LogInformation($"Session stopped for {ConversationId}:{SpeakerType.ToString()}");
            await OnCompletedSpeechAsync().ConfigureAwait(false);
        }
コード例 #2
0
    IEnumerator Say(string speech, SpeakerType speaker = SpeakerType.mijin)
    {
        Debug.Log("[SpeechRenderrer::Say]" + speech);
        //#if UNITY_ANDROID
        //        string uriSpeech = Application.persistentDataPath + "/tts.mp3";
        //#else
        //        string uriSpeech = Application.dataPath + "/tts.mp3";
        //#endif
        //        File.Delete(uriSpeech);

        //ServicePointManager.ServerCertificateValidationCallback = Validator;

        string url = "https://naveropenapi.apigw.ntruss.com/voice/v1/tts";
        //HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
        //request.Headers.Add("X-NCP-APIGW-API-KEY-ID", "4lk8cmcq67");
        //request.Headers.Add("X-NCP-APIGW-API-KEY", "Dnv1bksb2Trwh7DIbahih3QxFR9FOtAEdN1fPZz2");
        //request.Method = "POST";
        //byte[] byteDataParams = Encoding.UTF8.GetBytes("speaker=jinho&speed=0&text=" + speech);
        //request.ContentType = "application/x-www-form-urlencoded";
        //request.ContentLength = byteDataParams.Length;
        //Stream st = request.GetRequestStream();
        //st.Write(byteDataParams, 0, byteDataParams.Length);
        //st.Close();
        //HttpWebResponse response = (HttpWebResponse)request.GetResponse();
        //string status = response.StatusCode.ToString();
        ////Console.WriteLine("status=" + status);
        //using (Stream output = File.OpenWrite(uriSpeech))
        //using (Stream input = response.GetResponseStream())
        //{
        //    input.CopyTo(output);
        //}

        //WWW mp3Open = new WWW(uriSpeech);
        //while (mp3Open.isDone)
        //{
        //    yield return null;
        //}

        //byte[] mp3bytes = File.ReadAllBytes(uriSpeech);
        //audioSource.clip = Utils.GetAudioClipFromMP3ByteArray(mp3bytes);
        //audioSource.Play();

        //StopCoroutine("Feedback");
        //StartCoroutine("Feedback");

        WWWForm form = new WWWForm();
        Dictionary <string, string> headers = new Dictionary <string, string>();

        headers.Add("X-NCP-APIGW-API-KEY-ID", "4lk8cmcq67");
        headers.Add("X-NCP-APIGW-API-KEY", "Dnv1bksb2Trwh7DIbahih3QxFR9FOtAEdN1fPZz2");
        //form.AddField("speaker", "jinho");
        form.AddField("speaker", speaker.ToString());
        form.AddField("speed", "0");
        form.AddField("text", speech);

        byte[] rawData = form.data;
        using (WWW ttsRequest = new WWW(url, rawData, headers))
        {
            yield return(ttsRequest);

            if (ttsRequest.error != null)
            {
                Debug.Log(ttsRequest.error);
            }

            audioSource.clip = GetAudioClipFromMP3ByteArray(ttsRequest.bytes);
            audioSource.Play();

            StopCoroutine("Feedback");
            StartCoroutine("Feedback");
        }

        yield return(null);
    }
コード例 #3
0
        public SpeechRecognition(IConfiguration config, ILogger <SpeechRecognition> logger)
        {
            if (config == null)
            {
                throw new ArgumentNullException(nameof(config));
            }

            if (logger == null)
            {
                throw new ArgumentNullException(nameof(logger));
            }

            this.Logger = logger;
            this.Config = config;

            var speechKey        = config["Azure.Cognitive.Speech.Key"];
            var speechRegion     = config["Azure.Cognitive.Speech.Region"];
            var speechLanguage   = config["Azure.Cognitive.Speech.Language"];
            var speechEndpointId = config["Azure.Cognitive.Speech.EndpointId"];

            listSegment = new List <SegmentResult>();

            speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);

            if (speechConfig != null)
            {
                speechConfig.SpeechRecognitionLanguage = speechLanguage;
                speechConfig.OutputFormat = OutputFormat.Detailed;

                if (!string.IsNullOrEmpty(speechEndpointId))
                {
                    speechConfig.EndpointId = speechEndpointId;
                }

                PushStream       = AudioInputStream.CreatePushStream();
                audioConfig      = AudioConfig.FromStreamInput(PushStream);
                speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
            }

            speechRecognizer.Recognizing += (s, e) =>
            {
                Logger.LogDebug($"Recognizing: Text={e.Result.Text}");
            };

            speechRecognizer.Recognized += (s, e) =>
            {
                if (e.Result.Reason == ResultReason.RecognizedSpeech)
                {
                    // Building transcription.
                    var listBest = new List <NBest>();
                    foreach (var best in e.Result.Best())
                    {
                        listBest.Add(new NBest()
                        {
                            Confidence = best.Confidence,
                            Lexical    = best.LexicalForm,
                            Itn        = best.NormalizedForm,
                            MaskedItn  = best.MaskedNormalizedForm,
                            Display    = best.Text,
                            Sentiment  = null,
                            Words      = null
                        });
                    }

                    var segment = new SegmentResult()
                    {
                        ChannelNumber            = null,
                        SpeakerId                = (long)SpeakerType,
                        Offset                   = e.Result.OffsetInTicks,
                        Duration                 = e.Result.Duration.Ticks,
                        OffsetInSeconds          = new TimeSpan(e.Result.OffsetInTicks).TotalSeconds,
                        DurationInSeconds        = e.Result.Duration.TotalSeconds,
                        SpeechCompletedTimeStamp = DateTime.Now,
                        NBest = listBest.ToArray()
                    };

                    listSegment.Add(segment);
                }
                else if (e.Result.Reason == ResultReason.NoMatch)
                {
                    Logger.LogDebug($"NoMatch: Speech could not be recognized.");
                }
            };

            speechRecognizer.Canceled += (s, e) =>
            {
                Logger.LogInformation($"Canceled: Reason={e.Reason}");

                if (e.Reason == CancellationReason.Error)
                {
                    Logger.LogDebug($"Canceled: ErrorCode={e.ErrorCode}");
                    Logger.LogDebug($"Canceled: ErrorDetails={e.ErrorDetails}");
                    Logger.LogDebug($"Canceled: Did you update the subscription info?");
                }
            };

            speechRecognizer.SessionStarted += (s, e) =>
            {
                Logger.LogInformation($"Session started for {ConversationId}:{SpeakerType.ToString()}");
            };

            speechRecognizer.SessionStopped += (s, e) =>
            {
                Logger.LogInformation($"Session stopped for {ConversationId}:{SpeakerType.ToString()}");
                Logger.LogInformation("Stop recognition.");
            };
        }
コード例 #4
0
        public async Task OnCompletedSpeechAsync()
        {
            var culture       = CultureInfo.InvariantCulture;
            var timestamp     = StartTime.ToString("s", culture);
            var audioFileName = $"{ConversationId}/{ConversationId}-{culture.TextInfo.ToLower(SpeakerType.ToString())}-{timestamp}.wav";

            var audioFileResult = new AudioFileResult()
            {
                AudioFileName  = audioFileName,
                AudioFileUrl   = audioFileName,
                SegmentResults = listSegment.ToArray()
            };

            var json = new RootObject()
            {
                AudioFileResults = new AudioFileResult[] {
                    audioFileResult
                }
            }.ToJson();

            var localDirectory = Environment.GetEnvironmentVariable("LocalAppData");
            var outFilePath    = Path.Combine(localDirectory, $"{Guid.NewGuid()}.json");

            try
            {
                File.WriteAllText(outFilePath, json);

                if (File.Exists(outFilePath))
                {
                    var blobName = $"{ConversationId}/{ConversationId}-{culture.TextInfo.ToLower(SpeakerType.ToString())}-{timestamp}.json";
                    await AzureStorageHelper.UploadTranscriptFileAsync(outFilePath, Config["Azure.Storage.ConnectionString"], Config["Azure.Storage.Container.Transcript"], blobName).ConfigureAwait(false);

                    File.Delete(outFilePath);

                    Logger.LogInformation($"Successfully uploaded transcript file for {ConversationId}:{SpeakerType.ToString()}.");
                }
            }
            catch (IOException ex)
            {
                Logger.LogError(ex, $"Issue when uploading (or deleting) transcript file for {ConversationId}:{SpeakerType.ToString()}.");
            };
        }
コード例 #5
0
        public async Task ReceiveAsync(WebSocket socket, string conversationId, SpeakerType speakerType)
        {
            // PCM format, 16000 samples per second, 16 bits per sample, 1 channel (mono)
            var outFormat      = new WaveFormat(16000, 16, 1);
            var localDirectory = Environment.GetEnvironmentVariable("LocalAppData");
            var outFilePath    = Path.Combine(localDirectory, $"{Guid.NewGuid()}.wav");
            var startTime      = DateTime.Now;

            using (var outFileWriter = new WaveFileWriter(outFilePath, outFormat))
            {
                await speech.StartContinuousRecognitionAsync(conversationId, speakerType, startTime).ConfigureAwait(false);

                var socketBuffer = new byte[Settings.ReceiveBufferSize];

                if (socket != null)
                {
                    var result = await socket.ReceiveAsync(new ArraySegment <byte>(socketBuffer), CancellationToken.None).ConfigureAwait(false);

                    while (!result.CloseStatus.HasValue)
                    {
                        outFileWriter.Write(socketBuffer, 0, result.Count);

                        if (result.Count > 0)
                        {
                            speech.PushStream.Write(socketBuffer, result.Count);
                        }

                        result = await socket.ReceiveAsync(new ArraySegment <byte>(socketBuffer), CancellationToken.None).ConfigureAwait(false);;
                    }

                    await speech.StopContinuousRecognitionAsync().ConfigureAwait(false);

                    await socket.CloseAsync(result.CloseStatus.Value, result.CloseStatusDescription, CancellationToken.None).ConfigureAwait(false);
                }

                outFileWriter.Close();
            }

            try
            {
                if (File.Exists(outFilePath))
                {
                    var culture   = CultureInfo.InvariantCulture;
                    var timestamp = startTime.ToString("s", culture);

                    var blobName = $"{conversationId}/{conversationId}-{culture.TextInfo.ToLower(speakerType.ToString())}-{timestamp}.wav";
                    await AzureStorageHelper.UploadAudioFileAsync(outFilePath, config["Azure.Storage.ConnectionString"], config["Azure.Storage.Container.Audio"], blobName).ConfigureAwait(false);

                    File.Delete(outFilePath);

                    logger.LogInformation($"Successfully uploaded audio file for {conversationId}:{speakerType.ToString()}.");
                }
            }
            catch (IOException ex)
            {
                logger.LogError(ex, $"Issue when uploading (or deleting) audio file for {conversationId}:{speakerType.ToString()}.");
            };
        }