Пример #1
0
        public LongRunningRecognizeResponse TranscribeInCloud(TranscribeParameters transParams)
        {
            // var speechClient = SpeechClient.Create();

            string           fileOnCloudStorage = "gs://" + transParams.GoogleCloudBucketName + "/" + transParams.objectName;
            RecognitionAudio recogAudio         = RecognitionAudio.FromStorageUri(fileOnCloudStorage);

            SpeakerDiarizationConfig sdc = new SpeakerDiarizationConfig()
            {
                EnableSpeakerDiarization = true,
                MinSpeakerCount          = transParams.MinSpeakerCount,
                MaxSpeakerCount          = transParams.MaxSpeakerCount
            };

            var longOperation = speechClient.LongRunningRecognize(new RecognitionConfig()
            {
                Encoding                   = RecognitionConfig.Types.AudioEncoding.Flac,
                SampleRateHertz            = 44100,
                EnableWordTimeOffsets      = true,
                LanguageCode               = transParams.language,
                EnableAutomaticPunctuation = true,
                DiarizationConfig          = sdc,
                SpeechContexts             =
                {
                    new SpeechContext {
                        Phrases =     { transParams.phrases}
                    }
                }
            }, recogAudio);

            longOperation = longOperation.PollUntilCompleted();
            var response = longOperation.Result;

            return(response);
        }
Пример #2
0
        /** (Re-)initializes the Cloud-based streaming speech recognizer. */
        private void ReInitStreamRecognizer()
        {
            lock (speakerIdBufferLock)
            {
                speakerIdBufferPos = 0;
            }
            recogStream = speechClient.StreamingRecognize();
            SpeakerDiarizationConfig diarizationConfig = new SpeakerDiarizationConfig()
            {
                EnableSpeakerDiarization = ENABLE_SPEAKER_DIARIZATION,
                MaxSpeakerCount          = MAX_SPEAKER_COUNT,
                MinSpeakerCount          = MIN_SPEAKER_COUNT,
            };

            recogStream.WriteAsync(new StreamingRecognizeRequest()
            {
                StreamingConfig = new StreamingRecognitionConfig()
                {
                    Config = new RecognitionConfig()
                    {
                        Encoding          = RecognitionConfig.Types.AudioEncoding.Linear16,
                        AudioChannelCount = 1,
                        SampleRateHertz   = audioFormat.SampleRate,
                        LanguageCode      = LANGUAGE_CODE,
                        DiarizationConfig = diarizationConfig,
                    },
                    SingleUtterance = false,
                },
            });;
            Task.Run(async() =>
            {
                while (await recogStream.GetResponseStream().MoveNextAsync())
                {
                    foreach (var result in recogStream.GetResponseStream().Current.Results)
                    {
                        if (result.Alternatives.Count == 0)
                        {
                            continue;
                        }
                        // Identify the alternative with the highest confidence.
                        SpeechRecognitionAlternative bestAlt = null;
                        foreach (var alternative in result.Alternatives)
                        {
                            if (bestAlt == null || alternative.Confidence > bestAlt.Confidence)
                            {
                                bestAlt = alternative;
                            }
                        }
                        string transcript = bestAlt.Transcript.Trim();
                        if (transcript.Length == 0)
                        {
                            continue;
                        }
                        string transcriptInfo =
                            $"Speech transcript: {DateTime.Now}: \"" +
                            $"{transcript}\" (confidence={bestAlt.Confidence})";
                        if (ENABLE_SPEAKER_DIARIZATION)
                        {
                            int speakerTag  = bestAlt.Words[bestAlt.Words.Count - 1].SpeakerTag;
                            transcriptInfo += $" (speakerTag={speakerTag})";
                        }
                        Debug.WriteLine(transcriptInfo);
                        if (ENABLE_SPEAKER_DIARIZATION && ENABLE_SPEAKER_ID)
                        {
                            recognizeSpeaker(transcript, bestAlt);
                        }
                    }
                }
            });
            cummulativeRecogSeconds = 0f;
        }