public LongRunningRecognizeResponse TranscribeInCloud(TranscribeParameters transParams) { // var speechClient = SpeechClient.Create(); string fileOnCloudStorage = "gs://" + transParams.GoogleCloudBucketName + "/" + transParams.objectName; RecognitionAudio recogAudio = RecognitionAudio.FromStorageUri(fileOnCloudStorage); SpeakerDiarizationConfig sdc = new SpeakerDiarizationConfig() { EnableSpeakerDiarization = true, MinSpeakerCount = transParams.MinSpeakerCount, MaxSpeakerCount = transParams.MaxSpeakerCount }; var longOperation = speechClient.LongRunningRecognize(new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Flac, SampleRateHertz = 44100, EnableWordTimeOffsets = true, LanguageCode = transParams.language, EnableAutomaticPunctuation = true, DiarizationConfig = sdc, SpeechContexts = { new SpeechContext { Phrases = { transParams.phrases} } } }, recogAudio); longOperation = longOperation.PollUntilCompleted(); var response = longOperation.Result; return(response); }
/** (Re-)initializes the Cloud-based streaming speech recognizer. */ private void ReInitStreamRecognizer() { lock (speakerIdBufferLock) { speakerIdBufferPos = 0; } recogStream = speechClient.StreamingRecognize(); SpeakerDiarizationConfig diarizationConfig = new SpeakerDiarizationConfig() { EnableSpeakerDiarization = ENABLE_SPEAKER_DIARIZATION, MaxSpeakerCount = MAX_SPEAKER_COUNT, MinSpeakerCount = MIN_SPEAKER_COUNT, }; recogStream.WriteAsync(new StreamingRecognizeRequest() { StreamingConfig = new StreamingRecognitionConfig() { Config = new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Linear16, AudioChannelCount = 1, SampleRateHertz = audioFormat.SampleRate, LanguageCode = LANGUAGE_CODE, DiarizationConfig = diarizationConfig, }, SingleUtterance = false, }, });; Task.Run(async() => { while (await recogStream.GetResponseStream().MoveNextAsync()) { foreach (var result in recogStream.GetResponseStream().Current.Results) { if (result.Alternatives.Count == 0) { continue; } // Identify the alternative with the highest confidence. SpeechRecognitionAlternative bestAlt = null; foreach (var alternative in result.Alternatives) { if (bestAlt == null || alternative.Confidence > bestAlt.Confidence) { bestAlt = alternative; } } string transcript = bestAlt.Transcript.Trim(); if (transcript.Length == 0) { continue; } string transcriptInfo = $"Speech transcript: {DateTime.Now}: \"" + $"{transcript}\" (confidence={bestAlt.Confidence})"; if (ENABLE_SPEAKER_DIARIZATION) { int speakerTag = bestAlt.Words[bestAlt.Words.Count - 1].SpeakerTag; transcriptInfo += $" (speakerTag={speakerTag})"; } Debug.WriteLine(transcriptInfo); if (ENABLE_SPEAKER_DIARIZATION && ENABLE_SPEAKER_ID) { recognizeSpeaker(transcript, bestAlt); } } } }); cummulativeRecogSeconds = 0f; }