Beispiel #1
0
        public object AnalyzeSoundToText(List <RecognitionAudio> target)
        {
            foreach (var sound in target)
            {
                try
                {
                    var response       = client.LongRunningRecognize(this.recognitionConfig, sound);
                    var responseResult = response.PollUntilCompleted();

                    var i             = 0;
                    var soundFilename = JObject.Parse(sound.ToString())["uri"].ToString().Replace("gs:", "").Replace("/", "_");

                    foreach (var result in responseResult.Result.Results)
                    {
                        var timestamp = SpeechConnector.UnixTimeNow();
                        using (StreamWriter rdr = new StreamWriter($"result_{soundFilename}_{timestamp}_{i}.txt"))
                        {
                            foreach (var alternative in result.Alternatives)
                            {
                                rdr.WriteLine(alternative.Transcript);
                            }
                            rdr.Close();
                            Console.WriteLine($"The result saved the file -> result_{soundFilename}_{timestamp}_{i}_.txt");
                        }
                        i++;
                    }
                }
                catch (Grpc.Core.RpcException e)
                {
                    Console.WriteLine(e);
                    Console.WriteLine("Something wrong :( Skipping this file");
                }
            }
            return(true);
        }
Beispiel #2
0
        public LongRunningRecognizeResponse TranscribeInCloud(TranscribeParameters transParams)
        {
            // var speechClient = SpeechClient.Create();

            string           fileOnCloudStorage = "gs://" + transParams.GoogleCloudBucketName + "/" + transParams.objectName;
            RecognitionAudio recogAudio         = RecognitionAudio.FromStorageUri(fileOnCloudStorage);

            SpeakerDiarizationConfig sdc = new SpeakerDiarizationConfig()
            {
                EnableSpeakerDiarization = true,
                MinSpeakerCount          = transParams.MinSpeakerCount,
                MaxSpeakerCount          = transParams.MaxSpeakerCount
            };

            var longOperation = speechClient.LongRunningRecognize(new RecognitionConfig()
            {
                Encoding                   = RecognitionConfig.Types.AudioEncoding.Flac,
                SampleRateHertz            = 44100,
                EnableWordTimeOffsets      = true,
                LanguageCode               = transParams.language,
                EnableAutomaticPunctuation = true,
                DiarizationConfig          = sdc,
                SpeechContexts             =
                {
                    new SpeechContext {
                        Phrases =     { transParams.phrases}
                    }
                }
            }, recogAudio);

            longOperation = longOperation.PollUntilCompleted();
            var response = longOperation.Result;

            return(response);
        }
        public string GetSpeechText(string fileUri)
        {
            var response = _client
                           .LongRunningRecognize(_config, RecognitionAudio.FromStorageUri(fileUri))
                           .PollUntilCompleted();

            return(response.Result.Results
                   .Select(x => x.Alternatives.First().Transcript)
                   .Aggregate((x, y) =>
                              $"{x} {Environment.NewLine}{y}"));
        }
        /// <summary>
        /// Asynchronously recognize and transcribe a long audio file.
        /// <para>
        /// See <a href="https://developers.google.com/drive/api/v3/search-files">Search for files and folders</a>
        /// and <a href="https://developers.google.com/drive/api/v3/about-files">Files and folders overview</a>.
        /// </para>
        /// </summary>
        /// <param name="storageUri">The storage URI for the audio.</param>
        /// <param name="encoding">Optional audio encoding type.</param>
        /// <param name="sampleRateHertz">Optional audio sample rate in hertz.</param>
        /// <param name="languageCode">Optional language code of the audio i.e. "en-US".</param>
        /// <returns>An <see cref="IAsyncEnumerable{T}" /> where each iterator returns a progress and transcription results object.</returns>
        public async IAsyncEnumerable <(int Progress, IReadOnlyList <SpeechRecognitionAlternative> Transcription)> LongRunningRecognizeAsync(
            string storageUri,
            AudioEncoding encoding = AudioEncoding.Linear16,
            int sampleRateHertz    = 16000,
            string languageCode    = "en-US")
        {
            var config = new RecognitionConfig()
            {
                Encoding                   = encoding,
                SampleRateHertz            = sampleRateHertz,
                LanguageCode               = languageCode,
                EnableAutomaticPunctuation = true,
                DiarizationConfig          = new SpeakerDiarizationConfig()
                {
                    EnableSpeakerDiarization = true,
                },
            };

            var longOperation       = _client.LongRunningRecognize(config, RecognitionAudio.FromStorageUri(storageUri));
            var lastProgressPercent = 0;

            while (true)
            {
                if (longOperation != null && longOperation.IsCompleted)
                {
                    var response         = longOperation.Result;
                    var wordAlternatives = response.Results.SelectMany(q => q.Alternatives).Where(q => q.Words.Count > 0);


                    yield return(longOperation.Metadata.ProgressPercent, wordAlternatives.ToList());

                    yield break;
                }

                longOperation = await longOperation.PollOnceAsync();

                var progressPercent = longOperation.Metadata.ProgressPercent;
                if (progressPercent != lastProgressPercent)
                {
                    // Only emit progress percent if it has changed.
                    lastProgressPercent = progressPercent;
                    yield return(longOperation.Metadata.ProgressPercent, null);
                }

                // Delay 5s before polling again so we don't flood the API with polling requests.
                await Task.Delay(5000);
            }
        }
Beispiel #5
0
        /// <summary>Snippet for LongRunningRecognize</summary>
        public void LongRunningRecognize_RequestObject()
        {
            // Snippet: LongRunningRecognize(LongRunningRecognizeRequest,CallSettings)
            // Create client
            SpeechClient speechClient = SpeechClient.Create();
            // Initialize request argument(s)
            LongRunningRecognizeRequest request = new LongRunningRecognizeRequest
            {
                Config = new RecognitionConfig
                {
                    Encoding        = RecognitionConfig.Types.AudioEncoding.Flac,
                    SampleRateHertz = 44100,
                    LanguageCode    = "en-US",
                },
                Audio = new RecognitionAudio
                {
                    Uri = "gs://bucket_name/file_name.flac",
                },
            };
            // Make the request
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
                speechClient.LongRunningRecognize(request);

            // Poll until the returned long-running operation is complete
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> completedResponse =
                response.PollUntilCompleted();
            // Retrieve the operation result
            LongRunningRecognizeResponse result = completedResponse.Result;

            // Or get the name of the operation
            string operationName = response.Name;
            // This name can be stored, then the long-running operation retrieved later by name
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> retrievedResponse =
                speechClient.PollOnceLongRunningRecognize(operationName);

            // Check if the retrieved long-running operation has completed
            if (retrievedResponse.IsCompleted)
            {
                // If it has completed, then access the result
                LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result;
            }
            // End snippet
        }
Beispiel #6
0
        /// <summary>
        /// Print confidence level for individual words in a transcription of a short audio file
        /// Separating different speakers in an audio file recording
        /// </summary>
        /// <param name="localFilePath">Path to local audio file, e.g. /path/audio.wav</param>
        public static void SampleLongRunningRecognize(string localFilePath)
        {
            SpeechClient speechClient = SpeechClient.Create();
            // string localFilePath = "resources/commercial_mono.wav"
            LongRunningRecognizeRequest request = new LongRunningRecognizeRequest
            {
                Config = new RecognitionConfig
                {
                    // If enabled, each word in the first alternative of each result will be
                    // tagged with a speaker tag to identify the speaker.
                    EnableSpeakerDiarization = true,
                    // Optional. Specifies the estimated number of speakers in the conversation.
                    DiarizationSpeakerCount = 2,
                    // The language of the supplied audio
                    LanguageCode = "en-US",
                },
                Audio = new RecognitionAudio
                {
                    Content = ByteString.CopyFrom(File.ReadAllBytes(localFilePath)),
                },
            };
            // Poll until the returned long-running operation is complete
            LongRunningRecognizeResponse response = speechClient.LongRunningRecognize(request).PollUntilCompleted().Result;

            foreach (var result in response.Results)
            {
                // First alternative has words tagged with speakers
                SpeechRecognitionAlternative alternative = result.Alternatives[0];
                Console.WriteLine($"Transcript: {alternative.Transcript}");
                // Print the speakerTag of each word
                foreach (var word in alternative.Words)
                {
                    Console.WriteLine($"Word: {word.Word}");
                    Console.WriteLine($"Speaker tag: {word.SpeakerTag}");
                }
            }
        }
        /// <summary>Snippet for LongRunningRecognize</summary>
        public void LongRunningRecognize_RequestObject()
        {
            // Snippet: LongRunningRecognize(LongRunningRecognizeRequest, CallSettings)
            // Create client
            SpeechClient speechClient = SpeechClient.Create();
            // Initialize request argument(s)
            LongRunningRecognizeRequest request = new LongRunningRecognizeRequest
            {
                Config = new RecognitionConfig(),
                Audio  = new RecognitionAudio(),
            };
            // Make the request
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.LongRunningRecognize(request);

            // Poll until the returned long-running operation is complete
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> completedResponse = response.PollUntilCompleted();
            // Retrieve the operation result
            LongRunningRecognizeResponse result = completedResponse.Result;

            // Or get the name of the operation
            string operationName = response.Name;
            // This name can be stored, then the long-running operation retrieved later by name
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> retrievedResponse = speechClient.PollOnceLongRunningRecognize(operationName);

            // Check if the retrieved long-running operation has completed
            if (retrievedResponse.IsCompleted)
            {
                // If it has completed, then access the result
                LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result;
            }
            // End snippet
        }