public object AnalyzeSoundToText(List <RecognitionAudio> target) { foreach (var sound in target) { try { var response = client.LongRunningRecognize(this.recognitionConfig, sound); var responseResult = response.PollUntilCompleted(); var i = 0; var soundFilename = JObject.Parse(sound.ToString())["uri"].ToString().Replace("gs:", "").Replace("/", "_"); foreach (var result in responseResult.Result.Results) { var timestamp = SpeechConnector.UnixTimeNow(); using (StreamWriter rdr = new StreamWriter($"result_{soundFilename}_{timestamp}_{i}.txt")) { foreach (var alternative in result.Alternatives) { rdr.WriteLine(alternative.Transcript); } rdr.Close(); Console.WriteLine($"The result saved the file -> result_{soundFilename}_{timestamp}_{i}_.txt"); } i++; } } catch (Grpc.Core.RpcException e) { Console.WriteLine(e); Console.WriteLine("Something wrong :( Skipping this file"); } } return(true); }
public LongRunningRecognizeResponse TranscribeInCloud(TranscribeParameters transParams) { // var speechClient = SpeechClient.Create(); string fileOnCloudStorage = "gs://" + transParams.GoogleCloudBucketName + "/" + transParams.objectName; RecognitionAudio recogAudio = RecognitionAudio.FromStorageUri(fileOnCloudStorage); SpeakerDiarizationConfig sdc = new SpeakerDiarizationConfig() { EnableSpeakerDiarization = true, MinSpeakerCount = transParams.MinSpeakerCount, MaxSpeakerCount = transParams.MaxSpeakerCount }; var longOperation = speechClient.LongRunningRecognize(new RecognitionConfig() { Encoding = RecognitionConfig.Types.AudioEncoding.Flac, SampleRateHertz = 44100, EnableWordTimeOffsets = true, LanguageCode = transParams.language, EnableAutomaticPunctuation = true, DiarizationConfig = sdc, SpeechContexts = { new SpeechContext { Phrases = { transParams.phrases} } } }, recogAudio); longOperation = longOperation.PollUntilCompleted(); var response = longOperation.Result; return(response); }
public string GetSpeechText(string fileUri) { var response = _client .LongRunningRecognize(_config, RecognitionAudio.FromStorageUri(fileUri)) .PollUntilCompleted(); return(response.Result.Results .Select(x => x.Alternatives.First().Transcript) .Aggregate((x, y) => $"{x} {Environment.NewLine}{y}")); }
/// <summary> /// Asynchronously recognize and transcribe a long audio file. /// <para> /// See <a href="https://developers.google.com/drive/api/v3/search-files">Search for files and folders</a> /// and <a href="https://developers.google.com/drive/api/v3/about-files">Files and folders overview</a>. /// </para> /// </summary> /// <param name="storageUri">The storage URI for the audio.</param> /// <param name="encoding">Optional audio encoding type.</param> /// <param name="sampleRateHertz">Optional audio sample rate in hertz.</param> /// <param name="languageCode">Optional language code of the audio i.e. "en-US".</param> /// <returns>An <see cref="IAsyncEnumerable{T}" /> where each iterator returns a progress and transcription results object.</returns> public async IAsyncEnumerable <(int Progress, IReadOnlyList <SpeechRecognitionAlternative> Transcription)> LongRunningRecognizeAsync( string storageUri, AudioEncoding encoding = AudioEncoding.Linear16, int sampleRateHertz = 16000, string languageCode = "en-US") { var config = new RecognitionConfig() { Encoding = encoding, SampleRateHertz = sampleRateHertz, LanguageCode = languageCode, EnableAutomaticPunctuation = true, DiarizationConfig = new SpeakerDiarizationConfig() { EnableSpeakerDiarization = true, }, }; var longOperation = _client.LongRunningRecognize(config, RecognitionAudio.FromStorageUri(storageUri)); var lastProgressPercent = 0; while (true) { if (longOperation != null && longOperation.IsCompleted) { var response = longOperation.Result; var wordAlternatives = response.Results.SelectMany(q => q.Alternatives).Where(q => q.Words.Count > 0); yield return(longOperation.Metadata.ProgressPercent, wordAlternatives.ToList()); yield break; } longOperation = await longOperation.PollOnceAsync(); var progressPercent = longOperation.Metadata.ProgressPercent; if (progressPercent != lastProgressPercent) { // Only emit progress percent if it has changed. lastProgressPercent = progressPercent; yield return(longOperation.Metadata.ProgressPercent, null); } // Delay 5s before polling again so we don't flood the API with polling requests. await Task.Delay(5000); } }
/// <summary>Snippet for LongRunningRecognize</summary> public void LongRunningRecognize_RequestObject() { // Snippet: LongRunningRecognize(LongRunningRecognizeRequest,CallSettings) // Create client SpeechClient speechClient = SpeechClient.Create(); // Initialize request argument(s) LongRunningRecognizeRequest request = new LongRunningRecognizeRequest { Config = new RecognitionConfig { Encoding = RecognitionConfig.Types.AudioEncoding.Flac, SampleRateHertz = 44100, LanguageCode = "en-US", }, Audio = new RecognitionAudio { Uri = "gs://bucket_name/file_name.flac", }, }; // Make the request Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.LongRunningRecognize(request); // Poll until the returned long-running operation is complete Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> completedResponse = response.PollUntilCompleted(); // Retrieve the operation result LongRunningRecognizeResponse result = completedResponse.Result; // Or get the name of the operation string operationName = response.Name; // This name can be stored, then the long-running operation retrieved later by name Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> retrievedResponse = speechClient.PollOnceLongRunningRecognize(operationName); // Check if the retrieved long-running operation has completed if (retrievedResponse.IsCompleted) { // If it has completed, then access the result LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result; } // End snippet }
/// <summary> /// Print confidence level for individual words in a transcription of a short audio file /// Separating different speakers in an audio file recording /// </summary> /// <param name="localFilePath">Path to local audio file, e.g. /path/audio.wav</param> public static void SampleLongRunningRecognize(string localFilePath) { SpeechClient speechClient = SpeechClient.Create(); // string localFilePath = "resources/commercial_mono.wav" LongRunningRecognizeRequest request = new LongRunningRecognizeRequest { Config = new RecognitionConfig { // If enabled, each word in the first alternative of each result will be // tagged with a speaker tag to identify the speaker. EnableSpeakerDiarization = true, // Optional. Specifies the estimated number of speakers in the conversation. DiarizationSpeakerCount = 2, // The language of the supplied audio LanguageCode = "en-US", }, Audio = new RecognitionAudio { Content = ByteString.CopyFrom(File.ReadAllBytes(localFilePath)), }, }; // Poll until the returned long-running operation is complete LongRunningRecognizeResponse response = speechClient.LongRunningRecognize(request).PollUntilCompleted().Result; foreach (var result in response.Results) { // First alternative has words tagged with speakers SpeechRecognitionAlternative alternative = result.Alternatives[0]; Console.WriteLine($"Transcript: {alternative.Transcript}"); // Print the speakerTag of each word foreach (var word in alternative.Words) { Console.WriteLine($"Word: {word.Word}"); Console.WriteLine($"Speaker tag: {word.SpeakerTag}"); } } }
/// <summary>Snippet for LongRunningRecognize</summary> public void LongRunningRecognize_RequestObject() { // Snippet: LongRunningRecognize(LongRunningRecognizeRequest, CallSettings) // Create client SpeechClient speechClient = SpeechClient.Create(); // Initialize request argument(s) LongRunningRecognizeRequest request = new LongRunningRecognizeRequest { Config = new RecognitionConfig(), Audio = new RecognitionAudio(), }; // Make the request Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.LongRunningRecognize(request); // Poll until the returned long-running operation is complete Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> completedResponse = response.PollUntilCompleted(); // Retrieve the operation result LongRunningRecognizeResponse result = completedResponse.Result; // Or get the name of the operation string operationName = response.Name; // This name can be stored, then the long-running operation retrieved later by name Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> retrievedResponse = speechClient.PollOnceLongRunningRecognize(operationName); // Check if the retrieved long-running operation has completed if (retrievedResponse.IsCompleted) { // If it has completed, then access the result LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result; } // End snippet }