/// <summary>Snippet for LongRunningRecognize</summary> public void LongRunningRecognize_RequestObject() { // Snippet: LongRunningRecognize(LongRunningRecognizeRequest, CallSettings) // Create client SpeechClient speechClient = SpeechClient.Create(); // Initialize request argument(s) LongRunningRecognizeRequest request = new LongRunningRecognizeRequest { Config = new RecognitionConfig(), Audio = new RecognitionAudio(), }; // Make the request Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.LongRunningRecognize(request); // Poll until the returned long-running operation is complete Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> completedResponse = response.PollUntilCompleted(); // Retrieve the operation result LongRunningRecognizeResponse result = completedResponse.Result; // Or get the name of the operation string operationName = response.Name; // This name can be stored, then the long-running operation retrieved later by name Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> retrievedResponse = speechClient.PollOnceLongRunningRecognize(operationName); // Check if the retrieved long-running operation has completed if (retrievedResponse.IsCompleted) { // If it has completed, then access the result LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result; } // End snippet }
/// <summary>Snippet for LongRunningRecognizeAsync</summary> public async Task LongRunningRecognizeAsync() { // Snippet: LongRunningRecognizeAsync(RecognitionConfig, RecognitionAudio, CallSettings) // Additional: LongRunningRecognizeAsync(RecognitionConfig, RecognitionAudio, CancellationToken) // Create client SpeechClient speechClient = await SpeechClient.CreateAsync(); // Initialize request argument(s) RecognitionConfig config = new RecognitionConfig(); RecognitionAudio audio = new RecognitionAudio(); // Make the request Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = await speechClient.LongRunningRecognizeAsync(config, audio); // Poll until the returned long-running operation is complete Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> completedResponse = await response.PollUntilCompletedAsync(); // Retrieve the operation result LongRunningRecognizeResponse result = completedResponse.Result; // Or get the name of the operation string operationName = response.Name; // This name can be stored, then the long-running operation retrieved later by name Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> retrievedResponse = await speechClient.PollOnceLongRunningRecognizeAsync(operationName); // Check if the retrieved long-running operation has completed if (retrievedResponse.IsCompleted) { // If it has completed, then access the result LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result; } // End snippet }
public LongRunningRecognizeResponse UploadAndTranscribeInCloud(TranscribeParameters transParams) { UploadToCloudIfNeeded(transParams); LongRunningRecognizeResponse response = TranscribeInCloud(transParams); return(response); }
static void SimplifyRaw(string responseFile, string simplified) { // Clean up from last run File.Delete(simplified); string priorResponse = File.ReadAllText(responseFile); LongRunningRecognizeResponse beforeFix = JsonConvert.DeserializeObject <LongRunningRecognizeResponse>(priorResponse); Transcribed_Dto afterFix = TransformResponse.Simpify(beforeFix.Results); string afterFixString = JsonConvert.SerializeObject(afterFix, Formatting.Indented); File.WriteAllText(simplified, afterFixString); }
private async Task <string> Transcript(string originUri, string sourceUri) { var buckerLoader = new BucketLoader(); (string audioInBucketUri, string objectName) = buckerLoader.UploadFileFromLocal(sourceUri); SpeechTranscripter transcripter = new SpeechTranscripter(); LongRunningRecognizeResponse response = await transcripter.Recognize(audioInBucketUri, new RecognizeConfiguration()); buckerLoader.DeleteObject(new[] { objectName }); string transcriptId = ObjectId.GenerateNewId().ToString(); await RecognitionResponseProcessor.FindSamples(transcriptId, response, sourceUri, originUri); return(transcriptId); }
public Transcribed_Dto TranscribeAudioFile(TranscribeParameters transParams, string rawResponseFile = null) { LongRunningRecognizeResponse response = UploadAndTranscribeInCloud(transParams); // Save the raw response, if we were passed a file path. if (rawResponseFile != "") { string responseString = JsonConvert.SerializeObject(response, Formatting.Indented); File.WriteAllText(rawResponseFile, responseString); } Transcribed_Dto resp = TransformResponse.Simpify(response.Results); return(TransformResponse.FixSpeakerTags(resp)); }
/// <summary>Snippet for LongRunningRecognizeAsync</summary> public async Task LongRunningRecognizeAsync_RequestObject() { // Snippet: LongRunningRecognizeAsync(LongRunningRecognizeRequest,CallSettings) // Create client SpeechClient speechClient = await SpeechClient.CreateAsync(); // Initialize request argument(s) LongRunningRecognizeRequest request = new LongRunningRecognizeRequest { Config = new RecognitionConfig { Encoding = RecognitionConfig.Types.AudioEncoding.Flac, SampleRateHertz = 44100, LanguageCode = "en-US", }, Audio = new RecognitionAudio { Uri = "gs://bucket_name/file_name.flac", }, }; // Make the request Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = await speechClient.LongRunningRecognizeAsync(request); // Poll until the returned long-running operation is complete Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> completedResponse = await response.PollUntilCompletedAsync(); // Retrieve the operation result LongRunningRecognizeResponse result = completedResponse.Result; // Or get the name of the operation string operationName = response.Name; // This name can be stored, then the long-running operation retrieved later by name Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> retrievedResponse = await speechClient.PollOnceLongRunningRecognizeAsync(operationName); // Check if the retrieved long-running operation has completed if (retrievedResponse.IsCompleted) { // If it has completed, then access the result LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result; } // End snippet }
public void LongRunningRecognize() { // Snippet: LongRunningRecognize(RecognitionConfig,RecognitionAudio,CallSettings) // Create client SpeechClient speechClient = SpeechClient.Create(); // Initialize request argument(s) RecognitionConfig config = new RecognitionConfig { Encoding = RecognitionConfig.Types.AudioEncoding.Flac, SampleRateHertz = 44100, LanguageCode = "en-US", }; RecognitionAudio audio = new RecognitionAudio { Uri = "gs://bucket_name/file_name.flac", }; // Make the request Operation <LongRunningRecognizeResponse> response = speechClient.LongRunningRecognize(config, audio); // Poll until the returned long-running operation is complete Operation <LongRunningRecognizeResponse> completedResponse = response.PollUntilCompleted(); // Retrieve the operation result LongRunningRecognizeResponse result = completedResponse.Result; // Or get the name of the operation string operationName = response.Name; // This name can be stored, then the long-running operation retrieved later by name Operation <LongRunningRecognizeResponse> retrievedResponse = speechClient.PollOnceLongRunningRecognize(operationName); // Check if the retrieved long-running operation has completed if (retrievedResponse.IsCompleted) { // If it has completed, then access the result LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result; } // End snippet }
public async Task <LongRunningRecognizeResponse> Recognize(string storageUri, RecognizeConfiguration configuration = null) { configuration ??= new RecognizeConfiguration(); var speech = SpeechClient.Create(); var audio = RecognitionAudio.FromStorageUri(storageUri); var longOperation = await speech.LongRunningRecognizeAsync(new RecognitionConfig() { Encoding = configuration.AudioEncoding, SampleRateHertz = configuration.SampleRateHertz, LanguageCode = configuration.LanguageCode, DiarizationConfig = new SpeakerDiarizationConfig() { EnableSpeakerDiarization = true, MinSpeakerCount = configuration.MinSpeakerCount, MaxSpeakerCount = configuration.MaxSpeakerCount }, Metadata = new RecognitionMetadata() { OriginalMediaType = RecognitionMetadata.Types.OriginalMediaType.Video } //}, RecognitionAudio.FromStorageUri(storageUri)); }, audio); //}, RecognitionAudio.FetchFromUri("https://www.youtube.com/watch?v=5Btbdt7ksko&fbclid=IwAR2FQ5KlTzxHH7UdYDTx4Vcnk6TfFfFtWpMJw-jH1UOMAbodsnY8mS1bNlI")); longOperation = await longOperation.PollUntilCompletedAsync(); LongRunningRecognizeResponse response = longOperation.Result; foreach (var result in response.Results) { foreach (var alternative in result.Alternatives) { Console.WriteLine($"Transcript: { alternative.Transcript}"); } } return(response); }
/// <summary> /// Print confidence level for individual words in a transcription of a short audio file /// Separating different speakers in an audio file recording /// </summary> /// <param name="localFilePath">Path to local audio file, e.g. /path/audio.wav</param> public static void SampleLongRunningRecognize(string localFilePath) { SpeechClient speechClient = SpeechClient.Create(); // string localFilePath = "resources/commercial_mono.wav" LongRunningRecognizeRequest request = new LongRunningRecognizeRequest { Config = new RecognitionConfig { // If enabled, each word in the first alternative of each result will be // tagged with a speaker tag to identify the speaker. EnableSpeakerDiarization = true, // Optional. Specifies the estimated number of speakers in the conversation. DiarizationSpeakerCount = 2, // The language of the supplied audio LanguageCode = "en-US", }, Audio = new RecognitionAudio { Content = ByteString.CopyFrom(File.ReadAllBytes(localFilePath)), }, }; // Poll until the returned long-running operation is complete LongRunningRecognizeResponse response = speechClient.LongRunningRecognize(request).PollUntilCompleted().Result; foreach (var result in response.Results) { // First alternative has words tagged with speakers SpeechRecognitionAlternative alternative = result.Alternatives[0]; Console.WriteLine($"Transcript: {alternative.Transcript}"); // Print the speakerTag of each word foreach (var word in alternative.Words) { Console.WriteLine($"Word: {word.Word}"); Console.WriteLine($"Speaker tag: {word.SpeakerTag}"); } } }
TranscribeResponse GetLongTranscribeResponse(LongRunningRecognizeResponse response) { return(GetTranscribeResponse(response.Results)); }
public async Task FindSamples(string transcriptId, LongRunningRecognizeResponse recognizeResponse, string sourceUri, string originUri) { var result = recognizeResponse.Results.Last(); if (result == null) { throw new ArgumentException("Empty recognition response. Cannot find samples."); } var words = result.Alternatives.Last().Words; int currentSpeakerTag = -1; List <WordInfo> currentSampleWords = new List <WordInfo>(); List <Sample> samples = new List <Sample>(); foreach (WordInfo wordInfo in words) { if (currentSpeakerTag == -1) { currentSpeakerTag = wordInfo.SpeakerTag; currentSampleWords.Add(wordInfo); } else { if (currentSpeakerTag != wordInfo.SpeakerTag) // save new sample { await AddSample(); //switch speaker currentSampleWords.Clear(); currentSpeakerTag = wordInfo.SpeakerTag; } else { currentSampleWords.Add(wordInfo); } } } //last sample await AddSample(); var samplesToSave = new SamplesCollection() { samples = samples, transcriptId = transcriptId, VideoUri = originUri }; var connectionString = Environment.GetEnvironmentVariable("MONGO_CONNECT_STR"); var database = CosmosUtils.ConnectToDatabase(connectionString, "Samples"); var collection = database.GetCollection <SamplesCollection>("Samples"); await CosmosUtils.AddDocumentAsync(collection, samplesToSave); foreach (var invoiceEntity in await CosmosUtils.GetAllAsync(collection)) { Console.WriteLine(invoiceEntity.transcriptId); } async Task AddSample() { var orderedWords = currentSampleWords.OrderBy(w => w.StartTime.Nanos); var firstWord = orderedWords.First(); var lastWord = orderedWords.Last(); Duration duration = orderedWords.Last().EndTime - orderedWords.First().StartTime; string trimmedFile = AudioTrimmer.SaveTrimmed( (int)(firstWord.StartTime.Seconds * 1000) + firstWord.StartTime.Nanos / 1000_000, (int)(lastWord.EndTime.Seconds * 1000) + lastWord.EndTime.Nanos / 1000_000, sourceUri); string blobName = await StorageLoader.PutIntoBlob(trimmedFile); samples.Add(new Sample() { duration = (int)(duration.Seconds * 1000) + duration.Nanos / 1000_000, wordCount = orderedWords.Count(), speakerId = currentSpeakerTag, startTime = firstWord.StartTime.ToTimeSpan().ToString("g"), endTime = lastWord.EndTime.ToTimeSpan().ToString("g"), storageUri = $"{StorageLoader.BlobServiceClient.Uri}{StorageLoader.BlobName}/{blobName}", text = string.Join(' ', currentSampleWords.Select(w => w.Word)) }); }