/// <summary>Snippet for LongRunningRecognize</summary>
        public void LongRunningRecognize_RequestObject()
        {
            // Snippet: LongRunningRecognize(LongRunningRecognizeRequest, CallSettings)
            // Create client
            SpeechClient speechClient = SpeechClient.Create();
            // Initialize request argument(s)
            LongRunningRecognizeRequest request = new LongRunningRecognizeRequest
            {
                Config = new RecognitionConfig(),
                Audio  = new RecognitionAudio(),
            };
            // Make the request
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = speechClient.LongRunningRecognize(request);

            // Poll until the returned long-running operation is complete
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> completedResponse = response.PollUntilCompleted();
            // Retrieve the operation result
            LongRunningRecognizeResponse result = completedResponse.Result;

            // Or get the name of the operation
            string operationName = response.Name;
            // This name can be stored, then the long-running operation retrieved later by name
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> retrievedResponse = speechClient.PollOnceLongRunningRecognize(operationName);

            // Check if the retrieved long-running operation has completed
            if (retrievedResponse.IsCompleted)
            {
                // If it has completed, then access the result
                LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result;
            }
            // End snippet
        }
        /// <summary>Snippet for LongRunningRecognizeAsync</summary>
        public async Task LongRunningRecognizeAsync()
        {
            // Snippet: LongRunningRecognizeAsync(RecognitionConfig, RecognitionAudio, CallSettings)
            // Additional: LongRunningRecognizeAsync(RecognitionConfig, RecognitionAudio, CancellationToken)
            // Create client
            SpeechClient speechClient = await SpeechClient.CreateAsync();

            // Initialize request argument(s)
            RecognitionConfig config = new RecognitionConfig();
            RecognitionAudio  audio  = new RecognitionAudio();
            // Make the request
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response = await speechClient.LongRunningRecognizeAsync(config, audio);

            // Poll until the returned long-running operation is complete
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> completedResponse = await response.PollUntilCompletedAsync();

            // Retrieve the operation result
            LongRunningRecognizeResponse result = completedResponse.Result;

            // Or get the name of the operation
            string operationName = response.Name;
            // This name can be stored, then the long-running operation retrieved later by name
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> retrievedResponse = await speechClient.PollOnceLongRunningRecognizeAsync(operationName);

            // Check if the retrieved long-running operation has completed
            if (retrievedResponse.IsCompleted)
            {
                // If it has completed, then access the result
                LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result;
            }
            // End snippet
        }
Exemplo n.º 3
0
        public LongRunningRecognizeResponse UploadAndTranscribeInCloud(TranscribeParameters transParams)
        {
            UploadToCloudIfNeeded(transParams);

            LongRunningRecognizeResponse response = TranscribeInCloud(transParams);

            return(response);
        }
Exemplo n.º 4
0
        static void SimplifyRaw(string responseFile, string simplified)
        {
            // Clean up from last run
            File.Delete(simplified);

            string priorResponse = File.ReadAllText(responseFile);
            LongRunningRecognizeResponse beforeFix = JsonConvert.DeserializeObject <LongRunningRecognizeResponse>(priorResponse);
            Transcribed_Dto afterFix       = TransformResponse.Simpify(beforeFix.Results);
            string          afterFixString = JsonConvert.SerializeObject(afterFix, Formatting.Indented);

            File.WriteAllText(simplified, afterFixString);
        }
Exemplo n.º 5
0
        private async Task <string> Transcript(string originUri, string sourceUri)
        {
            var buckerLoader = new BucketLoader();

            (string audioInBucketUri, string objectName) = buckerLoader.UploadFileFromLocal(sourceUri);
            SpeechTranscripter           transcripter = new SpeechTranscripter();
            LongRunningRecognizeResponse response     = await transcripter.Recognize(audioInBucketUri, new RecognizeConfiguration());

            buckerLoader.DeleteObject(new[] { objectName });

            string transcriptId = ObjectId.GenerateNewId().ToString();
            await RecognitionResponseProcessor.FindSamples(transcriptId, response, sourceUri, originUri);

            return(transcriptId);
        }
Exemplo n.º 6
0
        public Transcribed_Dto TranscribeAudioFile(TranscribeParameters transParams, string rawResponseFile = null)
        {
            LongRunningRecognizeResponse response = UploadAndTranscribeInCloud(transParams);

            // Save the raw response, if we were passed a file path.
            if (rawResponseFile != "")
            {
                string responseString = JsonConvert.SerializeObject(response, Formatting.Indented);
                File.WriteAllText(rawResponseFile, responseString);
            }

            Transcribed_Dto resp = TransformResponse.Simpify(response.Results);

            return(TransformResponse.FixSpeakerTags(resp));
        }
Exemplo n.º 7
0
        /// <summary>Snippet for LongRunningRecognizeAsync</summary>
        public async Task LongRunningRecognizeAsync_RequestObject()
        {
            // Snippet: LongRunningRecognizeAsync(LongRunningRecognizeRequest,CallSettings)
            // Create client
            SpeechClient speechClient = await SpeechClient.CreateAsync();

            // Initialize request argument(s)
            LongRunningRecognizeRequest request = new LongRunningRecognizeRequest
            {
                Config = new RecognitionConfig
                {
                    Encoding        = RecognitionConfig.Types.AudioEncoding.Flac,
                    SampleRateHertz = 44100,
                    LanguageCode    = "en-US",
                },
                Audio = new RecognitionAudio
                {
                    Uri = "gs://bucket_name/file_name.flac",
                },
            };
            // Make the request
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
                await speechClient.LongRunningRecognizeAsync(request);

            // Poll until the returned long-running operation is complete
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> completedResponse =
                await response.PollUntilCompletedAsync();

            // Retrieve the operation result
            LongRunningRecognizeResponse result = completedResponse.Result;

            // Or get the name of the operation
            string operationName = response.Name;
            // This name can be stored, then the long-running operation retrieved later by name
            Operation <LongRunningRecognizeResponse, LongRunningRecognizeMetadata> retrievedResponse =
                await speechClient.PollOnceLongRunningRecognizeAsync(operationName);

            // Check if the retrieved long-running operation has completed
            if (retrievedResponse.IsCompleted)
            {
                // If it has completed, then access the result
                LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result;
            }
            // End snippet
        }
Exemplo n.º 8
0
        public void LongRunningRecognize()
        {
            // Snippet: LongRunningRecognize(RecognitionConfig,RecognitionAudio,CallSettings)
            // Create client
            SpeechClient speechClient = SpeechClient.Create();
            // Initialize request argument(s)
            RecognitionConfig config = new RecognitionConfig
            {
                Encoding        = RecognitionConfig.Types.AudioEncoding.Flac,
                SampleRateHertz = 44100,
                LanguageCode    = "en-US",
            };
            RecognitionAudio audio = new RecognitionAudio
            {
                Uri = "gs://bucket_name/file_name.flac",
            };
            // Make the request
            Operation <LongRunningRecognizeResponse> response =
                speechClient.LongRunningRecognize(config, audio);

            // Poll until the returned long-running operation is complete
            Operation <LongRunningRecognizeResponse> completedResponse =
                response.PollUntilCompleted();
            // Retrieve the operation result
            LongRunningRecognizeResponse result = completedResponse.Result;

            // Or get the name of the operation
            string operationName = response.Name;
            // This name can be stored, then the long-running operation retrieved later by name
            Operation <LongRunningRecognizeResponse> retrievedResponse =
                speechClient.PollOnceLongRunningRecognize(operationName);

            // Check if the retrieved long-running operation has completed
            if (retrievedResponse.IsCompleted)
            {
                // If it has completed, then access the result
                LongRunningRecognizeResponse retrievedResult = retrievedResponse.Result;
            }
            // End snippet
        }
Exemplo n.º 9
0
        public async Task <LongRunningRecognizeResponse> Recognize(string storageUri, RecognizeConfiguration configuration = null)
        {
            configuration ??= new RecognizeConfiguration();

            var speech        = SpeechClient.Create();
            var audio         = RecognitionAudio.FromStorageUri(storageUri);
            var longOperation = await speech.LongRunningRecognizeAsync(new RecognitionConfig()
            {
                Encoding          = configuration.AudioEncoding,
                SampleRateHertz   = configuration.SampleRateHertz,
                LanguageCode      = configuration.LanguageCode,
                DiarizationConfig = new SpeakerDiarizationConfig()
                {
                    EnableSpeakerDiarization = true,
                    MinSpeakerCount          = configuration.MinSpeakerCount,
                    MaxSpeakerCount          = configuration.MaxSpeakerCount
                },
                Metadata = new RecognitionMetadata()
                {
                    OriginalMediaType = RecognitionMetadata.Types.OriginalMediaType.Video
                }
                //}, RecognitionAudio.FromStorageUri(storageUri));
            }, audio);

            //}, RecognitionAudio.FetchFromUri("https://www.youtube.com/watch?v=5Btbdt7ksko&fbclid=IwAR2FQ5KlTzxHH7UdYDTx4Vcnk6TfFfFtWpMJw-jH1UOMAbodsnY8mS1bNlI"));
            longOperation = await longOperation.PollUntilCompletedAsync();

            LongRunningRecognizeResponse response = longOperation.Result;

            foreach (var result in response.Results)
            {
                foreach (var alternative in result.Alternatives)
                {
                    Console.WriteLine($"Transcript: { alternative.Transcript}");
                }
            }
            return(response);
        }
Exemplo n.º 10
0
        /// <summary>
        /// Print confidence level for individual words in a transcription of a short audio file
        /// Separating different speakers in an audio file recording
        /// </summary>
        /// <param name="localFilePath">Path to local audio file, e.g. /path/audio.wav</param>
        public static void SampleLongRunningRecognize(string localFilePath)
        {
            SpeechClient speechClient = SpeechClient.Create();
            // string localFilePath = "resources/commercial_mono.wav"
            LongRunningRecognizeRequest request = new LongRunningRecognizeRequest
            {
                Config = new RecognitionConfig
                {
                    // If enabled, each word in the first alternative of each result will be
                    // tagged with a speaker tag to identify the speaker.
                    EnableSpeakerDiarization = true,
                    // Optional. Specifies the estimated number of speakers in the conversation.
                    DiarizationSpeakerCount = 2,
                    // The language of the supplied audio
                    LanguageCode = "en-US",
                },
                Audio = new RecognitionAudio
                {
                    Content = ByteString.CopyFrom(File.ReadAllBytes(localFilePath)),
                },
            };
            // Poll until the returned long-running operation is complete
            LongRunningRecognizeResponse response = speechClient.LongRunningRecognize(request).PollUntilCompleted().Result;

            foreach (var result in response.Results)
            {
                // First alternative has words tagged with speakers
                SpeechRecognitionAlternative alternative = result.Alternatives[0];
                Console.WriteLine($"Transcript: {alternative.Transcript}");
                // Print the speakerTag of each word
                foreach (var word in alternative.Words)
                {
                    Console.WriteLine($"Word: {word.Word}");
                    Console.WriteLine($"Speaker tag: {word.SpeakerTag}");
                }
            }
        }
 TranscribeResponse GetLongTranscribeResponse(LongRunningRecognizeResponse response)
 {
     return(GetTranscribeResponse(response.Results));
 }
Exemplo n.º 12
0
        public async Task FindSamples(string transcriptId, LongRunningRecognizeResponse recognizeResponse, string sourceUri, string originUri)
        {
            var result = recognizeResponse.Results.Last();

            if (result == null)
            {
                throw new ArgumentException("Empty recognition response. Cannot find samples.");
            }

            var             words              = result.Alternatives.Last().Words;
            int             currentSpeakerTag  = -1;
            List <WordInfo> currentSampleWords = new List <WordInfo>();
            List <Sample>   samples            = new List <Sample>();

            foreach (WordInfo wordInfo in words)
            {
                if (currentSpeakerTag == -1)
                {
                    currentSpeakerTag = wordInfo.SpeakerTag;
                    currentSampleWords.Add(wordInfo);
                }
                else
                {
                    if (currentSpeakerTag != wordInfo.SpeakerTag) // save new sample
                    {
                        await AddSample();

                        //switch speaker
                        currentSampleWords.Clear();
                        currentSpeakerTag = wordInfo.SpeakerTag;
                    }
                    else
                    {
                        currentSampleWords.Add(wordInfo);
                    }
                }
            }

            //last sample
            await AddSample();

            var samplesToSave = new SamplesCollection()
            {
                samples      = samples,
                transcriptId = transcriptId,
                VideoUri     = originUri
            };

            var connectionString = Environment.GetEnvironmentVariable("MONGO_CONNECT_STR");
            var database         = CosmosUtils.ConnectToDatabase(connectionString, "Samples");
            var collection       = database.GetCollection <SamplesCollection>("Samples");
            await CosmosUtils.AddDocumentAsync(collection, samplesToSave);

            foreach (var invoiceEntity in await CosmosUtils.GetAllAsync(collection))
            {
                Console.WriteLine(invoiceEntity.transcriptId);
            }

            async Task AddSample()
            {
                var      orderedWords = currentSampleWords.OrderBy(w => w.StartTime.Nanos);
                var      firstWord    = orderedWords.First();
                var      lastWord     = orderedWords.Last();
                Duration duration     = orderedWords.Last().EndTime - orderedWords.First().StartTime;
                string   trimmedFile  = AudioTrimmer.SaveTrimmed(
                    (int)(firstWord.StartTime.Seconds * 1000) + firstWord.StartTime.Nanos / 1000_000,
                    (int)(lastWord.EndTime.Seconds * 1000) + lastWord.EndTime.Nanos / 1000_000,
                    sourceUri);
                string blobName = await StorageLoader.PutIntoBlob(trimmedFile);

                samples.Add(new Sample()
                {
                    duration   = (int)(duration.Seconds * 1000) + duration.Nanos / 1000_000,
                    wordCount  = orderedWords.Count(),
                    speakerId  = currentSpeakerTag,
                    startTime  = firstWord.StartTime.ToTimeSpan().ToString("g"),
                    endTime    = lastWord.EndTime.ToTimeSpan().ToString("g"),
                    storageUri = $"{StorageLoader.BlobServiceClient.Uri}{StorageLoader.BlobName}/{blobName}",
                    text       = string.Join(' ', currentSampleWords.Select(w => w.Word))
                });
            }