//private YandexSpeechKitService _yandexSpeechKitServiceService; //private WaweNetService _waweNetService; public override void Initialize(Enums.Language language) { _googleSpeechService = new GoogleSpeechService(); _googleSpeechService.Initialize(language); //_yandexSpeechKitServiceService = new GoogleSpeechService(); //_yandexSpeechKitServiceService.Initialize(language); //_waweNetService = new WaweNetService(); //_waweNetService.Initialize(language); }
/// <summary> /// Run the program service. /// </summary> /// <returns>A task.</returns> public async Task RunAsync() { if (_speechService is null) { try { _speechService = new GoogleSpeechService(_options.CredentialsPath); } catch (Exception ex) { _logger.Error(ex, "Failed to start the speech service."); return; } } if (_storageService is null) { try { _storageService = new GoogleStorageService(_options.CredentialsPath); } catch (Exception ex) { _logger.Error(ex, "Failed to start the storage service."); return; } } if (_taglibService is null) { try { _taglibService = new TaglibService(); } catch (Exception ex) { _logger.Error(ex, "Failed to start the taglib service."); return; } } if (!File.Exists(_options.AudioPath)) { _logger.Error("The audio file at path {audioPath} does not exist.", _options.AudioPath); return; } _logger.Information("Starting transcription for {audioPath}.", _options.AudioPath); // Retrieve audio metadata. var codec = _taglibService.GetAudioCodec(_options.AudioPath); var sampleRate = _taglibService.GetAudioSampleRate(_options.AudioPath); // Match audio metadata against supported formats. AudioEncoding encoding = default; switch (codec) { case var _ when codec is TagLib.Riff.WaveFormatEx: encoding = AudioEncoding.Linear16; break; case var _ when codec is TagLib.Flac.StreamHeader: encoding = AudioEncoding.Flac; break; default: throw new NotImplementedException("The codec is not supported."); } ; // Asynchronously create the bucket if it doesn't already exist. if (await _storageService.GetBucketAsync(_options.Bucket) is null) { var bucket = await _storageService.CreateBucketAsync(_options.Bucket); if (bucket is null) { throw new InvalidOperationException("Unable to create bucket."); } _logger.Information("Bucket {bucketName} was created.", _options.Bucket); } // Asynchronously upload the audio. _logger.Information("Uploading audio to bucket {bucketName}.", _options.Bucket); var objectName = $"{Guid.NewGuid()}{Path.GetExtension(_options.AudioPath)}"; var uploadedAudio = await _storageService.UploadAsync(_options.Bucket, objectName, _options.AudioPath); var uploadedAudioUri = $"gs://{_options.Bucket}/{objectName}"; _logger.Information("Uploaded audio to {audioUri}.", uploadedAudioUri); // Asynchronously transcribe the audio. try { _logger.Information("Transcription started."); IReadOnlyList <SpeechRecognitionAlternative> transcription = null; await foreach (var result in _speechService.LongRunningRecognizeAsync(uploadedAudioUri, encoding, sampleRate, _options.LanguageCode)) { if (result.Progress < 100) { _logger.Information("Transcription progress {progress}%.", result.Progress); continue; } transcription = result.Transcription; } _logger.Information("Transcription completed."); // Analyze transcription by speaker. var textBlocks = new List <TranscribedTextBlock>(); var wordsBySpeakerTag = transcription.SelectMany(q => q.Words).Where(q => q.SpeakerTag != 0).GroupAdjacent(q => q.SpeakerTag); foreach (var group in wordsBySpeakerTag) { var textBlock = new TranscribedTextBlock() { SpeakerTag = group.Key, Text = string.Join(" ", group.Select(x => x.Word.ToString())) }; textBlocks.Add(textBlock); } // Write to .json file. var transcribedFile = new TranscribedFile() { AudioPath = _options.AudioPath, AudioUri = uploadedAudioUri, Created = DateTime.Now, TextBlocks = textBlocks.ToArray() }; var json = JsonConvert.SerializeObject(transcribedFile); var jsonPath = Path.Combine(Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName), $"Transcription-{Path.GetFileNameWithoutExtension(_options.AudioPath)}.json"); File.WriteAllText(jsonPath, json); // Write to .txt file. var text = string.Join("\n", textBlocks.Select(q => $"Speaker {q.SpeakerTag}: {q.Text}")); var textPath = Path.Combine(Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName), $"Transcription-{Path.GetFileNameWithoutExtension(_options.AudioPath)}.txt"); File.WriteAllText(textPath, text); } catch (Exception ex) { _logger.Error(ex, "Transcription failed."); } // Asynchronously delete the uploaded audio. switch (await _storageService.DeleteAsync(_options.Bucket, objectName)) { case true: _logger.Information("Deleted uploaded audio."); break; case false: _logger.Information("Failed to delete uploaded audio."); break; } }