예제 #1
0
        //private YandexSpeechKitService _yandexSpeechKitServiceService;
        //private WaweNetService _waweNetService;
        public override void Initialize(Enums.Language language)
        {
            _googleSpeechService = new GoogleSpeechService();
            _googleSpeechService.Initialize(language);

            //_yandexSpeechKitServiceService = new GoogleSpeechService();
            //_yandexSpeechKitServiceService.Initialize(language);
            //_waweNetService = new WaweNetService();
            //_waweNetService.Initialize(language);
        }
예제 #2
0
        /// <summary>
        /// Run the program service.
        /// </summary>
        /// <returns>A task.</returns>
        public async Task RunAsync()
        {
            if (_speechService is null)
            {
                try
                {
                    _speechService = new GoogleSpeechService(_options.CredentialsPath);
                }
                catch (Exception ex)
                {
                    _logger.Error(ex, "Failed to start the speech service.");
                    return;
                }
            }

            if (_storageService is null)
            {
                try
                {
                    _storageService = new GoogleStorageService(_options.CredentialsPath);
                }
                catch (Exception ex)
                {
                    _logger.Error(ex, "Failed to start the storage service.");
                    return;
                }
            }

            if (_taglibService is null)
            {
                try
                {
                    _taglibService = new TaglibService();
                }
                catch (Exception ex)
                {
                    _logger.Error(ex, "Failed to start the taglib service.");
                    return;
                }
            }

            if (!File.Exists(_options.AudioPath))
            {
                _logger.Error("The audio file at path {audioPath} does not exist.", _options.AudioPath);
                return;
            }

            _logger.Information("Starting transcription for {audioPath}.", _options.AudioPath);

            // Retrieve audio metadata.
            var codec      = _taglibService.GetAudioCodec(_options.AudioPath);
            var sampleRate = _taglibService.GetAudioSampleRate(_options.AudioPath);

            // Match audio metadata against supported formats.
            AudioEncoding encoding = default;

            switch (codec)
            {
            case var _ when codec is TagLib.Riff.WaveFormatEx:
                encoding = AudioEncoding.Linear16;
                break;

            case var _ when codec is TagLib.Flac.StreamHeader:
                encoding = AudioEncoding.Flac;
                break;

            default:
                throw new NotImplementedException("The codec is not supported.");
            }
            ;

            // Asynchronously create the bucket if it doesn't already exist.
            if (await _storageService.GetBucketAsync(_options.Bucket) is null)
            {
                var bucket = await _storageService.CreateBucketAsync(_options.Bucket);

                if (bucket is null)
                {
                    throw new InvalidOperationException("Unable to create bucket.");
                }

                _logger.Information("Bucket {bucketName} was created.", _options.Bucket);
            }

            // Asynchronously upload the audio.
            _logger.Information("Uploading audio to bucket {bucketName}.", _options.Bucket);
            var objectName    = $"{Guid.NewGuid()}{Path.GetExtension(_options.AudioPath)}";
            var uploadedAudio = await _storageService.UploadAsync(_options.Bucket, objectName, _options.AudioPath);

            var uploadedAudioUri = $"gs://{_options.Bucket}/{objectName}";

            _logger.Information("Uploaded audio to {audioUri}.", uploadedAudioUri);

            // Asynchronously transcribe the audio.
            try
            {
                _logger.Information("Transcription started.");
                IReadOnlyList <SpeechRecognitionAlternative> transcription = null;
                await foreach (var result in _speechService.LongRunningRecognizeAsync(uploadedAudioUri, encoding, sampleRate, _options.LanguageCode))
                {
                    if (result.Progress < 100)
                    {
                        _logger.Information("Transcription progress {progress}%.", result.Progress);
                        continue;
                    }

                    transcription = result.Transcription;
                }

                _logger.Information("Transcription completed.");

                // Analyze transcription by speaker.
                var textBlocks        = new List <TranscribedTextBlock>();
                var wordsBySpeakerTag = transcription.SelectMany(q => q.Words).Where(q => q.SpeakerTag != 0).GroupAdjacent(q => q.SpeakerTag);
                foreach (var group in wordsBySpeakerTag)
                {
                    var textBlock = new TranscribedTextBlock()
                    {
                        SpeakerTag = group.Key,
                        Text       = string.Join(" ", group.Select(x => x.Word.ToString()))
                    };

                    textBlocks.Add(textBlock);
                }

                // Write to .json file.
                var transcribedFile = new TranscribedFile()
                {
                    AudioPath  = _options.AudioPath,
                    AudioUri   = uploadedAudioUri,
                    Created    = DateTime.Now,
                    TextBlocks = textBlocks.ToArray()
                };
                var json     = JsonConvert.SerializeObject(transcribedFile);
                var jsonPath = Path.Combine(Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName), $"Transcription-{Path.GetFileNameWithoutExtension(_options.AudioPath)}.json");
                File.WriteAllText(jsonPath, json);

                // Write to .txt file.
                var text     = string.Join("\n", textBlocks.Select(q => $"Speaker {q.SpeakerTag}: {q.Text}"));
                var textPath = Path.Combine(Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName), $"Transcription-{Path.GetFileNameWithoutExtension(_options.AudioPath)}.txt");
                File.WriteAllText(textPath, text);
            }
            catch (Exception ex)
            {
                _logger.Error(ex, "Transcription failed.");
            }

            // Asynchronously delete the uploaded audio.
            switch (await _storageService.DeleteAsync(_options.Bucket, objectName))
            {
            case true:
                _logger.Information("Deleted uploaded audio.");
                break;

            case false:
                _logger.Information("Failed to delete uploaded audio.");
                break;
            }
        }