public async Task <ConvertResponse> ExtractTextComputerVision(ConvertResponse response)
        {
            try
            {
                var key      = _config["computerVision:key"];
                var endpoint = _config["computerVision:endpoint"];
                ComputerVisionClient computerVision = new ComputerVisionClient(new ApiKeyServiceClientCredentials(key))
                {
                    Endpoint = endpoint
                };

                var analysis = await computerVision.RecognizePrintedTextAsync(true, response.Request.UploadBlobUrl + _config["storage:sas"]);

                var text = new StringBuilder();
                foreach (var region in analysis.Regions)
                {
                    foreach (var line in region.Lines)
                    {
                        foreach (var word in line.Words)
                        {
                            text.Append(word.Text + " ");
                        }
                        text.AppendLine();
                    }
                }

                var transcriptBlobName = Path.GetFileNameWithoutExtension(response.Request.BlobName) + ".txt";

                var blobClient = new BlobStorageClient(_config);
                var textBlob   = blobClient.GetBlobBlock("transcripts", transcriptBlobName);

                response.TranscriptBlobUrl = textBlob.Uri.AbsoluteUri;
                response.Transcript        = text.ToString().Trim();

                await textBlob.UploadTextAsync(response.Transcript);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
                Console.WriteLine(ex.Message);
                response.ErrorMessage = ex.ToString();
            }

            return(response);
        }
        public async Task <ConvertResponse> ExtractTextBatchRead(ConvertResponse response)
        {
            try
            {
                var key      = _config["computerVision:key"];
                var endpoint = _config["computerVision:endpoint"];
                ComputerVisionClient computerVision = new ComputerVisionClient(new ApiKeyServiceClientCredentials(key))
                {
                    Endpoint = endpoint
                };

                var operationInfo = await computerVision.BatchReadFileAsync(response.Request.UploadBlobUrl + _config["storage:sas"]);

                var result      = new ReadOperationResult();
                var operationId = operationInfo.OperationLocation.Split('/').Last();

                while (result.Status != TextOperationStatusCodes.Failed && result.Status != TextOperationStatusCodes.Succeeded)
                {
                    await Task.Delay(500);

                    result = await computerVision.GetReadOperationResultAsync(operationId);
                }

                if (result.Status == TextOperationStatusCodes.Failed)
                {
                    response.ErrorMessage = $"Text translation failed.";
                    return(response);
                }

                var text = new StringBuilder();
                foreach (var page in result.RecognitionResults)
                {
                    Line lastLine = null;
                    foreach (var line in page.Lines)
                    {
                        // if (lastLine?.Words.Count >= 4)
                        // {
                        //  text.Append($" {line.Text}");
                        // }
                        // else
                        // {
                        text.Append(Environment.NewLine + line.Text);
                        // }

                        lastLine = line;
                    }
                }

                Console.WriteLine();
                Console.WriteLine(text.ToString());

                var transcriptBlobName = Path.GetFileNameWithoutExtension(response.Request.BlobName) + ".txt";

                var blobClient = new BlobStorageClient(_config);
                var textBlob   = blobClient.GetBlobBlock("transcripts", transcriptBlobName);

                response.TranscriptBlobUrl = textBlob.Uri.AbsoluteUri;
                response.Transcript        = text.ToString().Trim();

                await textBlob.UploadTextAsync(response.Transcript);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
                Console.WriteLine(ex.Message);
                response.ErrorMessage = ex.ToString();
            }

            return(response);
        }
예제 #3
0
        public async Task <ConvertResponse> ConvertTextToSpeechSsml(ConvertResponse response)
        {
            var ext      = new FileInfo(response.Request.BlobName).Extension;
            var waveName = response.Request.BlobName.Replace(ext, ".wav");

            try
            {
                string accessToken = null;
                using (var client = new HttpClient())
                {
                    client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", _config["speech:key"]);
                    var tokenFetchUri = $"https://{_config["speech:region"]}.api.cognitive.microsoft.com/sts/v1.0/issueToken";
                    var uriBuilder    = new UriBuilder(tokenFetchUri);

                    var result = await client.PostAsync(uriBuilder.Uri.AbsoluteUri, null).ConfigureAwait(false);

                    accessToken = await result.Content.ReadAsStringAsync().ConfigureAwait(false);
                }

                var r       = response.Request;
                var host    = $"https://{_config["speech:region"]}.tts.speech.microsoft.com/cognitiveservices/v1";
                var prosody = $"<prosody rate='{r.Speed}.00%' pitch='{r.Pitch}' volume='{r.Volume}'>";

                response.Transcript = response.Transcript.Replace("&", "and");

                var maxChars   = 990;
                var transcript = response.Transcript;
                if (transcript.Length > 990)
                {
                    var intro = "This transcript is too long and has been shortened automatically." + Environment.NewLine;
                    transcript = intro + transcript.Substring(0, maxChars - intro.Length);
                }

                string body = $@"<speak version='1.0' xmlns='https://www.w3.org/2001/10/synthesis' xml:lang='en-US'>
					<voice name='Microsoft Server Speech Text to Speech Voice (en-US, {r.VoiceToUse})'>{prosody}"                     +
                              transcript + "</prosody></voice></speak>";

                Console.WriteLine();
                Console.WriteLine(body);

                using (var client = new HttpClient())
                {
                    using (var request = new HttpRequestMessage())
                    {
                        request.Method     = HttpMethod.Post;
                        request.RequestUri = new Uri(host);
                        request.Content    = new StringContent(body, Encoding.UTF8, "application/ssml+xml");
                        request.Headers.Add("Authorization", "Bearer " + accessToken);
                        request.Headers.Add("Connection", "Keep-Alive");
                        request.Headers.Add("User-Agent", _config["speech:resourceName"]);
                        request.Headers.Add("X-Microsoft-OutputFormat", "riff-24khz-16bit-mono-pcm");

                        Console.WriteLine("Calling the TTS service. Please wait... \n");
                        using (var speechResponse = await client.SendAsync(request).ConfigureAwait(false))
                        {
                            speechResponse.EnsureSuccessStatusCode();
                            using (var dataStream = await speechResponse.Content.ReadAsStreamAsync().ConfigureAwait(false))
                            {
                                var fileName = Path.GetTempPath() + "/temp.wav";

                                Console.WriteLine("Your speech file is being written to temp file...");
                                using (var fileStream = new FileStream(fileName, FileMode.Create, FileAccess.Write, FileShare.Write))
                                {
                                    await dataStream.CopyToAsync(fileStream).ConfigureAwait(false);

                                    fileStream.Close();
                                }

                                var blobClient = new BlobStorageClient(_config);
                                var audioBlob  = blobClient.GetBlobBlock("speechresults", waveName);
                                await audioBlob.UploadFromFileAsync(fileName);

                                File.Delete(fileName);

                                response.SpeechAudioBlobUrl = audioBlob.Uri.AbsoluteUri;
                                response.IsSuccessful       = true;
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
                Console.WriteLine(ex.Message);
                response.ErrorMessage = ex.ToString();
            }

            return(response);
        }