public void GetHTMLFromJson() { var body = File.ReadAllText(@"testFiles/transcriptSample.json"); var transcription = JsonConvert.DeserializeObject <SpeechTranscript>(body); var html = TranscriptionToHtml.ToHtml(transcription, "testfile"); Assert.IsTrue(!string.IsNullOrEmpty(html)); Assert.IsTrue(html.StartsWith("<html lang=", StringComparison.OrdinalIgnoreCase)); }
private static async Task ProcessSucceededTranscriptionAsync(string transcriptionLocation, string subscriptionKey, TranscriptionStartedMessage serviceBusMessage, string jobName, ILogger log) { log.LogInformation($"Got succeeded transcription for job {jobName}"); var jsonContainer = FetchTranscriptionEnvironmentVariables.JsonResultOutputContainer; var textAnalyticsKey = FetchTranscriptionEnvironmentVariables.TextAnalyticsKey; var textAnalyticsRegion = FetchTranscriptionEnvironmentVariables.TextAnalyticsRegion; var transcriptionFiles = await BatchClient.GetTranscriptionFilesAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false); log.LogInformation($"Received transcription files."); var resultFiles = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.Transcription); var containsMultipleTranscriptions = resultFiles.Skip(1).Any(); var textAnalyticsInfoProvided = !string.IsNullOrEmpty(textAnalyticsKey) && !string.IsNullOrEmpty(textAnalyticsRegion) && !textAnalyticsRegion.Equals("none", StringComparison.OrdinalIgnoreCase); var textAnalytics = textAnalyticsInfoProvided ? new TextAnalytics(serviceBusMessage.Locale, textAnalyticsKey, textAnalyticsRegion, log) : null; var generalErrorsStringBuilder = new StringBuilder(); foreach (var resultFile in resultFiles) { log.LogInformation($"Getting result for file {resultFile.Name}"); var transcriptionResult = await BatchClient.GetSpeechTranscriptFromSasAsync(resultFile.Links.ContentUrl, log).ConfigureAwait(false); if (string.IsNullOrEmpty(transcriptionResult.Source)) { var errorMessage = $"Transcription source is unknown, skipping evaluation."; log.LogError(errorMessage); generalErrorsStringBuilder.AppendLine(errorMessage); continue; } var fileName = StorageConnector.GetFileNameFromUri(new Uri(transcriptionResult.Source)); if (transcriptionResult.RecognizedPhrases != null && transcriptionResult.RecognizedPhrases.All(phrase => phrase.RecognitionStatus.Equals("Success", StringComparison.Ordinal))) { var textAnalyticsErrors = new List <string>(); if (FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting != SentimentAnalysisSetting.None) { var sentimentErrors = await textAnalytics.AddSentimentToTranscriptAsync(transcriptionResult, FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting).ConfigureAwait(false); textAnalyticsErrors.AddRange(sentimentErrors); } if (FetchTranscriptionEnvironmentVariables.EntityRedactionSetting != EntityRedactionSetting.None) { var entityRedactionErrors = await textAnalytics.RedactEntitiesAsync(transcriptionResult, FetchTranscriptionEnvironmentVariables.EntityRedactionSetting).ConfigureAwait(false); textAnalyticsErrors.AddRange(entityRedactionErrors); } if (textAnalyticsErrors.Any()) { var distinctErrors = textAnalyticsErrors.Distinct(); var errorMessage = $"File {(string.IsNullOrEmpty(fileName) ? "unknown" : fileName)}:\n{string.Join('\n', distinctErrors)}"; generalErrorsStringBuilder.AppendLine(errorMessage); } } var editedTranscriptionResultJson = JsonConvert.SerializeObject( transcriptionResult, Newtonsoft.Json.Formatting.Indented, new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore }); var jsonFileName = $"{fileName}.json"; await StorageConnectorInstance.WriteTextFileToBlobAsync(editedTranscriptionResultJson, jsonContainer, jsonFileName, log).ConfigureAwait(false); if (FetchTranscriptionEnvironmentVariables.CreateHtmlResultFile) { var htmlContainer = FetchTranscriptionEnvironmentVariables.HtmlResultOutputContainer; var htmlFileName = $"{fileName}.html"; var displayResults = TranscriptionToHtml.ToHtml(transcriptionResult, jobName); await StorageConnectorInstance.WriteTextFileToBlobAsync(displayResults, htmlContainer, htmlFileName, log).ConfigureAwait(false); } if (FetchTranscriptionEnvironmentVariables.UseSqlDatabase) { var duration = XmlConvert.ToTimeSpan(transcriptionResult.Duration); var approximatedCost = CostEstimation.GetCostEstimation( duration, transcriptionResult.CombinedRecognizedPhrases.Count(), serviceBusMessage.UsesCustomModel, FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting, FetchTranscriptionEnvironmentVariables.EntityRedactionSetting); var jobId = containsMultipleTranscriptions ? Guid.NewGuid() : new Guid(transcriptionLocation.Split('/').LastOrDefault()); var dbConnectionString = FetchTranscriptionEnvironmentVariables.DatabaseConnectionString; using var dbConnector = new DatabaseConnector(log, dbConnectionString); await dbConnector.StoreTranscriptionAsync( jobId, serviceBusMessage.Locale, string.IsNullOrEmpty(fileName)?jobName : fileName, (float)approximatedCost, transcriptionResult).ConfigureAwait(false); } } var generalErrors = generalErrorsStringBuilder.ToString(); if (!string.IsNullOrEmpty(generalErrors)) { var errorTxtname = $"jobs/{jobName}.txt"; await StorageConnectorInstance.WriteTextFileToBlobAsync( generalErrors, FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer, errorTxtname, log).ConfigureAwait(false); } var reportFile = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.TranscriptionReport).FirstOrDefault(); var reportFileContent = await BatchClient.GetTranscriptionReportFileFromSasAsync(reportFile.Links.ContentUrl, log).ConfigureAwait(false); await ProcessReportFileAsync(reportFileContent, log).ConfigureAwait(false); BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false).GetAwaiter().GetResult(); }
internal static async Task ProcessSucceededTranscriptionAsync(BatchClient client, PostTranscriptionServiceBusMessage serviceBusMessage, Guid transcriptionId, string jobName, ILogger log) { log.LogInformation($"Got succeeded transcription for job {jobName}"); var jsonContainer = FetchTranscriptionEnvironmentVariables.JsonResultOutputContainer; var transcriptionFiles = await client.GetTranscriptionFilesAsync(transcriptionId).ConfigureAwait(false); var resultFiles = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.Transcription); var containsMultipleTranscriptions = resultFiles.Skip(1).Any(); var textAnalyticsKey = FetchTranscriptionEnvironmentVariables.TextAnalyticsKey; var textAnalyticsRegion = FetchTranscriptionEnvironmentVariables.TextAnalyticsRegion; var textAnalyticsInfoProvided = !string.IsNullOrEmpty(textAnalyticsKey) && !string.IsNullOrEmpty(textAnalyticsRegion) && !textAnalyticsRegion.Equals("none", StringComparison.OrdinalIgnoreCase); var textAnalytics = textAnalyticsInfoProvided ? new TextAnalytics(serviceBusMessage.Locale, textAnalyticsKey, textAnalyticsRegion, log) : null; var generalErrorsStringBuilder = new StringBuilder(); foreach (var resultFile in resultFiles) { var fileName = string.Empty; try { var transcriptionResultJson = string.Empty; using (var webClient = new WebClient()) { transcriptionResultJson = webClient.DownloadString(resultFile.Links.ContentUrl); } var transcriptionResult = JsonConvert.DeserializeObject <SpeechTranscript>(transcriptionResultJson); fileName = StorageUtilities.GetFileNameFromUri(new Uri(transcriptionResult.Source)); log.LogInformation($"Filename is {fileName}"); if (transcriptionResult.RecognizedPhrases == null || transcriptionResult.RecognizedPhrases.All(phrase => !phrase.RecognitionStatus.Equals("Success", StringComparison.Ordinal))) { continue; } var textAnalyticsErrors = new List <string>(); if (serviceBusMessage.AddSentimentAnalysis) { var sentimentErrors = await textAnalytics.AddSentimentToTranscriptAsync(transcriptionResult).ConfigureAwait(false); textAnalyticsErrors.AddRange(sentimentErrors); } if (serviceBusMessage.AddEntityRedaction) { var entityRedactionErrors = await textAnalytics.RedactEntitiesAsync(transcriptionResult).ConfigureAwait(false); textAnalyticsErrors.AddRange(entityRedactionErrors); } var editedTranscriptionResultJson = JsonConvert.SerializeObject(transcriptionResult, Newtonsoft.Json.Formatting.Indented); // Store transcript json: var jsonFileName = $"{fileName}.json"; await StorageUtilities.WriteTextFileToBlobAsync(FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, editedTranscriptionResultJson, jsonContainer, jsonFileName, log).ConfigureAwait(false); if (FetchTranscriptionEnvironmentVariables.CreateHtmlResultFile) { var htmlContainer = FetchTranscriptionEnvironmentVariables.HtmlResultOutputContainer; var htmlFileName = $"{fileName}.html"; var displayResults = TranscriptionToHtml.ToHTML(transcriptionResult, jobName); await StorageUtilities.WriteTextFileToBlobAsync(FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, displayResults, htmlContainer, htmlFileName, log).ConfigureAwait(false); } if (textAnalyticsErrors.Any()) { var distinctErrors = textAnalyticsErrors.Distinct(); var errorMessage = $"File {(string.IsNullOrEmpty(fileName) ? "unknown" : fileName)}:\n{string.Join('\n', distinctErrors)}"; generalErrorsStringBuilder.AppendLine(errorMessage); } if (FetchTranscriptionEnvironmentVariables.UseSqlDatabase) { var duration = XmlConvert.ToTimeSpan(transcriptionResult.Duration); var approximatedCost = CostEstimation.GetCostEstimation( duration, transcriptionResult.CombinedRecognizedPhrases.Count(), serviceBusMessage.UsesCustomModel, serviceBusMessage.AddSentimentAnalysis, serviceBusMessage.AddEntityRedaction); var dbConnectionString = FetchTranscriptionEnvironmentVariables.DatabaseConnectionString; using var dbConnector = new DatabaseConnector(log, dbConnectionString); await dbConnector.StoreTranscriptionAsync( containsMultipleTranscriptions?Guid.NewGuid() : transcriptionId, serviceBusMessage.Locale, string.IsNullOrEmpty(fileName)?jobName : fileName, (float)approximatedCost, transcriptionResult).ConfigureAwait(false); } } catch (Exception e) { if (string.IsNullOrEmpty(fileName) && e is ArgumentNullException) { var errorMessage = $"Transcription file name is unknown, failed with message: {e.Message}"; log.LogError(errorMessage); generalErrorsStringBuilder.AppendLine(errorMessage); continue; } else if (e is JsonException || e is SqlException) { var errorTxtname = fileName + ".txt"; var errorMessage = $"Transcription result processing failed with exception: {e.Message}"; log.LogError(errorMessage); await StorageUtilities.WriteTextFileToBlobAsync( FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, errorMessage, FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer, errorTxtname, log).ConfigureAwait(false); continue; } throw; } } var errors = generalErrorsStringBuilder.ToString(); if (!string.IsNullOrEmpty(errors)) { var errorTxtname = jobName + ".txt"; await StorageUtilities.WriteTextFileToBlobAsync( FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, errors, FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer, errorTxtname, log).ConfigureAwait(false); } // Delete trace from service client.DeleteTranscriptionAsync(transcriptionId).ConfigureAwait(false).GetAwaiter().GetResult(); }