public static bool ProcessMessage(string message, ILogger log) { if (!string.IsNullOrEmpty(message)) { var serviceBusMessage = PostTranscriptionServiceBusMessage.DeserializeMessage(message); bool result = TranscriptionProcessor.GetTranscripts(serviceBusMessage, log).Result; return(result); } return(false); }
public static async Task <bool> GetTranscripts(PostTranscriptionServiceBusMessage serviceBusMessage, ILogger log) { if (serviceBusMessage == null) { throw new ArgumentNullException(nameof(serviceBusMessage)); } var jobName = serviceBusMessage.JobName; var transcriptionId = serviceBusMessage.TranscriptionLocation.Split('/').LastOrDefault(); var transcriptionGuid = new Guid(transcriptionId); log.LogInformation($"Received transcription {transcriptionGuid} with name {jobName} from service bus message."); var serviceBusConnectionString = FetchTranscriptionEnvironmentVariables.FetchTranscriptionServiceBusConnectionString; var reenqueueingTimeInSeconds = serviceBusMessage.ReenqueueingTimeInSeconds; log.LogInformation($"Re-enqueueing time for messages: {reenqueueingTimeInSeconds} seconds."); log.LogInformation($"Subscription location: {serviceBusMessage.Subscription.LocationUri.AbsoluteUri}"); var client = new BatchClient(serviceBusMessage.Subscription.SubscriptionKey, serviceBusMessage.Subscription.LocationUri.AbsoluteUri, log); try { var transcription = await client.GetTranscriptionAsync(transcriptionGuid).ConfigureAwait(false); switch (transcription.Status) { case "Failed": await ProcessFailedTranscriptionAsync(client, transcription, transcriptionGuid, jobName, log).ConfigureAwait(false); break; case "Succeeded": await ProcessSucceededTranscriptionAsync(client, serviceBusMessage, transcriptionGuid, jobName, log).ConfigureAwait(false); break; case "Running": var runningMessage = serviceBusMessage.RetryMessage(); log.LogInformation("Transcription running, retrying message - retry count: " + runningMessage.RetryCount); ServiceBusUtilities.SendServiceBusMessageAsync(serviceBusConnectionString, runningMessage.CreateMessageString(), log, reenqueueingTimeInSeconds).GetAwaiter().GetResult(); break; case "NotStarted": var notStartedMessage = serviceBusMessage.RetryMessage(); var initialDelayInSeconds = serviceBusMessage.InitialDelayInSeconds; // If the transcription is not started yet, the job will take at least length of the audio: var notStartedReenqueueingTime = Math.Max(initialDelayInSeconds, reenqueueingTimeInSeconds); log.LogInformation("Transcription not started, retrying message - retry count: " + notStartedMessage.RetryCount); ServiceBusUtilities.SendServiceBusMessageAsync(serviceBusConnectionString, notStartedMessage.CreateMessageString(), log, notStartedReenqueueingTime).GetAwaiter().GetResult(); break; } } catch (WebException e) { if (BatchClient.IsThrottledOrTimeoutStatusCode(((HttpWebResponse)e.Response).StatusCode)) { var timeoutMessage = serviceBusMessage.RetryMessage(); log.LogInformation("Timeout or throttled, retrying message - retry count: " + timeoutMessage.RetryCount); ServiceBusUtilities.SendServiceBusMessageAsync(serviceBusConnectionString, timeoutMessage.CreateMessageString(), log, reenqueueingTimeInSeconds).GetAwaiter().GetResult(); return(false); } throw; } catch (TimeoutException e) { var timeoutMessage = serviceBusMessage.RetryMessage(); log.LogInformation($"Timeout - re-enqueueing fetch transcription message. Exception message: {e.Message}"); ServiceBusUtilities.SendServiceBusMessageAsync(serviceBusConnectionString, timeoutMessage.CreateMessageString(), log, reenqueueingTimeInSeconds).GetAwaiter().GetResult(); return(false); } return(true); }
internal static async Task ProcessSucceededTranscriptionAsync(BatchClient client, PostTranscriptionServiceBusMessage serviceBusMessage, Guid transcriptionId, string jobName, ILogger log) { log.LogInformation($"Got succeeded transcription for job {jobName}"); var jsonContainer = FetchTranscriptionEnvironmentVariables.JsonResultOutputContainer; var transcriptionFiles = await client.GetTranscriptionFilesAsync(transcriptionId).ConfigureAwait(false); var resultFiles = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.Transcription); var containsMultipleTranscriptions = resultFiles.Skip(1).Any(); var textAnalyticsKey = FetchTranscriptionEnvironmentVariables.TextAnalyticsKey; var textAnalyticsRegion = FetchTranscriptionEnvironmentVariables.TextAnalyticsRegion; var textAnalyticsInfoProvided = !string.IsNullOrEmpty(textAnalyticsKey) && !string.IsNullOrEmpty(textAnalyticsRegion) && !textAnalyticsRegion.Equals("none", StringComparison.OrdinalIgnoreCase); var textAnalytics = textAnalyticsInfoProvided ? new TextAnalytics(serviceBusMessage.Locale, textAnalyticsKey, textAnalyticsRegion, log) : null; var generalErrorsStringBuilder = new StringBuilder(); foreach (var resultFile in resultFiles) { var fileName = string.Empty; try { var transcriptionResultJson = string.Empty; using (var webClient = new WebClient()) { transcriptionResultJson = webClient.DownloadString(resultFile.Links.ContentUrl); } var transcriptionResult = JsonConvert.DeserializeObject <SpeechTranscript>(transcriptionResultJson); fileName = StorageUtilities.GetFileNameFromUri(new Uri(transcriptionResult.Source)); log.LogInformation($"Filename is {fileName}"); if (transcriptionResult.RecognizedPhrases == null || transcriptionResult.RecognizedPhrases.All(phrase => !phrase.RecognitionStatus.Equals("Success", StringComparison.Ordinal))) { continue; } var textAnalyticsErrors = new List <string>(); if (serviceBusMessage.AddSentimentAnalysis) { var sentimentErrors = await textAnalytics.AddSentimentToTranscriptAsync(transcriptionResult).ConfigureAwait(false); textAnalyticsErrors.AddRange(sentimentErrors); } if (serviceBusMessage.AddEntityRedaction) { var entityRedactionErrors = await textAnalytics.RedactEntitiesAsync(transcriptionResult).ConfigureAwait(false); textAnalyticsErrors.AddRange(entityRedactionErrors); } var editedTranscriptionResultJson = JsonConvert.SerializeObject(transcriptionResult, Newtonsoft.Json.Formatting.Indented); // Store transcript json: var jsonFileName = $"{fileName}.json"; await StorageUtilities.WriteTextFileToBlobAsync(FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, editedTranscriptionResultJson, jsonContainer, jsonFileName, log).ConfigureAwait(false); if (FetchTranscriptionEnvironmentVariables.CreateHtmlResultFile) { var htmlContainer = FetchTranscriptionEnvironmentVariables.HtmlResultOutputContainer; var htmlFileName = $"{fileName}.html"; var displayResults = TranscriptionToHtml.ToHTML(transcriptionResult, jobName); await StorageUtilities.WriteTextFileToBlobAsync(FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, displayResults, htmlContainer, htmlFileName, log).ConfigureAwait(false); } if (textAnalyticsErrors.Any()) { var distinctErrors = textAnalyticsErrors.Distinct(); var errorMessage = $"File {(string.IsNullOrEmpty(fileName) ? "unknown" : fileName)}:\n{string.Join('\n', distinctErrors)}"; generalErrorsStringBuilder.AppendLine(errorMessage); } if (FetchTranscriptionEnvironmentVariables.UseSqlDatabase) { var duration = XmlConvert.ToTimeSpan(transcriptionResult.Duration); var approximatedCost = CostEstimation.GetCostEstimation( duration, transcriptionResult.CombinedRecognizedPhrases.Count(), serviceBusMessage.UsesCustomModel, serviceBusMessage.AddSentimentAnalysis, serviceBusMessage.AddEntityRedaction); var dbConnectionString = FetchTranscriptionEnvironmentVariables.DatabaseConnectionString; using var dbConnector = new DatabaseConnector(log, dbConnectionString); await dbConnector.StoreTranscriptionAsync( containsMultipleTranscriptions?Guid.NewGuid() : transcriptionId, serviceBusMessage.Locale, string.IsNullOrEmpty(fileName)?jobName : fileName, (float)approximatedCost, transcriptionResult).ConfigureAwait(false); } } catch (Exception e) { if (string.IsNullOrEmpty(fileName) && e is ArgumentNullException) { var errorMessage = $"Transcription file name is unknown, failed with message: {e.Message}"; log.LogError(errorMessage); generalErrorsStringBuilder.AppendLine(errorMessage); continue; } else if (e is JsonException || e is SqlException) { var errorTxtname = fileName + ".txt"; var errorMessage = $"Transcription result processing failed with exception: {e.Message}"; log.LogError(errorMessage); await StorageUtilities.WriteTextFileToBlobAsync( FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, errorMessage, FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer, errorTxtname, log).ConfigureAwait(false); continue; } throw; } } var errors = generalErrorsStringBuilder.ToString(); if (!string.IsNullOrEmpty(errors)) { var errorTxtname = jobName + ".txt"; await StorageUtilities.WriteTextFileToBlobAsync( FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, errors, FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer, errorTxtname, log).ConfigureAwait(false); } // Delete trace from service client.DeleteTranscriptionAsync(transcriptionId).ConfigureAwait(false).GetAwaiter().GetResult(); }
private async Task StartBatchTranscriptionJobAsync(IEnumerable <Message> messages, string jobName, DateTime startDateTime) { if (messages == null || !messages.Any()) { Logger.LogError("Invalid service bus message(s)."); return; } var locationString = string.Empty; var serviceBusMessages = messages.Select(message => JsonConvert.DeserializeObject <ServiceBusMessage>(Encoding.UTF8.GetString(message.Body))); var modelIds = new List <Guid>(); try { var properties = GetTranscriptionPropertyBag(); modelIds = GetModelIds(); var audioFileUrls = new List <string>(); foreach (var serviceBusMessage in serviceBusMessages) { var audioFileUrl = await StorageUtilities.CreateSASAsync(StartTranscriptionEnvironmentVariables.AzureWebJobsStorage, serviceBusMessage.Data.Url, Logger).ConfigureAwait(false); audioFileUrls.Add(audioFileUrl); } var client = new BatchClient(Subscription.SubscriptionKey, Subscription.LocationUri.AbsoluteUri, Logger); var transcriptionLocation = await client.PostTranscriptionAsync( jobName, "StartByTimerTranscription", Locale, properties, audioFileUrls, modelIds).ConfigureAwait(false); Logger.LogInformation($"Location: {transcriptionLocation}"); locationString = transcriptionLocation.ToString(); } catch (WebException e) { if (BatchClient.IsThrottledOrTimeoutStatusCode(((HttpWebResponse)e.Response).StatusCode)) { Logger.LogError($"Throttled or timeout while creating post, re-enqueueing transcription start. Message: {e.Message}"); var startTranscriptionSBConnectionString = StartTranscriptionEnvironmentVariables.StartTranscriptionServiceBusConnectionString; foreach (var message in messages) { ServiceBusUtilities.SendServiceBusMessageAsync(startTranscriptionSBConnectionString, message, Logger, 2).GetAwaiter().GetResult(); } return; } else { Logger.LogError($"Failed with Webexception. Write message for job with name {jobName} to report file."); var errorTxtName = jobName + ".txt"; var exceptionMessage = e.Message; using (var reader = new StreamReader(e.Response.GetResponseStream())) { var responseMessage = await reader.ReadToEndAsync().ConfigureAwait(false); exceptionMessage += "\n" + responseMessage; } await WriteFailedJobLogToStorageAsync(serviceBusMessages, exceptionMessage, jobName).ConfigureAwait(false); return; } } catch (TimeoutException e) { Logger.LogError($"Timeout while creating post, re-enqueueing transcription start. Message: {e.Message}"); var startTranscriptionSBConnectionString = StartTranscriptionEnvironmentVariables.StartTranscriptionServiceBusConnectionString; foreach (var message in messages) { ServiceBusUtilities.SendServiceBusMessageAsync(startTranscriptionSBConnectionString, message, Logger, 2).GetAwaiter().GetResult(); } return; } catch (Exception e) { Logger.LogError($"Failed with Exception {e} and message {e.Message}. Write message for job with name {jobName} to report file."); await WriteFailedJobLogToStorageAsync(serviceBusMessages, e.Message, jobName).ConfigureAwait(false); return; } var reenqueueingTimeInSeconds = MapDatasetSizeToReenqueueingTimeInSeconds(StartTranscriptionEnvironmentVariables.AudioDatasetSize); var transcriptionMessage = new PostTranscriptionServiceBusMessage( Subscription, locationString, jobName, startDateTime.ToString(CultureInfo.InvariantCulture), Locale, modelIds.Any(), StartTranscriptionEnvironmentVariables.AddSentimentAnalysis, StartTranscriptionEnvironmentVariables.AddEntityRedaction, 0, reenqueueingTimeInSeconds, reenqueueingTimeInSeconds); var fetchTranscriptionSBConnectionString = StartTranscriptionEnvironmentVariables.FetchTranscriptionServiceBusConnectionString; Logger.LogInformation($"FetchTranscriptionServiceBusConnectionString from settings: {fetchTranscriptionSBConnectionString}"); try { ServiceBusUtilities.SendServiceBusMessageAsync(fetchTranscriptionSBConnectionString, transcriptionMessage.CreateMessageString(), Logger, reenqueueingTimeInSeconds).GetAwaiter().GetResult(); } catch (Exception e) { Logger.LogError($"Failed with Exception {e} and message {e.Message}. Write message for job with name {jobName} to report file."); await WriteFailedJobLogToStorageAsync(serviceBusMessages, e.Message, jobName).ConfigureAwait(false); } Logger.LogInformation($"Fetch transcription queue informed about job at: {jobName}"); }