private static async Task WriteFailedJobLogToStorageAsync(TranscriptionStartedMessage transcriptionStartedMessage, string errorMessage, string jobName, ILogger log)
        {
            var errorOutputContainer = FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer;

            var jobErrorFileName = $"jobs/{jobName}.txt";
            await StorageConnectorInstance.WriteTextFileToBlobAsync(errorMessage, errorOutputContainer, jobErrorFileName, log).ConfigureAwait(false);

            foreach (var audioFileInfo in transcriptionStartedMessage.AudioFileInfos)
            {
                var fileName      = StorageConnector.GetFileNameFromUri(new Uri(audioFileInfo.FileUrl));
                var errorFileName = fileName + ".txt";
                try
                {
                    await StorageConnectorInstance.WriteTextFileToBlobAsync(errorMessage, errorOutputContainer, errorFileName, log).ConfigureAwait(false);

                    await StorageConnectorInstance.MoveFileAsync(
                        FetchTranscriptionEnvironmentVariables.AudioInputContainer,
                        fileName,
                        FetchTranscriptionEnvironmentVariables.ErrorFilesOutputContainer,
                        fileName,
                        log).ConfigureAwait(false);
                }
                catch (StorageException e)
                {
                    log.LogError($"Storage Exception {e} while writing error log to file and moving result");
                }
            }
        }
Example #2
0
        private static async Task RetryOrFailJobAsync(TranscriptionStartedMessage message, string error, string jobName, string transcriptionLocation, string subscriptionKey, ILogger log)
        {
            message.FailedExecutionCounter += 1;
            var messageDelayTime = GetMessageDelayTime(message.PollingCounter);

            if (message.FailedExecutionCounter > FetchTranscriptionEnvironmentVariables.RetryLimit)
            {
                await WriteFailedJobLogToStorageAsync(message, error, jobName, log).ConfigureAwait(false);

                await BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);
            }
            else
            {
                log.LogInformation($"Retrying..");
                await ServiceBusUtilities.SendServiceBusMessageAsync(FetchQueueClientInstance, message.CreateMessageString(), log, messageDelayTime).ConfigureAwait(false);
            }
        }
        public static async Task Run([ServiceBusTrigger("fetch_transcription_queue", Connection = "AzureServiceBus")] string message, ILogger log)
        {
            if (log == null)
            {
                throw new ArgumentNullException(nameof(log));
            }

            log.LogInformation($"C# Service bus triggered function executed at: {DateTime.Now}");

            if (string.IsNullOrEmpty(message))
            {
                log.LogInformation($"Found invalid service bus message: {message}. Stopping execution.");
                return;
            }

            var serviceBusMessage = TranscriptionStartedMessage.DeserializeMessage(message);
            await TranscriptionProcessor.ProcessTranscriptionJobAsync(serviceBusMessage, log).ConfigureAwait(false);
        }
        private async Task StartBatchTranscriptionJobAsync(IEnumerable <Message> messages, string jobName)
        {
            if (messages == null || !messages.Any())
            {
                Logger.LogError($"Invalid service bus message(s).");
                return;
            }

            var fetchingDelay      = GetInitialFetchingDelay(messages.Count());
            var locationString     = string.Empty;
            var serviceBusMessages = messages.Select(message => JsonConvert.DeserializeObject <ServiceBusMessage>(Encoding.UTF8.GetString(message.Body)));

            try
            {
                var properties = GetTranscriptionPropertyBag();

                var sasUrls        = new List <string>();
                var audioFileInfos = new List <AudioFileInfo>();

                foreach (var serviceBusMessage in serviceBusMessages)
                {
                    var sasUrl = StorageConnectorInstance.CreateSas(serviceBusMessage.Data.Url);
                    sasUrls.Add(sasUrl);
                    audioFileInfos.Add(new AudioFileInfo(serviceBusMessage.Data.Url.AbsoluteUri, serviceBusMessage.RetryCount));
                }

                ModelIdentity modelIdentity = null;

                if (Guid.TryParse(StartTranscriptionEnvironmentVariables.CustomModelId, out var customModelId))
                {
                    modelIdentity = ModelIdentity.Create(StartTranscriptionEnvironmentVariables.AzureSpeechServicesRegion, customModelId);
                }

                var transcriptionDefinition = TranscriptionDefinition.Create(jobName, "StartByTimerTranscription", Locale, sasUrls, properties, modelIdentity);

                var transcriptionLocation = await BatchClient.PostTranscriptionAsync(
                    transcriptionDefinition,
                    HostName,
                    SubscriptionKey,
                    Logger).ConfigureAwait(false);

                Logger.LogInformation($"Location: {transcriptionLocation}");

                var transcriptionMessage = new TranscriptionStartedMessage(
                    transcriptionLocation.AbsoluteUri,
                    jobName,
                    Locale,
                    modelIdentity != null,
                    audioFileInfos,
                    0,
                    0);

                await ServiceBusUtilities.SendServiceBusMessageAsync(FetchQueueClientInstance, transcriptionMessage.CreateMessageString(), Logger, fetchingDelay).ConfigureAwait(false);
            }
            catch (WebException e)
            {
                if (BatchClient.IsThrottledOrTimeoutStatusCode(((HttpWebResponse)e.Response).StatusCode))
                {
                    var errorMessage = $"Throttled or timeout while creating post. Error Message: {e.Message}";
                    Logger.LogError(errorMessage);
                    await RetryOrFailMessagesAsync(messages, errorMessage).ConfigureAwait(false);
                }
                else
                {
                    var errorMessage = $"Start Transcription in job with name {jobName} failed with WebException {e} and message {e.Message}";
                    Logger.LogError(errorMessage);

                    using (var reader = new StreamReader(e.Response.GetResponseStream()))
                    {
                        var responseMessage = await reader.ReadToEndAsync().ConfigureAwait(false);

                        errorMessage += "\nResponse message:" + responseMessage;
                    }

                    await WriteFailedJobLogToStorageAsync(serviceBusMessages, errorMessage, jobName).ConfigureAwait(false);
                }

                throw;
            }
            catch (TimeoutException e)
            {
                var errorMessage = $"Timeout while creating post, re-enqueueing transcription start. Message: {e.Message}";
                Logger.LogError(errorMessage);
                await RetryOrFailMessagesAsync(messages, errorMessage).ConfigureAwait(false);

                throw;
            }
            catch (Exception e)
            {
                var errorMessage = $"Start Transcription in job with name {jobName} failed with exception {e} and message {e.Message}";
                Logger.LogError(errorMessage);
                await WriteFailedJobLogToStorageAsync(serviceBusMessages, errorMessage, jobName).ConfigureAwait(false);

                throw;
            }

            Logger.LogInformation($"Fetch transcription queue successfully informed about job at: {jobName}");
        }
Example #5
0
        public static async Task ProcessTranscriptionJobAsync(TranscriptionStartedMessage serviceBusMessage, ILogger log)
        {
            if (serviceBusMessage == null)
            {
                throw new ArgumentNullException(nameof(serviceBusMessage));
            }

            var subscriptionKey       = FetchTranscriptionEnvironmentVariables.AzureSpeechServicesKey;
            var jobName               = serviceBusMessage.JobName;
            var transcriptionLocation = serviceBusMessage.TranscriptionLocation;

            log.LogInformation($"Received transcription at {transcriptionLocation} with name {jobName} from service bus message.");

            serviceBusMessage.PollingCounter += 1;
            var messageDelayTime = GetMessageDelayTime(serviceBusMessage.PollingCounter);

            try
            {
                var transcription = await BatchClient.GetTranscriptionAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);

                log.LogInformation($"Polled {serviceBusMessage.PollingCounter} time(s) for results in total, delay job for {messageDelayTime.TotalMinutes} minutes if not completed.");
                switch (transcription.Status)
                {
                case "Failed":
                    await ProcessFailedTranscriptionAsync(transcriptionLocation, subscriptionKey, serviceBusMessage, transcription, jobName, log).ConfigureAwait(false);

                    break;

                case "Succeeded":
                    await ProcessSucceededTranscriptionAsync(transcriptionLocation, subscriptionKey, serviceBusMessage, jobName, log).ConfigureAwait(false);

                    break;

                case "Running":
                    log.LogInformation($"Transcription running, polling again after {messageDelayTime.TotalMinutes} minutes.");
                    await ServiceBusUtilities.SendServiceBusMessageAsync(FetchQueueClientInstance, serviceBusMessage.CreateMessageString(), log, messageDelayTime).ConfigureAwait(false);

                    break;

                case "NotStarted":
                    log.LogInformation($"Transcription not started, polling again after {messageDelayTime.TotalMinutes} minutes.");
                    await ServiceBusUtilities.SendServiceBusMessageAsync(FetchQueueClientInstance, serviceBusMessage.CreateMessageString(), log, messageDelayTime).ConfigureAwait(false);

                    break;
                }
            }
            catch (WebException e)
            {
                if (e.Response != null && BatchClient.IsThrottledOrTimeoutStatusCode(((HttpWebResponse)e.Response).StatusCode))
                {
                    log.LogInformation("Timeout or throttled, retrying message.");
                    await ServiceBusUtilities.SendServiceBusMessageAsync(FetchQueueClientInstance, serviceBusMessage.CreateMessageString(), log, messageDelayTime).ConfigureAwait(false);
                }
                else
                {
                    var errorMessage = $"Fetch Transcription in job with name {jobName} failed with WebException {e} and message {e.Message}.";
                    log.LogError($"{errorMessage}");
                    await RetryOrFailJobAsync(serviceBusMessage, errorMessage, jobName, transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);
                }

                throw;
            }
            catch (TimeoutException e)
            {
                log.LogInformation($"Timeout - re-enqueueing fetch transcription message. Exception message: {e.Message}");
                await ServiceBusUtilities.SendServiceBusMessageAsync(FetchQueueClientInstance, serviceBusMessage.CreateMessageString(), log, messageDelayTime).ConfigureAwait(false);

                throw;
            }
            catch (Exception e)
            {
                var errorMessage = $"Fetch Transcription in job with name {jobName} failed with Exception {e} and message {e.Message}.";
                log.LogError($"{errorMessage}");
                await RetryOrFailJobAsync(serviceBusMessage, errorMessage, jobName, transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);

                throw;
            }
        }
Example #6
0
        private static async Task ProcessSucceededTranscriptionAsync(string transcriptionLocation, string subscriptionKey, TranscriptionStartedMessage serviceBusMessage, string jobName, ILogger log)
        {
            log.LogInformation($"Got succeeded transcription for job {jobName}");

            var jsonContainer       = FetchTranscriptionEnvironmentVariables.JsonResultOutputContainer;
            var textAnalyticsKey    = FetchTranscriptionEnvironmentVariables.TextAnalyticsKey;
            var textAnalyticsRegion = FetchTranscriptionEnvironmentVariables.TextAnalyticsRegion;

            var transcriptionFiles = await BatchClient.GetTranscriptionFilesAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);

            log.LogInformation($"Received transcription files.");
            var resultFiles = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.Transcription);
            var containsMultipleTranscriptions = resultFiles.Skip(1).Any();

            var textAnalyticsInfoProvided = !string.IsNullOrEmpty(textAnalyticsKey) &&
                                            !string.IsNullOrEmpty(textAnalyticsRegion) &&
                                            !textAnalyticsRegion.Equals("none", StringComparison.OrdinalIgnoreCase);

            var textAnalytics = textAnalyticsInfoProvided ? new TextAnalytics(serviceBusMessage.Locale, textAnalyticsKey, textAnalyticsRegion, log) : null;

            var generalErrorsStringBuilder = new StringBuilder();

            foreach (var resultFile in resultFiles)
            {
                log.LogInformation($"Getting result for file {resultFile.Name}");
                var transcriptionResult = await BatchClient.GetSpeechTranscriptFromSasAsync(resultFile.Links.ContentUrl, log).ConfigureAwait(false);

                if (string.IsNullOrEmpty(transcriptionResult.Source))
                {
                    var errorMessage = $"Transcription source is unknown, skipping evaluation.";
                    log.LogError(errorMessage);

                    generalErrorsStringBuilder.AppendLine(errorMessage);
                    continue;
                }

                var fileName = StorageConnector.GetFileNameFromUri(new Uri(transcriptionResult.Source));

                if (transcriptionResult.RecognizedPhrases != null && transcriptionResult.RecognizedPhrases.All(phrase => phrase.RecognitionStatus.Equals("Success", StringComparison.Ordinal)))
                {
                    var textAnalyticsErrors = new List <string>();

                    if (FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting != SentimentAnalysisSetting.None)
                    {
                        var sentimentErrors = await textAnalytics.AddSentimentToTranscriptAsync(transcriptionResult, FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting).ConfigureAwait(false);

                        textAnalyticsErrors.AddRange(sentimentErrors);
                    }

                    if (FetchTranscriptionEnvironmentVariables.EntityRedactionSetting != EntityRedactionSetting.None)
                    {
                        var entityRedactionErrors = await textAnalytics.RedactEntitiesAsync(transcriptionResult, FetchTranscriptionEnvironmentVariables.EntityRedactionSetting).ConfigureAwait(false);

                        textAnalyticsErrors.AddRange(entityRedactionErrors);
                    }

                    if (textAnalyticsErrors.Any())
                    {
                        var distinctErrors = textAnalyticsErrors.Distinct();
                        var errorMessage   = $"File {(string.IsNullOrEmpty(fileName) ? "unknown" : fileName)}:\n{string.Join('\n', distinctErrors)}";

                        generalErrorsStringBuilder.AppendLine(errorMessage);
                    }
                }

                var editedTranscriptionResultJson = JsonConvert.SerializeObject(
                    transcriptionResult,
                    Newtonsoft.Json.Formatting.Indented,
                    new JsonSerializerSettings
                {
                    NullValueHandling = NullValueHandling.Ignore
                });

                var jsonFileName = $"{fileName}.json";
                await StorageConnectorInstance.WriteTextFileToBlobAsync(editedTranscriptionResultJson, jsonContainer, jsonFileName, log).ConfigureAwait(false);

                if (FetchTranscriptionEnvironmentVariables.CreateHtmlResultFile)
                {
                    var htmlContainer  = FetchTranscriptionEnvironmentVariables.HtmlResultOutputContainer;
                    var htmlFileName   = $"{fileName}.html";
                    var displayResults = TranscriptionToHtml.ToHtml(transcriptionResult, jobName);
                    await StorageConnectorInstance.WriteTextFileToBlobAsync(displayResults, htmlContainer, htmlFileName, log).ConfigureAwait(false);
                }

                if (FetchTranscriptionEnvironmentVariables.UseSqlDatabase)
                {
                    var duration         = XmlConvert.ToTimeSpan(transcriptionResult.Duration);
                    var approximatedCost = CostEstimation.GetCostEstimation(
                        duration,
                        transcriptionResult.CombinedRecognizedPhrases.Count(),
                        serviceBusMessage.UsesCustomModel,
                        FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting,
                        FetchTranscriptionEnvironmentVariables.EntityRedactionSetting);

                    var jobId = containsMultipleTranscriptions ? Guid.NewGuid() : new Guid(transcriptionLocation.Split('/').LastOrDefault());
                    var dbConnectionString = FetchTranscriptionEnvironmentVariables.DatabaseConnectionString;
                    using var dbConnector = new DatabaseConnector(log, dbConnectionString);
                    await dbConnector.StoreTranscriptionAsync(
                        jobId,
                        serviceBusMessage.Locale,
                        string.IsNullOrEmpty(fileName)?jobName : fileName,
                        (float)approximatedCost,
                        transcriptionResult).ConfigureAwait(false);
                }
            }

            var generalErrors = generalErrorsStringBuilder.ToString();

            if (!string.IsNullOrEmpty(generalErrors))
            {
                var errorTxtname = $"jobs/{jobName}.txt";

                await StorageConnectorInstance.WriteTextFileToBlobAsync(
                    generalErrors,
                    FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer,
                    errorTxtname,
                    log).ConfigureAwait(false);
            }

            var reportFile        = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.TranscriptionReport).FirstOrDefault();
            var reportFileContent = await BatchClient.GetTranscriptionReportFileFromSasAsync(reportFile.Links.ContentUrl, log).ConfigureAwait(false);

            await ProcessReportFileAsync(reportFileContent, log).ConfigureAwait(false);

            BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false).GetAwaiter().GetResult();
        }
Example #7
0
        private static async Task ProcessFailedTranscriptionAsync(string transcriptionLocation, string subscriptionKey, TranscriptionStartedMessage serviceBusMessage, Transcription transcription, string jobName, ILogger log)
        {
            var safeErrorCode    = transcription?.Properties?.Error?.Code ?? "unknown";
            var safeErrorMessage = transcription?.Properties?.Error?.Message ?? "unknown";
            var logMessage       = $"Got failed transcription for job {jobName} with error {safeErrorMessage} (Error code: {safeErrorCode}).";

            log.LogInformation(logMessage);

            var transcriptionFiles = await BatchClient.GetTranscriptionFilesAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);

            var errorReportOutput = logMessage;
            var reportFile        = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.TranscriptionReport).FirstOrDefault();

            if (reportFile?.Links?.ContentUrl != null)
            {
                var reportFileContent = await BatchClient.GetTranscriptionReportFileFromSasAsync(reportFile.Links.ContentUrl, log).ConfigureAwait(false);

                errorReportOutput += $"\nReport file: \n {JsonConvert.SerializeObject(reportFileContent)}";
            }

            var errorOutputContainer = FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer;
            await StorageConnectorInstance.WriteTextFileToBlobAsync(errorReportOutput, errorOutputContainer, $"jobs/{jobName}.txt", log).ConfigureAwait(false);

            var retryAudioFile = IsRetryableError(safeErrorCode);

            foreach (var audio in serviceBusMessage.AudioFileInfos)
            {
                var fileName = StorageConnector.GetFileNameFromUri(new Uri(audio.FileUrl));

                if (retryAudioFile && audio.RetryCount < FetchTranscriptionEnvironmentVariables.RetryLimit)
                {
                    log.LogInformation($"Retrying transcription with name {fileName} - retry count: {audio.RetryCount}");
                    var sbMessage = new ServiceBusMessage
                    {
                        Data = new Data
                        {
                            Url = new Uri(audio.FileUrl)
                        },
                        EventType  = "BlobCreated",
                        RetryCount = audio.RetryCount + 1
                    };

                    var audioFileMessage = new Message(Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(sbMessage)));
                    await ServiceBusUtilities.SendServiceBusMessageAsync(StartQueueClientInstance, audioFileMessage, log, TimeSpan.FromMinutes(1)).ConfigureAwait(false);
                }
                else
                {
                    var message = $"Failed transcription with name {fileName} in job {jobName} after {audio.RetryCount} retries with error: {safeErrorMessage} (Error: {safeErrorCode}).";
                    await StorageConnectorInstance.WriteTextFileToBlobAsync(message, errorOutputContainer, $"{fileName}.txt", log).ConfigureAwait(false);

                    await StorageConnectorInstance.MoveFileAsync(
                        FetchTranscriptionEnvironmentVariables.AudioInputContainer,
                        fileName,
                        FetchTranscriptionEnvironmentVariables.ErrorFilesOutputContainer,
                        fileName,
                        log).ConfigureAwait(false);
                }
            }

            await BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);
        }
        public static async Task ProcessTranscriptionJobAsync(TranscriptionStartedMessage serviceBusMessage, ILogger log)
        {
            if (serviceBusMessage == null)
            {
                throw new ArgumentNullException(nameof(serviceBusMessage));
            }

            var subscriptionKey       = FetchTranscriptionEnvironmentVariables.AzureSpeechServicesKey;
            var jobName               = serviceBusMessage.JobName;
            var transcriptionLocation = serviceBusMessage.TranscriptionLocation;

            log.LogInformation($"Received transcription at {transcriptionLocation} with name {jobName} from service bus message.");

            var messageDelayTime = GetMessageDelayTime(serviceBusMessage.PollingCounter);

            serviceBusMessage.PollingCounter += 1;

            try
            {
                var transcription = await BatchClient.GetTranscriptionAsync(transcriptionLocation, subscriptionKey).ConfigureAwait(false);

                log.LogInformation($"Polled {serviceBusMessage.PollingCounter} time(s) for results in total, delay job for {messageDelayTime.TotalMinutes} minutes if not completed.");
                switch (transcription.Status)
                {
                case "Failed":
                    await ProcessFailedTranscriptionAsync(transcriptionLocation, subscriptionKey, serviceBusMessage, transcription, jobName, log).ConfigureAwait(false);

                    break;

                case "Succeeded":
                    await ProcessSucceededTranscriptionAsync(transcriptionLocation, subscriptionKey, serviceBusMessage, jobName, log).ConfigureAwait(false);

                    break;

                case "Running":
                    log.LogInformation($"Transcription running, polling again after {messageDelayTime.TotalMinutes} minutes.");
                    await ServiceBusUtilities.SendServiceBusMessageAsync(FetchServiceBusSender, serviceBusMessage.CreateMessageString(), log, messageDelayTime).ConfigureAwait(false);

                    break;

                case "NotStarted":
                    log.LogInformation($"Transcription not started, polling again after {messageDelayTime.TotalMinutes} minutes.");
                    await ServiceBusUtilities.SendServiceBusMessageAsync(FetchServiceBusSender, serviceBusMessage.CreateMessageString(), log, messageDelayTime).ConfigureAwait(false);

                    break;
                }
            }
            catch (TransientFailureException e)
            {
                await RetryOrFailJobAsync(
                    serviceBusMessage,
                    $"Exception {e} in job {jobName} at {transcriptionLocation}: {e.Message}",
                    jobName,
                    transcriptionLocation,
                    subscriptionKey,
                    log,
                    isThrottled : false).ConfigureAwait(false);
            }
            catch (TimeoutException e)
            {
                await RetryOrFailJobAsync(
                    serviceBusMessage,
                    $"TimeoutException {e} in job {jobName} at {transcriptionLocation}: {e.Message}",
                    jobName,
                    transcriptionLocation,
                    subscriptionKey,
                    log,
                    isThrottled : false).ConfigureAwait(false);
            }
            catch (Exception e)
            {
                HttpStatusCode?httpStatusCode = null;
                if (e is HttpStatusCodeException statusCodeException && statusCodeException.HttpStatusCode.HasValue)
                {
                    httpStatusCode = statusCodeException.HttpStatusCode.Value;
                }