Пример #1
0
        internal static async Task ProcessFailedTranscriptionAsync(BatchClient client, Transcription transcription, Guid transcriptionId, string jobName, ILogger log)
        {
            log.LogInformation($"Got failed transcription for job {jobName}");

            var report       = "Unknown Error.";
            var errorTxtname = jobName + ".txt";

            var transcriptionFiles = await client.GetTranscriptionFilesAsync(transcriptionId).ConfigureAwait(false);

            var reportFile        = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.TranscriptionReport).FirstOrDefault();
            var reportFileContent = string.Empty;

            if (reportFile?.Links?.ContentUrl != null)
            {
                using var webClient = new WebClient();
                reportFileContent   = webClient.DownloadString(reportFile.Links.ContentUrl);
                report = reportFileContent;
            }
            else if (transcription.Properties?.Error?.Message != null)
            {
                report  = transcription.Properties.Error.Message;
                report += string.IsNullOrEmpty(transcription.Properties.Error.Code) ? string.Empty : $" (Error: {transcription.Properties.Error.Code}).";
            }

            var errorOutputContainer = FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer;
            await StorageUtilities.WriteTextFileToBlobAsync(FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, report, errorOutputContainer, errorTxtname, log).ConfigureAwait(false);

            if (!string.IsNullOrEmpty(reportFileContent))
            {
                await ProcessReportFileAsync(reportFileContent, log).ConfigureAwait(false);
            }

            await client.DeleteTranscriptionAsync(transcriptionId).ConfigureAwait(false);
        }
Пример #2
0
        private static async Task RetryOrFailJobAsync(TranscriptionStartedMessage message, string error, string jobName, string transcriptionLocation, string subscriptionKey, ILogger log)
        {
            message.FailedExecutionCounter += 1;
            var messageDelayTime = GetMessageDelayTime(message.PollingCounter);

            if (message.FailedExecutionCounter > FetchTranscriptionEnvironmentVariables.RetryLimit)
            {
                await WriteFailedJobLogToStorageAsync(message, error, jobName, log).ConfigureAwait(false);

                await BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);
            }
            else
            {
                log.LogInformation($"Retrying..");
                await ServiceBusUtilities.SendServiceBusMessageAsync(FetchQueueClientInstance, message.CreateMessageString(), log, messageDelayTime).ConfigureAwait(false);
            }
        }
Пример #3
0
        private static async Task ProcessSucceededTranscriptionAsync(string transcriptionLocation, string subscriptionKey, TranscriptionStartedMessage serviceBusMessage, string jobName, ILogger log)
        {
            log.LogInformation($"Got succeeded transcription for job {jobName}");

            var jsonContainer       = FetchTranscriptionEnvironmentVariables.JsonResultOutputContainer;
            var textAnalyticsKey    = FetchTranscriptionEnvironmentVariables.TextAnalyticsKey;
            var textAnalyticsRegion = FetchTranscriptionEnvironmentVariables.TextAnalyticsRegion;

            var transcriptionFiles = await BatchClient.GetTranscriptionFilesAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);

            log.LogInformation($"Received transcription files.");
            var resultFiles = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.Transcription);
            var containsMultipleTranscriptions = resultFiles.Skip(1).Any();

            var textAnalyticsInfoProvided = !string.IsNullOrEmpty(textAnalyticsKey) &&
                                            !string.IsNullOrEmpty(textAnalyticsRegion) &&
                                            !textAnalyticsRegion.Equals("none", StringComparison.OrdinalIgnoreCase);

            var textAnalytics = textAnalyticsInfoProvided ? new TextAnalytics(serviceBusMessage.Locale, textAnalyticsKey, textAnalyticsRegion, log) : null;

            var generalErrorsStringBuilder = new StringBuilder();

            foreach (var resultFile in resultFiles)
            {
                log.LogInformation($"Getting result for file {resultFile.Name}");
                var transcriptionResult = await BatchClient.GetSpeechTranscriptFromSasAsync(resultFile.Links.ContentUrl, log).ConfigureAwait(false);

                if (string.IsNullOrEmpty(transcriptionResult.Source))
                {
                    var errorMessage = $"Transcription source is unknown, skipping evaluation.";
                    log.LogError(errorMessage);

                    generalErrorsStringBuilder.AppendLine(errorMessage);
                    continue;
                }

                var fileName = StorageConnector.GetFileNameFromUri(new Uri(transcriptionResult.Source));

                if (transcriptionResult.RecognizedPhrases != null && transcriptionResult.RecognizedPhrases.All(phrase => phrase.RecognitionStatus.Equals("Success", StringComparison.Ordinal)))
                {
                    var textAnalyticsErrors = new List <string>();

                    if (FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting != SentimentAnalysisSetting.None)
                    {
                        var sentimentErrors = await textAnalytics.AddSentimentToTranscriptAsync(transcriptionResult, FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting).ConfigureAwait(false);

                        textAnalyticsErrors.AddRange(sentimentErrors);
                    }

                    if (FetchTranscriptionEnvironmentVariables.EntityRedactionSetting != EntityRedactionSetting.None)
                    {
                        var entityRedactionErrors = await textAnalytics.RedactEntitiesAsync(transcriptionResult, FetchTranscriptionEnvironmentVariables.EntityRedactionSetting).ConfigureAwait(false);

                        textAnalyticsErrors.AddRange(entityRedactionErrors);
                    }

                    if (textAnalyticsErrors.Any())
                    {
                        var distinctErrors = textAnalyticsErrors.Distinct();
                        var errorMessage   = $"File {(string.IsNullOrEmpty(fileName) ? "unknown" : fileName)}:\n{string.Join('\n', distinctErrors)}";

                        generalErrorsStringBuilder.AppendLine(errorMessage);
                    }
                }

                var editedTranscriptionResultJson = JsonConvert.SerializeObject(
                    transcriptionResult,
                    Newtonsoft.Json.Formatting.Indented,
                    new JsonSerializerSettings
                {
                    NullValueHandling = NullValueHandling.Ignore
                });

                var jsonFileName = $"{fileName}.json";
                await StorageConnectorInstance.WriteTextFileToBlobAsync(editedTranscriptionResultJson, jsonContainer, jsonFileName, log).ConfigureAwait(false);

                if (FetchTranscriptionEnvironmentVariables.CreateHtmlResultFile)
                {
                    var htmlContainer  = FetchTranscriptionEnvironmentVariables.HtmlResultOutputContainer;
                    var htmlFileName   = $"{fileName}.html";
                    var displayResults = TranscriptionToHtml.ToHtml(transcriptionResult, jobName);
                    await StorageConnectorInstance.WriteTextFileToBlobAsync(displayResults, htmlContainer, htmlFileName, log).ConfigureAwait(false);
                }

                if (FetchTranscriptionEnvironmentVariables.UseSqlDatabase)
                {
                    var duration         = XmlConvert.ToTimeSpan(transcriptionResult.Duration);
                    var approximatedCost = CostEstimation.GetCostEstimation(
                        duration,
                        transcriptionResult.CombinedRecognizedPhrases.Count(),
                        serviceBusMessage.UsesCustomModel,
                        FetchTranscriptionEnvironmentVariables.SentimentAnalysisSetting,
                        FetchTranscriptionEnvironmentVariables.EntityRedactionSetting);

                    var jobId = containsMultipleTranscriptions ? Guid.NewGuid() : new Guid(transcriptionLocation.Split('/').LastOrDefault());
                    var dbConnectionString = FetchTranscriptionEnvironmentVariables.DatabaseConnectionString;
                    using var dbConnector = new DatabaseConnector(log, dbConnectionString);
                    await dbConnector.StoreTranscriptionAsync(
                        jobId,
                        serviceBusMessage.Locale,
                        string.IsNullOrEmpty(fileName)?jobName : fileName,
                        (float)approximatedCost,
                        transcriptionResult).ConfigureAwait(false);
                }
            }

            var generalErrors = generalErrorsStringBuilder.ToString();

            if (!string.IsNullOrEmpty(generalErrors))
            {
                var errorTxtname = $"jobs/{jobName}.txt";

                await StorageConnectorInstance.WriteTextFileToBlobAsync(
                    generalErrors,
                    FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer,
                    errorTxtname,
                    log).ConfigureAwait(false);
            }

            var reportFile        = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.TranscriptionReport).FirstOrDefault();
            var reportFileContent = await BatchClient.GetTranscriptionReportFileFromSasAsync(reportFile.Links.ContentUrl, log).ConfigureAwait(false);

            await ProcessReportFileAsync(reportFileContent, log).ConfigureAwait(false);

            BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false).GetAwaiter().GetResult();
        }
Пример #4
0
        private static async Task ProcessFailedTranscriptionAsync(string transcriptionLocation, string subscriptionKey, TranscriptionStartedMessage serviceBusMessage, Transcription transcription, string jobName, ILogger log)
        {
            var safeErrorCode    = transcription?.Properties?.Error?.Code ?? "unknown";
            var safeErrorMessage = transcription?.Properties?.Error?.Message ?? "unknown";
            var logMessage       = $"Got failed transcription for job {jobName} with error {safeErrorMessage} (Error code: {safeErrorCode}).";

            log.LogInformation(logMessage);

            var transcriptionFiles = await BatchClient.GetTranscriptionFilesAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);

            var errorReportOutput = logMessage;
            var reportFile        = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.TranscriptionReport).FirstOrDefault();

            if (reportFile?.Links?.ContentUrl != null)
            {
                var reportFileContent = await BatchClient.GetTranscriptionReportFileFromSasAsync(reportFile.Links.ContentUrl, log).ConfigureAwait(false);

                errorReportOutput += $"\nReport file: \n {JsonConvert.SerializeObject(reportFileContent)}";
            }

            var errorOutputContainer = FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer;
            await StorageConnectorInstance.WriteTextFileToBlobAsync(errorReportOutput, errorOutputContainer, $"jobs/{jobName}.txt", log).ConfigureAwait(false);

            var retryAudioFile = IsRetryableError(safeErrorCode);

            foreach (var audio in serviceBusMessage.AudioFileInfos)
            {
                var fileName = StorageConnector.GetFileNameFromUri(new Uri(audio.FileUrl));

                if (retryAudioFile && audio.RetryCount < FetchTranscriptionEnvironmentVariables.RetryLimit)
                {
                    log.LogInformation($"Retrying transcription with name {fileName} - retry count: {audio.RetryCount}");
                    var sbMessage = new ServiceBusMessage
                    {
                        Data = new Data
                        {
                            Url = new Uri(audio.FileUrl)
                        },
                        EventType  = "BlobCreated",
                        RetryCount = audio.RetryCount + 1
                    };

                    var audioFileMessage = new Message(Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(sbMessage)));
                    await ServiceBusUtilities.SendServiceBusMessageAsync(StartQueueClientInstance, audioFileMessage, log, TimeSpan.FromMinutes(1)).ConfigureAwait(false);
                }
                else
                {
                    var message = $"Failed transcription with name {fileName} in job {jobName} after {audio.RetryCount} retries with error: {safeErrorMessage} (Error: {safeErrorCode}).";
                    await StorageConnectorInstance.WriteTextFileToBlobAsync(message, errorOutputContainer, $"{fileName}.txt", log).ConfigureAwait(false);

                    await StorageConnectorInstance.MoveFileAsync(
                        FetchTranscriptionEnvironmentVariables.AudioInputContainer,
                        fileName,
                        FetchTranscriptionEnvironmentVariables.ErrorFilesOutputContainer,
                        fileName,
                        log).ConfigureAwait(false);
                }
            }

            await BatchClient.DeleteTranscriptionAsync(transcriptionLocation, subscriptionKey, log).ConfigureAwait(false);
        }
Пример #5
0
        internal static async Task ProcessSucceededTranscriptionAsync(BatchClient client, PostTranscriptionServiceBusMessage serviceBusMessage, Guid transcriptionId, string jobName, ILogger log)
        {
            log.LogInformation($"Got succeeded transcription for job {jobName}");

            var jsonContainer = FetchTranscriptionEnvironmentVariables.JsonResultOutputContainer;

            var transcriptionFiles = await client.GetTranscriptionFilesAsync(transcriptionId).ConfigureAwait(false);

            var resultFiles = transcriptionFiles.Values.Where(t => t.Kind == TranscriptionFileKind.Transcription);
            var containsMultipleTranscriptions = resultFiles.Skip(1).Any();

            var textAnalyticsKey    = FetchTranscriptionEnvironmentVariables.TextAnalyticsKey;
            var textAnalyticsRegion = FetchTranscriptionEnvironmentVariables.TextAnalyticsRegion;

            var textAnalyticsInfoProvided = !string.IsNullOrEmpty(textAnalyticsKey) &&
                                            !string.IsNullOrEmpty(textAnalyticsRegion) &&
                                            !textAnalyticsRegion.Equals("none", StringComparison.OrdinalIgnoreCase);

            var textAnalytics = textAnalyticsInfoProvided ? new TextAnalytics(serviceBusMessage.Locale, textAnalyticsKey, textAnalyticsRegion, log) : null;

            var generalErrorsStringBuilder = new StringBuilder();

            foreach (var resultFile in resultFiles)
            {
                var fileName = string.Empty;

                try
                {
                    var transcriptionResultJson = string.Empty;
                    using (var webClient = new WebClient())
                    {
                        transcriptionResultJson = webClient.DownloadString(resultFile.Links.ContentUrl);
                    }

                    var transcriptionResult = JsonConvert.DeserializeObject <SpeechTranscript>(transcriptionResultJson);
                    fileName = StorageUtilities.GetFileNameFromUri(new Uri(transcriptionResult.Source));
                    log.LogInformation($"Filename is {fileName}");

                    if (transcriptionResult.RecognizedPhrases == null || transcriptionResult.RecognizedPhrases.All(phrase => !phrase.RecognitionStatus.Equals("Success", StringComparison.Ordinal)))
                    {
                        continue;
                    }

                    var textAnalyticsErrors = new List <string>();

                    if (serviceBusMessage.AddSentimentAnalysis)
                    {
                        var sentimentErrors = await textAnalytics.AddSentimentToTranscriptAsync(transcriptionResult).ConfigureAwait(false);

                        textAnalyticsErrors.AddRange(sentimentErrors);
                    }

                    if (serviceBusMessage.AddEntityRedaction)
                    {
                        var entityRedactionErrors = await textAnalytics.RedactEntitiesAsync(transcriptionResult).ConfigureAwait(false);

                        textAnalyticsErrors.AddRange(entityRedactionErrors);
                    }

                    var editedTranscriptionResultJson = JsonConvert.SerializeObject(transcriptionResult, Newtonsoft.Json.Formatting.Indented);

                    // Store transcript json:
                    var jsonFileName = $"{fileName}.json";
                    await StorageUtilities.WriteTextFileToBlobAsync(FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, editedTranscriptionResultJson, jsonContainer, jsonFileName, log).ConfigureAwait(false);

                    if (FetchTranscriptionEnvironmentVariables.CreateHtmlResultFile)
                    {
                        var htmlContainer  = FetchTranscriptionEnvironmentVariables.HtmlResultOutputContainer;
                        var htmlFileName   = $"{fileName}.html";
                        var displayResults = TranscriptionToHtml.ToHTML(transcriptionResult, jobName);
                        await StorageUtilities.WriteTextFileToBlobAsync(FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage, displayResults, htmlContainer, htmlFileName, log).ConfigureAwait(false);
                    }

                    if (textAnalyticsErrors.Any())
                    {
                        var distinctErrors = textAnalyticsErrors.Distinct();
                        var errorMessage   = $"File {(string.IsNullOrEmpty(fileName) ? "unknown" : fileName)}:\n{string.Join('\n', distinctErrors)}";

                        generalErrorsStringBuilder.AppendLine(errorMessage);
                    }

                    if (FetchTranscriptionEnvironmentVariables.UseSqlDatabase)
                    {
                        var duration         = XmlConvert.ToTimeSpan(transcriptionResult.Duration);
                        var approximatedCost = CostEstimation.GetCostEstimation(
                            duration,
                            transcriptionResult.CombinedRecognizedPhrases.Count(),
                            serviceBusMessage.UsesCustomModel,
                            serviceBusMessage.AddSentimentAnalysis,
                            serviceBusMessage.AddEntityRedaction);

                        var dbConnectionString = FetchTranscriptionEnvironmentVariables.DatabaseConnectionString;
                        using var dbConnector = new DatabaseConnector(log, dbConnectionString);
                        await dbConnector.StoreTranscriptionAsync(
                            containsMultipleTranscriptions?Guid.NewGuid() : transcriptionId,
                                serviceBusMessage.Locale,
                                string.IsNullOrEmpty(fileName)?jobName : fileName,
                                (float)approximatedCost,
                                transcriptionResult).ConfigureAwait(false);
                    }
                }
                catch (Exception e)
                {
                    if (string.IsNullOrEmpty(fileName) && e is ArgumentNullException)
                    {
                        var errorMessage = $"Transcription file name is unknown, failed with message: {e.Message}";
                        log.LogError(errorMessage);

                        generalErrorsStringBuilder.AppendLine(errorMessage);

                        continue;
                    }
                    else if (e is JsonException || e is SqlException)
                    {
                        var errorTxtname = fileName + ".txt";
                        var errorMessage = $"Transcription result processing failed with exception: {e.Message}";
                        log.LogError(errorMessage);

                        await StorageUtilities.WriteTextFileToBlobAsync(
                            FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage,
                            errorMessage,
                            FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer,
                            errorTxtname,
                            log).ConfigureAwait(false);

                        continue;
                    }

                    throw;
                }
            }

            var errors = generalErrorsStringBuilder.ToString();

            if (!string.IsNullOrEmpty(errors))
            {
                var errorTxtname = jobName + ".txt";

                await StorageUtilities.WriteTextFileToBlobAsync(
                    FetchTranscriptionEnvironmentVariables.AzureWebJobsStorage,
                    errors,
                    FetchTranscriptionEnvironmentVariables.ErrorReportOutputContainer,
                    errorTxtname,
                    log).ConfigureAwait(false);
            }

            // Delete trace from service
            client.DeleteTranscriptionAsync(transcriptionId).ConfigureAwait(false).GetAwaiter().GetResult();
        }
        private async static Task TranscribeAsync(BatchClient client, Uri[] audioFiles, bool isContainerUrls)
        {
            Console.WriteLine("Deleting all existing completed transcriptions.");

            // get all transcriptions for the subscription
            PaginatedTranscriptions paginatedTranscriptions = null;

            do
            {
                if (paginatedTranscriptions == null)
                {
                    paginatedTranscriptions = await client.GetTranscriptionsAsync().ConfigureAwait(false);
                }
                else
                {
                    paginatedTranscriptions = await client.GetTranscriptionsAsync(paginatedTranscriptions.NextLink).ConfigureAwait(false);
                }

                // delete all pre-existing completed transcriptions. If transcriptions are still running or not started, they will not be deleted
                foreach (var transcriptionToDelete in paginatedTranscriptions.Values)
                {
                    // delete a transcription
                    await client.DeleteTranscriptionAsync(transcriptionToDelete.Self).ConfigureAwait(false);

                    Console.WriteLine($"Deleted transcription {transcriptionToDelete.Self}");
                }
            }while (paginatedTranscriptions.NextLink != null);

            // <transcriptiondefinition>
            var newTranscription = new Transcription
            {
                DisplayName = DisplayName,
                Locale      = Locale,
                ContentUrls = audioFiles,
                //ContentContainerUrl = ContentAzureBlobContainer,
                Model      = CustomModel,
                Properties = new TranscriptionProperties
                {
                    DestinationContainerUrl      = null,
                    IsWordLevelTimestampsEnabled = false,
                    TimeToLive = TimeSpan.FromDays(1)
                }
            };

            if (isContainerUrls)
            {
                newTranscription.ContentUrls         = null;
                newTranscription.ContentContainerUrl = audioFiles[0];
                newTranscription.Properties.DestinationContainerUrl = audioFiles[1];
            }

            newTranscription = await client.CreateTranscriptionAsync(newTranscription).ConfigureAwait(false);

            Console.WriteLine($"Created transcription {newTranscription.Self}");
            // </transcriptiondefinition>

            // get the transcription Id from the location URI
            var createdTranscriptions = new List <Uri> {
                newTranscription.Self
            };

            Console.WriteLine("Checking status.");

            // get the status of our transcriptions periodically and log results
            int completed = 0, running = 0, notStarted = 0;

            while (completed < 1)
            {
                completed = 0; running = 0; notStarted = 0;

                // get all transcriptions for the user
                paginatedTranscriptions = null;
                do
                {
                    // <transcriptionstatus>
                    if (paginatedTranscriptions == null)
                    {
                        paginatedTranscriptions = await client.GetTranscriptionsAsync().ConfigureAwait(false);
                    }
                    else
                    {
                        paginatedTranscriptions = await client.GetTranscriptionsAsync(paginatedTranscriptions.NextLink).ConfigureAwait(false);
                    }

                    // delete all pre-existing completed transcriptions. If transcriptions are still running or not started, they will not be deleted
                    foreach (var transcription in paginatedTranscriptions.Values)
                    {
                        switch (transcription.Status)
                        {
                        case "Failed":
                        case "Succeeded":
                            // we check to see if it was one of the transcriptions we created from this client.
                            if (!createdTranscriptions.Contains(transcription.Self))
                            {
                                // not created form here, continue
                                continue;
                            }

                            completed++;

                            // if the transcription was successful, check the results
                            if (transcription.Status == "Succeeded")
                            {
                                Console.WriteLine($"\t\tTranscirption {transcription.DisplayName} Succeeded.");

                                // BUGBUG:  TODO:  The destination container public access level is set to OFF.
                                // Hence the call to GetTranscriptionResultAsync(Uri) is failing with "resource not found" error.
                                // so disabling the code below.

                                // Alternatively, checking for isContainerUrls & use audioFiles[1] to get the container and change its public access level.
                                if (false)
                                {
                                    var paginatedfiles = await client.GetTranscriptionFilesAsync(transcription.Links.Files).ConfigureAwait(false);

                                    var resultFile = paginatedfiles.Values.FirstOrDefault(f => f.Kind == ArtifactKind.Transcription);
                                    var result     = await client.GetTranscriptionResultAsync(new Uri(resultFile.Links.ContentUrl)).ConfigureAwait(false);

                                    Console.WriteLine("Transcription succeeded. Results: ");
                                    Console.WriteLine(JsonConvert.SerializeObject(result, SpeechJsonContractResolver.WriterSettings));
                                }
                            }
                            else
                            {
                                Console.WriteLine("Transcription failed. Status: {0}", transcription.Properties.Error.Message);
                            }

                            break;

                        case "Running":
                            running++;
                            break;

                        case "NotStarted":
                            notStarted++;
                            break;
                        }
                    }

                    // for each transcription in the list we check the status
                    Console.WriteLine(string.Format("Transcriptions status: {0} completed, {1} running, {2} not started yet", completed, running, notStarted));
                }while (paginatedTranscriptions.NextLink != null);

                // </transcriptionstatus>
                // check again after 1 minute
                await Task.Delay(TimeSpan.FromMinutes(1)).ConfigureAwait(false);
            }

            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }