Ejemplo n.º 1
0
        internal async Task ReadImageOcrTextAndTranslate(string toLanguage = "en-US")
        {
            string inputImageFilePath = Path.Combine(
                HelperFunctions.GetSampleDataFullPath(customSettings.SampleDataFolders.PhotosToAnalyzeFolder),
                customSettings.SampleIndividualFiles.PhotoFileToProcess
                );

            string fileNamePrefix  = Path.GetFileName(inputImageFilePath);
            string outFolder       = HelperFunctions.GetSampleDataFullPath(customSettings.SampleDataFolders.AnalyzedImagesFolder);
            string outBaseFilePath = Path.Combine(outFolder, fileNamePrefix);

            // ensure destination Path exists
            Directory.CreateDirectory(outFolder);



            // OCR - text extraction

            // Get vision client

            Console.WriteLine($"Extracting Text using Vision OCR from {inputImageFilePath}...");

            ComputerVisionClient visionClient = ComputerVision.Authenticate(
                customSettings.ComputerVisionSettings.Endpoint,
                customSettings.ComputerVisionSettings.Key);

            string ocrFilePath = outBaseFilePath + "-ReadOcrResults.txt";

            var ocrResult = await ComputerVision.RecognizeTextFromImageLocal(visionClient, inputImageFilePath, false);

            var ocrLineTexts = ComputerVisionHelper.GetOcrResultLineTexts(ocrResult);
            await File.WriteAllLinesAsync(ocrFilePath, ocrLineTexts);

            Console.WriteLine($"Generated OCR output file {ocrFilePath}.");
            Console.WriteLine();

            // Detect Languages using Text Analytics Api
            TextAnalyticsClient textClient = TextAnalytics.GetClient(
                customSettings.TextAnalyticsSettings.Key,
                customSettings.TextAnalyticsSettings.Endpoint
                );


            Console.WriteLine("Detect the language from generated OCR text using TextAnalytics...");
            IEnumerable <string> sourceLanguages = await TextAnalytics.DetectLanguageBatchAsync(textClient, ocrLineTexts);

            //Console.WriteLine($"Detected languages Count: {sourceLanguages.Count()}");
            //Console.WriteLine($"Detected Languages: {string.Join(", ", sourceLanguages)}");
            Console.WriteLine();


            // Now translate the extracted text (OCR) to output language (here default is English)
            Console.WriteLine($"Now translate the generated OCR file to English {toLanguage}...");

            string ocrText = await File.ReadAllTextAsync(ocrFilePath);

            string translatedText = await JournalHelper.Translator.Translate.TranslateTextRequestAsync(
                customSettings.TranslatorConfigSettings.Key,
                customSettings.TranslatorConfigSettings.Endpoint,
                toLanguage,
                ocrText
                );

            string outTranslatedFilePath = outBaseFilePath + "-translated-" + toLanguage + ".json";

            if (!translatedText.StartsWith("["))
            {
                Console.WriteLine($"Storing the generated translation output to file: {outTranslatedFilePath}... ");
                var json = JObject.Parse(translatedText);
                Helper.WriteToJsonFile <JObject>(outTranslatedFilePath, json);

                if (json.ContainsKey("error"))
                {
                    Console.WriteLine($"\t\t\tTRANSLATOR ERROR: {json["error"]["code"]}");
                    Console.WriteLine($"\t\t\tMESSAGE: {json["error"]["message"]}");
                    return;
                }
            }

            string txtFile = outTranslatedFilePath + ".txt";

            Console.WriteLine($"Generating txt file with translated texts - {txtFile}");

            IEnumerable <string> texts = JournalHelper.Translator.Translate.GetTranslatedTexts(translatedText);
            await File.WriteAllLinesAsync(txtFile, texts);

            Console.WriteLine();
        }