/// <summary> /// Translates a plain text document in UTF8 encoding to the target language. /// </summary> /// <param name="fullNameForDocumentToProcess">SOurce document file name</param> /// <param name="sourceLanguage">From language</param> /// <param name="targetLanguage">To language</param> private static void ProcessTextDocument(string fullNameForDocumentToProcess, string sourceLanguage, string targetLanguage) { var document = File.ReadAllLines(fullNameForDocumentToProcess, Encoding.UTF8); List <string> lstTexts = new List <string>(document); var batches = SplitList(lstTexts, TranslationServiceFacade.maxelements, TranslationServiceFacade.maxrequestsize); File.Delete(fullNameForDocumentToProcess); foreach (var batch in batches) { string[] translated = TranslationServiceFacade.TranslateArray(batch.ToArray(), sourceLanguage, targetLanguage); File.AppendAllLines(fullNameForDocumentToProcess, translated, Encoding.UTF8); } return; }
public CtfToTmx(ConsoleLogger Logger) : base(Logger) { TranslationServiceFacade.Initialize(); if (!TranslationServiceFacade.IsTranslationServiceReady()) { this.Logger.WriteLine(LogLevel.Error, "Invalid translation service credentials. Use \"DocumentTranslatorCmd setcredentials\", or use the Document Translator Settings option."); return; } this.TmxDocument = new SimpleStringArgument( "Tmx", true, new[] { ',' }, "TMX Document to create."); this.sourceLanguage = new Argument( "From", false, new[] { "" }, TranslationServiceFacade.AvailableLanguages.Keys.ToArray(), true, "The source language code. Must be a valid Microsoft Translator language code, and the same as language code used in the CTF store. Mapped to TMX language code in TmxLangMap.csv."); this.targetLanguage = new Argument( "To", false, new[] { "" }, TranslationServiceFacade.AvailableLanguages.Keys.ToArray(), true, "The target language code. Must be a valid Microsoft Translator language code, and the same as language code used in the CTF store. Mapped to TMX language code in TmxLangMap.csv."); this.user = new Argument( "User", false, "If provided, filter by the given user. The default user name for data uploaded using this tool is TmxUpload. Download is not filtered by default."); this.rating = new Argument( "Rating", false, "The download can be filtered by rating. Default is no filtering."); this.Arguments = new ArgumentList( new[] { this.TmxDocument, this.sourceLanguage, this.targetLanguage, this.user, this.rating }, Logger); }
/// <summary> /// Initializes a new instance of the TranslateXML class. /// </summary> /// <param name="Logger"> /// The logger. /// </param> public TranslateXML(ConsoleLogger Logger) : base(Logger) { TranslationServiceFacade.Initialize(); if (!TranslationServiceFacade.IsTranslationServiceReady()) { this.Logger.WriteLine(LogLevel.Error, "Invalid translation service credentials. Use \"DocumentTranslatorCmd setcredentials\", or use the Document Translator Settings option."); return; } this.xmltotranslate = new Argument( "XML", true, "The XML file in need of translation"); this.elementsdispositioncsv = new Argument( "Elements", true, "CSV file listing the elements to translate"); this.fromlanguage = new Argument( "from", false, new[] { "Auto-Detect" }, AvailableLanguages.GetLanguages().Result.Keys.ToArray(), true, "The source language. Auto-detect if no language specified."); this.tolanguage = new SimpleStringArgument( "to", true, new string[] { }, AvailableLanguages.GetLanguages().Result.Keys.ToArray(), new[] { ',' }, "The target language code, or comma-separated list of language codes."); this.generatecsv = new BooleanArgument( "generate", false, false, "Set to true if you want to generate a list of elements."); this.Arguments = new ArgumentList( new[] { this.xmltotranslate, this.elementsdispositioncsv, this.fromlanguage, this.tolanguage, this.generatecsv }, Logger); }
/// <summary> /// The execute. /// </summary> /// <returns> /// The <see cref="bool" />. /// </returns> public override bool Execute() { try { if (!string.IsNullOrEmpty(this.Reset.ValueString)) { TranslationServiceFacade.ResetCredentials(); } if (!string.IsNullOrEmpty(this.AzureKey.ValueString)) { TranslationServiceFacade.AzureKey = this.AzureKey.ValueString; } if (!string.IsNullOrEmpty(this.categoryID.ValueString)) { TranslationServiceFacade.CategoryID = this.categoryID.ValueString; } if (!string.IsNullOrEmpty(this.Cloud.ValueString)) { TranslationServiceFacade.AzureCloud = this.Cloud.ValueString; } if (!string.IsNullOrEmpty(this.Region.ValueString)) { TranslationServiceFacade.AzureRegion = this.Region.ValueString; } TranslationServiceFacade.SaveCredentials(); } catch (Exception ex) { this.Logger.WriteException(ex); Console.ReadLine(); return(false); } this.Logger.WriteLine(LogLevel.Msg, string.Format("Credentials saved.")); if (TranslationServiceFacade.IsTranslationServiceReady()) { this.Logger.WriteLine(LogLevel.Msg, string.Format("Translator service is ready to use.")); } else { this.Logger.WriteLine(LogLevel.Error, string.Format("API Key is invalid. Check that the key is for a resource in this cloud, in this region.")); } return(true); }
/// <summary> /// Initializes a new instance of the <see cref="TranslateDocuments" /> class. /// </summary> /// <param name="Logger"> /// The logger. /// </param> public TranslateDocuments(ConsoleLogger Logger) : base(Logger) { try { TranslationServiceFacade.Initialize(); } catch (CredentialsMissingException ex) { this.Logger.WriteLine(LogLevel.Error, ex.Message); } if (!TranslationServiceFacade.IsTranslationServiceReady()) { this.Logger.WriteLine(LogLevel.Error, "Invalid translation service credentials. Use \"DocumentTranslatorCmd setcredentials\""); return; } this.sourceDocuments = new SimpleStringArgument( "Documents", true, new[] { ',' }, "Document to translate, or list of documents separated by comma, or a wildcard. Wildcard recurses through subfolders."); this.sourceLanguage = new Argument( "from", false, new[] { "Auto-Detect" }, TranslationServiceFacade.AvailableLanguages.Keys.ToArray(), true, "The source language. Auto-detect if no language specified."); this.targetLanguages = new SimpleStringArgument( "to", true, new string[] { }, TranslationServiceFacade.AvailableLanguages.Keys.ToArray(), new[] { ',' }, "The target language code, or comma-separated list of language codes."); this.Arguments = new ArgumentList( new[] { this.sourceDocuments, this.sourceLanguage, this.targetLanguages }, Logger); }
/// <summary> /// Initializes a new instance of the <see cref="DocumentTranslation" /> class. /// </summary> public DocumentTranslation() { TranslationServiceFacade.Initialize(); this.PopulateAvailableLanguages(); this.ShowProgressBar = false; this.IsGoButtonEnabled = false; this.TargetFolder = string.Empty; this.SelectedTargetLanguage = string.Empty; this.SelectedSourceLanguage = TranslationAssistant.DocumentTranslationInterface.Properties.DocumentTranslator.Default.DefaultSourceLanguage; this.SelectedTargetLanguage = TranslationAssistant.DocumentTranslationInterface.Properties.DocumentTranslator.Default.DefaultTargetLanguage; this.StatusText = string.Empty; if (TranslationServiceFacade.IsTranslationServiceReady()) { this.StatusText = Properties.Resources.Common_SelectDocuments; } this.PopulateReadyToTranslateMessage(TranslationServiceFacade.IsTranslationServiceReady()); SingletonEventAggregator.Instance.GetEvent <AccountValidationEvent>().Unsubscribe(PopulateReadyToTranslateMessage); SingletonEventAggregator.Instance.GetEvent <AccountValidationEvent>().Subscribe(PopulateReadyToTranslateMessage); }
/// <summary> /// Initializes a new instance of the <see cref="DocumentTranslation" /> class. /// </summary> public DocumentTranslation() { TranslationServiceFacade.Initialize(); this.PopulateAvailableLanguages(); this.PopulateTranslateMode(); this.ShowProgressBar = false; this.IsGoButtonEnabled = false; this.TargetFolder = string.Empty; this.SelectedTargetLanguage = string.Empty; //this.SelectedSourceLanguage = Properties.DocumentTranslator.Default.DefaultSourceLanguage; //this.SelectedTargetLanguage = Properties.DocumentTranslator.Default.DefaultTargetLanguage; //this.SelectedTranslateMode = TranslateModeList[Properties.DocumentTranslator.Default.DefaultTranslateMode]; //0=plain text, 1=HTML //this.IgnoreHiddenContent = Properties.DocumentTranslator.Default.IgnoreHiddenContent; this.StatusText = string.Empty; if (TranslationServiceFacade.IsTranslationServiceReady()) { this.StatusText = Properties.Resources.Common_SelectDocuments; this.PopulateReadyToTranslateMessage(true); } }
public static async Task <string> GetAllLanguages() { Task GetLanguagesTask = TranslationServiceFacade.GetLanguages(); StringWriter writer = new StringWriter(); Dictionary <string, string> languagelist = new Dictionary <string, string>(); await GetLanguagesTask; languagelist = TranslationServiceFacade.AvailableLanguages; writer.WriteLine("{0}\t{1}\t{2}", "Language", "Language Code", "Display Name"); writer.WriteLine("------------------------------------------------------"); foreach (KeyValuePair <string, string> language in languagelist.ToList()) { Task t = TranslationServiceFacade.GetLanguages(language.Key); await t; foreach (KeyValuePair <string, string> lang in TranslationServiceFacade.AvailableLanguages) { writer.WriteLine("{0}\t{1}\t{2}", language.Key, lang.Key, lang.Value); } writer.WriteLine("\n\n"); } return(writer.ToString()); }
/// <summary> /// Create a CSV file with the aligment information as the third column. Original in 1st, translation in 2nd and alignment in 3rd column. /// Source document must be UTF-8 encoded text file. /// </summary> /// <param name="fullNameForDocumentToProcess">Source document name</param> /// <param name="sourceLanguage">From language</param> /// <param name="targetLanguage">To language</param> public static void CreateAlignmentCSV(string fullNameForDocumentToProcess, string sourceLanguage, string targetLanguage) { var document = File.ReadAllLines(fullNameForDocumentToProcess, Encoding.UTF8); List <string> lstTexts = new List <string>(document); var batches = SplitList(lstTexts, TranslationServiceFacade.maxelements, TranslationServiceFacade.maxrequestsize); var textfile = File.CreateText(fullNameForDocumentToProcess + "." + TranslationServiceFacade.LanguageNameToLanguageCode(targetLanguage) + ".csv"); textfile.WriteLine("\"{0}\",\"{1}\",\"{2}\"", TranslationServiceFacade.LanguageNameToLanguageCode(sourceLanguage).ToUpperInvariant(), TranslationServiceFacade.LanguageNameToLanguageCode(targetLanguage).ToUpperInvariant(), "Word Alignment"); foreach (var batch in batches) { string[] alignments = null; string[] translated = TranslationServiceFacade.GetAlignments(batch.ToArray(), sourceLanguage, targetLanguage, ref alignments); for (int i = 0; i < batch.Count(); i++) { textfile.WriteLine("\"{0}\",\"{1}\",\"{2}\"", batch[i].Replace("\"", "\"\""), translated[i].Replace("\"", "\"\""), alignments[i]); } } textfile.Close(); return; }
/// <summary> /// The execute. /// </summary> /// <returns> /// The <see cref="bool" />. /// </returns> public override bool Execute() { int documentcount = 0; List <String> listoffiles = new List <string>(); //Expand wildcard, if name specification contains * if (this.sourceDocuments.Values.ToArray().Any(file => file.ToString().Contains("*"))) { foreach (string filename in this.sourceDocuments.Values.ToArray()) { int lastBackslashPosition = filename.LastIndexOf('\\') + 1; string path = filename.Substring(0, lastBackslashPosition); string filenameOnly = filename.Substring(lastBackslashPosition); String[] filelist = Directory.GetFiles(path, filenameOnly, SearchOption.AllDirectories); listoffiles.AddRange(filelist); } } else //no * in the file name { foreach (var file in this.sourceDocuments.ValueString.Split(',')) { listoffiles.Add(file); } } try { var model = new CommentTranslationModel { SourceLanguage = this.sourceLanguage.ValueString ?? "Auto-Detect", TargetLanguage = this.targetLanguages.ValueString }; foreach (var file in listoffiles) { if (!File.Exists(file)) { Logger.WriteLine(LogLevel.Error, String.Format("Specified document {0} does not exist. ", file)); } foreach (var language in this.targetLanguages.Values) { try { this.Logger.WriteLine( LogLevel.Msg, string.Format( "Getting alignments for document {0} to language {1}.", file, language)); model.TargetPath = file; var sourceLanguageExpanded = String.IsNullOrEmpty(this.sourceLanguage.ValueString) || this.sourceLanguage.ValueString.Equals("Auto-Detect") ? "Auto-Detect" : TranslationServiceFacade.AvailableLanguages[ this.sourceLanguage.ValueString]; string languagename = TranslationServiceFacade.LanguageCodeToLanguageName(language.ToString()); DocumentTranslationManager.CreateAlignmentCSV( file, sourceLanguageExpanded, languagename); this.Logger.WriteLine( LogLevel.Msg, string.Format( "-- Aligned document name {0} to language {1}.", file, language)); documentcount++; } catch (Exception ex) { this.Logger.WriteLine( LogLevel.Error, string.Format( "Error while processing file: {0} to language {1} with error: {2}", model.TargetPath, language, ex.Message)); throw; } } } } catch (Exception ex) { this.Logger.WriteException(ex); Console.ReadLine(); return(false); } this.Logger.WriteLine(LogLevel.Msg, string.Format("Documents aligned successfully: {0}.", documentcount)); return(true); }
private static void ProcessWordDocument( string outputDocumentFullName, string sourceLanguage, string targetLanguage) { List <DocumentFormat.OpenXml.Wordprocessing.Text> texts = new List <DocumentFormat.OpenXml.Wordprocessing.Text>(); using (WordprocessingDocument doc = WordprocessingDocument.Open(outputDocumentFullName, true)) { var body = doc.MainDocumentPart.Document.Body; texts.AddRange(body.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 1)); var exceptions = new ConcurrentQueue <Exception>(); // Extract Text for Translation var batch = texts.Select(text => text.Text); // Do Translation var batches = SplitList(batch, 99, 9000); Parallel.For( 0, batches.Count(), new ParallelOptions { MaxDegreeOfParallelism = 1 }, l => { try { var translationOutput = TranslationServiceFacade.TranslateArray( batches[l].ToArray(), sourceLanguage, targetLanguage); int batchStartIndexInDocument = 0; for (int i = 0; i < l; i++) { batchStartIndexInDocument = batchStartIndexInDocument + batches[i].Count(); } // Apply translated batch to document for (int j = 0; j < translationOutput.Length; j++) { int indexInDocument = j + batchStartIndexInDocument + 1; var newValue = translationOutput[j]; texts.Take(indexInDocument).Last().Text = newValue; } } catch (Exception ex) { exceptions.Enqueue(ex); } }); // Throw the exceptions here after the loop completes. if (exceptions.Count > 0) { throw new AggregateException(exceptions); } //doc.MainDocumentPart.PutXDocument(); } }
/// <summary> /// Translates a document and provides links to the original as well as the translated document /// </summary> /// <param name="storageContainerName">Name of the storage container</param> /// <param name="storageFileName">Name of the storage file (original document for translation)</param> /// <param name="originalLanguage">The language of the original file</param> /// <param name="translationLanguage">The language for translating the document</param> /// <returns></returns> public DocumentLinks TranslateFile(string storageContainerName, string storageFileName, string originalLanguage, string translationLanguage) { var localFileName = storageFileName; try { TranslationServiceFacade.Initialize(ConfigurationManager.AppSettings["ApiKey"]); DocumentTranslationManager.DoTranslation(localFileName, false, originalLanguage, translationLanguage); } catch (Exception ex) { _loggingService.Error("Error in TranslationServiceFacade.Initialize or DocumentTranslationManager.DoTranslation", ex); throw; } string originalFileUrl; string translatedFileUrl; string translatedDocumentName; try { var languageCode = TranslationServiceFacade.AvailableLanguages.Where(p => p.Value == translationLanguage).Select(p => p.Key).FirstOrDefault(); var extension = Helper.GetExtension(storageFileName); translatedDocumentName = localFileName.Replace($".{extension}", $".{languageCode}.{extension}"); // Move original file to SharePoint originalFileUrl = _sharePointManagementService.CopyFileToSharePoint(localFileName); // Move translated file to SharePoint translatedFileUrl = _sharePointManagementService.CopyFileToSharePoint(translatedDocumentName); } catch (Exception ex) { _loggingService.Error("Error in TranslationServiceFacade.AvailableLanguages.Wher or Helper.GetExtension or _sharePointManagementService.CopyFileToSharePoint", ex); throw; } try { // Delete original file if (System.IO.File.Exists(localFileName)) { System.IO.File.Delete(localFileName); } // Delete translated file if (System.IO.File.Exists(translatedDocumentName)) { System.IO.File.Delete(translatedDocumentName); } } catch (Exception ex) { _loggingService.Error("Error in System.IO.File.Exists or System.IO.File.Delete", ex); throw; } return(new DocumentLinks { OriginalDocument = originalFileUrl, TranslatedDocument = translatedFileUrl }); }
private static void ProcessPowerPointDocument(string outputDocumentFullName, string sourceLanguage, string targetLanguage) { using (PresentationDocument doc = PresentationDocument.Open(outputDocumentFullName, true)) { //doc.PresentationPart.PutXDocument(); List <DocumentFormat.OpenXml.Drawing.Text> texts = new List <DocumentFormat.OpenXml.Drawing.Text>(); List <DocumentFormat.OpenXml.Drawing.Text> notes = new List <DocumentFormat.OpenXml.Drawing.Text>(); List <DocumentFormat.OpenXml.Presentation.Comment> lstComments = new List <DocumentFormat.OpenXml.Presentation.Comment>(); var slideParts = doc.PresentationPart.SlideParts; if (slideParts != null) { foreach (var slidePart in slideParts) { if (slidePart.Slide != null) { var slide = slidePart.Slide; ExtractTextContent(texts, slide); var commentsPart = slidePart.SlideCommentsPart; if (commentsPart != null) { lstComments.AddRange(commentsPart.CommentList.Cast <DocumentFormat.OpenXml.Presentation.Comment>()); } var notesPart = slidePart.NotesSlidePart; if (notesPart != null) { ExtractTextContent(notes, notesPart.NotesSlide); } } } ReplaceTextsWithTranslation(texts, sourceLanguage, targetLanguage); ReplaceTextsWithTranslation(notes, sourceLanguage, targetLanguage); if (lstComments.Count() > 0) { // Extract Text for Translation var batch = lstComments.Select(text => text.InnerText); // Do Translation var batchesComments = SplitList(batch, TranslationServiceFacade.maxelements, TranslationServiceFacade.maxrequestsize); // Use ConcurrentQueue to enable safe enqueueing from multiple threads. var exceptions = new ConcurrentQueue <Exception>(); Parallel.For( 0, batchesComments.Count(), new ParallelOptions { MaxDegreeOfParallelism = 1 }, l => { try { var translationOutput = TranslationServiceFacade.TranslateArray( batchesComments[l].ToArray(), sourceLanguage, targetLanguage); int batchStartIndexInDocument = 0; for (int i = 0; i < l; i++) { batchStartIndexInDocument = batchStartIndexInDocument + batchesComments[i].Count(); } // Apply translated batch to document for (int j = 0; j < translationOutput.Length; j++) { int indexInDocument = j + batchStartIndexInDocument + 1; var newValue = translationOutput[j]; var commentPart = lstComments.Take(indexInDocument).Last(); commentPart.Text = new DocumentFormat.OpenXml.Presentation.Text { Text = newValue }; } } catch (Exception ex) { exceptions.Enqueue(ex); } }); // Throw the exceptions here after the loop completes. if (exceptions.Count > 0) { throw new AggregateException(exceptions); } } } //doc.PresentationPart.PutXDocument(); } }
static void Main(string[] args) { Console.WriteLine("Start to execute custom activity V2"); // Parse activity and reference objects info from input files dynamic activity = JsonConvert.DeserializeObject(File.ReadAllText("activity.json")); dynamic linkedServices = JsonConvert.DeserializeObject(File.ReadAllText("linkedServices.json")); // Extract Connection String from LinkedService dynamic storageLinkedService = ((JArray)linkedServices).First(_ => "BatchStorageLinkedService".Equals(((dynamic)_).name.ToString())); string connectionString = storageLinkedService.properties.typeProperties.connectionString.value; // Extract InputFilePath & OutputFilePath from ExtendedProperties // In ADFv2, Input & Output Datasets are not required for Custom Activity. In this sample the folderName and // fileName properties are stored in ExtendedProperty of the Custom Activity like below. You are not required // to get the information from Datasets. //"extendedProperties": { // "InputContainer": "incoming", // "OutputFolder": "translated", // "TranslateServiceKey": "key goes here" // } string azureKey = activity.typeProperties.extendedProperties.TranslateServiceKey; //"cognitive services key"; string outputPath = activity.typeProperties.extendedProperties.OutputFolder; //"translated"; string inputContainer = activity.typeProperties.extendedProperties.InputContainer; //"incoming"; //V1 Logger is no longer required as your executable can directly write to STDOUT Console.WriteLine(string.Format("InputContainer: {0}, OutputFolderPath: {1}", inputContainer, outputPath)); // Extract Input & Output Dataset // If you would like to continue using Datasets, pass the Datasets in referenceObjects of the Custom Activity JSON payload like below: //"referenceObjects": { // "linkedServices": [ // { // "referenceName": "BatchStorageLinkedService", // "type": "LinkedServiceReference" // } // ], // "datasets": [ // { // "referenceName": "InputDataset", // "type": "DatasetReference" // }, // { // "referenceName": "OutputDataset", // "type": "DatasetReference" // } // ] // } // Then you can use following code to get the folder and file info instead: //dynamic datasets = JsonConvert.DeserializeObject(File.ReadAllText("datasets.json")); //dynamic inputDataset = ((JArray)datasets).First(_ => ((dynamic)_).name.ToString().StartsWith("InputDataset")); //dynamic outputDataset = ((JArray)datasets).First(_ => ((dynamic)_).name.ToString().StartsWith("OutputDataset")); //string inputFolderPath = inputDataset.properties.typeProperties.folderPath; //string outputFolderPath = outputDataset.properties.typeProperties.folderPath; //string outputFile = outputDataset.properties.typeProperties.fileName; //string outputFilePath = outputFolderPath + "/" + outputFile; //Once needed info is prepared, core business logic down below remains the same. Console.WriteLine("initializing blob services..."); //Blob Storage References // create storage client for output. Pass the connection string. CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(connectionString); CloudBlobClient outputClient = outputStorageAccount.CreateCloudBlobClient(); CloudBlobContainer cbc = outputClient.GetContainerReference(inputContainer); Console.WriteLine("success..."); Console.WriteLine("initializing cortana intelligence services..."); //Cognitive Services Intialization TranslationServiceFacade.AzureKey = azureKey; TranslationServiceFacade.SaveCredentials(); TranslationServiceFacade.Initialize(); Console.WriteLine("success..."); Console.WriteLine("finding blobs..."); foreach (IListBlobItem item in cbc.ListBlobs(null, true)) { if (item.GetType() == typeof(CloudBlockBlob)) { CloudBlockBlob blob = (CloudBlockBlob)item; Console.WriteLine("found file {0}", blob.Name); ProcessTextDocument(blob, string.Empty, "en", outputPath); } } }
/// <summary> /// Translate the VTT /// </summary> /// <param name="tolangcode">Translate to language</param> /// <returns>List of translated VTT</returns> public async Task <int> Translate(string tolangcode) { //Read into the Markup and Content arrays bool headerended = false; using (StreamReader streamReader = new StreamReader(filename)) { while (!streamReader.EndOfStream) { string line = streamReader.ReadLine(); if (line.Trim().Length > 0 && Char.IsDigit(line.Trim()[0]) && line.Contains("-->")) { Markup.Add(line); headerended = true; } else { if (line.Trim().Length > 0) { if (headerended) { Content.Add(line); } else { Header.Add(line); } } } } } //Concatenate the string StringBuilder sb = new StringBuilder(); foreach (var line in Content) { sb.Append(line + " "); } //Translate string fromlangcode = null; if (Content.Count > 3) { string sample = Content[Content.Count / 2] + Content[Content.Count / 2 - 1] + Content[Content.Count / 2 + 1]; fromlangcode = await TranslationServiceFacade.DetectAsync(sample, true); } string result = await TranslationServiceFacade.TranslateStringAsync(sb.ToString(), fromlangcode, tolangcode); sb.Clear(); //Compose the resulting VTT List <int> offsets = await TranslationServiceFacade.BreakSentencesAsync(result, tolangcode); List <string> resultVTT = new List <string>(); resultVTT.AddRange(Header); int startindex = 0; for (int i = 0; i < offsets.Count; i++) { resultVTT.Add(result.Substring(startindex, offsets[i])); startindex += offsets[i]; } using (StreamWriter outVTT = new StreamWriter(filename)) { foreach (string line in resultVTT) { outVTT.WriteLine(line); } } return(resultVTT.Count); }
/// <summary> /// The do translation internal. /// </summary> /// <param name="fullNameForDocumentToProcess">The full name for document to process.</param> /// <param name="sourceLanguage">The source language.</param> /// <param name="targetLanguage">The target langauge.</param> private static void DoTranslationInternal( string fullNameForDocumentToProcess, string sourceLanguage, string targetLanguage, bool ignoreHidden = false) { try { if (fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".docx")) { ProcessWordDocument(fullNameForDocumentToProcess, sourceLanguage, targetLanguage, ignoreHidden); } else if (fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".xlsx")) { ProcessExcelDocument(fullNameForDocumentToProcess, sourceLanguage, targetLanguage, ignoreHidden); } else if (fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".pptx")) { ProcessPowerPointDocument(fullNameForDocumentToProcess, sourceLanguage, targetLanguage, ignoreHidden); } else if (fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".txt") || fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".text")) { ProcessTextDocument(fullNameForDocumentToProcess, sourceLanguage, targetLanguage); } else if (fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".html") || fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".htm")) { HTMLTranslationManager.DoTranslation(fullNameForDocumentToProcess, sourceLanguage, targetLanguage); } else if (fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".srt")) { VTTTranslate vTTTranslate = new VTTTranslate(fullNameForDocumentToProcess, sourceLanguage, VTTTranslate.Filetype.srt); _ = vTTTranslate.Translate(TranslationServiceFacade.LanguageNameToLanguageCode(targetLanguage)).Result; } else if (fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".md") || fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".markdown")) { MDTranslationManager.DoTranslation(fullNameForDocumentToProcess, sourceLanguage, targetLanguage); } else if (fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".vtt") || fullNameForDocumentToProcess.ToLowerInvariant().EndsWith(".webvtt")) { VTTTranslate vTTTranslate = new VTTTranslate(fullNameForDocumentToProcess, sourceLanguage); _ = vTTTranslate.Translate(TranslationServiceFacade.LanguageNameToLanguageCode(targetLanguage)).Result; } } catch (AggregateException ae) { var errorMessage = String.Empty; foreach (var ex in ae.InnerExceptions) { errorMessage = errorMessage + " " + ex.Message; LoggingManager.LogError(string.Format("{0}:{1}", fullNameForDocumentToProcess, ex.Message + ex.StackTrace)); } throw new Exception(errorMessage); } catch (Exception ex) { LoggingManager.LogError( string.Format("{0}:{1}", fullNameForDocumentToProcess, ex.Message + ex.StackTrace)); throw; } }
private static void ProcessExcelDocument( string outputDocumentFullName, string sourceLanguage, string targetLanguage) { using (SpreadsheetDocument document = SpreadsheetDocument.Open(outputDocumentFullName, true)) { //document.WorkbookPart.SharedStringTablePart.PutXDocument(); List <DocumentFormat.OpenXml.Spreadsheet.Text> lstTexts = new List <DocumentFormat.OpenXml.Spreadsheet.Text>(); foreach (SharedStringItem si in document.WorkbookPart.SharedStringTablePart.SharedStringTable.Elements <SharedStringItem>()) { if (si != null && si.Text != null && !String.IsNullOrEmpty(si.Text.Text)) { lstTexts.Add(si.Text); } else if (si != null) { lstTexts.AddRange(si.Elements <DocumentFormat.OpenXml.Spreadsheet.Run>().Where(item => (item != null && item.Text != null && !String.IsNullOrEmpty(item.Text.Text))).Select(item => item.Text)); } } var batch = lstTexts.Select(item => item.Text); IEnumerable <string> values = batch as string[] ?? batch.ToArray(); var batches = SplitList(values, TranslationServiceFacade.maxelements, TranslationServiceFacade.maxrequestsize); string[] translated = new string[values.Count()]; var exceptions = new ConcurrentQueue <Exception>(); Parallel.For( 0, batches.Count(), new ParallelOptions { MaxDegreeOfParallelism = 1 }, l => { try { var translationOutput = TranslationServiceFacade.TranslateArray( batches[l].ToArray(), sourceLanguage, targetLanguage); int batchStartIndexInDocument = 0; for (int i = 0; i < l; i++) { batchStartIndexInDocument = batchStartIndexInDocument + batches[i].Count(); } // Apply translated batch to document for (int j = 0; j < translationOutput.Length; j++) { int indexInDocument = j + batchStartIndexInDocument + 1; var newValue = translationOutput[j]; translated[indexInDocument - 1] = newValue; lstTexts[indexInDocument - 1].Text = newValue; } } catch (Exception ex) { exceptions.Enqueue(ex); } }); if (exceptions.Count > 0) { throw new AggregateException(exceptions); } // Refresh all the shared string references. foreach (var table in document.WorkbookPart.GetPartsOfType <WorksheetPart>().Select(part => part.TableDefinitionParts).SelectMany(tables => tables)) { foreach (TableColumn col in table.Table.TableColumns) { col.Name = translated[int.Parse(col.Id) - 1]; } table.Table.Save(); } // Update comments WorkbookPart workBookPart = document.WorkbookPart; List <DocumentFormat.OpenXml.Spreadsheet.Comment> lstComments = new List <DocumentFormat.OpenXml.Spreadsheet.Comment>(); foreach (WorksheetCommentsPart commentsPart in workBookPart.WorksheetParts.SelectMany(sheet => sheet.GetPartsOfType <WorksheetCommentsPart>())) { lstComments.AddRange(commentsPart.Comments.CommentList.Cast <Comment>()); } var batchComments = lstComments.Select(item => item.InnerText); var batchesComments = SplitList(batchComments, TranslationServiceFacade.maxelements, TranslationServiceFacade.maxrequestsize); string[] translatedComments = new string[batchesComments.Count()]; Parallel.For( 0, batchesComments.Count(), new ParallelOptions { MaxDegreeOfParallelism = 1 }, l => { try { var translationOutput = TranslationServiceFacade.TranslateArray( batchesComments[l].ToArray(), sourceLanguage, targetLanguage); int batchStartIndexInDocument = 0; for (int i = 0; i < l; i++) { batchStartIndexInDocument = batchStartIndexInDocument + batches[i].Count(); } for (int j = 0; j < translationOutput.Length; j++) { int indexInDocument = j + batchStartIndexInDocument + 1; var currentSharedStringItem = lstComments.Take(indexInDocument).Last(); var newValue = translationOutput[j]; if (translatedComments.Count() > indexInDocument - 1) { translatedComments[indexInDocument - 1] = newValue; } currentSharedStringItem.CommentText = new CommentText { Text = new DocumentFormat. OpenXml.Spreadsheet. Text { Text = newValue } }; } } catch (Exception ex) { exceptions.Enqueue(ex); } }); // Throw the exceptions here after the loop completes. if (exceptions.Count > 0) { throw new AggregateException(exceptions); } } }
/// <summary> /// The execute method. /// </summary> /// <returns> /// The <see cref="bool" />. /// </returns> public override bool Execute() { string SntFileName = Path.GetTempPath() + "_TmxUpload.snt"; string uservalue = user.ValueString; if (uservalue == string.Empty) { uservalue = "TmxUpload"; } string ratingvalue = rating.ValueString; if (ratingvalue == string.Empty) { ratingvalue = "6"; } TmxFile TmxIn = new TmxFile(this.TmxDocument.ValueString); string[] sntFilenames = TmxIn.WriteToSNTFiles(SntFileName); if (sntFilenames.Length != 2) { Logger.WriteLine(LogLevel.Error, "More than 2 languages in the TMX file. Must have exactly 2."); deleteSNTfiles(sntFilenames); return(false); } TranslationMemory TM = new TranslationMemory(); TM.sourceLangID = this.sourceLanguage.ValueString.ToLowerInvariant(); TM.targetLangID = this.targetLanguage.ValueString.ToLowerInvariant(); // Read langauge names from Tmx string TmxSourceLanguage = Path.GetFileNameWithoutExtension(sntFilenames[0]); TmxSourceLanguage = TmxSourceLanguage.Substring(TmxSourceLanguage.LastIndexOf('_') + 1).ToLowerInvariant(); string TmxTargetLanguage = Path.GetFileNameWithoutExtension(sntFilenames[1]); TmxTargetLanguage = TmxTargetLanguage.Substring(TmxTargetLanguage.LastIndexOf('_') + 1).ToLowerInvariant(); if (TmxSourceLanguage.Substring(0, 2) != TM.sourceLangID) { Logger.WriteLine(LogLevel.Error, "Source language mismatch between command line {0} and TMX language {1}. Please edit TmxLangMap.csv to fix. Aborting.", TM.sourceLangID, TmxSourceLanguage); deleteSNTfiles(sntFilenames); return(false); } if (TmxTargetLanguage.Substring(0, 2) != TM.targetLangID) { Logger.WriteLine(LogLevel.Error, "Target language mismatch between command line {0} and TMX language {1}. Please edit TmxLangMap.csv to fix. Aborting.", TM.targetLangID, TmxTargetLanguage); deleteSNTfiles(sntFilenames); return(false); } string[] sntSource = File.ReadAllLines(sntFilenames[0]); string[] sntTarget = File.ReadAllLines(sntFilenames[1]); if (sntSource.Length != sntTarget.Length) { Logger.WriteLine(LogLevel.Error, "Unequal number of segments. The TMX must have the same number of segments in the two given languages."); deleteSNTfiles(sntFilenames); return(false); } Logger.WriteLine(LogLevel.None, "{0} translation units read.", sntSource.Length); TmxWriter ErrorTmx = new TmxWriter(Path.GetFileNameWithoutExtension(this.TmxDocument.ValueString) + ".errors." + TmxSourceLanguage + "_" + TmxTargetLanguage + "." + DateTime.Now.ToString("yyyyMMddThhmmssZ") + ".tmx", TmxSourceLanguage, TmxTargetLanguage); //Load into TM and perform error check on each line. int ratioViolationCount = 0; //counts number of ratio violations int sntCountViolationCount = 0; //counts number of unequal sentence count violation. for (int sntLineIndex = 0; sntLineIndex < sntSource.Length; sntLineIndex++) { //show a progress message. if ((sntLineIndex % 10) == 0) { Logger.WriteLine(LogLevel.Debug, "{0} of {1} sentences aligned and error checked.", sntLineIndex, sntSource.Length); } //Length discrepancy check float ratio = Math.Abs(sntSource[sntLineIndex].Length / sntTarget[sntLineIndex].Length); if ((ratio > 3) && ((sntSource.Length > 15) || (sntTarget.Length > 15))) //skip the segment, and add to error.tmx { Logger.WriteLine(LogLevel.Debug, "Length ratio exceeded. Segment skipped: {0}", sntSource[sntLineIndex].Substring(0, (60 < sntSource[sntLineIndex].Length)?60:sntSource[sntLineIndex].Length)); ratioViolationCount++; ErrorTmx.TmxWriteSegment(sntSource[sntLineIndex], sntTarget[sntLineIndex], TmxSourceLanguage, TmxTargetLanguage, TmxWriter.TUError.lengthratio); if ((ratioViolationCount / sntSource.Length) > 0.10) { Logger.WriteLine(LogLevel.Error, "Length ratio exceeded for 10% of segments. Probably not a translation. Aborting."); deleteSNTfiles(sntFilenames); return(false); } continue; } //TODO: special handling of bpt/ept sntSource[sntLineIndex] = System.Net.WebUtility.HtmlDecode(sntSource[sntLineIndex]); sntTarget[sntLineIndex] = System.Net.WebUtility.HtmlDecode(sntTarget[sntLineIndex]); //throw away segments with tags if ((sntSource[sntLineIndex].Contains("<") && sntSource[sntLineIndex].Contains(">")) && (sntTarget[sntLineIndex].Contains("<") && sntTarget[sntLineIndex].Contains(">"))) { Logger.WriteLine(LogLevel.Debug, "Tagged segment. Segment skipped: {0}", sntSource[sntLineIndex].Substring(0, (60 < sntSource[sntLineIndex].Length) ? 60 : sntSource[sntLineIndex].Length)); ErrorTmx.TmxWriteSegment(sntSource[sntLineIndex], sntTarget[sntLineIndex], TmxSourceLanguage, TmxTargetLanguage, TmxWriter.TUError.tagging); continue; } //Encode the remaining <>& sntSource[sntLineIndex] = System.Net.WebUtility.HtmlEncode(sntSource[sntLineIndex]); sntTarget[sntLineIndex] = System.Net.WebUtility.HtmlEncode(sntTarget[sntLineIndex]); int[] sourceSentLengths = TranslationServiceFacade.BreakSentences(sntSource[sntLineIndex], TM.sourceLangID); int[] targetSentLengths = TranslationServiceFacade.BreakSentences(sntTarget[sntLineIndex], TM.targetLangID); //unequal sentence count violation check if (sourceSentLengths.Length != targetSentLengths.Length) { sntCountViolationCount++; Logger.WriteLine(LogLevel.Debug, "Unequal number of sentences in segment. Segment skipped: {0}", sntSource[sntLineIndex].Substring(0, (60 < sntSource[sntLineIndex].Length)?60:sntSource[sntLineIndex].Length)); ErrorTmx.TmxWriteSegment(sntSource[sntLineIndex], sntTarget[sntLineIndex], TmxSourceLanguage, TmxTargetLanguage, TmxWriter.TUError.sentencecountmismatch); if ((sntCountViolationCount / sntSource.Length) > 0.10) { Logger.WriteLine(LogLevel.Error, "Unequal sentence count exceeded for 10% of segments. Probably not a translation. Aborting."); deleteSNTfiles(sntFilenames); return(false); } continue; } //Split multiple sentences int startIndexSrc = 0; int startIndexTgt = 0; for (int j = 0; j < sourceSentLengths.Length; j++) { TranslationUnit TU = new TranslationUnit(); TU.strSource = sntSource[sntLineIndex].Substring(startIndexSrc, sourceSentLengths[j]); TU.strTarget = sntTarget[sntLineIndex].Substring(startIndexTgt, targetSentLengths[j]); startIndexSrc = sourceSentLengths[j]; startIndexTgt = targetSentLengths[j]; TU.rating = int.Parse(ratingvalue); TU.user = uservalue.ToUpperInvariant(); TM.Add(TU); } } ErrorTmx.Dispose(); //Add the whole TM list to CTF, if a CTF write was requested. if (boolWrite.ValueString.ToLowerInvariant() == "true") { int SentenceCount = 0; foreach (TranslationUnit TU in TM) { TranslationServiceFacade.AddTranslation(TU.strSource, TU.strTarget, TM.sourceLangID, TM.targetLangID, TU.rating, TU.user); if ((SentenceCount % 10) == 0) { Logger.WriteLine(LogLevel.Debug, "{0} of {1} sentences written. Continuing...", SentenceCount, sntSource.Length); } //Do not change the sleep time. This is slow and needs to be slow - the AddTranslation method is designed for interactive use. Thread.Sleep(500); SentenceCount++; } Logger.WriteLine(LogLevel.Msg, "{0} sentences written to CTF. Write complete. ", SentenceCount); } else { //Just list the entire TM on screen. foreach (TranslationUnit TU in TM) { Logger.WriteLine(LogLevel.None, "{0} || {1}", TU.strSource, TU.strTarget); } } return(true); }
private static void ProcessWordDocument( string outputDocumentFullName, string sourceLanguage, string targetLanguage) { using (WordprocessingDocument doc = WordprocessingDocument.Open(outputDocumentFullName, true)) { OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings { AcceptRevisions = true, NormalizeXml = true, //setting this to false reduces translation quality, but if true some documents have XML format errors when opening RemoveBookmarks = true, RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = true, RemoveGoBackBookmark = true, //RemoveHyperlinks = false, RemoveLastRenderedPageBreak = true, RemoveMarkupForDocumentComparison = true, RemovePermissions = false, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveWebHidden = true, ReplaceTabsWithSpaces = false }; OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(doc, settings); } List <DocumentFormat.OpenXml.Wordprocessing.Text> texts = new List <DocumentFormat.OpenXml.Wordprocessing.Text>(); using (WordprocessingDocument doc = WordprocessingDocument.Open(outputDocumentFullName, true)) { var body = doc.MainDocumentPart.Document.Body; texts.AddRange(body.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0)); var headers = doc.MainDocumentPart.HeaderParts.Select(p => p.Header); foreach (var header in headers) { texts.AddRange(header.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0)); } var footers = doc.MainDocumentPart.FooterParts.Select(p => p.Footer); foreach (var footer in footers) { texts.AddRange(footer.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0)); } var exceptions = new ConcurrentQueue <Exception>(); // Extract Text for Translation var batch = texts.Select(text => text.Text); // Do Translation var batches = SplitList(batch, TranslationServiceFacade.maxelements, TranslationServiceFacade.maxrequestsize); Parallel.For( 0, batches.Count(), new ParallelOptions { MaxDegreeOfParallelism = 1 }, l => { try { var translationOutput = TranslationServiceFacade.TranslateArray( batches[l].ToArray(), sourceLanguage, targetLanguage); int batchStartIndexInDocument = 0; for (int i = 0; i < l; i++) { batchStartIndexInDocument = batchStartIndexInDocument + batches[i].Count(); } // Apply translated batch to document for (int j = 0; j < translationOutput.Length; j++) { int indexInDocument = j + batchStartIndexInDocument + 1; var newValue = translationOutput[j]; texts.Take(indexInDocument).Last().Text = newValue; } } catch (Exception ex) { exceptions.Enqueue(ex); } }); // Throw the exceptions here after the loop completes. if (exceptions.Count > 0) { throw new AggregateException(exceptions); } //doc.MainDocumentPart.PutXDocument(); } }
public static async Task Run( [BlobTrigger("to-be-translated/{name}", Connection = "AzureWebJobsStorage")] Stream InputStream, [Blob("translated/{name}", FileAccess.Write, Connection = "AzureWebJobsStorage")] TextWriter OutputText, string name, ILogger log) { var watch = System.Diagnostics.Stopwatch.StartNew(); Status status = Status.Success; string TranslatedContent = string.Empty; try { log.LogInformation($"STARTED: AutoTranslateBlob function for blob Name:{name} of Size: {InputStream.Length} Bytes"); OutputText.WriteLine(""); //Get Environment Variables string ToLang = GetEnvironmentVariable("ToLang"); string FromLang = GetEnvironmentVariable("FromLang"); string AzureKey = GetEnvironmentVariable("AzureTranslateKey"); string CategoryID = GetEnvironmentVariable("CategoryID"); string FileExtension = name.Split('.').Last().ToLower(); TranslationServiceFacade.LoadCredentials(AzureKey, CategoryID); TranslationServiceFacade.Initialize(true); //ReadFile string ContentToBeTranslated = await new StreamReader(InputStream).ReadToEndAsync(); //Translate switch (FileExtension) { case ("html"): TranslatedContent = HTMLTranslationManager.DoContentTranslation(ContentToBeTranslated, FromLang, ToLang); break; case ("htm"): TranslatedContent = HTMLTranslationManager.DoContentTranslation(ContentToBeTranslated, FromLang, ToLang); break; case "txt": TranslatedContent = DocumentTranslationManager.ProcessTextDocument(ContentToBeTranslated, FromLang, ToLang); break; default: break; } } catch (Exception e) { status = Status.Failure; log.LogError("Exception: " + e.Message); } finally { //Save to Blob await OutputText.WriteAsync(TranslatedContent); OutputText.Close(); watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; log.LogInformation($"FINISHED with {status.ToString()}: AutoTranslateBlob function for blob:{name} \n ExecutionTime: {Convert.ToString(elapsedMs)} Ms."); } }