private static void ProcessWordDocument( string outputDocumentFullName, string sourceLanguage, string targetLanguage) { using (WordprocessingDocument doc = WordprocessingDocument.Open(outputDocumentFullName, true)) { OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings { AcceptRevisions = true, NormalizeXml = true, //setting this to false reduces translation quality, but if true some documents have XML format errors when opening RemoveBookmarks = true, RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = true, RemoveGoBackBookmark = true, //RemoveHyperlinks = false, RemoveLastRenderedPageBreak = true, RemoveMarkupForDocumentComparison = true, RemovePermissions = false, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveWebHidden = true, ReplaceTabsWithSpaces = false }; OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(doc, settings); } List <DocumentFormat.OpenXml.Wordprocessing.Text> texts = new List <DocumentFormat.OpenXml.Wordprocessing.Text>(); using (WordprocessingDocument doc = WordprocessingDocument.Open(outputDocumentFullName, true)) { var body = doc.MainDocumentPart.Document.Body; texts.AddRange(body.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0)); var headers = doc.MainDocumentPart.HeaderParts.Select(p => p.Header); foreach (var header in headers) { texts.AddRange(header.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0)); } var footers = doc.MainDocumentPart.FooterParts.Select(p => p.Footer); foreach (var footer in footers) { texts.AddRange(footer.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0)); } var exceptions = new ConcurrentQueue <Exception>(); // Extract Text for Translation var batch = texts.Select(text => text.Text); // Do Translation var batches = SplitList(batch, TranslationServiceFacade.maxelements, TranslationServiceFacade.maxrequestsize); Parallel.For( 0, batches.Count(), new ParallelOptions { MaxDegreeOfParallelism = 1 }, l => { try { var translationOutput = TranslationServiceFacade.TranslateArray( batches[l].ToArray(), sourceLanguage, targetLanguage); int batchStartIndexInDocument = 0; for (int i = 0; i < l; i++) { batchStartIndexInDocument = batchStartIndexInDocument + batches[i].Count(); } // Apply translated batch to document for (int j = 0; j < translationOutput.Length; j++) { int indexInDocument = j + batchStartIndexInDocument + 1; var newValue = translationOutput[j]; texts.Take(indexInDocument).Last().Text = newValue; } } catch (Exception ex) { exceptions.Enqueue(ex); } }); // Throw the exceptions here after the loop completes. if (exceptions.Count > 0) { throw new AggregateException(exceptions); } //doc.MainDocumentPart.PutXDocument(); } }
public static Boolean FillTable(string fileName, Dictionary <string, string> varDict, Dictionary <string, BildMitKommentar> dictBilder) { Boolean bRet = false; szError = ""; try { File.Copy(Config.current.szWordTemplate, fileName, true); using (WordprocessingDocument wordDocument = WordprocessingDocument.Open(fileName, true)) { OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, // ReplaceTabsWithSpaces = true, RemoveGoBackBookmark = true, RemoveBookmarks = true, RemoveMarkupForDocumentComparison = true, RemoveWebHidden = true }; OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(wordDocument, settings); MainDocumentPart mainPart = wordDocument.MainDocumentPart; var paraList = mainPart.Document.Descendants <Paragraph>(); foreach (var para in paraList) { ReplaceVar(para, varDict); } var headerPart = mainPart.HeaderParts.FirstOrDefault(); paraList = headerPart.Header.Descendants <Paragraph>(); foreach (var para in paraList) { ReplaceVar(para, varDict); } var footerPart = mainPart.FooterParts.FirstOrDefault(); paraList = footerPart.Footer.Descendants <Paragraph>(); foreach (var para in paraList) { ReplaceVar(para, varDict); } var tableList = mainPart.Document.Descendants <Table>(); foreach (var table in tableList) { var rowList = table.Descendants <TableRow>(); foreach (var tr in rowList) { var tc = tr.Descendants <TableCell>().FirstOrDefault(); // get the first para var p = tc.Descendants <Paragraph>().FirstOrDefault(); if (p.InnerText.Trim().Equals("${PictureTable}", StringComparison.InvariantCultureIgnoreCase)) { AddInTable(mainPart, table, tr, dictBilder, varDict); } } } wordDocument.Save(); bRet = true; } } catch (Exception ex) { szError = "FillTable: " + ex.ToString(); Logging.AddError(szError); } return(bRet); }
/// <summary> /// Translates an Word document /// /// Based on method `ProcessWordDocument` (line 726 onwards) in /// TranslationAssistant.Business/DocumentTranslationManager.cs in /// MicrosoftTranslator/DocumentTranslator /// </summary> public async Task TranslateDocument(MemoryStream memoryStream, ITextTranslator textTranslator, string to, string from = null, bool ignoreHidden = false) { using (WordprocessingDocument doc = WordprocessingDocument.Open(memoryStream, true)) { // Simply the Word document mark-up OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings { AcceptRevisions = true, NormalizeXml = true, //setting this to false reduces translation quality, but if true some documents have XML format errors when opening RemoveBookmarks = true, RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = true, RemoveGoBackBookmark = true, //RemoveHyperlinks = false, RemoveLastRenderedPageBreak = true, RemoveMarkupForDocumentComparison = true, RemovePermissions = false, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveWebHidden = true, ReplaceTabsWithSpaces = false }; OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(doc, settings); } var texts = new List <Text>(); using (WordprocessingDocument doc = WordprocessingDocument.Open(memoryStream, true)) { // Find all text nodes in the document (body, headers & footers) var body = doc.MainDocumentPart.Document.Body; texts.AddRange(body.Descendants <Text>() .Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0)); var headers = doc.MainDocumentPart.HeaderParts.Select(p => p.Header); foreach (var header in headers) { texts.AddRange(header.Descendants <Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0)); } var footers = doc.MainDocumentPart.FooterParts.Select(p => p.Footer); foreach (var footer in footers) { texts.AddRange(footer.Descendants <Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0)); } if (ignoreHidden) { texts.RemoveAll(t => t.Parent.Descendants <Vanish>().Any()); } // Extract text strings for translation var values = texts.Select(text => text.Text); // Do Translation var translations = await textTranslator.TranslateTexts(values, to, from); // Apply translations to document by iterating through both lists and // replacing the original with its translation using (var textsEnumerator = texts.GetEnumerator()) { using (var translationsEnumerator = translations.GetEnumerator()) { while (textsEnumerator.MoveNext() && translationsEnumerator.MoveNext()) { textsEnumerator.Current.Text = translationsEnumerator.Current; } } } } }
public DocxConversionData CreateCleanedCopy(string fullPath, bool ignoreHidden) { if (string.IsNullOrEmpty(fullPath)) { throw new ArgumentNullException("path variable cannot be null or empty"); } // ensure we have a valid path to a docx file if (!validator.IsValid(fullPath)) { return(null); } var origFileData = new DocxConversionData(fullPath); var cleanedFileName = origFileData.FileName + "_cleaned" + origFileData.FileExtension; var cleanedFileData = new DocxConversionData(Path.Join(origFileData.FilePath, cleanedFileName)); try { // ensure there is not an orphaned copy already on disk File.Delete(cleanedFileData.FullPath); // Make a copy of the original using the "cleaned" file name File.Copy(origFileData.FullPath, cleanedFileData.FullPath); //Clean Document of extra tags using (WordprocessingDocument doc = WordprocessingDocument.Open(cleanedFileData.FullPath, true)) { OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings { AcceptRevisions = true, NormalizeXml = false, //setting this to false reduces translation quality, but if true some documents have XML format errors when opening RemoveBookmarks = true, RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, //FieldCode remove pagination RemoveGoBackBookmark = true, //RemoveHyperlinks = false, RemoveLastRenderedPageBreak = true, RemoveMarkupForDocumentComparison = true, RemovePermissions = false, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveWebHidden = true, ReplaceTabsWithSpaces = false }; OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(doc, settings); //Extract Strings for translation var textBlocks = textExtractor.ExtractText(doc, ignoreHidden); //var textBlocks = textExtractor.ExtractParagraph(doc, ignoreHidden); cleanedFileData.Strings2Translate = textBlocks.Select(para => para.InnerText); } //Total the number of strings for translation cleanedFileData.TotalStrings2Translate = cleanedFileData.Strings2Translate.Count(); if (File.Exists(cleanedFileData.FullPath)) { cleanedFileData.Messages.Add("Success"); } else { cleanedFileData.Messages.Add("Docx data failed to build properly."); } } catch (Exception e) { cleanedFileData.Messages.Add(e.Message); } return(cleanedFileData); }