private static void ProcessWordDocument(
            string outputDocumentFullName,
            string sourceLanguage,
            string targetLanguage)
        {
            using (WordprocessingDocument doc = WordprocessingDocument.Open(outputDocumentFullName, true))
            {
                OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings
                {
                    AcceptRevisions       = true,
                    NormalizeXml          = true, //setting this to false reduces translation quality, but if true some documents have XML format errors when opening
                    RemoveBookmarks       = true,
                    RemoveComments        = true,
                    RemoveContentControls = true,
                    RemoveEndAndFootNotes = true,
                    RemoveFieldCodes      = true,
                    RemoveGoBackBookmark  = true,
                    //RemoveHyperlinks = false,
                    RemoveLastRenderedPageBreak       = true,
                    RemoveMarkupForDocumentComparison = true,
                    RemovePermissions     = false,
                    RemoveProof           = true,
                    RemoveRsidInfo        = true,
                    RemoveSmartTags       = true,
                    RemoveSoftHyphens     = true,
                    RemoveWebHidden       = true,
                    ReplaceTabsWithSpaces = false
                };
                OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(doc, settings);
            }

            List <DocumentFormat.OpenXml.Wordprocessing.Text> texts = new List <DocumentFormat.OpenXml.Wordprocessing.Text>();

            using (WordprocessingDocument doc = WordprocessingDocument.Open(outputDocumentFullName, true))
            {
                var body = doc.MainDocumentPart.Document.Body;
                texts.AddRange(body.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0));

                var headers = doc.MainDocumentPart.HeaderParts.Select(p => p.Header);
                foreach (var header in headers)
                {
                    texts.AddRange(header.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0));
                }

                var footers = doc.MainDocumentPart.FooterParts.Select(p => p.Footer);
                foreach (var footer in footers)
                {
                    texts.AddRange(footer.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>().Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0));
                }

                var exceptions = new ConcurrentQueue <Exception>();

                // Extract Text for Translation
                var batch = texts.Select(text => text.Text);

                // Do Translation
                var batches = SplitList(batch, TranslationServiceFacade.maxelements, TranslationServiceFacade.maxrequestsize);
                Parallel.For(
                    0,
                    batches.Count(),
                    new ParallelOptions {
                    MaxDegreeOfParallelism = 1
                },
                    l =>
                {
                    try
                    {
                        var translationOutput = TranslationServiceFacade.TranslateArray(
                            batches[l].ToArray(),
                            sourceLanguage,
                            targetLanguage);
                        int batchStartIndexInDocument = 0;
                        for (int i = 0; i < l; i++)
                        {
                            batchStartIndexInDocument = batchStartIndexInDocument + batches[i].Count();
                        }

                        // Apply translated batch to document
                        for (int j = 0; j < translationOutput.Length; j++)
                        {
                            int indexInDocument = j + batchStartIndexInDocument + 1;
                            var newValue        = translationOutput[j];
                            texts.Take(indexInDocument).Last().Text = newValue;
                        }
                    }
                    catch (Exception ex)
                    {
                        exceptions.Enqueue(ex);
                    }
                });

                // Throw the exceptions here after the loop completes.
                if (exceptions.Count > 0)
                {
                    throw new AggregateException(exceptions);
                }

                //doc.MainDocumentPart.PutXDocument();
            }
        }
Пример #2
0
        public static Boolean FillTable(string fileName, Dictionary <string, string> varDict, Dictionary <string, BildMitKommentar> dictBilder)
        {
            Boolean bRet = false;

            szError = "";
            try
            {
                File.Copy(Config.current.szWordTemplate, fileName, true);
                using (WordprocessingDocument wordDocument
                           = WordprocessingDocument.Open(fileName, true))
                {
                    OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings
                    {
                        RemoveComments              = true,
                        RemoveContentControls       = true,
                        RemoveEndAndFootNotes       = true,
                        RemoveFieldCodes            = false,
                        RemoveLastRenderedPageBreak = true,
                        RemovePermissions           = true,
                        RemoveProof       = true,
                        RemoveRsidInfo    = true,
                        RemoveSmartTags   = true,
                        RemoveSoftHyphens = true,
                        // ReplaceTabsWithSpaces = true,
                        RemoveGoBackBookmark = true,
                        RemoveBookmarks      = true,
                        RemoveMarkupForDocumentComparison = true,
                        RemoveWebHidden = true
                    };
                    OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(wordDocument, settings);


                    MainDocumentPart mainPart = wordDocument.MainDocumentPart;

                    var paraList = mainPart.Document.Descendants <Paragraph>();
                    foreach (var para in paraList)
                    {
                        ReplaceVar(para, varDict);
                    }
                    var headerPart = mainPart.HeaderParts.FirstOrDefault();
                    paraList = headerPart.Header.Descendants <Paragraph>();
                    foreach (var para in paraList)
                    {
                        ReplaceVar(para, varDict);
                    }
                    var footerPart = mainPart.FooterParts.FirstOrDefault();
                    paraList = footerPart.Footer.Descendants <Paragraph>();
                    foreach (var para in paraList)
                    {
                        ReplaceVar(para, varDict);
                    }

                    var tableList = mainPart.Document.Descendants <Table>();
                    foreach (var table in tableList)
                    {
                        var rowList = table.Descendants <TableRow>();
                        foreach (var tr in rowList)
                        {
                            var tc = tr.Descendants <TableCell>().FirstOrDefault();
                            // get the first para
                            var p = tc.Descendants <Paragraph>().FirstOrDefault();
                            if (p.InnerText.Trim().Equals("${PictureTable}", StringComparison.InvariantCultureIgnoreCase))
                            {
                                AddInTable(mainPart, table, tr, dictBilder, varDict);
                            }
                        }
                    }
                    wordDocument.Save();
                    bRet = true;
                }
            }
            catch (Exception ex)
            {
                szError = "FillTable: " + ex.ToString();
                Logging.AddError(szError);
            }
            return(bRet);
        }
Пример #3
0
        /// <summary>
        /// Translates an Word document
        ///
        /// Based on method `ProcessWordDocument` (line 726 onwards) in
        /// TranslationAssistant.Business/DocumentTranslationManager.cs in
        /// MicrosoftTranslator/DocumentTranslator
        /// </summary>
        public async Task TranslateDocument(MemoryStream memoryStream, ITextTranslator textTranslator, string to, string from = null, bool ignoreHidden = false)
        {
            using (WordprocessingDocument doc = WordprocessingDocument.Open(memoryStream, true))
            {
                // Simply the Word document mark-up
                OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings
                {
                    AcceptRevisions       = true,
                    NormalizeXml          = true, //setting this to false reduces translation quality, but if true some documents have XML format errors when opening
                    RemoveBookmarks       = true,
                    RemoveComments        = true,
                    RemoveContentControls = true,
                    RemoveEndAndFootNotes = true,
                    RemoveFieldCodes      = true,
                    RemoveGoBackBookmark  = true,
                    //RemoveHyperlinks = false,
                    RemoveLastRenderedPageBreak       = true,
                    RemoveMarkupForDocumentComparison = true,
                    RemovePermissions     = false,
                    RemoveProof           = true,
                    RemoveRsidInfo        = true,
                    RemoveSmartTags       = true,
                    RemoveSoftHyphens     = true,
                    RemoveWebHidden       = true,
                    ReplaceTabsWithSpaces = false
                };
                OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(doc, settings);
            }

            var texts = new List <Text>();

            using (WordprocessingDocument doc = WordprocessingDocument.Open(memoryStream, true))
            {
                // Find all text nodes in the document (body, headers & footers)
                var body = doc.MainDocumentPart.Document.Body;
                texts.AddRange(body.Descendants <Text>()
                               .Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0));

                var headers = doc.MainDocumentPart.HeaderParts.Select(p => p.Header);
                foreach (var header in headers)
                {
                    texts.AddRange(header.Descendants <Text>().Where(text =>
                                                                     !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0));
                }

                var footers = doc.MainDocumentPart.FooterParts.Select(p => p.Footer);
                foreach (var footer in footers)
                {
                    texts.AddRange(footer.Descendants <Text>().Where(text =>
                                                                     !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0));
                }

                if (ignoreHidden)
                {
                    texts.RemoveAll(t => t.Parent.Descendants <Vanish>().Any());
                }

                // Extract text strings for translation
                var values = texts.Select(text => text.Text);

                // Do Translation
                var translations = await textTranslator.TranslateTexts(values, to, from);

                // Apply translations to document by iterating through both lists and
                // replacing the original with its translation
                using (var textsEnumerator = texts.GetEnumerator())
                {
                    using (var translationsEnumerator = translations.GetEnumerator())
                    {
                        while (textsEnumerator.MoveNext() && translationsEnumerator.MoveNext())
                        {
                            textsEnumerator.Current.Text = translationsEnumerator.Current;
                        }
                    }
                }
            }
        }
Пример #4
0
        public DocxConversionData CreateCleanedCopy(string fullPath, bool ignoreHidden)
        {
            if (string.IsNullOrEmpty(fullPath))
            {
                throw new ArgumentNullException("path variable cannot be null or empty");
            }

            // ensure we have a valid path to a docx file
            if (!validator.IsValid(fullPath))
            {
                return(null);
            }

            var origFileData = new DocxConversionData(fullPath);

            var cleanedFileName = origFileData.FileName + "_cleaned" + origFileData.FileExtension;
            var cleanedFileData = new DocxConversionData(Path.Join(origFileData.FilePath, cleanedFileName));

            try
            {
                // ensure there is not an orphaned copy already on disk
                File.Delete(cleanedFileData.FullPath);

                // Make a copy of the original using the "cleaned" file name
                File.Copy(origFileData.FullPath, cleanedFileData.FullPath);

                //Clean Document of extra tags
                using (WordprocessingDocument doc = WordprocessingDocument.Open(cleanedFileData.FullPath, true))
                {
                    OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings
                    {
                        AcceptRevisions       = true,
                        NormalizeXml          = false, //setting this to false reduces translation quality, but if true some documents have XML format errors when opening
                        RemoveBookmarks       = true,
                        RemoveComments        = true,
                        RemoveContentControls = true,
                        RemoveEndAndFootNotes = true,
                        RemoveFieldCodes      = false, //FieldCode remove pagination
                        RemoveGoBackBookmark  = true,
                        //RemoveHyperlinks = false,
                        RemoveLastRenderedPageBreak       = true,
                        RemoveMarkupForDocumentComparison = true,
                        RemovePermissions     = false,
                        RemoveProof           = true,
                        RemoveRsidInfo        = true,
                        RemoveSmartTags       = true,
                        RemoveSoftHyphens     = true,
                        RemoveWebHidden       = true,
                        ReplaceTabsWithSpaces = false
                    };

                    OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(doc, settings);

                    //Extract Strings for translation
                    var textBlocks = textExtractor.ExtractText(doc, ignoreHidden);
                    //var textBlocks = textExtractor.ExtractParagraph(doc, ignoreHidden);
                    cleanedFileData.Strings2Translate = textBlocks.Select(para => para.InnerText);
                }

                //Total the number of strings for translation
                cleanedFileData.TotalStrings2Translate = cleanedFileData.Strings2Translate.Count();

                if (File.Exists(cleanedFileData.FullPath))
                {
                    cleanedFileData.Messages.Add("Success");
                }
                else
                {
                    cleanedFileData.Messages.Add("Docx data failed to build properly.");
                }
            }
            catch (Exception e)
            {
                cleanedFileData.Messages.Add(e.Message);
            }

            return(cleanedFileData);
        }