Esempio n. 1
0
        /// <summary>
        /// Based on method `ReplaceTextsWithTranslation` (line 667 onwards) in
        /// TranslationAssistant.Business/DocumentTranslationManager.cs in
        /// MicrosoftTranslator/DocumentTranslator
        /// </summary>
        private static async Task ReplaceTextsWithTranslation(List <DocumentFormat.OpenXml.Drawing.Text> texts, ITextTranslator textTranslator, string to, string from)
        {
            if (texts.Count() > 0)
            {
                // Extract text for translation
                var values = texts.Select(text => text.Text);

                // Do translation
                var translations = await textTranslator.TranslateTexts(values, to, from);

                // Apply translations to document by iterating through both lists and
                // replacing the original text with its translation
                using (var textsEnumerator = texts.GetEnumerator())
                {
                    using (var translationsEnumerator = translations.GetEnumerator())
                    {
                        while (textsEnumerator.MoveNext() && translationsEnumerator.MoveNext())
                        {
                            textsEnumerator.Current.Text = translationsEnumerator.Current;
                        }
                    }
                }
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Translates an Word document
        ///
        /// Based on method `ProcessWordDocument` (line 726 onwards) in
        /// TranslationAssistant.Business/DocumentTranslationManager.cs in
        /// MicrosoftTranslator/DocumentTranslator
        /// </summary>
        public async Task TranslateDocument(MemoryStream memoryStream, ITextTranslator textTranslator, string to, string from = null, bool ignoreHidden = false)
        {
            using (WordprocessingDocument doc = WordprocessingDocument.Open(memoryStream, true))
            {
                // Simply the Word document mark-up
                OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings
                {
                    AcceptRevisions       = true,
                    NormalizeXml          = true, //setting this to false reduces translation quality, but if true some documents have XML format errors when opening
                    RemoveBookmarks       = true,
                    RemoveComments        = true,
                    RemoveContentControls = true,
                    RemoveEndAndFootNotes = true,
                    RemoveFieldCodes      = true,
                    RemoveGoBackBookmark  = true,
                    //RemoveHyperlinks = false,
                    RemoveLastRenderedPageBreak       = true,
                    RemoveMarkupForDocumentComparison = true,
                    RemovePermissions     = false,
                    RemoveProof           = true,
                    RemoveRsidInfo        = true,
                    RemoveSmartTags       = true,
                    RemoveSoftHyphens     = true,
                    RemoveWebHidden       = true,
                    ReplaceTabsWithSpaces = false
                };
                OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(doc, settings);
            }

            var texts = new List <Text>();

            using (WordprocessingDocument doc = WordprocessingDocument.Open(memoryStream, true))
            {
                // Find all text nodes in the document (body, headers & footers)
                var body = doc.MainDocumentPart.Document.Body;
                texts.AddRange(body.Descendants <Text>()
                               .Where(text => !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0));

                var headers = doc.MainDocumentPart.HeaderParts.Select(p => p.Header);
                foreach (var header in headers)
                {
                    texts.AddRange(header.Descendants <Text>().Where(text =>
                                                                     !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0));
                }

                var footers = doc.MainDocumentPart.FooterParts.Select(p => p.Footer);
                foreach (var footer in footers)
                {
                    texts.AddRange(footer.Descendants <Text>().Where(text =>
                                                                     !String.IsNullOrEmpty(text.Text) && text.Text.Length > 0));
                }

                if (ignoreHidden)
                {
                    texts.RemoveAll(t => t.Parent.Descendants <Vanish>().Any());
                }

                // Extract text strings for translation
                var values = texts.Select(text => text.Text);

                // Do Translation
                var translations = await textTranslator.TranslateTexts(values, to, from);

                // Apply translations to document by iterating through both lists and
                // replacing the original with its translation
                using (var textsEnumerator = texts.GetEnumerator())
                {
                    using (var translationsEnumerator = translations.GetEnumerator())
                    {
                        while (textsEnumerator.MoveNext() && translationsEnumerator.MoveNext())
                        {
                            textsEnumerator.Current.Text = translationsEnumerator.Current;
                        }
                    }
                }
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Translates an PowerPoint document
        ///
        /// Based on method `ProcessPowerPointDocument` (line 569 onwards) in
        /// TranslationAssistant.Business/DocumentTranslationManager.cs in
        /// MicrosoftTranslator/DocumentTranslator
        /// </summary>
        public async Task TranslateDocument(MemoryStream memoryStream, ITextTranslator textTranslator, string to, string from = null)
        {
            using (PresentationDocument doc = PresentationDocument.Open(memoryStream, true))
            {
                var texts    = new List <DocumentFormat.OpenXml.Drawing.Text>();
                var notes    = new List <DocumentFormat.OpenXml.Drawing.Text>();
                var comments = new List <Comment>();

                var slideParts = doc.PresentationPart.SlideParts;
                if (slideParts != null)
                {
                    // Find all text items, notes and comments in all slides
                    foreach (var slidePart in slideParts)
                    {
                        if (slidePart.Slide != null)
                        {
                            var slide = slidePart.Slide;
                            ExtractTextContent(texts, slide);

                            var commentsPart = slidePart.SlideCommentsPart;
                            if (commentsPart != null)
                            {
                                comments.AddRange(commentsPart.CommentList.Cast <Comment>());
                            }

                            var notesPart = slidePart.NotesSlidePart;
                            if (notesPart != null)
                            {
                                ExtractTextContent(notes, notesPart.NotesSlide);
                            }
                        }
                    }

                    // Translate and replace the text items
                    await ReplaceTextsWithTranslation(texts, textTranslator, to, from);

                    // Translate and replace the notes
                    await ReplaceTextsWithTranslation(notes, textTranslator, to, from);

                    // Translate and replace the comments
                    if (comments.Count() > 0)
                    {
                        // Extract text from comment for translation
                        var values = comments.Select(text => text.InnerText);

                        // Do translation
                        var translatedComments = await textTranslator.TranslateTexts(values, to, from);

                        // Apply translations to document by iterating through both lists and
                        // replacing the original comment text with its translation
                        using (var commentsEnumerator = comments.GetEnumerator())
                        {
                            using (var translationsEnumerator = translatedComments.GetEnumerator())
                            {
                                while (commentsEnumerator.MoveNext() && translationsEnumerator.MoveNext())
                                {
                                    commentsEnumerator.Current.Text = new DocumentFormat.OpenXml.Presentation.Text
                                    {
                                        Text = translationsEnumerator.Current
                                    };
                                }
                            }
                        }
                    }
                }
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Translates an Excel document
        ///
        /// Based on method `ProcessExcelDocument` (line 424 onwards) in
        /// TranslationAssistant.Business/DocumentTranslationManager.cs in
        /// MicrosoftTranslator/DocumentTranslator
        /// </summary>
        public async Task TranslateDocument(MemoryStream memoryStream, ITextTranslator textTranslator, string to, string from = null)
        {
            using (SpreadsheetDocument document = SpreadsheetDocument.Open(memoryStream, true))
            {
                // Find all string items in the spreadsheet
                List <DocumentFormat.OpenXml.Spreadsheet.Text> texts = new List <DocumentFormat.OpenXml.Spreadsheet.Text>();
                foreach (SharedStringItem si in document.WorkbookPart.SharedStringTablePart.SharedStringTable.Elements <SharedStringItem>())
                {
                    if (si != null && si.Text != null && !String.IsNullOrEmpty(si.Text.Text))
                    {
                        texts.Add(si.Text);
                    }
                    else if (si != null)
                    {
                        texts
                        .AddRange(si.Elements <DocumentFormat.OpenXml.Spreadsheet.Run>()
                                  .Where(item => (item != null && item.Text != null && !String.IsNullOrEmpty(item.Text.Text)))
                                  .Select(item => item.Text));
                    }
                }

                // Extract text for translation
                var textValues = texts.Select(item => item.Text);

                // Do the translation
                var translations = await textTranslator.TranslateTexts(textValues, to, from);

                // Apply translations to document by iterating through both lists and
                // replacing the original text with its translation
                using (var textsEnumerator = texts.GetEnumerator())
                {
                    using (var translationEnumerator = translations.GetEnumerator())
                    {
                        while (textsEnumerator.MoveNext() && translationEnumerator.MoveNext())
                        {
                            textsEnumerator.Current.Text = translationEnumerator.Current;
                        }
                    }
                }

                // Refresh all the shared string references.
                var tables = document.WorkbookPart.GetPartsOfType <WorksheetPart>()
                             .Select(part => part.TableDefinitionParts)
                             .SelectMany(_tables => _tables);
                foreach (var table in tables)
                {
                    foreach (TableColumn col in table.Table.TableColumns)
                    {
                        col.Name = translations.Take(int.Parse(col.Id)).Last();
                    }

                    table.Table.Save();
                }

                // Find all comments
                WorkbookPart workBookPart = document.WorkbookPart;
                List <DocumentFormat.OpenXml.Spreadsheet.Comment> comments = new List <DocumentFormat.OpenXml.Spreadsheet.Comment>();
                foreach (var commentsPart in workBookPart.WorksheetParts.SelectMany(sheet => sheet.GetPartsOfType <WorksheetCommentsPart>()))
                {
                    comments.AddRange(commentsPart.Comments.CommentList.Cast <Comment>());
                }

                // Extract text for translation
                var commentValues = comments.Select(item => item.InnerText).ToArray();

                // Do the translation
                var translatedComments = await textTranslator.TranslateTexts(commentValues, to, from);

                // Apply translations to document by iterating through both lists and
                // replacing the original comment text with its translation
                using (var commentsEnumerator = comments.GetEnumerator())
                {
                    using (var translationEnumerator = translations.GetEnumerator())
                    {
                        while (commentsEnumerator.MoveNext() && translationEnumerator.MoveNext())
                        {
                            var text = translationEnumerator.Current;
                            commentsEnumerator.Current.CommentText = new CommentText
                            {
                                Text = new DocumentFormat.OpenXml.Spreadsheet.Text {
                                    Text = text
                                }
                            };
                        }
                    }
                }
            }
        }