public IEnumerable <string> Merge(JsonConversionData jsonFileData, DocxConversionData docxFileData, Boolean ignoreHidden = false) { if (docxFileData == null) { throw new ArgumentNullException("docxFileData", "DocxConversionData cannot be null."); } if (jsonFileData == null) { throw new ArgumentNullException("jsonFileData", "JsonFileData cannot be null."); } string docxConversionDataFullPath = docxFileData.FullPath; using (WordprocessingDocument doc = WordprocessingDocument.Open(docxConversionDataFullPath, true)) { var strings2Translate = textExtractionService.ExtractText(doc, ignoreHidden); for (int j = 0; j < docxFileData.TotalStrings2Translate; j++) { int indexInDocument = j + 1; var newValue = (string)jsonFileData.JsonData["lines"][j]; strings2Translate.Take(indexInDocument).Last().Text = newValue; } IEnumerable <string> outputStrings2Translate = strings2Translate.Select(text => text.Text); return(outputStrings2Translate); } }
public DocxConversionData CreateCleanedCopy(string fullPath, bool ignoreHidden) { if (string.IsNullOrEmpty(fullPath)) { throw new ArgumentNullException("path variable cannot be null or empty"); } // ensure we have a valid path to a docx file if (!validator.IsValid(fullPath)) { return(null); } var origFileData = new DocxConversionData(fullPath); var cleanedFileName = origFileData.FileName + "_cleaned" + origFileData.FileExtension; var cleanedFileData = new DocxConversionData(Path.Join(origFileData.FilePath, cleanedFileName)); try { // ensure there is not an orphaned copy already on disk File.Delete(cleanedFileData.FullPath); // Make a copy of the original using the "cleaned" file name File.Copy(origFileData.FullPath, cleanedFileData.FullPath); //Clean Document of extra tags using (WordprocessingDocument doc = WordprocessingDocument.Open(cleanedFileData.FullPath, true)) { OpenXmlPowerTools.SimplifyMarkupSettings settings = new OpenXmlPowerTools.SimplifyMarkupSettings { AcceptRevisions = true, NormalizeXml = false, //setting this to false reduces translation quality, but if true some documents have XML format errors when opening RemoveBookmarks = true, RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, //FieldCode remove pagination RemoveGoBackBookmark = true, //RemoveHyperlinks = false, RemoveLastRenderedPageBreak = true, RemoveMarkupForDocumentComparison = true, RemovePermissions = false, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveWebHidden = true, ReplaceTabsWithSpaces = false }; OpenXmlPowerTools.MarkupSimplifier.SimplifyMarkup(doc, settings); //Extract Strings for translation var textBlocks = textExtractor.ExtractText(doc, ignoreHidden); //var textBlocks = textExtractor.ExtractParagraph(doc, ignoreHidden); cleanedFileData.Strings2Translate = textBlocks.Select(para => para.InnerText); } //Total the number of strings for translation cleanedFileData.TotalStrings2Translate = cleanedFileData.Strings2Translate.Count(); if (File.Exists(cleanedFileData.FullPath)) { cleanedFileData.Messages.Add("Success"); } else { cleanedFileData.Messages.Add("Docx data failed to build properly."); } } catch (Exception e) { cleanedFileData.Messages.Add(e.Message); } return(cleanedFileData); }