/// <summary> /// Converts a specific node instead of the whole word document into HTML. /// Note: this method is added for the above purpose. See the other method: /// public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler) /// </summary> /// <param name="wordDoc"></param> /// <param name="node">The node to convert to HTML.</param> /// <param name="htmlConverterSettings"></param> /// <returns></returns> public static XElement ConvertToHtml(WordprocessingDocument wordDoc, XNode node, HtmlConverterSettings htmlConverterSettings) { InitEntityMap(); if (htmlConverterSettings.ConvertFormatting) { throw new InvalidSettingsException("Conversion with formatting is not supported"); } RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); AnnotateHyperlinkContent((XElement)node); XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, node, null); return(xhtml); }
public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler) { InitEntityMap(); if (htmlConverterSettings.ConvertFormatting) { throw new InvalidSettingsException("Conversion with formatting is not supported"); } RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root; AnnotateHyperlinkContent(rootElement); XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, rootElement, imageHandler); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. return(xhtml); }
public WmlDocument SimplifyMarkup(SimplifyMarkupSettings settings) { return(MarkupSimplifier.SimplifyMarkup(this, settings)); }
private static WmlDocument PreProcessMarkup(WmlDocument source, int startingIdForFootnotesEndnotes) { // open and close to get rid of MC content using (var ms = new MemoryStream()) { ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); var os = new OpenSettings { MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings( MarkupCompatibilityProcessMode.ProcessAllParts, FileFormatVersions.Office2007) }; using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os)) { OpenXmlPartRootElement unused = wDoc.MainDocumentPart.RootElement; if (wDoc.MainDocumentPart.FootnotesPart != null) { // contrary to what you might think, looking at the API, it is necessary to access the root element of each part to cause // the SDK to process MC markup. OpenXmlPartRootElement unused1 = wDoc.MainDocumentPart.FootnotesPart.RootElement; } if (wDoc.MainDocumentPart.EndnotesPart != null) { OpenXmlPartRootElement unused1 = wDoc.MainDocumentPart.EndnotesPart.RootElement; } } source = new WmlDocument(source.FileName, ms.ToArray()); } // open and close to get rid of MC content using (var ms = new MemoryStream()) { ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); var os = new OpenSettings { MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings( MarkupCompatibilityProcessMode.ProcessAllParts, FileFormatVersions.Office2007) }; using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os)) { TestForInvalidContent(wDoc); RemoveExistingPowerToolsMarkup(wDoc); // Removing content controls, field codes, and bookmarks is a no-no for many use cases. // We need content controls, e.g., on the title page. Field codes are required for // automatic cross-references, which require bookmarks. // TODO: Revisit var msSettings = new SimplifyMarkupSettings { RemoveBookmarks = true, AcceptRevisions = false, RemoveComments = true, RemoveContentControls = true, RemoveFieldCodes = true, RemoveGoBackBookmark = true, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveHyperlinks = true }; MarkupSimplifier.SimplifyMarkup(wDoc, msSettings); ChangeFootnoteEndnoteReferencesToUniqueRange(wDoc, startingIdForFootnotesEndnotes); AddUnidsToMarkupInContentParts(wDoc); AddFootnotesEndnotesParts(wDoc); FillInEmptyFootnotesEndnotes(wDoc); } return(new WmlDocument(source.FileName, ms.ToArray())); } }
public static WmlDocument MergeComments(WmlDocument document1, WmlDocument document2, bool ensureLocked) { WmlDocument cDoc1 = new WmlDocument(document1); WmlDocument cDoc2 = new WmlDocument(document2); using (OpenXmlMemoryStreamDocument streamDoc1 = new OpenXmlMemoryStreamDocument(cDoc1)) using (WordprocessingDocument doc1 = streamDoc1.GetWordprocessingDocument()) using (OpenXmlMemoryStreamDocument streamDoc2 = new OpenXmlMemoryStreamDocument(cDoc2)) using (WordprocessingDocument doc2 = streamDoc2.GetWordprocessingDocument()) { SimplifyMarkupSettings mss = new SimplifyMarkupSettings() { RemoveProof = true, RemoveRsidInfo = true, RemoveGoBackBookmark = true, }; MarkupSimplifier.SimplifyMarkup(doc1, mss); MarkupSimplifier.SimplifyMarkup(doc2, mss); // If documents don't contain the same content, then don't attempt to merge comments. bool same = DocumentComparer.CompareDocuments(doc1, doc2); if (!same) { throw new CommentMergerDifferingContentsException( "Documents do not contain the same content"); } if (doc1.MainDocumentPart.WordprocessingCommentsPart == null && doc2.MainDocumentPart.WordprocessingCommentsPart == null) { return(new WmlDocument(document1)); } if (doc1.MainDocumentPart.WordprocessingCommentsPart != null && doc2.MainDocumentPart.WordprocessingCommentsPart == null) { return(new WmlDocument(document1)); } if (doc1.MainDocumentPart.WordprocessingCommentsPart == null && doc2.MainDocumentPart.WordprocessingCommentsPart != null) { return(new WmlDocument(document2)); } // If either of the documents have no comments, then return the other one. if (!doc1.MainDocumentPart.WordprocessingCommentsPart.GetXDocument().Root .Elements(W.comment).Any()) { return(new WmlDocument(document2)); } if (!doc2.MainDocumentPart.WordprocessingCommentsPart.GetXDocument().Root .Elements(W.comment).Any()) { return(new WmlDocument(document1)); } if (ensureLocked) { // If either document is not locked (allowing only commenting), don't attempt to // merge comments. if (doc1.ExtendedFilePropertiesPart.GetXDocument().Root .Element(EP.DocSecurity).Value != "8") { throw new CommentMergerUnlockedDocumentException( "Document1 is not locked"); } if (doc2.ExtendedFilePropertiesPart.GetXDocument().Root .Element(EP.DocSecurity).Value != "8") { throw new CommentMergerUnlockedDocumentException( "Document2 is not locked"); } } RenumberCommentsInDoc2(doc1, doc2); WmlDocument destDoc = new WmlDocument(document1); using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(destDoc)) { using (WordprocessingDocument destWDoc = streamDoc.GetWordprocessingDocument()) { // Merge the comments part. XDocument commentsPartXDoc = new XDocument( new XElement(W.comments, new XAttribute(XNamespace.Xmlns + "w", W.w), doc1.MainDocumentPart.WordprocessingCommentsPart.GetXDocument().Root.Elements(), doc2.MainDocumentPart.WordprocessingCommentsPart.GetXDocument().Root.Elements())); destWDoc.MainDocumentPart.WordprocessingCommentsPart.PutXDocument(commentsPartXDoc); MergeCommentsInPart(doc1.MainDocumentPart, doc2.MainDocumentPart, destWDoc.MainDocumentPart, commentsPartXDoc); } return(streamDoc.GetModifiedWmlDocument()); } } }