/// <summary> /// Converts a specific node instead of the whole word document into HTML. /// Note: this method is added for the above purpose. See the other method: /// public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler) /// </summary> /// <param name="wordDoc"></param> /// <param name="node">The node to convert to HTML.</param> /// <param name="htmlConverterSettings"></param> /// <returns></returns> public static XElement ConvertToHtml(WordprocessingDocument wordDoc, XNode node, HtmlConverterSettings htmlConverterSettings) { InitEntityMap(); if (htmlConverterSettings.ConvertFormatting) { throw new InvalidSettingsException("Conversion with formatting is not supported"); } RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); AnnotateHyperlinkContent((XElement)node); XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, node, null); return(xhtml); }
public static void SimplifyMarkup(WordprocessingDocument doc, SimplifyMarkupSettings settings) { if (settings.RemoveMarkupForDocumentComparison) { settings.RemoveRsidInfo = true; RemoveElementsForDocumentComparison(doc); } if (settings.RemoveRsidInfo) { RemoveRsidInfoInSettings(doc); } if (settings.AcceptRevisions) { RevisionAccepter.AcceptRevisions(doc); } foreach (var part in doc.ContentParts()) { SimplifyMarkupForPart(part, settings); } if (doc.MainDocumentPart.StyleDefinitionsPart != null) { SimplifyMarkupForPart(doc.MainDocumentPart.StyleDefinitionsPart, settings); } if (doc.MainDocumentPart.StylesWithEffectsPart != null) { SimplifyMarkupForPart(doc.MainDocumentPart.StylesWithEffectsPart, settings); } }
public static void TransformToSingleCharacterRuns(WordprocessingDocument doc) { if (RevisionAccepter.HasTrackedRevisions(doc)) { throw new OpenXmlPowerToolsException( "Transforming a document to single character runs is not supported for " + "a document with tracked revisions."); } foreach (var part in doc.ContentParts()) { TransformPartToSingleCharacterRuns(part); } }
public static void SearchAndReplace(WordprocessingDocument wordDoc, string search, string replace, bool matchCase) { if (RevisionAccepter.HasTrackedRevisions(wordDoc)) { throw new InvalidDataException( "Search and replace will not work with documents " + "that contain revision tracking."); } XDocument xDoc; xDoc = wordDoc.MainDocumentPart.DocumentSettingsPart.GetXDocument(); if (xDoc.Descendants(W.trackRevisions).Any()) { throw new InvalidDataException("Revision tracking is turned on for document."); } xDoc = wordDoc.MainDocumentPart.GetXDocument(); WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase); wordDoc.MainDocumentPart.PutXDocument(); foreach (var part in wordDoc.MainDocumentPart.HeaderParts) { xDoc = part.GetXDocument(); WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase); part.PutXDocument(); } foreach (var part in wordDoc.MainDocumentPart.FooterParts) { xDoc = part.GetXDocument(); WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase); part.PutXDocument(); } if (wordDoc.MainDocumentPart.EndnotesPart != null) { xDoc = wordDoc.MainDocumentPart.EndnotesPart.GetXDocument(); WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase); wordDoc.MainDocumentPart.EndnotesPart.PutXDocument(); } if (wordDoc.MainDocumentPart.FootnotesPart != null) { xDoc = wordDoc.MainDocumentPart.FootnotesPart.GetXDocument(); WmlSearchAndReplaceInXDocument(xDoc, search, replace, matchCase); wordDoc.MainDocumentPart.FootnotesPart.PutXDocument(); } }
public static void SimplifyMarkup(WordprocessingDocument doc, SimplifyMarkupSettings settings) { if (settings.AcceptRevisions) { RevisionAccepter.AcceptRevisions(doc); } foreach (var part in doc.ContentParts()) { SimplifyMarkupForPart(part, settings); } if (doc.MainDocumentPart.StyleDefinitionsPart != null) { SimplifyMarkupForPart(doc.MainDocumentPart.StyleDefinitionsPart, settings); } if (doc.MainDocumentPart.StylesWithEffectsPart != null) { SimplifyMarkupForPart(doc.MainDocumentPart.StylesWithEffectsPart, settings); } }
public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler) { InitEntityMap(); if (htmlConverterSettings.ConvertFormatting) { throw new InvalidSettingsException("Conversion with formatting is not supported"); } RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root; AnnotateHyperlinkContent(rootElement); XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, rootElement, imageHandler); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. return(xhtml); }
public static WmlDocument AssembleDocument(WmlDocument templateDoc, XElement data, out bool templateError) { byte[] byteArray = templateDoc.DocumentByteArray; using (MemoryStream mem = new MemoryStream()) { mem.Write(byteArray, 0, (int)byteArray.Length); using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(mem, true)) { if (RevisionAccepter.HasTrackedRevisions(wordDoc)) { throw new OpenXmlPowerToolsException("Invalid DocumentAssembler template - contains tracked revisions"); } var te = new TemplateError(); foreach (var part in wordDoc.ContentParts()) { ProcessTemplatePart(data, te, part); } templateError = te.HasError; } WmlDocument assembledDocument = new WmlDocument("TempFileName.docx", mem.ToArray()); return(assembledDocument); } }
public static void ReplaceBookmarkText(WordprocessingDocument doc, string bookmarkName, string replacementText) { XDocument xDoc = doc.MainDocumentPart.GetXDocument(); XElement bookmark = xDoc.Descendants(W.bookmarkStart) .FirstOrDefault(d => (string)d.Attribute(W.name) == bookmarkName); if (bookmark == null) { throw new BookmarkReplacerException( "Document doesn't contain bookmark."); } if (bookmark.Parent.Name.Namespace == M.m) { throw new BookmarkReplacerException( "Replacing text in math formulas is not supported."); } if (RevisionAccepter.HasTrackedRevisions(doc)) { throw new BookmarkReplacerException( "Replacing bookmark text in documents that have tracked revisions is not supported."); } if (xDoc.Descendants(W.sdt).Any()) { throw new BookmarkReplacerException( "Replacing bookmark text in documents that have content controls is not supported."); } XElement newRoot = (XElement)FlattenParagraphsTransform(xDoc.Root); XElement startBookmarkElement = newRoot.Descendants(W.bookmarkStart) .Where(d => (string)d.Attribute(W.name) == bookmarkName) .FirstOrDefault(); int bookmarkId = (int)startBookmarkElement.Attribute(W.id); XElement endBookmarkElement = newRoot.Descendants(W.bookmarkEnd) .Where(d => (int)d.Attribute(W.id) == bookmarkId) .FirstOrDefault(); if (startBookmarkElement.Ancestors(W.hyperlink).Any() || endBookmarkElement.Ancestors(W.hyperlink).Any()) { throw new BookmarkReplacerException( "Bookmark is within a hyperlink. Can't replace text."); } if (startBookmarkElement.Ancestors(W.fldSimple).Any() || endBookmarkElement.Ancestors(W.fldSimple).Any()) { throw new BookmarkReplacerException( "Bookmark is within a simple field. Can't replace text."); } if (startBookmarkElement.Ancestors(W.smartTag).Any() || endBookmarkElement.Ancestors(W.smartTag).Any()) { throw new BookmarkReplacerException( "Bookmark is within a smart tag. Can't replace text."); } if (startBookmarkElement.Parent != endBookmarkElement.Parent) { throw new BookmarkReplacerException( "Bookmark start and end not at same levels. Can't replace text."); } XElement parentElement = startBookmarkElement.Parent; var elementsBetweenBookmarks = startBookmarkElement .ElementsAfterSelf() .TakeWhile(e => e != endBookmarkElement); var newElements = parentElement .Elements() .TakeWhile(e => e != startBookmarkElement) .Concat(new[] { startBookmarkElement, new XElement(BookmarkReplacerCustomNamespace + "Insert", elementsBetweenBookmarks .Where(e => e.Name == W.r) .Take(1) .Elements(W.rPr) .FirstOrDefault()), }) .Concat(elementsBetweenBookmarks.Where(e => e.Name != W.p && e.Name != W.r && e.Name != W.tbl)) .Concat(new[] { endBookmarkElement }) .Concat(endBookmarkElement.ElementsAfterSelf()); parentElement.ReplaceNodes(newElements); newRoot = (XElement)UnflattenParagraphsTransform(newRoot); newRoot = (XElement)ReplaceInsertElement(newRoot, replacementText); newRoot = (XElement)DemoteRunChildrenOfBodyTransform(newRoot); xDoc.Elements().First().ReplaceWith(newRoot); doc.MainDocumentPart.PutXDocument(); }