/// <summary> /// Convert to html /// </summary> /// <param name="doc"></param> /// <param name="htmlConverterSettings"></param> /// <param name="imageHandler"></param> /// <returns></returns> public XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler) { using (var streamDoc = new OpenXmlMemoryStreamDocument(doc)) { using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { return(ConvertToHtml(document, htmlConverterSettings, imageHandler)); } } }
/// <summary> /// Convert to html /// </summary> /// <param name="doc"></param> /// <param name="htmlConverterSettings"></param> /// <returns></returns> public XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings) { using (var streamDoc = new OpenXmlMemoryStreamDocument(doc)) { using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { return(ConvertToHtml(document, htmlConverterSettings)); } } }
/// <summary> /// Convert to html /// </summary> /// <param name="wordDoc"></param> /// <param name="htmlConverterSettings"></param> /// <param name="imageHandler"></param> /// <returns></returns> public XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler) { InitEntityMap(); if (htmlConverterSettings.ConvertFormatting) { throw new InvalidSettingsException("Conversion with formatting is not supported"); } RevisionAccepter.AcceptRevisions(wordDoc); var settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root; AnnotateHyperlinkContent(rootElement); var xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, rootElement, imageHandler); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. return(xhtml); }
/// <summary> /// Convert to html /// </summary> /// <param name="wordDoc"></param> /// <param name="htmlConverterSettings"></param> /// <returns></returns> public XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings) { return(ConvertToHtml(wordDoc, htmlConverterSettings, null)); }
/// <summary> /// Convert to html transform /// </summary> /// <param name="wordDoc"></param> /// <param name="settings"></param> /// <param name="node"></param> /// <param name="imageHandler"></param> /// <returns></returns> private object ConvertToHtmlTransform(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XNode node, Func <ImageInfo, XElement> imageHandler) { var element = node as XElement; if (element != null) { if (element.Name == W.document) { return(new XElement(Xhtml.Html, new XElement(Xhtml.Head, new XElement(Xhtml.Meta, new XAttribute(HtmlNoNamespace.HttpEquiv, "Content-Type"), new XAttribute(HtmlNoNamespace.Content, "text/html; charset=windows-1252")), new XElement(Xhtml.Meta, new XAttribute(HtmlNoNamespace.Name, "Generator"), new XAttribute(HtmlNoNamespace.Content, "PowerTools for Open XML")), settings.PageTitle != null ? new XElement(Xhtml.Title, settings.PageTitle) : null, settings.Css != null ? new XElement(Xhtml.Style, new XComment(Environment.NewLine + settings.Css + Environment.NewLine)) : null ), element.Elements().Select(e => ConvertToHtmlTransform( wordDoc, settings, e, imageHandler)) )); } // Transform the w:body element to the XHTML h:body element. if (element.Name == W.body) { return(new XElement(Xhtml.Body, element.Elements().Select(e => ConvertToHtmlTransform( wordDoc, settings, e, imageHandler)))); } // Transform every paragraph with a style that has paragraph properties // that has an outline level into the same level of heading. This takes // care of transforming headings of every level. if (element.Name == W.p) { var styleId = (string)element.Elements(W.pPr).Elements(W.pStyle) .Attributes(W.val).FirstOrDefault(); var xElement = wordDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument().Root; if (xElement != null) { var style = xElement.Elements(W.style).FirstOrDefault(s => (string)s.Attribute(W.styleId) == styleId); if (style != null) { var outlineLevel = (int?)style.Elements(W.pPr) .Elements(W.outlineLvl).Attributes(W.val).FirstOrDefault(); if (outlineLevel != null) { return(new XElement(Xhtml.xhtml + string.Format("h{0}", outlineLevel + 1), settings.CssClassPrefix != null ? new XAttribute(HtmlNoNamespace.Class, settings.CssClassPrefix + styleId) : null, ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc, element, null)), element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler)))); } } } } // Transform w:p to h:p. if (element.Name == W.p) { var styleId = (string)element.Elements(W.pPr).Elements(W.pStyle) .Attributes(W.val).FirstOrDefault(); if (styleId == null) { var xElement = wordDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument().Root; if (xElement != null) { var firstOrDefault = xElement.Elements(W.style).FirstOrDefault(e => (string)e.Attribute(W.type) == "paragraph" && (string)e.Attribute(W._default) == "1"); if (firstOrDefault != null) { styleId = (string)firstOrDefault.Attributes(W.styleId).FirstOrDefault(); } } } var z = new XElement(Xhtml.P, styleId != null ? ( settings.CssClassPrefix != null ? new XAttribute(HtmlNoNamespace.Class, settings.CssClassPrefix + styleId) : null ) : null, ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc, element, null)), element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); return(z); } // Transform every hyperlink in the document to the XHTML h:A element. if (element.Name == W.hyperlink && element.Attribute(R.id) != null) { try { return(new XElement(Xhtml.A, new XAttribute(HtmlNoNamespace.Href, wordDoc.MainDocumentPart .HyperlinkRelationships.First(x => x.Id == (string)element.Attribute(R.id)) .Uri ), ConvertEntities(element.Elements(W.r) .Elements(W.t) .Select(s => (string)s).StringConcatenate()) )); } catch (UriFormatException) { return(element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); } } // Transform contents of runs that are part of a hyperlink. var annotation = element.Annotation <FieldInfo>(); if (annotation != null && (element.Name == W.r && annotation.Arguments.Length > 0)) { var fieldInfo = element.Annotation <FieldInfo>(); if (fieldInfo != null) { return(new XElement(Xhtml.A, new XAttribute(HtmlNoNamespace.Href, fieldInfo.Arguments[0]), ConvertEntities(element.Elements(W.t) .Select(s => (string)s).StringConcatenate()) )); } } // Transform contents of runs. if (element.Name == W.r) { return(element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); } // Transform every w:t element to a text node. if (element.Name == W.t) { return(ConvertEntities(element.Value)); } // Transform w:br to h:br. if (element.Name == W.br || element.Name == W.cr) { return(new XElement(Xhtml.Br)); } // Transform w:noBreakHyphen to '-' if (element.Name == W.noBreakHyphen) { return(new XText("-")); } // Transform w:tbl to h:tbl. if (element.Name == W.tbl) { return(new XElement(Xhtml.Table, new XAttribute(HtmlNoNamespace.Border, 1), element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler)))); } // Transform w:tr to h:tr. if (element.Name == W.tr) { return(new XElement(Xhtml.Tr, element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler)))); } // Transform w:tc to h:td. if (element.Name == W.tc) { return(new XElement(Xhtml.Td, element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler)))); } // Transform images. if (element.Name == W.drawing || element.Name == W.pict) { if (imageHandler == null) { return(null); } return(ProcessImage(wordDoc, element, imageHandler)); } // The following removes any nodes that haven't been transformed. return(null); } return(null); }