public string ParseDocument(Stream stream) { XNamespace w = "http://www.w3.org/1999/xhtml"; string result; var byteArray = stream.ToByteArray((int) stream.Length); using (var memoryStream = new MemoryStream()) { memoryStream.Write(byteArray, 0, byteArray.Length); using (var doc = WordprocessingDocument.Open(memoryStream, true)) { var settings = new HtmlConverterSettings(); XElement html = HtmlConverter.ConvertToHtml(doc, settings); // Note: the XHTML returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs defines the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities do not serialize properly. var bodyContainer = html.Element(w + "body"); result = bodyContainer.ToStringNewLineOnAttributes(); } } return result; }
/// <summary> /// Converts a specific node instead of the whole word document into HTML. /// Note: this method is added for the above purpose. See the other method: /// public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler) /// </summary> /// <param name="wordDoc"></param> /// <param name="node">The node to convert to HTML.</param> /// <param name="htmlConverterSettings"></param> /// <returns></returns> public static XElement ConvertToHtml(WordprocessingDocument wordDoc, XNode node, HtmlConverterSettings htmlConverterSettings) { InitEntityMap(); if (htmlConverterSettings.ConvertFormatting) { throw new InvalidSettingsException("Conversion with formatting is not supported"); } RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); AnnotateHyperlinkContent((XElement)node); XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, node, null); return(xhtml); }
public static XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc)) { using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { return(ConvertToHtml(document, htmlConverterSettings, imageHandler)); } } }
public static XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc)) { using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { return(ConvertToHtml(document, htmlConverterSettings)); } } }
public static XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc)) { using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { return ConvertToHtml(document, htmlConverterSettings); } } }
public static XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc)) { using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { return ConvertToHtml(document, htmlConverterSettings, imageHandler); } } }
public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler) { InitEntityMap(); if (htmlConverterSettings.ConvertFormatting) { throw new InvalidSettingsException("Conversion with formatting is not supported"); } RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root; AnnotateHyperlinkContent(rootElement); XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, rootElement, imageHandler); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. return(xhtml); }
public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler) { InitEntityMap(); if (htmlConverterSettings.ConvertFormatting) { throw new InvalidSettingsException("Conversion with formatting is not supported"); } RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root; AnnotateHyperlinkContent(rootElement); XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, rootElement, imageHandler); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. return xhtml; }
private static object CreateBorderDivs(WordprocessingDocument wordDoc, HtmlConverterSettings settings, IEnumerable<XElement> elements) { return elements.GroupAdjacent(e => { var pBdr = e.Elements(W.pPr).Elements(W.pBdr).FirstOrDefault(); if (pBdr != null) { var indStr = string.Empty; var ind = e.Elements(W.pPr).Elements(W.ind).FirstOrDefault(); if (ind != null) indStr = ind.ToString(SaveOptions.DisableFormatting); return pBdr.ToString(SaveOptions.DisableFormatting) + indStr; } return e.Name == W.tbl ? "table" : string.Empty; }) .Select(g => { if (g.Key == string.Empty) { return (object) GroupAndVerticallySpaceNumberedParagraphs(wordDoc, settings, g, 0m); } if (g.Key == "table") { return g.Select(gc => ConvertToHtmlTransform(wordDoc, settings, gc, false, 0)); } var pPr = g.First().Elements(W.pPr).First(); var pBdr = pPr.Element(W.pBdr); var style = new Dictionary<string, string>(); GenerateBorderStyle(pBdr, W.top, style, BorderType.Paragraph); GenerateBorderStyle(pBdr, W.right, style, BorderType.Paragraph); GenerateBorderStyle(pBdr, W.bottom, style, BorderType.Paragraph); GenerateBorderStyle(pBdr, W.left, style, BorderType.Paragraph); var currentMarginLeft = 0m; var ind = pPr.Element(W.ind); if (ind != null) { var leftInInches = (decimal?) ind.Attribute(W.left)/1440m ?? 0; var hangingInInches = -(decimal?) ind.Attribute(W.hanging)/1440m ?? 0; currentMarginLeft = leftInInches + hangingInInches; style.AddIfMissing("margin-left", currentMarginLeft > 0m ? string.Format(NumberFormatInfo.InvariantInfo, "{0:0.00}in", currentMarginLeft) : "0"); } var div = new XElement(Xhtml.div, GroupAndVerticallySpaceNumberedParagraphs(wordDoc, settings, g, currentMarginLeft)); div.AddAnnotation(style); return div; }) .ToList(); }
private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XNode node, bool suppressTrailingWhiteSpace, decimal currentMarginLeft) { var element = node as XElement; if (element == null) return null; // Transform the w:document element to the XHTML h:html element. // The h:head element is laid out based on the W3C's recommended layout, i.e., // the charset (using the HTML5-compliant form), the title (which is always // there but possibly empty), and other meta tags. if (element.Name == W.document) { return new XElement(Xhtml.html, new XElement(Xhtml.head, new XElement(Xhtml.meta, new XAttribute("charset", "UTF-8")), settings.PageTitle != null ? new XElement(Xhtml.title, new XText(settings.PageTitle)) : new XElement(Xhtml.title, new XText(string.Empty)), new XElement(Xhtml.meta, new XAttribute("name", "Generator"), new XAttribute("content", "PowerTools for Open XML"))), element.Elements() .Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft))); } // Transform the w:body element to the XHTML h:body element. if (element.Name == W.body) { return new XElement(Xhtml.body, CreateSectionDivs(wordDoc, settings, element)); } // Transform the w:p element to the XHTML h:h1-h6 or h:p element (if the previous paragraph does not // have a style separator). if (element.Name == W.p) { return ProcessParagraph(wordDoc, settings, element, suppressTrailingWhiteSpace, currentMarginLeft); } // Transform hyperlinks to the XHTML h:a element. if (element.Name == W.hyperlink && element.Attribute(R.id) != null) { try { return new XElement(Xhtml.a, new XAttribute("href", wordDoc.MainDocumentPart .HyperlinkRelationships .First(x => x.Id == (string)element.Attribute(R.id)) .Uri ), element.Elements(W.r).Select(run => ConvertRun(wordDoc, settings, run)) ); } catch (UriFormatException) { return element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)); } } // Transform hyperlinks to bookmarks to the XHTML h:a element. if (element.Name == W.hyperlink && element.Attribute(W.anchor) != null) { return ProcessHyperlinkToBookmark(wordDoc, settings, element); } // Transform contents of runs. if (element.Name == W.r) { return ConvertRun(wordDoc, settings, element); } // Transform w:bookmarkStart into anchor if (element.Name == W.bookmarkStart) { return ProcessBookmarkStart(element); } // Transform every w:t element to a text node. if (element.Name == W.t) { // We don't need to convert characters to entities in a UTF-8 document. // Further, we don't need entities for significant whitespace // because we are wrapping the text nodes in <span> elements within // which all whitespace is significant. return new XText(element.Value); } // Transform symbols to spans if (element.Name == W.sym) { var cs = (string)element.Attribute(W._char); var c = Convert.ToInt32(cs, 16); return new XElement(Xhtml.span, new XEntity(string.Format("#{0}", c))); } // Transform tabs that have the pt:TabWidth attribute set if (element.Name == W.tab) { return ProcessTab(element); } // Transform w:br to h:br. if (element.Name == W.br || element.Name == W.cr) { return ProcessBreak(element); } // Transform w:noBreakHyphen to '-' if (element.Name == W.noBreakHyphen) { return new XText("-"); } // Transform w:tbl to h:tbl. if (element.Name == W.tbl) { return ProcessTable(wordDoc, settings, element, currentMarginLeft); } // Transform w:tr to h:tr. if (element.Name == W.tr) { return ProcessTableRow(wordDoc, settings, element, currentMarginLeft); } // Transform w:tc to h:td. if (element.Name == W.tc) { return ProcessTableCell(wordDoc, settings, element); } // Transform images if (element.Name == W.drawing || element.Name == W.pict || element.Name == W._object) { return ProcessImage(wordDoc, element, settings.ImageHandler); } // Transform content controls. if (element.Name == W.sdt) { return ProcessContentControl(wordDoc, settings, element, currentMarginLeft); } // Transform smart tags and simple fields. if (element.Name == W.smartTag || element.Name == W.fldSimple) { return CreateBorderDivs(wordDoc, settings, element.Elements()); } // Ignore element. return null; }
public XElement ConvertToHtml(HtmlConverterSettings htmlConverterSettings) { return(HtmlConverter.ConvertToHtml(this, htmlConverterSettings)); }
/* * Handle: * - b * - bdr * - caps * - color * - dstrike * - highlight * - i * - position * - rFonts * - shd * - smallCaps * - spacing * - strike * - sz * - u * - vanish * - vertAlign * * Don't handle: * - em * - emboss * - fitText * - imprint * - kern * - outline * - shadow * - w * */ private static object ConvertRun(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement run) { var rPr = run.Element(W.rPr); if (rPr == null) return run.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, 0m)); // hide all content that contains the w:rPr/w:webHidden element if (rPr.Element(W.webHidden) != null) return null; var style = DefineRunStyle(run); object content = run.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, 0m)); // Wrap content in h:sup or h:sub elements as necessary. if (rPr.Element(W.vertAlign) != null) { XElement newContent = null; var vertAlignVal = (string)rPr.Elements(W.vertAlign).Attributes(W.val).FirstOrDefault(); switch (vertAlignVal) { case "superscript": newContent = new XElement(Xhtml.sup, content); break; case "subscript": newContent = new XElement(Xhtml.sub, content); break; } if (newContent != null && newContent.Nodes().Any()) content = newContent; } var langAttribute = GetLangAttribute(run); XEntity runStartMark; XEntity runEndMark; DetermineRunMarks(run, rPr, style, out runStartMark, out runEndMark); if (style.Any() || langAttribute != null || runStartMark != null) { style.AddIfMissing("margin", "0"); style.AddIfMissing("padding", "0"); var xe = new XElement(Xhtml.span, langAttribute, runStartMark, content, runEndMark); xe.AddAnnotation(style); content = xe; } return content; }
public static void ConvertToHtml(string file, string outputDirectory) { var fi = new FileInfo(file); byte[] byteArray = File.ReadAllBytes(fi.FullName); using (MemoryStream memoryStream = new MemoryStream()) { memoryStream.Write(byteArray, 0, byteArray.Length); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true)) { var destFileName = new FileInfo(fi.Name.Replace(".docx", ".html")); if (outputDirectory != null && outputDirectory != string.Empty) { DirectoryInfo di = new DirectoryInfo(outputDirectory); if (!di.Exists) { throw new OpenXmlPowerToolsException("Output directory does not exist"); } destFileName = new FileInfo(Path.Combine(di.FullName, destFileName.Name)); } var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files"; int imageCounter = 0; var pageTitle = (string)wDoc.CoreFilePropertiesPart.GetXDocument().Descendants(DC.title).FirstOrDefault(); if (pageTitle == null) { pageTitle = fi.FullName; } HtmlConverterSettings settings = new HtmlConverterSettings() { PageTitle = pageTitle, FabricateCssClasses = true, CssClassPrefix = "pt-", RestrictToSupportedLanguages = false, RestrictToSupportedNumberingFormats = false, ImageHandler = imageInfo => { DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName); if (!localDirInfo.Exists) { localDirInfo.Create(); } ++imageCounter; string extension = imageInfo.ContentType.Split('/')[1].ToLower(); ImageFormat imageFormat = null; if (extension == "png") { // Convert png to jpeg. extension = "gif"; imageFormat = ImageFormat.Gif; } else if (extension == "gif") { imageFormat = ImageFormat.Gif; } else if (extension == "bmp") { imageFormat = ImageFormat.Bmp; } else if (extension == "jpeg") { imageFormat = ImageFormat.Jpeg; } else if (extension == "tiff") { // Convert tiff to gif. extension = "gif"; imageFormat = ImageFormat.Gif; } else if (extension == "x-wmf") { extension = "wmf"; imageFormat = ImageFormat.Wmf; } // If the image format isn't one that we expect, ignore it, // and don't return markup for the link. if (imageFormat == null) { return(null); } string imageFileName = imageDirectoryName + "/image" + imageCounter.ToString() + "." + extension; try { imageInfo.Bitmap.Save(imageFileName, imageFormat); } catch (System.Runtime.InteropServices.ExternalException) { return(null); } XElement img = new XElement(Xhtml.img, new XAttribute(NoNamespace.src, imageFileName), imageInfo.ImgStyleAttribute, imageInfo.AltText != null ? new XAttribute(NoNamespace.alt, imageInfo.AltText) : null); return(img); } }; XElement html = HtmlConverter.ConvertToHtml(wDoc, settings); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. var htmlString = html.ToString(SaveOptions.DisableFormatting); File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8); } } }
private static object CreateSectionDivs(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element) { // note: when building a paging html converter, need to attend to new sections with page breaks here. // This code conflates adjacent sections if they have identical formatting, which is not an issue // for the non-paging transform. var groupedIntoDivs = element .Elements() .GroupAdjacent(e => { return e.Annotation<SectionAnnotation>().SectionElement.ToString(); }); // note: when creating a paging html converter, need to pay attention to w:rtlGutter element. var divList = groupedIntoDivs .Select(g => { var sectPr = g.First().Annotation<SectionAnnotation>(); var bidi = sectPr .SectionElement .Elements(W.bidi) .Where(b => b.Attribute(W.val) == null || b.Attribute(W.val).ToBoolean() == true) .FirstOrDefault(); if (bidi == null) { var div = new XElement(Xhtml.div, CreateBorderDivs(wordDoc, settings, g)); return div; } else { var div = new XElement(Xhtml.div, new XAttribute("dir", "rtl"), CreateBorderDivs(wordDoc, settings, g)); return div; } }); return divList; }
private static void ReifyStylesAndClasses(HtmlConverterSettings htmlConverterSettings, XElement xhtml) { if (htmlConverterSettings.FabricateCssClasses) { var usedCssClassNames = new HashSet<string>(); var elementsThatNeedClasses = xhtml .DescendantsAndSelf() .Select(d => new { Element = d, Styles = d.Annotation<Dictionary<string, string>>(), }) .Where(z => z.Styles != null); var augmented = elementsThatNeedClasses .Select(p => new { Element = p.Element, Styles = p.Styles, StylesString = p.Element.Name.LocalName + "|" + p.Styles.OrderBy(k => k.Key).Select(s => string.Format("{0}:{1};", s.Key, s.Value)).StringConcatenate(), }) .GroupBy(p => p.StylesString) .ToList(); int classCounter = 1000000; var sb = new StringBuilder(); sb.Append(Environment.NewLine); foreach (var grp in augmented) { string classNameToUse; var firstOne = grp.First(); var styles = firstOne.Styles; if (styles.ContainsKey("PtStyleName")) { classNameToUse = htmlConverterSettings.CssClassPrefix + styles["PtStyleName"]; if (usedCssClassNames.Contains(classNameToUse)) { classNameToUse = htmlConverterSettings.CssClassPrefix + styles["PtStyleName"] + "-" + classCounter.ToString().Substring(1); classCounter++; } } else { classNameToUse = htmlConverterSettings.CssClassPrefix + classCounter.ToString().Substring(1); classCounter++; } usedCssClassNames.Add(classNameToUse); sb.Append(firstOne.Element.Name.LocalName + "." + classNameToUse + " {" + Environment.NewLine); foreach (var st in firstOne.Styles.Where(s => s.Key != "PtStyleName")) { var s = " " + st.Key + ":" + st.Value + ";" + Environment.NewLine; sb.Append(s); } sb.Append("}" + Environment.NewLine); var classAtt = new XAttribute("class", classNameToUse); foreach (var gc in grp) gc.Element.Add(classAtt); } var styleElement = xhtml .Descendants(Xhtml.style) .FirstOrDefault(); if (styleElement != null) styleElement.Value = sb.ToString() + htmlConverterSettings.AdditionalCss; else { styleElement = new XElement(Xhtml.style, sb.ToString() + htmlConverterSettings.AdditionalCss); var head = xhtml.Element(Xhtml.head); if (head != null) head.Add(styleElement); } } else { foreach (var d in xhtml.DescendantsAndSelf()) { var style = d.Annotation<Dictionary<string, string>>(); if (style == null) continue; var styleValue = style .Where(p => p.Key != "PtStyleName") .OrderBy(p => p.Key) .Select(e => string.Format("{0}: {1};", e.Key, e.Value)) .StringConcatenate(); XAttribute st = new XAttribute("style", styleValue); if (d.Attribute("style") != null) d.Attribute("style").Value += styleValue; else d.Add(st); } } }
// Transform the w:p element, including the following sibling w:p element(s) // in case the w:p element has a style separator. The sibling(s) will be // transformed to h:span elements rather than h:p elements and added to // the element (e.g., h:h2) created from the w:p element having the (first) // style separator (i.e., a w:specVanish element). private static object ProcessParagraph(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element, bool suppressTrailingWhiteSpace, decimal currentMarginLeft) { // Ignore this paragraph if the previous paragraph has a style separator. // We have already transformed this one together with the previous one. var previousParagraph = element.ElementsBeforeSelf(W.p).LastOrDefault(); if (HasStyleSeparator(previousParagraph)) return null; var elementName = GetParagraphElementName(element, wordDoc); var isBidi = IsBidi(element); var paragraph = (XElement) ConvertParagraph(wordDoc, settings, element, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi); // The paragraph conversion might have created empty spans. // These can and should be removed because empty spans are // invalid in HTML5. paragraph.Elements(Xhtml.span).Where(e => e.IsEmpty).Remove(); while (HasStyleSeparator(element)) { element = element.ElementsAfterSelf(W.p).FirstOrDefault(); if (element == null) break; elementName = Xhtml.span; isBidi = IsBidi(element); var span = ConvertParagraph(wordDoc, settings, element, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi); paragraph.Add(span); } return paragraph; }
private static object ProcessTableCell(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element) { var style = new Dictionary<string, string>(); XAttribute colSpan = null; XAttribute rowSpan = null; var tcPr = element.Element(W.tcPr); if (tcPr != null) { if ((string) tcPr.Elements(W.vMerge).Attributes(W.val).FirstOrDefault() == "restart") { var currentRow = element.Parent.ElementsBeforeSelf(W.tr).Count(); var currentCell = element.ElementsBeforeSelf(W.tc).Count(); var tbl = element.Parent.Parent; int rowSpanCount = 1; currentRow += 1; while (true) { var row = tbl.Elements(W.tr).Skip(currentRow).FirstOrDefault(); if (row == null) break; var cell2 = row.Elements(W.tc).Skip(currentCell).FirstOrDefault(); if (cell2 == null) break; if (cell2.Elements(W.tcPr).Elements(W.vMerge).FirstOrDefault() == null) break; if ((string) cell2.Elements(W.tcPr).Elements(W.vMerge).Attributes(W.val).FirstOrDefault() == "restart") break; currentRow += 1; rowSpanCount += 1; } rowSpan = new XAttribute("rowspan", rowSpanCount); } if (tcPr.Element(W.vMerge) != null && (string) tcPr.Elements(W.vMerge).Attributes(W.val).FirstOrDefault() != "restart") return null; if (tcPr.Element(W.vAlign) != null) { var vAlignVal = (string) tcPr.Elements(W.vAlign).Attributes(W.val).FirstOrDefault(); if (vAlignVal == "top") style.AddIfMissing("vertical-align", "top"); else if (vAlignVal == "center") style.AddIfMissing("vertical-align", "middle"); else if (vAlignVal == "bottom") style.AddIfMissing("vertical-align", "bottom"); else style.AddIfMissing("vertical-align", "middle"); } style.AddIfMissing("vertical-align", "top"); if ((string) tcPr.Elements(W.tcW).Attributes(W.type).FirstOrDefault() == "dxa") { decimal width = (int) tcPr.Elements(W.tcW).Attributes(W._w).FirstOrDefault(); style.AddIfMissing("width", string.Format(NumberFormatInfo.InvariantInfo, "{0}pt", width/20m)); } if ((string) tcPr.Elements(W.tcW).Attributes(W.type).FirstOrDefault() == "pct") { decimal width = (int) tcPr.Elements(W.tcW).Attributes(W._w).FirstOrDefault(); style.AddIfMissing("width", string.Format(NumberFormatInfo.InvariantInfo, "{0:0.0}%", width/50m)); } var tcBorders = tcPr.Element(W.tcBorders); GenerateBorderStyle(tcBorders, W.top, style, BorderType.Cell); GenerateBorderStyle(tcBorders, W.right, style, BorderType.Cell); GenerateBorderStyle(tcBorders, W.bottom, style, BorderType.Cell); GenerateBorderStyle(tcBorders, W.left, style, BorderType.Cell); CreateStyleFromShd(style, tcPr.Element(W.shd)); var gridSpan = tcPr.Elements(W.gridSpan).Attributes(W.val).Select(a => (int?) a).FirstOrDefault(); if (gridSpan != null) colSpan = new XAttribute("colspan", (int) gridSpan); } style.AddIfMissing("padding-top", "0"); style.AddIfMissing("padding-bottom", "0"); var cell = new XElement(Xhtml.td, rowSpan, colSpan, CreateBorderDivs(wordDoc, settings, element.Elements())); cell.AddAnnotation(style); return cell; }
public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings) { InitEntityMap(); RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveGoBackBookmark = true, ReplaceTabsWithSpaces = false, }; MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings); FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings { RemoveStyleNamesFromParagraphAndRunProperties = false, ClearStyles = false, RestrictToSupportedLanguages = htmlConverterSettings.RestrictToSupportedLanguages, RestrictToSupportedNumberingFormats = htmlConverterSettings.RestrictToSupportedNumberingFormats, CreateHtmlConverterAnnotationAttributes = true, OrderElementsPerStandard = false, ListItemRetrieverSettings = new ListItemRetrieverSettings() { ListItemTextImplementations = htmlConverterSettings.ListItemImplementations, }, }; FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings); InsertAppropriateNonbreakingSpaces(wordDoc); CalculateSpanWidthForTabs(wordDoc); ReverseTableBordersForRtlTables(wordDoc); AdjustTableBorders(wordDoc); XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root; FieldRetriever.AnnotateWithFieldInfo(wordDoc.MainDocumentPart); AnnotateForSections(wordDoc); XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, rootElement, false, 0m); ReifyStylesAndClasses(htmlConverterSettings, xhtml); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. return xhtml; }
private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XNode node, Func <ImageInfo, XElement> imageHandler) { XElement element = node as XElement; if (element != null) { if (element.Name == W.document) { return(new XElement(Xhtml.html, new XElement(Xhtml.head, new XElement(Xhtml.meta, new XAttribute(HtmlNoNamespace.http_equiv, "Content-Type"), new XAttribute(HtmlNoNamespace.content, "text/html; charset=windows-1252")), new XElement(Xhtml.meta, new XAttribute(HtmlNoNamespace.name, "Generator"), new XAttribute(HtmlNoNamespace.content, "PowerTools for Open XML")), settings.PageTitle != null ? new XElement(Xhtml.title, settings.PageTitle) : null, settings.Css != null ? new XElement(Xhtml.style, new XComment(Environment.NewLine + settings.Css + Environment.NewLine)) : null ), element.Elements().Select(e => ConvertToHtmlTransform( wordDoc, settings, e, imageHandler)) )); } // Transform the w:body element to the XHTML h:body element. if (element.Name == W.body) { return(new XElement(Xhtml.body, element.Elements().Select(e => ConvertToHtmlTransform( wordDoc, settings, e, imageHandler)))); } // Transform every paragraph with a style that has paragraph properties // that has an outline level into the same level of heading. This takes // care of transforming headings of every level. if (element.Name == W.p) { string styleId = (string)element.Elements(W.pPr).Elements(W.pStyle) .Attributes(W.val).FirstOrDefault(); XElement style = wordDoc.MainDocumentPart.StyleDefinitionsPart .GetXDocument().Root.Elements(W.style) .Where(s => (string)s.Attribute(W.styleId) == styleId) .FirstOrDefault(); if (style != null) { int?outlineLevel = (int?)style.Elements(W.pPr) .Elements(W.outlineLvl).Attributes(W.val).FirstOrDefault(); if (outlineLevel != null) { return(new XElement(Xhtml.xhtml + string.Format("h{0}", outlineLevel + 1), settings.CssClassPrefix != null ? new XAttribute(HtmlNoNamespace._class, settings.CssClassPrefix + styleId) : null, ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc, element, null)), element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler)))); } } } // Transform w:p to h:p. if (element.Name == W.p) { string styleId = (string)element.Elements(W.pPr).Elements(W.pStyle) .Attributes(W.val).FirstOrDefault(); if (styleId == null) { styleId = (string)wordDoc.MainDocumentPart.StyleDefinitionsPart .GetXDocument().Root.Elements(W.style) .Where(e => (string)e.Attribute(W.type) == "paragraph" && (string)e.Attribute(W._default) == "1") .FirstOrDefault().Attributes(W.styleId).FirstOrDefault(); } XElement z = new XElement(Xhtml.p, styleId != null ? ( settings.CssClassPrefix != null ? new XAttribute(HtmlNoNamespace._class, settings.CssClassPrefix + styleId) : null ) : null, ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc, element, null)), element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); return(z); } // Transform every hyperlink in the document to the XHTML h:A element. if (element.Name == W.hyperlink && element.Attribute(R.id) != null) { try { return(new XElement(Xhtml.A, new XAttribute(HtmlNoNamespace.href, wordDoc.MainDocumentPart .HyperlinkRelationships .Where(x => x.Id == (string)element.Attribute(R.id)) .First() .Uri ), ConvertEntities(element.Elements(W.r) .Elements(W.t) .Select(s => (string)s).StringConcatenate()) )); } catch (UriFormatException) { return(element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); } } // Transform contents of runs that are part of a hyperlink. if (element.Name == W.r && element.Annotation <FieldInfo>() != null && element.Annotation <FieldInfo>().Arguments.Length > 0) { FieldInfo fieldInfo = element.Annotation <FieldInfo>(); return(new XElement(Xhtml.A, new XAttribute(HtmlNoNamespace.href, fieldInfo.Arguments[0]), ConvertEntities(element.Elements(W.t) .Select(s => (string)s).StringConcatenate()) )); } // Transform contents of runs. if (element.Name == W.r) { return(element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); } // Transform every w:t element to a text node. if (element.Name == W.t) { return(ConvertEntities(element.Value)); } // Transform w:br to h:br. if (element.Name == W.br || element.Name == W.cr) { return(new XElement(Xhtml.br)); } // Transform w:noBreakHyphen to '-' if (element.Name == W.noBreakHyphen) { return(new XText("-")); } // Transform w:tbl to h:tbl. if (element.Name == W.tbl) { return(new XElement(Xhtml.table, new XAttribute(HtmlNoNamespace.border, 1), element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler)))); } // Transform w:tr to h:tr. if (element.Name == W.tr) { return(new XElement(Xhtml.tr, element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler)))); } // Transform w:tc to h:td. if (element.Name == W.tc) { return(new XElement(Xhtml.td, element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler)))); } // Transform images. if (element.Name == W.drawing || element.Name == W.pict) { if (imageHandler == null) { return(null); } return(ProcessImage(wordDoc, element, imageHandler)); } // The following removes any nodes that haven't been transformed. return(null); } return(null); }
public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings) { return ConvertToHtml(wordDoc, htmlConverterSettings, null); }
private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XNode node, Func<ImageInfo, XElement> imageHandler) { XElement element = node as XElement; if (element != null) { if (element.Name == W.document) return new XElement(Xhtml.html, new XElement(Xhtml.head, new XElement(Xhtml.meta, new XAttribute(HtmlNoNamespace.http_equiv, "Content-Type"), new XAttribute(HtmlNoNamespace.content, "text/html; charset=windows-1252")), new XElement(Xhtml.meta, new XAttribute(HtmlNoNamespace.name, "Generator"), new XAttribute(HtmlNoNamespace.content, "PowerTools for Open XML")), settings.PageTitle != null ? new XElement(Xhtml.title, settings.PageTitle) : null, settings.Css != null ? new XElement(Xhtml.style, new XComment(Environment.NewLine + settings.Css + Environment.NewLine)) : null ), element.Elements().Select(e => ConvertToHtmlTransform( wordDoc, settings, e, imageHandler)) ); // Transform the w:body element to the XHTML h:body element. if (element.Name == W.body) return new XElement(Xhtml.body, element.Elements().Select(e => ConvertToHtmlTransform( wordDoc, settings, e, imageHandler))); // Transform every paragraph with a style that has paragraph properties // that has an outline level into the same level of heading. This takes // care of transforming headings of every level. if (element.Name == W.p) { string styleId = (string)element.Elements(W.pPr).Elements(W.pStyle) .Attributes(W.val).FirstOrDefault(); XElement style = wordDoc.MainDocumentPart.StyleDefinitionsPart .GetXDocument().Root.Elements(W.style) .Where(s => (string)s.Attribute(W.styleId) == styleId) .FirstOrDefault(); if (style != null) { int? outlineLevel = (int?)style.Elements(W.pPr) .Elements(W.outlineLvl).Attributes(W.val).FirstOrDefault(); if (outlineLevel != null) { return new XElement(Xhtml.xhtml + string.Format("h{0}", outlineLevel + 1), settings.CssClassPrefix != null ? new XAttribute(HtmlNoNamespace._class, settings.CssClassPrefix + styleId) : null, ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc, element, null)), element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); } } } // Transform w:p to h:p. if (element.Name == W.p) { string styleId = (string)element.Elements(W.pPr).Elements(W.pStyle) .Attributes(W.val).FirstOrDefault(); if (styleId == null) { styleId = (string)wordDoc.MainDocumentPart.StyleDefinitionsPart .GetXDocument().Root.Elements(W.style) .Where(e => (string)e.Attribute(W.type) == "paragraph" && (string)e.Attribute(W._default) == "1") .FirstOrDefault().Attributes(W.styleId).FirstOrDefault(); } XElement z = new XElement(Xhtml.p, styleId != null ? ( settings.CssClassPrefix != null ? new XAttribute(HtmlNoNamespace._class, settings.CssClassPrefix + styleId) : null ) : null, ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc, element, null)), element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); return z; } // Transform every hyperlink in the document to the XHTML h:A element. if (element.Name == W.hyperlink && element.Attribute(R.id) != null) { try { return new XElement(Xhtml.A, new XAttribute(HtmlNoNamespace.href, wordDoc.MainDocumentPart .HyperlinkRelationships .Where(x => x.Id == (string)element.Attribute(R.id)) .First() .Uri ), ConvertEntities(element.Elements(W.r) .Elements(W.t) .Select(s => (string)s).StringConcatenate()) ); } catch (UriFormatException) { return element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler)); } } // Transform contents of runs that are part of a hyperlink. if (element.Name == W.r && element.Annotation<FieldInfo>() != null && element.Annotation<FieldInfo>().Arguments.Length > 0) { FieldInfo fieldInfo = element.Annotation<FieldInfo>(); return new XElement(Xhtml.A, new XAttribute(HtmlNoNamespace.href, fieldInfo.Arguments[0]), ConvertEntities(element.Elements(W.t) .Select(s => (string)s).StringConcatenate()) ); } // Transform contents of runs. if (element.Name == W.r) return element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler)); // Transform every w:t element to a text node. if (element.Name == W.t) return ConvertEntities(element.Value); // Transform w:br to h:br. if (element.Name == W.br || element.Name == W.cr) return new XElement(Xhtml.br); // Transform w:noBreakHyphen to '-' if (element.Name == W.noBreakHyphen) return new XText("-"); // Transform w:tbl to h:tbl. if (element.Name == W.tbl) return new XElement(Xhtml.table, new XAttribute(HtmlNoNamespace.border, 1), element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); // Transform w:tr to h:tr. if (element.Name == W.tr) return new XElement(Xhtml.tr, element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); // Transform w:tc to h:td. if (element.Name == W.tc) return new XElement(Xhtml.td, element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, imageHandler))); // Transform images. if (element.Name == W.drawing || element.Name == W.pict) { if (imageHandler == null) return null; return ProcessImage(wordDoc, element, imageHandler); } // The following removes any nodes that haven't been transformed. return null; } return null; }
private static object ProcessHyperlinkToBookmark(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element) { var style = new Dictionary<string, string>(); var a = new XElement(Xhtml.a, new XAttribute("href", "#" + (string) element.Attribute(W.anchor)), element.Elements(W.r).Select(run => ConvertRun(wordDoc, settings, run))); style.Add("text-decoration", "none"); a.AddAnnotation(style); return a; }
private static IEnumerable<object> GroupAndVerticallySpaceNumberedParagraphs(WordprocessingDocument wordDoc, HtmlConverterSettings settings, IEnumerable<XElement> elements, decimal currentMarginLeft) { var grouped = elements .GroupAdjacent(e => { var abstractNumId = (string)e.Attribute(PtOpenXml.pt + "AbstractNumId"); if (abstractNumId != null) return "num:" + abstractNumId; var contextualSpacing = e.Elements(W.pPr).Elements(W.contextualSpacing).FirstOrDefault(); if (contextualSpacing != null) { var styleName = (string)e.Elements(W.pPr).Elements(W.pStyle).Attributes(W.val).FirstOrDefault(); if (styleName == null) return ""; return "sty:" + styleName; } return ""; }) .ToList(); var newContent = grouped .Select(g => { if (g.Key == "") return g.Select(e => { return ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft); }); var last = g.Count() - 1; var content = g .Select((e, i) => { if (i == last) return ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft); else return ConvertToHtmlTransform(wordDoc, settings, e, true, currentMarginLeft); }); return content; }); return newContent.Cast<object>(); }
private static object ProcessContentControl(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element, decimal currentMarginLeft) { var relevantAncestors = element.Ancestors().TakeWhile(a => a.Name != W.txbxContent); var isRunLevelContentControl = relevantAncestors.Any(a => a.Name == W.p); if (isRunLevelContentControl) { return element.Elements(W.sdtContent).Elements() .Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)) .ToList(); } return CreateBorderDivs(wordDoc, settings, element.Elements(W.sdtContent).Elements()); }
private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XNode node, bool suppressTrailingWhiteSpace, decimal currentMarginLeft) { XElement element = node as XElement; if (element != null) { if (element.Name == W.document) return new XElement(Xhtml.html, new XElement(Xhtml.head, new XElement(Xhtml.meta, new XAttribute("http-equiv", "Content-Type"), new XAttribute("content", "text/html; charset=utf-8")), new XElement(Xhtml.meta, new XAttribute("name", "Generator"), new XAttribute("content", "PowerTools for Open XML")), settings.PageTitle != null ? new XElement(Xhtml.title, new XText(settings.PageTitle)) : null ), element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)) ); // Transform the w:body element to the XHTML h:body element. if (element.Name == W.body) { var sectionDivContent = new XElement(Xhtml.body, CreateSectionDivs(wordDoc, settings, element)); return sectionDivContent; } if (element.Name == W.p) { var bidi = element .Elements(W.pPr) .Elements(W.bidi) .Where(b => b.Attribute(W.val) == null || b.Attribute(W.val).ToBoolean() == true) .FirstOrDefault(); var isBidi = bidi != null; string styleId = (string)element.Elements(W.pPr).Elements(W.pStyle) .Attributes(W.val).FirstOrDefault(); if (styleId != null) { XElement style = wordDoc.MainDocumentPart.StyleDefinitionsPart .GetXDocument().Root.Elements(W.style) .Where(s => (string)s.Attribute(W.styleId) == styleId) .FirstOrDefault(); if (style != null) { int? outlineLevel = (int?)style.Elements(W.pPr) .Elements(W.outlineLvl).Attributes(W.val).FirstOrDefault(); if (outlineLevel != null && outlineLevel <= 5) { XName elementName = Xhtml.xhtml + string.Format("h{0}", outlineLevel + 1); return ConvertParagraph(wordDoc, settings, element, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi); } else { XName elementName = Xhtml.p; var o = ConvertParagraph(wordDoc, settings, element, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi); return o; } } } else { XName elementName = Xhtml.p; var o = ConvertParagraph(wordDoc, settings, element, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi); return o; } } // Transform hyperlinks to the XHTML h:A element. if (element.Name == W.hyperlink && element.Attribute(R.id) != null) { try { return new XElement(Xhtml.A, new XAttribute("href", wordDoc.MainDocumentPart .HyperlinkRelationships .Where(x => x.Id == (string)element.Attribute(R.id)) .First() .Uri ), element.Elements(W.r).Select(run => ConvertRun(wordDoc, settings, run)) ); } catch (UriFormatException) { return element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)); } } // Transform hyperlinks to bookmarks to the XHTML h:A element. if (element.Name == W.hyperlink && element.Attribute(W.anchor) != null) { var style = new Dictionary<string, string>(); var a = new XElement(Xhtml.A, new XAttribute("href", "#" + (string)element.Attribute(W.anchor)), element .Elements(W.r) .Select(run => ConvertRun(wordDoc, settings, run))); style.Add("text-decoration", "none"); a.AddAnnotation(style); return a; } // Transform contents of runs. if (element.Name == W.r) return ConvertRun(wordDoc, settings, element); // Transform w:bookmarkStart into anchor if (element.Name == W.bookmarkStart) { var name = (string)element.Attribute(W.name); if (name != null) { var style = new Dictionary<string, string>(); var a = new XElement(Xhtml.A, new XAttribute("id", name), new XText("")); style.Add("text-decoration", "none"); a.AddAnnotation(style); return a; } } // Transform every w:t element to a text node. if (element.Name == W.t) { var textWithEntities = ConvertEntities(element.Value); return textWithEntities; } // Transform symbols to spans if (element.Name == W.sym) { var cs = (string)element.Attribute(W._char); var c = Convert.ToInt32(cs, 16); var symbolSpan = new XElement(Xhtml.span, new XEntity(string.Format("#{0}", ((int)c).ToString()))); return symbolSpan; } // Transform tabs that have the pt:TabWidth attribute set if (element.Name == W.tab) { var tabWidthAtt = element.Attribute(PtOpenXml.TabWidth); if (tabWidthAtt != null) { var leader = (string)element.Attribute(PtOpenXml.Leader); var tabWidth = (decimal)tabWidthAtt; var style = new Dictionary<string, string>(); XElement span; if (leader != null) { var leaderChar = "."; if (leader == "hyphen") leaderChar = "-"; else if (leader == "dot") leaderChar = "."; else if (leader == "underscore") leaderChar = "_"; var runContainingTabToReplace = element.Ancestors(W.r).First(); var fontNameAtt = runContainingTabToReplace.Attribute(PtOpenXml.pt + "FontName"); if (fontNameAtt == null) fontNameAtt = runContainingTabToReplace.Ancestors(W.p).First() .Attribute(PtOpenXml.pt + "FontName"); var dummyRun = new XElement(W.r, fontNameAtt, runContainingTabToReplace.Elements(W.rPr), new XElement(W.t, leaderChar)); var widthOfLeaderChar = CalcWidthOfRunInTwips(dummyRun); bool forceArial = false; if (widthOfLeaderChar == 0) { dummyRun = new XElement(W.r, new XAttribute(PtOpenXml.FontName, "Arial"), runContainingTabToReplace.Elements(W.rPr), new XElement(W.t, leaderChar)); widthOfLeaderChar = CalcWidthOfRunInTwips(dummyRun); forceArial = true; } if (widthOfLeaderChar != 0) { var numberOfLeaderChars = (int)(Math.Floor((tabWidth * 1440) / widthOfLeaderChar)); if (numberOfLeaderChars < 0) numberOfLeaderChars = 0; span = new XElement(Xhtml.span, " " + "".PadRight(numberOfLeaderChars, leaderChar[0]) + " "); style.Add("margin", "0 0 0 0"); style.Add("padding", "0 0 0 0"); style.Add("width", string.Format("{0:0.00}in", tabWidth)); style.Add("text-align", "center"); if (forceArial) style.Add("font-family", "Arial"); } else { span = new XElement(Xhtml.span, " "); style.Add("margin", "0 0 0 0"); style.Add("padding", "0 0 0 0"); style.Add("width", string.Format("{0:0.00}in", tabWidth)); style.Add("text-align", "center"); if (leader == "underscore") { style.Add("text-decoration", "underline"); } } } else { #if false var bidi = element .Ancestors(W.p) .Take(1) .Elements(W.pPr) .Elements(W.bidi) .Where(b => b.Attribute(W.val) == null || b.Attribute(W.val).ToBoolean() == true) .FirstOrDefault(); var isBidi = bidi != null; if (isBidi) span = new XElement(Xhtml.span, new XEntity("#x200f")); // RLM else span = new XElement(Xhtml.span, new XEntity("#x200e")); // LRM #else span = new XElement(Xhtml.span, new XEntity("nbsp")); #endif style.Add("margin", string.Format("0 0 0 {0:0.00}in", tabWidth)); style.Add("padding", "0 0 0 0"); } span.AddAnnotation(style); return span; } } // Transform w:br to h:br. if (element.Name == W.br || element.Name == W.cr) { XElement span = null; var tabWidth = (decimal?)element.Attribute(PtOpenXml.TabWidth); if (tabWidth != null) { span = new XElement(Xhtml.span); var style = new Dictionary<string, string>(); style.Add("margin", string.Format("0 0 0 {0:0.00}in", tabWidth)); style.Add("padding", "0 0 0 0"); span.AddAnnotation(style); } var paragraph = element.Ancestors(W.p).FirstOrDefault(); bool isBidi = false; if (paragraph != null) { var bidi = paragraph .Elements(W.pPr) .Elements(W.bidi) .Where(b => b.Attribute(W.val) == null || b.Attribute(W.val).ToBoolean() == true) .FirstOrDefault(); isBidi = bidi != null; } var br = new XElement(Xhtml.br); XEntity zeroWidthChar = null; if (isBidi) zeroWidthChar = new XEntity("#x200f"); // RLM else zeroWidthChar = new XEntity("#x200e"); // LRM return new object[] { br, zeroWidthChar, span, }; } // Transform w:noBreakHyphen to '-' if (element.Name == W.noBreakHyphen) return new XText("-"); // Transform w:tbl to h:tbl. if (element.Name == W.tbl) { var style = new Dictionary<string, string>(); style.AddIfMissing("border-collapse", "collapse"); style.AddIfMissing("border", "none"); var bidiVisual = element.Elements(W.tblPr).Elements(W.bidiVisual).FirstOrDefault(); var tblW = element.Elements(W.tblPr).Elements(W.tblW).FirstOrDefault(); if (tblW != null) { var type = (string)tblW.Attribute(W.type); if (type != null && type == "pct") { var w = (int)tblW.Attribute(W._w); style.AddIfMissing("width", (w / 50).ToString() + "%"); } } var tblInd = element.Elements(W.tblPr).Elements(W.tblInd).FirstOrDefault(); if (tblInd != null) { var tblIndType = (string)tblInd.Attribute(W.type); if (tblIndType != null) { if (tblIndType == "dxa") { var width = (decimal?)tblInd.Attribute(W._w); if (width != null) { style.AddIfMissing("margin-left", string.Format("{0}pt", width / 20m)); } } } } XAttribute tableDirection = null; if (bidiVisual != null) { tableDirection = new XAttribute("dir", "rtl"); } else { tableDirection = new XAttribute("dir", "ltr"); } style.AddIfMissing("margin-bottom", ".001pt"); var table = new XElement(Xhtml.table, new XAttribute("border", "1"), new XAttribute("cellspacing", 0), new XAttribute("cellpadding", 0), tableDirection, element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft))); table.AddAnnotation(style); var jc = (string)element.Elements(W.tblPr).Elements(W.jc).Attributes(W.val).FirstOrDefault(); if (jc == null) jc = "left"; XAttribute dir = null; XAttribute jcToUse = null; if (bidiVisual != null) { dir = new XAttribute("dir", "rtl"); if (jc == "left") jcToUse = new XAttribute("align", "right"); else if (jc == "right") jcToUse = new XAttribute("align", "left"); else if (jc == "center") jcToUse = new XAttribute("align", "center"); } else { jcToUse = new XAttribute("align", jc); } var tableDiv = new XElement(Xhtml.div, dir, jcToUse, table); return tableDiv; } // Transform w:tr to h:tr. if (element.Name == W.tr) { var style = new Dictionary<string, string>(); int? trHeight = (int?)element.Elements(W.trPr).Elements(W.trHeight).Attributes(W.val).FirstOrDefault(); if (trHeight != null) style.AddIfMissing("height", string.Format("{0}in", (decimal)trHeight / 1440m)); var htmlRow = new XElement(Xhtml.tr, element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft))); if (style.Any()) htmlRow.AddAnnotation(style); return htmlRow; } // Transform w:tc to h:td. if (element.Name == W.tc) { var style = new Dictionary<string, string>(); XAttribute colSpan = null; XAttribute rowSpan = null; var tcPr = element.Element(W.tcPr); if (tcPr != null) { if ((string)tcPr.Elements(W.vMerge).Attributes(W.val).FirstOrDefault() == "restart") { var currentRow = element.Parent.ElementsBeforeSelf(W.tr).Count(); var currentCell = element.ElementsBeforeSelf(W.tc).Count(); var tbl = element.Parent.Parent; int rowSpanCount = 1; currentRow += 1; while (true) { var row = tbl.Elements(W.tr).Skip(currentRow).FirstOrDefault(); if (row == null) break; var cell2 = row.Elements(W.tc).Skip(currentCell).FirstOrDefault(); if (cell2 == null) break; if (cell2.Elements(W.tcPr).Elements(W.vMerge).FirstOrDefault() == null) break; if ((string)cell2.Elements(W.tcPr).Elements(W.vMerge).Attributes(W.val).FirstOrDefault() == "restart") break; currentRow += 1; rowSpanCount += 1; } rowSpan = new XAttribute("rowspan", rowSpanCount); } if (tcPr.Element(W.vMerge) != null && (string)tcPr.Elements(W.vMerge).Attributes(W.val).FirstOrDefault() != "restart") return null; if (tcPr.Element(W.vAlign) != null) { var vAlignVal = (string)tcPr.Elements(W.vAlign).Attributes(W.val).FirstOrDefault(); if (vAlignVal == "top") style.AddIfMissing("vertical-align", "top"); else if (vAlignVal == "center") style.AddIfMissing("vertical-align", "middle"); else if (vAlignVal == "bottom") style.AddIfMissing("vertical-align", "bottom"); else style.AddIfMissing("vertical-align", "middle"); } style.AddIfMissing("vertical-align", "top"); if ((string)tcPr.Elements(W.tcW).Attributes(W.type).FirstOrDefault() == "dxa") { decimal width = (int)tcPr.Elements(W.tcW).Attributes(W._w).FirstOrDefault(); style.AddIfMissing("width", string.Format("{0}pt", width / 20m)); } if ((string)tcPr.Elements(W.tcW).Attributes(W.type).FirstOrDefault() == "pct") { decimal width = (int)tcPr.Elements(W.tcW).Attributes(W._w).FirstOrDefault(); style.AddIfMissing("width", string.Format("{0:0.0}%", width / 50m)); } var tcBorders = tcPr.Element(W.tcBorders); GenerateBorderStyle(tcBorders, W.top, style, BorderType.Cell); GenerateBorderStyle(tcBorders, W.right, style, BorderType.Cell); GenerateBorderStyle(tcBorders, W.bottom, style, BorderType.Cell); GenerateBorderStyle(tcBorders, W.left, style, BorderType.Cell); CreateStyleFromShd(style, tcPr.Element(W.shd)); var gridSpan = (int?)tcPr.Elements(W.gridSpan).Attributes(W.val).Select(a => (int?)a).FirstOrDefault(); if (gridSpan != null) colSpan = new XAttribute("colspan", (int)gridSpan); } style.AddIfMissing("padding-top", "0in"); style.AddIfMissing("padding-bottom", "0in"); var cell = new XElement(Xhtml.td, rowSpan, colSpan, CreateBorderDivs(wordDoc, settings, element.Elements())); cell.AddAnnotation(style); return cell; } // Transform images if (element.Name == W.drawing || element.Name == W.pict || element.Name == W._object) { if (settings.ImageHandler == null) return null; return ProcessImage(wordDoc, element, settings.ImageHandler); } if (element.Name == W.sdt) { var relevantAncestors = element.Ancestors().TakeWhile(a => a.Name != W.txbxContent); var isRunLevelContentControl = relevantAncestors.Any(a => a.Name == W.p); if (isRunLevelContentControl) { var o = element.Element(W.sdtContent).Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)) .ToList(); return o; } else { var o = CreateBorderDivs(wordDoc, settings, element.Element(W.sdtContent).Elements()); return o; } } if (element.Name == W.smartTag || element.Name == W.fldSimple) { var o = CreateBorderDivs(wordDoc, settings, element.Elements()); return o; } return null; } return null; }
private static object ProcessTable(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element, decimal currentMarginLeft) { var style = new Dictionary<string, string>(); style.AddIfMissing("border-collapse", "collapse"); style.AddIfMissing("border", "none"); var bidiVisual = element.Elements(W.tblPr).Elements(W.bidiVisual).FirstOrDefault(); var tblW = element.Elements(W.tblPr).Elements(W.tblW).FirstOrDefault(); if (tblW != null) { var type = (string)tblW.Attribute(W.type); if (type != null && type == "pct") { var w = (int)tblW.Attribute(W._w); style.AddIfMissing("width", (w / 50) + "%"); } } var tblInd = element.Elements(W.tblPr).Elements(W.tblInd).FirstOrDefault(); if (tblInd != null) { var tblIndType = (string)tblInd.Attribute(W.type); if (tblIndType != null) { if (tblIndType == "dxa") { var width = (decimal?)tblInd.Attribute(W._w); if (width != null) { style.AddIfMissing("margin-left", width > 0m ? string.Format(NumberFormatInfo.InvariantInfo, "{0}pt", width / 20m) : "0"); } } } } var tableDirection = bidiVisual != null ? new XAttribute("dir", "rtl") : new XAttribute("dir", "ltr"); style.AddIfMissing("margin-bottom", ".001pt"); var table = new XElement(Xhtml.table, // TODO: Revisit and make sure the omission is covered by appropriate CSS. // new XAttribute("border", "1"), // new XAttribute("cellspacing", 0), // new XAttribute("cellpadding", 0), tableDirection, element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft))); table.AddAnnotation(style); var jc = (string)element.Elements(W.tblPr).Elements(W.jc).Attributes(W.val).FirstOrDefault() ?? "left"; XAttribute dir = null; XAttribute jcToUse = null; if (bidiVisual != null) { dir = new XAttribute("dir", "rtl"); if (jc == "left") jcToUse = new XAttribute("align", "right"); else if (jc == "right") jcToUse = new XAttribute("align", "left"); else if (jc == "center") jcToUse = new XAttribute("align", "center"); } else { jcToUse = new XAttribute("align", jc); } var tableDiv = new XElement(Xhtml.div, dir, jcToUse, table); return tableDiv; }
public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings) { return(ConvertToHtml(wordDoc, htmlConverterSettings, null)); }
private static object ProcessTableRow(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element, decimal currentMarginLeft) { var style = new Dictionary<string, string>(); int? trHeight = (int?) element.Elements(W.trPr).Elements(W.trHeight).Attributes(W.val).FirstOrDefault(); if (trHeight != null) style.AddIfMissing("height", string.Format(NumberFormatInfo.InvariantInfo, "{0:0.00}in", (decimal) trHeight/1440m)); var htmlRow = new XElement(Xhtml.tr, element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft))); if (style.Any()) htmlRow.AddAnnotation(style); return htmlRow; }
public XElement ConvertToHtml(HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler) { return HtmlConverter.ConvertToHtml(this, htmlConverterSettings, imageHandler); }
/* * Handle: * - b * - bdr * - caps * - color * - dstrike * - highlight * - i * - position * - rFonts * - shd * - smallCaps * - spacing * - strike * - sz * - u * - vanish * - vertAlign * * Don't handle: * - em * - emboss * - fitText * - imprint * - kern * - outline * - shadow * - w * */ private static object ConvertRun(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement run) { var style = new Dictionary<string, string>(); var sn = (string)run.Attribute(PtOpenXml.StyleName); if (sn != null) style.Add("PtStyleName", sn); var rPr = run.Element(W.rPr); if (rPr == null) { object content2 = run.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, 0m)); return content2; } // hide all content that contains the w:rPr/w:webHidden element if (rPr.Element(W.webHidden) != null) return null; // W.bdr if (rPr.Element(W.bdr) != null && (string)rPr.Elements(W.bdr).Attributes(W.val).FirstOrDefault() != "none") { style.AddIfMissing("border", "solid windowtext 1.0pt"); style.AddIfMissing("padding", "0in"); } // W.color string color = (string)rPr.Elements(W.color).Attributes(W.val).FirstOrDefault(); if (color != null) CreateColorProperty("color", color, style); // W.highlight string highlight = (string)rPr.Elements(W.highlight).Attributes(W.val).FirstOrDefault(); if (highlight != null) CreateColorProperty("background", highlight, style); // W.shd string shade = (string)rPr.Elements(W.shd).Attributes(W.fill).FirstOrDefault(); if (shade != null) CreateColorProperty("background", shade, style); // Pt.FontName string font = null; if (run.Element(W.sym) != null) font = (string)run.Elements(W.sym).Attributes(W.font).FirstOrDefault(); else font = (string)run.Attributes(PtOpenXml.FontName).FirstOrDefault(); if (font != null) CreateFontCssProperty(font, style); // W.sz var languageType = (string)run.Attribute(PtOpenXml.LanguageType); decimal? sz = null; if (languageType == "bidi") sz = (decimal?)rPr.Elements(W.szCs).Attributes(W.val).FirstOrDefault(); else sz = (decimal?)rPr.Elements(W.sz).Attributes(W.val).FirstOrDefault(); if (sz != null) style.AddIfMissing("font-size", string.Format("{0}pt", sz / 2.0m)); // W.caps if (getBoolProp(rPr, W.caps)) style.AddIfMissing("text-transform", "uppercase"); // W.smallCaps if (getBoolProp(rPr, W.smallCaps)) style.AddIfMissing("font-variant", "small-caps"); // W.spacing decimal? spacingInTwips = (decimal?)rPr.Elements(W.spacing).Attributes(W.val).FirstOrDefault(); if (spacingInTwips != null) style.AddIfMissing("letter-spacing", string.Format("{0}pt", spacingInTwips / 20)); // W.position decimal? position = (decimal?)rPr.Elements(W.position).Attributes(W.val).FirstOrDefault(); if (position != null) { style.AddIfMissing("position", "relative"); style.AddIfMissing("top", string.Format("{0}pt", -(position / 2))); } // W.vanish if (getBoolProp(rPr, W.vanish)) style.AddIfMissing("display", "none"); object content = run.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, 0m)); // W.u if (rPr.Element(W.u) != null && (string)rPr.Elements(W.u).Attributes(W.val).FirstOrDefault() != "none") { var newContent = new XElement(Xhtml.u, content); if (newContent.Nodes().Any()) content = newContent; style.AddIfMissing("text-decoration", "underline"); } // W.i if (getBoolProp(rPr, W.i)) { var newContent = new XElement(Xhtml.i, content); if (newContent.Nodes().Any()) content = newContent; style.AddIfMissing("font-style", "italic"); } // W.b if (getBoolProp(rPr, W.b)) { var newContent = new XElement(Xhtml.b, content); if (newContent.Nodes().Any()) content = newContent; style.AddIfMissing("font-weight", "bold"); } else { style.AddIfMissing("font-weight", "normal"); } // W.strike if (getBoolProp(rPr, W.strike) || getBoolProp(rPr, W.dstrike)) { var newContent = new XElement(Xhtml.s, content); if (newContent.Nodes().Any()) content = newContent; style.AddIfMissing("text-decoration", "line-through"); } // W.vertAlign if (rPr.Element(W.vertAlign) != null && (string)rPr.Elements(W.vertAlign).Attributes(W.val).FirstOrDefault() == "superscript") { var newContent = new XElement(Xhtml.sup, content); if (newContent.Nodes().Any()) content = newContent; } if (rPr.Element(W.vertAlign) != null && (string)rPr.Elements(W.vertAlign).Attributes(W.val).FirstOrDefault() == "subscript") { var newContent = new XElement(Xhtml.sub, content); if (newContent.Nodes().Any()) content = newContent; } var rtl = rPr.Element(W.rtl); var isRtl = rtl != null; var paragraph = run.Ancestors(W.p).FirstOrDefault(); var paraBidi = paragraph .Elements(W.pPr) .Elements(W.bidi) .Where(b => b.Attribute(W.val) == null || b.Attribute(W.val).ToBoolean() == true) .FirstOrDefault(); var paraIsBidi = paraBidi != null; string lang = null; if (languageType == "western") lang = (string)rPr.Elements(W.lang).Attributes(W.val).FirstOrDefault(); else if (languageType == "bidi") lang = (string)rPr.Elements(W.lang).Attributes(W.bidi).FirstOrDefault(); else if (languageType == "eastAsia") lang = (string)rPr.Elements(W.lang).Attributes(W.eastAsia).FirstOrDefault(); // only do the following for text runs. XEntity runStartMark = null; XEntity runEndMark = null; // Can't add directional marks if the font-family is symbol - they are visible, and display as a ? bool addDirectionalMarks = true; if (style.ContainsKey("font-family")) { if (style["font-family"].ToLower() == "symbol") addDirectionalMarks = false; } if (addDirectionalMarks) { if (run.Element(W.t) != null) { if (isRtl) { runStartMark = new XEntity("#x200f"); // RLM runEndMark = new XEntity("#x200f"); // RLM } else { if (paraIsBidi) { runStartMark = new XEntity("#x200e"); // LRM runEndMark = new XEntity("#x200e"); // LRM } } } } string defaultLanguage = "en-US"; // todo need to get defaultLanguage if (lang == null) lang = defaultLanguage; XAttribute langAttribute = new XAttribute("lang", lang); if (lang == defaultLanguage) langAttribute = null; if (style.Any() || isRtl || langAttribute != null) { style.AddIfMissing("margin", "0in"); style.AddIfMissing("padding", "0in"); var xe = new XElement(Xhtml.span, langAttribute, runStartMark, content, runEndMark); xe.AddAnnotation(style); content = xe; } return content; }
private string _ConvertToHtml(WordprocessingDocument doc) { lock(_convertLocker) { HtmlConverterSettings settings = new HtmlConverterSettings { #region import images // ImageHandler = imageInfo => // { // DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName); // if (!localDirInfo.Exists) // localDirInfo.Create(); // ++imageCounter; // string extension = imageInfo.ContentType.Split('/')[1].ToLower(); // // ImageFormat imageFormat = null; // switch (extension) // { // case "jpeg": // // Convert the .jpeg file to a .png file. // extension = "png"; // imageFormat = ImageFormat.Png; // break; // case "bmp": // imageFormat = ImageFormat.Bmp; // break; // case "png": // imageFormat = ImageFormat.Png; // break; // case "tiff": // imageFormat = ImageFormat.Tiff; // break; // } // // if (imageFormat == null) return null; // // string imageFileName = String.Format("{0}{1}/img.{2}", imageDirectoryName, imageCounter, extension); // try // { // imageInfo.Bitmap.Save(imageFileName, imageFormat); // } // catch (ExternalException) // { // return null; // } // // XElement img = new XElement(Xhtml.img, // new XAttribute(NoNamespace.src, imageFileName), // imageInfo.ImgStyleAttribute, // imageInfo.AltText != null ? // new XAttribute(NoNamespace.alt, imageInfo.AltText) : null); // // return img; // } #endregion }; CultureInfo ci = (CultureInfo)CultureInfo.CurrentCulture.Clone(); CultureInfo ci2 = (CultureInfo)CultureInfo.CurrentCulture.Clone(); ci2.NumberFormat.NumberDecimalSeparator = "."; Thread.CurrentThread.CurrentCulture = ci2; XElement html = HtmlConverter.ConvertToHtml(doc, settings); Thread.CurrentThread.CurrentCulture = ci; return html.ToStringNewLineOnAttributes(); } }
private static object CreateBorderDivs(WordprocessingDocument wordDoc, HtmlConverterSettings settings, IEnumerable<XElement> elements) { return elements.GroupAdjacent(e => { if (e.Elements(W.pPr).Elements(W.pBdr).Any()) { var pBdr = e.Element(W.pPr).Element(W.pBdr); var indStr = ""; var ind = e.Element(W.pPr).Element(W.ind); if (ind != null) indStr = ind.ToString(SaveOptions.DisableFormatting); return pBdr.ToString(SaveOptions.DisableFormatting) + indStr; } else if (e.Name == W.tbl) { return "table"; } else { return ""; // empty string means no pBdr } }) .Select(g => { if (g.Key == "") { var o = GroupAndVerticallySpaceNumberedParagraphs(wordDoc, settings, g, 0m); return (object)o; } if (g.Key == "table") { var o = g.Select(gc => ConvertToHtmlTransform(wordDoc, settings, gc, false, 0)); return o; } var pPr = g.First().Element(W.pPr); var pBdr = pPr.Element(W.pBdr); Dictionary<string, string> style = new Dictionary<string, string>(); GenerateBorderStyle(pBdr, W.top, style, BorderType.Paragraph); GenerateBorderStyle(pBdr, W.right, style, BorderType.Paragraph); GenerateBorderStyle(pBdr, W.bottom, style, BorderType.Paragraph); GenerateBorderStyle(pBdr, W.left, style, BorderType.Paragraph); var ind = pPr.Element(W.ind); decimal currentMarginLeft = 0m; if (ind != null) { decimal? left = (decimal?)ind.Attribute(W.left); decimal leftInInches = 0; if (left != null) leftInInches = (decimal)left / 1440; decimal? hanging = (decimal?)ind.Attribute(W.hanging); decimal hangingInInches = 0; if (hanging != null) hangingInInches = -(decimal)hanging / 1440; currentMarginLeft = leftInInches + hangingInInches; style.AddIfMissing("margin-left", string.Format("{0:0.00}in", currentMarginLeft)); } var div = new XElement(Xhtml.div, GroupAndVerticallySpaceNumberedParagraphs(wordDoc, settings, g, currentMarginLeft)); div.AddAnnotation(style); return div; }) .ToList(); }
public static string ConvertToHtml(byte[] byteArray) { string result = null; List<Bitmap> documentPictures = new List<Bitmap>(); using (MemoryStream memoryStream = new MemoryStream()) { memoryStream.Write(byteArray, 0, byteArray.Length); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true)) { //var destFileName = new FileInfo(fi.Name.Replace(".docx", ".html")); //if (outputDirectory != null && outputDirectory != string.Empty) //{ // DirectoryInfo di = new DirectoryInfo(outputDirectory); // if (!di.Exists) // { // throw new OpenXmlPowerToolsException("Output directory does not exist"); // } // destFileName = new FileInfo(Path.Combine(di.FullName, destFileName.Name)); //} //var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files"; int imageCounter = 0; var pageTitle = "RoboBrailleDoc"; var part = wDoc.CoreFilePropertiesPart; if (part != null) { pageTitle = (string)part.GetXDocument().Descendants(DC.title).FirstOrDefault() ?? "RoboBrailleDoc"; } // TODO: Determine max-width from size of content area. HtmlConverterSettings settings = new HtmlConverterSettings() { AdditionalCss = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }", PageTitle = pageTitle, FabricateCssClasses = true, CssClassPrefix = "pt-", RestrictToSupportedLanguages = false, RestrictToSupportedNumberingFormats = false, ImageHandler = imageInfo => { //DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName); //if (!localDirInfo.Exists) // localDirInfo.Create(); ++imageCounter; string extension = imageInfo.ContentType.Split('/')[1].ToLower(); ImageFormat imageFormat = null; if (extension == "png") imageFormat = ImageFormat.Png; else if (extension == "gif") imageFormat = ImageFormat.Gif; else if (extension == "bmp") imageFormat = ImageFormat.Bmp; else if (extension == "jpeg") imageFormat = ImageFormat.Jpeg; else if (extension == "tiff") { // Convert tiff to gif. extension = "gif"; imageFormat = ImageFormat.Gif; } else if (extension == "x-wmf") { extension = "wmf"; imageFormat = ImageFormat.Wmf; } // If the image format isn't one that we expect, ignore it, // and don't return markup for the link. if (imageFormat == null) return null; try { //imageInfo.Bitmap.Save(imageFileName, imageFormat); documentPictures.Add(imageInfo.Bitmap); } catch (ExternalException) { return null; } string imageFileName = "/image" + imageCounter.ToString() + "." + extension; XElement img = new XElement(Xhtml.img, new XAttribute(NoNamespace.src, imageFileName), imageInfo.ImgStyleAttribute, imageInfo.AltText != null ? new XAttribute(NoNamespace.alt, imageInfo.AltText) : null); return img; } }; XElement htmlElement = HtmlConverter.ConvertToHtml(wDoc, settings); // Produce HTML document with <!DOCTYPE html > declaration to tell the browser // we are using HTML5. var html = new XDocument( new XDocumentType("html", null, null, null), htmlElement); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. var htmlString = html.ToString(SaveOptions.DisableFormatting); //File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8); result = htmlString; } } return result; }
private static object ConvertContentThatCanContainFields(WordprocessingDocument wordDoc, HtmlConverterSettings settings, IEnumerable<XElement> elements) { var grouped = elements .GroupAdjacent(e => { Stack<FieldRetriever.FieldElementTypeInfo> stack = e.Annotation<Stack<FieldRetriever.FieldElementTypeInfo>>(); if (stack == null || !stack.Any()) return (int?)null; int lowestId = stack.Select(st => st.Id).Min(); return lowestId; }) .ToList(); var txformed = grouped .Select(g => { if (g.Key == null) { var o = (object)g. Select(n => { var o2 = ConvertToHtmlTransform(wordDoc, settings, n, false, 0m); return o2; }) .ToList(); return o; } var instrText = FieldRetriever.InstrText(g.First().Ancestors().Last(), (int)g.Key).TrimStart('{').TrimEnd('}'); var parsed = FieldRetriever.ParseField(instrText); if (parsed.FieldType != "HYPERLINK") return g.Select(n => ConvertToHtmlTransform(wordDoc, settings, n, false, 0m)); if (parsed.Arguments.Length > 0) { var a = new XElement(Xhtml.A, new XAttribute("href", parsed.Arguments[0]), g.DescendantsAndSelf(W.r).Select(run => ConvertRun(wordDoc, settings, run))); return a; } else { var a = new XElement(Xhtml.A, g.DescendantsAndSelf(W.r).Select(run => ConvertRun(wordDoc, settings, run))); return a; } }) .ToList(); return txformed; }
/* * Notes on line spacing * * the w:line and w:lineRule attributes control spacing between lines - including between lines within a paragraph * * If w:spacing w:lineRule="auto" then * w:spacing w:line is a percentage where 240 == 100% * * (line value / 240) * 100 = percentage of line * * If w:spacing w:lineRule="exact" or w:lineRule="atLeast" then * w:spacing w:line is in twips * 1440 = exactly one inch from line to line * * Handle * - ind * - jc * - numPr * - pBdr * - shd * - spacing * - textAlignment * * Don't Handle (yet) * - adjustRightInd? * - autoSpaceDE * - autoSpaceDN * - bidi * - contextualSpacing * - divId * - framePr * - keepLines * - keepNext * - kinsoku * - mirrorIndents * - overflowPunct * - pageBreakBefore * - snapToGrid * - suppressAutoHyphens * - suppressLineNumbers * - suppressOverlap * - tabs * - textBoxTightWrap * - textDirection * - topLinePunct * - widowControl * - wordWrap * */ private static object ConvertParagraph(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement paragraph, XName elementName, bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi) { var style = DefineParagraphStyle(paragraph, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi); var rtl = isBidi ? new XAttribute("dir", "rtl") : new XAttribute("dir", "ltr"); var firstMark = isBidi ? new XEntity("#x200f") : null; // Analyze initial runs to see whether we have a tab, in which case we will render // a span with a defined width and ignore the tab rather than rendering the text // preceding the tab and the tab as a span with a computed width. var firstTabRun = paragraph .Elements(W.r) .FirstOrDefault(run => run.Elements(W.tab).Any()); var elementsPrecedingTab = firstTabRun != null ? paragraph.Elements(W.r).TakeWhile(e => e != firstTabRun) .Where(e => e.Elements().Any(c => c.Attributes(PtOpenXml.TabWidth).Any())).ToList() : Enumerable.Empty<XElement>().ToList(); // TODO: Revisit // For the time being, if a hyperlink field precedes the tab, we'll render it as before. var hyperlinkPrecedesTab = elementsPrecedingTab .Elements(W.r) .Elements(W.instrText) .Select(e => e.Value) .Any(value => value != null && value.TrimStart().ToUpper().StartsWith("HYPERLINK")); if (hyperlinkPrecedesTab) { var paraElement1 = new XElement(elementName, rtl, firstMark, ConvertContentThatCanContainFields(wordDoc, settings, paragraph.Elements())); paraElement1.AddAnnotation(style); return paraElement1; } var txElementsPrecedingTab = TransformElementsPrecedingTab(wordDoc, settings, elementsPrecedingTab, firstTabRun); var elementsSucceedingTab = firstTabRun != null ? paragraph.Elements().SkipWhile(e => e != firstTabRun).Skip(1) : paragraph.Elements(); var paraElement = new XElement(elementName, rtl, firstMark, txElementsPrecedingTab, ConvertContentThatCanContainFields(wordDoc, settings, elementsSucceedingTab)); paraElement.AddAnnotation(style); return paraElement; }
public XElement ConvertToHtml(HtmlConverterSettings htmlConverterSettings) { return HtmlConverter.ConvertToHtml(this, htmlConverterSettings); }
private static List<object> TransformElementsPrecedingTab(WordprocessingDocument wordDoc, HtmlConverterSettings settings, List<XElement> elementsPrecedingTab, XElement firstTabRun) { var tabWidth = firstTabRun != null ? (decimal?) firstTabRun.Elements(W.tab).Attributes(PtOpenXml.TabWidth).FirstOrDefault() ?? 0m : 0m; var precedingElementsWidth = elementsPrecedingTab .Elements() .Where(c => c.Attributes(PtOpenXml.TabWidth).Any()) .Select(e => (decimal) e.Attribute(PtOpenXml.TabWidth)) .Sum(); var totalWidth = precedingElementsWidth + tabWidth; var txElementsPrecedingTab = elementsPrecedingTab .Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, 0m)) .ToList(); if (txElementsPrecedingTab.Count > 1) { var span = new XElement(Xhtml.span, txElementsPrecedingTab); var spanStyle = new Dictionary<string, string> { { "display", "inline-block" }, { "text-indent", "0" }, { "width", string.Format(NumberFormatInfo.InvariantInfo, "{0:0.000}in", totalWidth) } }; span.AddAnnotation(spanStyle); } else if (txElementsPrecedingTab.Count == 1) { var element = txElementsPrecedingTab.First() as XElement; if (element != null) { var spanStyle = element.Annotation<Dictionary<string, string>>(); spanStyle.AddIfMissing("display", "inline-block"); spanStyle.AddIfMissing("text-indent", "0"); spanStyle.AddIfMissing("width", string.Format(NumberFormatInfo.InvariantInfo, "{0:0.000}in", totalWidth)); } } return txElementsPrecedingTab; }
/* * Notes on line spacing * * the w:line and w:lineRule attributes control spacing between lines - including between lines within a paragraph * * If w:spacing w:lineRule="auto" then * w:spacing w:line is a percentage where 240 == 100% * * (line value / 240) * 100 = percentage of line * * If w:spacing w:lineRule="exact" or w:lineRule="atLeast" then * w:spacing w:line is in twips * 1440 = exactly one inch from line to line * * Handle * - ind * - jc * - numPr * - pBdr * - shd * - spacing * - textAlignment * * Don't Handle (yet) * - adjustRightInd? * - autoSpaceDE * - autoSpaceDN * - bidi * - contextualSpacing * - divId * - framePr * - keepLines * - keepNext * - kinsoku * - mirrorIndents * - overflowPunct * - pageBreakBefore * - snapToGrid * - suppressAutoHyphens * - suppressLineNumbers * - suppressOverlap * - tabs * - textBoxTightWrap * - textDirection * - topLinePunct * - widowControl * - wordWrap * */ private static object ConvertParagraph(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement paragraph, XName elementName, bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi) { var style = new Dictionary<string, string>(); var sn = (string)paragraph.Attribute(PtOpenXml.StyleName); if (sn != null) style.Add("PtStyleName", sn); XElement pPr = paragraph.Element(W.pPr); if (pPr != null) { var spacing = pPr.Element(W.spacing); if (spacing != null) { var spacingBefore = (decimal?)spacing.Attribute(W.before); if (spacingBefore != null) style.AddIfMissing("margin-top", string.Format("{0}pt", spacingBefore / 20.0m)); var lineRule = (string)spacing.Attribute(W.lineRule); if (lineRule == "auto") { var line = (decimal)spacing.Attribute(W.line); if (line != 240m) { var pct = (line / 240m) * 100m; style.Add("line-height", string.Format("{0:0.0}%", pct)); } } if (lineRule == "exact") { var line = (decimal)spacing.Attribute(W.line); var points = line / 20m; style.Add("line-height", string.Format("{0:0.0}pt", points)); } if (lineRule == "atLeast") { var line = (decimal)spacing.Attribute(W.line); var points = line / 20m; if (points >= 14m) style.Add("line-height", string.Format("{0:0.0}pt", points)); } decimal? spacingAfter; if (suppressTrailingWhiteSpace) spacingAfter = 0; else spacingAfter = (decimal?)spacing.Attribute(W.after) /*+ addToSpacing*/; if (spacingAfter != null) style.AddIfMissing("margin-bottom", string.Format("{0}pt", spacingAfter / 20.0m)); } var ind = pPr.Element(W.ind); if (ind != null) { decimal? left = (decimal?)ind.Attribute(W.left); if (left != null) { decimal leftInInches = (decimal)left / 1440 - currentMarginLeft; style.AddIfMissing(isBidi ? "margin-right" : "margin-left", string.Format("{0:0.00}in", leftInInches)); } decimal? right = (decimal?)ind.Attribute(W.right); if (right != null) { decimal rightInInches = (decimal)right / 1440; style.AddIfMissing(isBidi ? "margin-left" : "margin-right", string.Format("{0:0.00}in", rightInInches)); } decimal? firstLine = (decimal?)ind.Attribute(W.firstLine); if (firstLine != null) { decimal firstLineInInches = (decimal)firstLine / 1440m; style.AddIfMissing("text-indent", string.Format("{0:0.00}in", firstLineInInches)); } decimal? hanging = (decimal?)ind.Attribute(W.hanging); if (hanging != null) { decimal hangingInInches = (decimal)-hanging / 1440m; style.AddIfMissing("text-indent", string.Format("{0:0.00}in", hangingInInches)); } } // todo need to handle // - both // - mediumKashida // - distribute // - numTab // - highKashida // - lowKashida // - thaiDistribute var jcVal = (string)pPr.Elements(W.jc).Attributes(W.val).FirstOrDefault(); if (jcVal == null) { jcVal = "left"; } if (jcVal == "left") { if (isBidi) style.AddIfMissing("text-align", "right"); else style.AddIfMissing("text-align", "left"); } else if (jcVal == "right") { if (isBidi) style.AddIfMissing("text-align", "left"); else style.AddIfMissing("text-align", "right"); } else if (jcVal == "center") style.AddIfMissing("text-align", "center"); else if (jcVal == "both") style.AddIfMissing("text-align", "justify"); CreateStyleFromShd(style, pPr.Element(W.shd)); // Pt.FontName string font = (string)paragraph.Attributes(PtOpenXml.FontName).FirstOrDefault(); if (font != null) CreateFontCssProperty(font, style); // W.sz decimal? sz = null; var languageType = (string)paragraph.Attribute(PtOpenXml.LanguageType); if (languageType == "bidi") sz = (decimal?)pPr.Elements(W.rPr).Elements(W.szCs).Attributes(W.val).FirstOrDefault(); else sz = (decimal?)pPr.Elements(W.rPr).Elements(W.sz).Attributes(W.val).FirstOrDefault(); var sizesOfAllRunsInParagraph = paragraph .DescendantsTrimmed(W.txbxContent) .Select(run => { if (run.Name != W.r) return null; var runLanguageType = (string)run.Attribute(PtOpenXml.LanguageType); if (runLanguageType == "bidi") { var runCsSz = (decimal?)run .Elements(W.rPr) .Elements(W.szCs) .Attributes(W.val) .FirstOrDefault(); return runCsSz; } else { var runSz = (decimal?)run .Elements(W.rPr) .Elements(W.sz) .Attributes(W.val) .FirstOrDefault(); return runSz; } }) .Where(runSz => runSz != null); if (sizesOfAllRunsInParagraph.Any()) sz = sizesOfAllRunsInParagraph.Cast<decimal>().Max(); if (sz != null) style.AddIfMissing("font-size", string.Format("{0}pt", sz / 2.0m)); var languageTypeOfAllRunsInParagraph = paragraph .DescendantsTrimmed(W.txbxContent) .Select(run => { if (run.Name != W.r) return null; var runLanguageType = (string)run.Attribute(PtOpenXml.LanguageType); return runLanguageType; }) .Where(runSz => runSz != null); if (!languageTypeOfAllRunsInParagraph.Any(lt => lt == "bidi")) style.AddIfMissing("line-height", "108%"); // vertical text alignment as of December 2013 does not work in any major browsers. var verticalTextAlignment = (string)pPr.Elements(W.textAlignment).Attributes(W.val).FirstOrDefault(); if (verticalTextAlignment != null && verticalTextAlignment != "auto") { if (verticalTextAlignment == "top") style.AddIfMissing("vertical-align", "top"); else if (verticalTextAlignment == "center") style.AddIfMissing("vertical-align", "middle"); else if (verticalTextAlignment == "baseline") style.AddIfMissing("vertical-align", "baseline"); else if (verticalTextAlignment == "bottom") style.AddIfMissing("vertical-align", "bottom"); } style.AddIfMissing("margin-top", "0pt"); style.AddIfMissing("margin-left", "0pt"); style.AddIfMissing("margin-right", "0pt"); style.AddIfMissing("margin-bottom", ".001pt"); } XAttribute rtl = null; XEntity firstMark = null; if (isBidi) { rtl = new XAttribute("dir", "rtl"); firstMark = new XEntity("#x200f"); // RLM } else { rtl = new XAttribute("dir", "ltr"); } var paraElement = new XElement(elementName, rtl, firstMark, ConvertContentThatCanContainFields(wordDoc, settings, paragraph.Elements())); paraElement.AddAnnotation(style); return paraElement; }
public XElement ConvertToHtml(HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler) { return(HtmlConverter.ConvertToHtml(this, htmlConverterSettings, imageHandler)); }