public string ParseDocument(Stream stream)
        {
            XNamespace w = "http://www.w3.org/1999/xhtml";
            string result;

            var byteArray = stream.ToByteArray((int) stream.Length);
            using (var memoryStream = new MemoryStream())
            {
                memoryStream.Write(byteArray, 0, byteArray.Length);
                using (var doc = WordprocessingDocument.Open(memoryStream, true))
                {
                    var settings = new HtmlConverterSettings();
                    XElement html = HtmlConverter.ConvertToHtml(doc, settings);

                    // Note: the XHTML returned by ConvertToHtmlTransform contains objects of type
                    // XEntity. PtOpenXmlUtil.cs defines the XEntity class. See
                    // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
                    // for detailed explanation.
                    //
                    // If you further transform the XML tree returned by ConvertToHtmlTransform, you
                    // must do it correctly, or entities do not serialize properly.
                    var bodyContainer = html.Element(w + "body");
                    result = bodyContainer.ToStringNewLineOnAttributes();
                }
            }

            return result;
        }
Example #2
0
        /// <summary>
        /// Converts a specific node instead of the whole word document into HTML.
        /// Note: this method is added for the above purpose. See the other method:
        /// public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler)
        /// </summary>
        /// <param name="wordDoc"></param>
        /// <param name="node">The node to convert to HTML.</param>
        /// <param name="htmlConverterSettings"></param>
        /// <returns></returns>
        public static XElement ConvertToHtml(WordprocessingDocument wordDoc, XNode node,
                                             HtmlConverterSettings htmlConverterSettings)
        {
            InitEntityMap();
            if (htmlConverterSettings.ConvertFormatting)
            {
                throw new InvalidSettingsException("Conversion with formatting is not supported");
            }
            RevisionAccepter.AcceptRevisions(wordDoc);
            SimplifyMarkupSettings settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = true,
                RemoveEndAndFootNotes       = true,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = true,
                RemoveProof           = true,
                RemoveRsidInfo        = true,
                RemoveSmartTags       = true,
                RemoveSoftHyphens     = true,
                ReplaceTabsWithSpaces = true,
            };

            MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
            AnnotateHyperlinkContent((XElement)node);
            XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                                                              node, null);

            return(xhtml);
        }
Example #3
0
 public static XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler)
 {
     using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc))
     {
         using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
         {
             return(ConvertToHtml(document, htmlConverterSettings, imageHandler));
         }
     }
 }
Example #4
0
 public static XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings)
 {
     using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc))
     {
         using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
         {
             return(ConvertToHtml(document, htmlConverterSettings));
         }
     }
 }
 public static XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings)
 {
     using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc))
     {
         using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
         {
             return ConvertToHtml(document, htmlConverterSettings);
         }
     }
 }
 public static XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler)
 {
     using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc))
     {
         using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
         {
             return ConvertToHtml(document, htmlConverterSettings, imageHandler);
         }
     }
 }
Example #7
0
        public static XElement ConvertToHtml(WordprocessingDocument wordDoc,
                                             HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler)
        {
            InitEntityMap();
            if (htmlConverterSettings.ConvertFormatting)
            {
                throw new InvalidSettingsException("Conversion with formatting is not supported");
            }
            RevisionAccepter.AcceptRevisions(wordDoc);
            SimplifyMarkupSettings settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = true,
                RemoveEndAndFootNotes       = true,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = true,
                RemoveProof           = true,
                RemoveRsidInfo        = true,
                RemoveSmartTags       = true,
                RemoveSoftHyphens     = true,
                ReplaceTabsWithSpaces = true,
            };

            MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
            XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root;

            AnnotateHyperlinkContent(rootElement);
            XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                                                              rootElement, imageHandler);

            // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
            // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
            // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
            // for detailed explanation.
            //
            // If you further transform the XML tree returned by ConvertToHtmlTransform, you
            // must do it correctly, or entities will not be serialized properly.

            return(xhtml);
        }
        public static XElement ConvertToHtml(WordprocessingDocument wordDoc,
            HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler)
        {
            InitEntityMap();
            if (htmlConverterSettings.ConvertFormatting)
            {
                throw new InvalidSettingsException("Conversion with formatting is not supported");
            }
            RevisionAccepter.AcceptRevisions(wordDoc);
            SimplifyMarkupSettings settings = new SimplifyMarkupSettings
            {
                RemoveComments = true,
                RemoveContentControls = true,
                RemoveEndAndFootNotes = true,
                RemoveFieldCodes = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions = true,
                RemoveProof = true,
                RemoveRsidInfo = true,
                RemoveSmartTags = true,
                RemoveSoftHyphens = true,
                ReplaceTabsWithSpaces = true,
            };
            MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
            XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root;
            AnnotateHyperlinkContent(rootElement);
            XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                rootElement, imageHandler);

            // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
            // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
            // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
            // for detailed explanation.
            //
            // If you further transform the XML tree returned by ConvertToHtmlTransform, you
            // must do it correctly, or entities will not be serialized properly.

            return xhtml;
        }
        private static object CreateBorderDivs(WordprocessingDocument wordDoc, HtmlConverterSettings settings, IEnumerable<XElement> elements)
        {
            return elements.GroupAdjacent(e =>
                {
                    var pBdr = e.Elements(W.pPr).Elements(W.pBdr).FirstOrDefault();
                    if (pBdr != null)
                    {
                        var indStr = string.Empty;
                        var ind = e.Elements(W.pPr).Elements(W.ind).FirstOrDefault();
                        if (ind != null)
                            indStr = ind.ToString(SaveOptions.DisableFormatting);
                        return pBdr.ToString(SaveOptions.DisableFormatting) + indStr;
                    }
                    return e.Name == W.tbl ? "table" : string.Empty;
                })
                .Select(g =>
                {
                    if (g.Key == string.Empty)
                    {
                        return (object) GroupAndVerticallySpaceNumberedParagraphs(wordDoc, settings, g, 0m);
                    }
                    if (g.Key == "table")
                    {
                        return g.Select(gc => ConvertToHtmlTransform(wordDoc, settings, gc, false, 0));
                    }
                    var pPr = g.First().Elements(W.pPr).First();
                    var pBdr = pPr.Element(W.pBdr);
                    var style = new Dictionary<string, string>();
                    GenerateBorderStyle(pBdr, W.top, style, BorderType.Paragraph);
                    GenerateBorderStyle(pBdr, W.right, style, BorderType.Paragraph);
                    GenerateBorderStyle(pBdr, W.bottom, style, BorderType.Paragraph);
                    GenerateBorderStyle(pBdr, W.left, style, BorderType.Paragraph);

                    var currentMarginLeft = 0m;
                    var ind = pPr.Element(W.ind);
                    if (ind != null)
                    {
                        var leftInInches = (decimal?) ind.Attribute(W.left)/1440m ?? 0;
                        var hangingInInches = -(decimal?) ind.Attribute(W.hanging)/1440m ?? 0;
                        currentMarginLeft = leftInInches + hangingInInches;

                        style.AddIfMissing("margin-left",
                            currentMarginLeft > 0m
                                ? string.Format(NumberFormatInfo.InvariantInfo, "{0:0.00}in", currentMarginLeft)
                                : "0");
                    }

                    var div = new XElement(Xhtml.div,
                        GroupAndVerticallySpaceNumberedParagraphs(wordDoc, settings, g, currentMarginLeft));
                    div.AddAnnotation(style);
                    return div;
                })
            .ToList();
        }
        private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc,
            HtmlConverterSettings settings, XNode node,
            bool suppressTrailingWhiteSpace,
            decimal currentMarginLeft)
        {
            var element = node as XElement;
            if (element == null) return null;

            // Transform the w:document element to the XHTML h:html element.
            // The h:head element is laid out based on the W3C's recommended layout, i.e.,
            // the charset (using the HTML5-compliant form), the title (which is always
            // there but possibly empty), and other meta tags.
            if (element.Name == W.document)
            {
                return new XElement(Xhtml.html,
                    new XElement(Xhtml.head,
                        new XElement(Xhtml.meta, new XAttribute("charset", "UTF-8")),
                        settings.PageTitle != null
                            ? new XElement(Xhtml.title, new XText(settings.PageTitle))
                            : new XElement(Xhtml.title, new XText(string.Empty)),
                        new XElement(Xhtml.meta,
                            new XAttribute("name", "Generator"),
                            new XAttribute("content", "PowerTools for Open XML"))),
                    element.Elements()
                        .Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)));
            }

            // Transform the w:body element to the XHTML h:body element.
            if (element.Name == W.body)
            {
                return new XElement(Xhtml.body, CreateSectionDivs(wordDoc, settings, element));
            }

            // Transform the w:p element to the XHTML h:h1-h6 or h:p element (if the previous paragraph does not
            // have a style separator).
            if (element.Name == W.p)
            {
                return ProcessParagraph(wordDoc, settings, element, suppressTrailingWhiteSpace, currentMarginLeft);
            }

            // Transform hyperlinks to the XHTML h:a element.
            if (element.Name == W.hyperlink && element.Attribute(R.id) != null)
            {
                try
                {
                    return new XElement(Xhtml.a,
                        new XAttribute("href",
                            wordDoc.MainDocumentPart
                                .HyperlinkRelationships
                                .First(x => x.Id == (string)element.Attribute(R.id))
                                .Uri
                            ),
                        element.Elements(W.r).Select(run => ConvertRun(wordDoc, settings, run))
                        );
                }
                catch (UriFormatException)
                {
                    return element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft));
                }
            }

            // Transform hyperlinks to bookmarks to the XHTML h:a element.
            if (element.Name == W.hyperlink && element.Attribute(W.anchor) != null)
            {
                return ProcessHyperlinkToBookmark(wordDoc, settings, element);
            }

            // Transform contents of runs.
            if (element.Name == W.r)
            {
                return ConvertRun(wordDoc, settings, element);
            }

            // Transform w:bookmarkStart into anchor
            if (element.Name == W.bookmarkStart)
            {
                return ProcessBookmarkStart(element);
            }

            // Transform every w:t element to a text node.
            if (element.Name == W.t)
            {
                // We don't need to convert characters to entities in a UTF-8 document.
                // Further, we don't need &nbsp; entities for significant whitespace
                // because we are wrapping the text nodes in <span> elements within
                // which all whitespace is significant.
                return new XText(element.Value);
            }

            // Transform symbols to spans
            if (element.Name == W.sym)
            {
                var cs = (string)element.Attribute(W._char);
                var c = Convert.ToInt32(cs, 16);
                return new XElement(Xhtml.span, new XEntity(string.Format("#{0}", c)));
            }

            // Transform tabs that have the pt:TabWidth attribute set
            if (element.Name == W.tab)
            {
                return ProcessTab(element);
            }

            // Transform w:br to h:br.
            if (element.Name == W.br || element.Name == W.cr)
            {
                return ProcessBreak(element);
            }

            // Transform w:noBreakHyphen to '-'
            if (element.Name == W.noBreakHyphen)
            {
                return new XText("-");
            }

            // Transform w:tbl to h:tbl.
            if (element.Name == W.tbl)
            {
                return ProcessTable(wordDoc, settings, element, currentMarginLeft);
            }

            // Transform w:tr to h:tr.
            if (element.Name == W.tr)
            {
                return ProcessTableRow(wordDoc, settings, element, currentMarginLeft);
            }

            // Transform w:tc to h:td.
            if (element.Name == W.tc)
            {
                return ProcessTableCell(wordDoc, settings, element);
            }

            // Transform images
            if (element.Name == W.drawing || element.Name == W.pict || element.Name == W._object)
            {
                return ProcessImage(wordDoc, element, settings.ImageHandler);
            }

            // Transform content controls.
            if (element.Name == W.sdt)
            {
                return ProcessContentControl(wordDoc, settings, element, currentMarginLeft);
            }

            // Transform smart tags and simple fields.
            if (element.Name == W.smartTag || element.Name == W.fldSimple)
            {
                return CreateBorderDivs(wordDoc, settings, element.Elements());
            }

            // Ignore element.
            return null;
        }
Example #11
0
 public XElement ConvertToHtml(HtmlConverterSettings htmlConverterSettings)
 {
     return(HtmlConverter.ConvertToHtml(this, htmlConverterSettings));
 }
        /*
         * Handle:
         * - b
         * - bdr
         * - caps
         * - color
         * - dstrike
         * - highlight
         * - i
         * - position
         * - rFonts
         * - shd
         * - smallCaps
         * - spacing
         * - strike
         * - sz
         * - u
         * - vanish
         * - vertAlign
         *
         * Don't handle:
         * - em
         * - emboss
         * - fitText
         * - imprint
         * - kern
         * - outline
         * - shadow
         * - w
         *
         */

        private static object ConvertRun(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement run)
        {
            var rPr = run.Element(W.rPr);
            if (rPr == null)
                return run.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, 0m));

            // hide all content that contains the w:rPr/w:webHidden element
            if (rPr.Element(W.webHidden) != null)
                return null;

            var style = DefineRunStyle(run);
            object content = run.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, 0m));

            // Wrap content in h:sup or h:sub elements as necessary.
            if (rPr.Element(W.vertAlign) != null)
            {
                XElement newContent = null;
                var vertAlignVal = (string)rPr.Elements(W.vertAlign).Attributes(W.val).FirstOrDefault();
                switch (vertAlignVal)
                {
                    case "superscript":
                        newContent = new XElement(Xhtml.sup, content);
                        break;
                    case "subscript":
                        newContent = new XElement(Xhtml.sub, content);
                        break;
                }
                if (newContent != null && newContent.Nodes().Any())
                    content = newContent;
            }

            var langAttribute = GetLangAttribute(run);

            XEntity runStartMark;
            XEntity runEndMark;
            DetermineRunMarks(run, rPr, style, out runStartMark, out runEndMark);

            if (style.Any() || langAttribute != null || runStartMark != null)
            {
                style.AddIfMissing("margin", "0");
                style.AddIfMissing("padding", "0");
                var xe = new XElement(Xhtml.span,
                    langAttribute,
                    runStartMark,
                    content,
                    runEndMark);

                xe.AddAnnotation(style);
                content = xe;
            }
            return content;
        }
        public static void ConvertToHtml(string file, string outputDirectory)
        {
            var fi = new FileInfo(file);

            byte[] byteArray = File.ReadAllBytes(fi.FullName);
            using (MemoryStream memoryStream = new MemoryStream())
            {
                memoryStream.Write(byteArray, 0, byteArray.Length);
                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true))
                {
                    var destFileName = new FileInfo(fi.Name.Replace(".docx", ".html"));
                    if (outputDirectory != null && outputDirectory != string.Empty)
                    {
                        DirectoryInfo di = new DirectoryInfo(outputDirectory);
                        if (!di.Exists)
                        {
                            throw new OpenXmlPowerToolsException("Output directory does not exist");
                        }
                        destFileName = new FileInfo(Path.Combine(di.FullName, destFileName.Name));
                    }
                    var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files";
                    int imageCounter       = 0;
                    var pageTitle          = (string)wDoc.CoreFilePropertiesPart.GetXDocument().Descendants(DC.title).FirstOrDefault();
                    if (pageTitle == null)
                    {
                        pageTitle = fi.FullName;
                    }

                    HtmlConverterSettings settings = new HtmlConverterSettings()
                    {
                        PageTitle                           = pageTitle,
                        FabricateCssClasses                 = true,
                        CssClassPrefix                      = "pt-",
                        RestrictToSupportedLanguages        = false,
                        RestrictToSupportedNumberingFormats = false,
                        ImageHandler                        = imageInfo =>
                        {
                            DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName);
                            if (!localDirInfo.Exists)
                            {
                                localDirInfo.Create();
                            }
                            ++imageCounter;
                            string      extension   = imageInfo.ContentType.Split('/')[1].ToLower();
                            ImageFormat imageFormat = null;
                            if (extension == "png")
                            {
                                // Convert png to jpeg.
                                extension   = "gif";
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "gif")
                            {
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "bmp")
                            {
                                imageFormat = ImageFormat.Bmp;
                            }
                            else if (extension == "jpeg")
                            {
                                imageFormat = ImageFormat.Jpeg;
                            }
                            else if (extension == "tiff")
                            {
                                // Convert tiff to gif.
                                extension   = "gif";
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "x-wmf")
                            {
                                extension   = "wmf";
                                imageFormat = ImageFormat.Wmf;
                            }

                            // If the image format isn't one that we expect, ignore it,
                            // and don't return markup for the link.
                            if (imageFormat == null)
                            {
                                return(null);
                            }

                            string imageFileName = imageDirectoryName + "/image" +
                                                   imageCounter.ToString() + "." + extension;
                            try
                            {
                                imageInfo.Bitmap.Save(imageFileName, imageFormat);
                            }
                            catch (System.Runtime.InteropServices.ExternalException)
                            {
                                return(null);
                            }
                            XElement img = new XElement(Xhtml.img,
                                                        new XAttribute(NoNamespace.src, imageFileName),
                                                        imageInfo.ImgStyleAttribute,
                                                        imageInfo.AltText != null ?
                                                        new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                            return(img);
                        }
                    };
                    XElement html = HtmlConverter.ConvertToHtml(wDoc, settings);

                    // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
                    // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
                    // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
                    // for detailed explanation.
                    //
                    // If you further transform the XML tree returned by ConvertToHtmlTransform, you
                    // must do it correctly, or entities will not be serialized properly.

                    var htmlString = html.ToString(SaveOptions.DisableFormatting);
                    File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);
                }
            }
        }
        private static object CreateSectionDivs(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element)
        {
            // note: when building a paging html converter, need to attend to new sections with page breaks here.
            // This code conflates adjacent sections if they have identical formatting, which is not an issue
            // for the non-paging transform.
            var groupedIntoDivs = element
                .Elements()
                .GroupAdjacent(e => {
                    return e.Annotation<SectionAnnotation>().SectionElement.ToString();
                });

            // note: when creating a paging html converter, need to pay attention to w:rtlGutter element.
            var divList = groupedIntoDivs
                .Select(g =>
                {
                    var sectPr = g.First().Annotation<SectionAnnotation>();
                    var bidi = sectPr
                        .SectionElement
                        .Elements(W.bidi)
                        .Where(b => b.Attribute(W.val) == null || b.Attribute(W.val).ToBoolean() == true)
                        .FirstOrDefault();
                    if (bidi == null)
                    {
                        var div = new XElement(Xhtml.div, CreateBorderDivs(wordDoc, settings, g));
                        return div;
                    }
                    else
                    {
                        var div = new XElement(Xhtml.div,
                            new XAttribute("dir", "rtl"),
                            CreateBorderDivs(wordDoc, settings, g));
                        return div;
                    }
                });
            return divList;
        }
 private static void ReifyStylesAndClasses(HtmlConverterSettings htmlConverterSettings, XElement xhtml)
 {
     if (htmlConverterSettings.FabricateCssClasses)
     {
         var usedCssClassNames = new HashSet<string>();
         var elementsThatNeedClasses = xhtml
             .DescendantsAndSelf()
             .Select(d => new
             {
                 Element = d,
                 Styles = d.Annotation<Dictionary<string, string>>(),
             })
             .Where(z => z.Styles != null);
         var augmented = elementsThatNeedClasses
             .Select(p => new
             {
                 Element = p.Element,
                 Styles = p.Styles,
                 StylesString = p.Element.Name.LocalName + "|" + p.Styles.OrderBy(k => k.Key).Select(s => string.Format("{0}:{1};", s.Key, s.Value)).StringConcatenate(),
             })
             .GroupBy(p => p.StylesString)
             .ToList();
         int classCounter = 1000000;
         var sb = new StringBuilder();
         sb.Append(Environment.NewLine);
         foreach (var grp in augmented)
         {
             string classNameToUse;
             var firstOne = grp.First();
             var styles = firstOne.Styles;
             if (styles.ContainsKey("PtStyleName"))
             {
                 classNameToUse = htmlConverterSettings.CssClassPrefix + styles["PtStyleName"];
                 if (usedCssClassNames.Contains(classNameToUse))
                 {
                     classNameToUse = htmlConverterSettings.CssClassPrefix +
                         styles["PtStyleName"] + "-" +
                         classCounter.ToString().Substring(1);
                     classCounter++;
                 }
             }
             else
             {
                 classNameToUse = htmlConverterSettings.CssClassPrefix +
                     classCounter.ToString().Substring(1);
                 classCounter++;
             }
             usedCssClassNames.Add(classNameToUse);
             sb.Append(firstOne.Element.Name.LocalName + "." + classNameToUse + " {" + Environment.NewLine);
             foreach (var st in firstOne.Styles.Where(s => s.Key != "PtStyleName"))
             {
                 var s = "    " + st.Key + ":" + st.Value + ";" + Environment.NewLine;
                 sb.Append(s);
             }
             sb.Append("}" + Environment.NewLine);
             var classAtt = new XAttribute("class", classNameToUse);
             foreach (var gc in grp)
                 gc.Element.Add(classAtt);
         }
         var styleElement = xhtml
             .Descendants(Xhtml.style)
             .FirstOrDefault();
         if (styleElement != null)
             styleElement.Value = sb.ToString() + htmlConverterSettings.AdditionalCss;
         else
         {
             styleElement = new XElement(Xhtml.style, sb.ToString() + htmlConverterSettings.AdditionalCss);
             var head = xhtml.Element(Xhtml.head);
             if (head != null)
                 head.Add(styleElement);
         }
     }
     else
     {
         foreach (var d in xhtml.DescendantsAndSelf())
         {
             var style = d.Annotation<Dictionary<string, string>>();
             if (style == null)
                 continue;
             var styleValue =
                 style
                 .Where(p => p.Key != "PtStyleName")
                 .OrderBy(p => p.Key)
                 .Select(e => string.Format("{0}: {1};", e.Key, e.Value))
                 .StringConcatenate();
             XAttribute st = new XAttribute("style", styleValue);
             if (d.Attribute("style") != null)
                 d.Attribute("style").Value += styleValue;
             else
                 d.Add(st);
         }
     }
 }
        // Transform the w:p element, including the following sibling w:p element(s)
        // in case the w:p element has a style separator. The sibling(s) will be
        // transformed to h:span elements rather than h:p elements and added to
        // the element (e.g., h:h2) created from the w:p element having the (first)
        // style separator (i.e., a w:specVanish element).
        private static object ProcessParagraph(WordprocessingDocument wordDoc, HtmlConverterSettings settings,
            XElement element, bool suppressTrailingWhiteSpace, decimal currentMarginLeft)
        {
            // Ignore this paragraph if the previous paragraph has a style separator.
            // We have already transformed this one together with the previous one.
            var previousParagraph = element.ElementsBeforeSelf(W.p).LastOrDefault();
            if (HasStyleSeparator(previousParagraph)) return null;

            var elementName = GetParagraphElementName(element, wordDoc);
            var isBidi = IsBidi(element);
            var paragraph = (XElement) ConvertParagraph(wordDoc, settings, element, elementName,
                suppressTrailingWhiteSpace, currentMarginLeft, isBidi);

            // The paragraph conversion might have created empty spans.
            // These can and should be removed because empty spans are
            // invalid in HTML5.
            paragraph.Elements(Xhtml.span).Where(e => e.IsEmpty).Remove();

            while (HasStyleSeparator(element))
            {
                element = element.ElementsAfterSelf(W.p).FirstOrDefault();
                if (element == null) break;

                elementName = Xhtml.span;
                isBidi = IsBidi(element);
                var span = ConvertParagraph(wordDoc, settings, element, elementName,
                    suppressTrailingWhiteSpace, currentMarginLeft, isBidi);
                paragraph.Add(span);
            }

            return paragraph;
        }
        private static object ProcessTableCell(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element)
        {
            var style = new Dictionary<string, string>();
            XAttribute colSpan = null;
            XAttribute rowSpan = null;

            var tcPr = element.Element(W.tcPr);
            if (tcPr != null)
            {
                if ((string) tcPr.Elements(W.vMerge).Attributes(W.val).FirstOrDefault() == "restart")
                {
                    var currentRow = element.Parent.ElementsBeforeSelf(W.tr).Count();
                    var currentCell = element.ElementsBeforeSelf(W.tc).Count();
                    var tbl = element.Parent.Parent;
                    int rowSpanCount = 1;
                    currentRow += 1;
                    while (true)
                    {
                        var row = tbl.Elements(W.tr).Skip(currentRow).FirstOrDefault();
                        if (row == null)
                            break;
                        var cell2 = row.Elements(W.tc).Skip(currentCell).FirstOrDefault();
                        if (cell2 == null)
                            break;
                        if (cell2.Elements(W.tcPr).Elements(W.vMerge).FirstOrDefault() == null)
                            break;
                        if ((string) cell2.Elements(W.tcPr).Elements(W.vMerge).Attributes(W.val).FirstOrDefault() == "restart")
                            break;
                        currentRow += 1;
                        rowSpanCount += 1;
                    }
                    rowSpan = new XAttribute("rowspan", rowSpanCount);
                }

                if (tcPr.Element(W.vMerge) != null &&
                    (string) tcPr.Elements(W.vMerge).Attributes(W.val).FirstOrDefault() != "restart")
                    return null;

                if (tcPr.Element(W.vAlign) != null)
                {
                    var vAlignVal = (string) tcPr.Elements(W.vAlign).Attributes(W.val).FirstOrDefault();
                    if (vAlignVal == "top")
                        style.AddIfMissing("vertical-align", "top");
                    else if (vAlignVal == "center")
                        style.AddIfMissing("vertical-align", "middle");
                    else if (vAlignVal == "bottom")
                        style.AddIfMissing("vertical-align", "bottom");
                    else
                        style.AddIfMissing("vertical-align", "middle");
                }
                style.AddIfMissing("vertical-align", "top");

                if ((string) tcPr.Elements(W.tcW).Attributes(W.type).FirstOrDefault() == "dxa")
                {
                    decimal width = (int) tcPr.Elements(W.tcW).Attributes(W._w).FirstOrDefault();
                    style.AddIfMissing("width", string.Format(NumberFormatInfo.InvariantInfo, "{0}pt", width/20m));
                }
                if ((string) tcPr.Elements(W.tcW).Attributes(W.type).FirstOrDefault() == "pct")
                {
                    decimal width = (int) tcPr.Elements(W.tcW).Attributes(W._w).FirstOrDefault();
                    style.AddIfMissing("width", string.Format(NumberFormatInfo.InvariantInfo, "{0:0.0}%", width/50m));
                }

                var tcBorders = tcPr.Element(W.tcBorders);
                GenerateBorderStyle(tcBorders, W.top, style, BorderType.Cell);
                GenerateBorderStyle(tcBorders, W.right, style, BorderType.Cell);
                GenerateBorderStyle(tcBorders, W.bottom, style, BorderType.Cell);
                GenerateBorderStyle(tcBorders, W.left, style, BorderType.Cell);

                CreateStyleFromShd(style, tcPr.Element(W.shd));

                var gridSpan = tcPr.Elements(W.gridSpan).Attributes(W.val).Select(a => (int?) a).FirstOrDefault();
                if (gridSpan != null)
                    colSpan = new XAttribute("colspan", (int) gridSpan);
            }
            style.AddIfMissing("padding-top", "0");
            style.AddIfMissing("padding-bottom", "0");

            var cell = new XElement(Xhtml.td,
                rowSpan,
                colSpan,
                CreateBorderDivs(wordDoc, settings, element.Elements()));
            cell.AddAnnotation(style);
            return cell;
        }
        public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings)
        {
            InitEntityMap();
            RevisionAccepter.AcceptRevisions(wordDoc);
            SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings
            {
                RemoveComments = true,
                RemoveContentControls = true,
                RemoveEndAndFootNotes = true,
                RemoveFieldCodes = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions = true,
                RemoveProof = true,
                RemoveRsidInfo = true,
                RemoveSmartTags = true,
                RemoveSoftHyphens = true,
                RemoveGoBackBookmark = true,
                ReplaceTabsWithSpaces = false,
            };
            MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings);

            FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings
            {
                RemoveStyleNamesFromParagraphAndRunProperties = false,
                ClearStyles = false,
                RestrictToSupportedLanguages = htmlConverterSettings.RestrictToSupportedLanguages,
                RestrictToSupportedNumberingFormats = htmlConverterSettings.RestrictToSupportedNumberingFormats,
                CreateHtmlConverterAnnotationAttributes = true,
                OrderElementsPerStandard = false,
                ListItemRetrieverSettings = new ListItemRetrieverSettings()
                {
                    ListItemTextImplementations = htmlConverterSettings.ListItemImplementations,
                },
            };

            FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings);

            InsertAppropriateNonbreakingSpaces(wordDoc);
            CalculateSpanWidthForTabs(wordDoc);
            ReverseTableBordersForRtlTables(wordDoc);
            AdjustTableBorders(wordDoc);
            XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root;
            FieldRetriever.AnnotateWithFieldInfo(wordDoc.MainDocumentPart);
            AnnotateForSections(wordDoc);
            XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                rootElement, false, 0m);

            ReifyStylesAndClasses(htmlConverterSettings, xhtml);

            // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
            // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
            // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
            // for detailed explanation.
            //
            // If you further transform the XML tree returned by ConvertToHtmlTransform, you
            // must do it correctly, or entities will not be serialized properly.

            return xhtml;
        }
Example #19
0
        private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc,
                                                     HtmlConverterSettings settings, XNode node,
                                                     Func <ImageInfo, XElement> imageHandler)
        {
            XElement element = node as XElement;

            if (element != null)
            {
                if (element.Name == W.document)
                {
                    return(new XElement(Xhtml.html,
                                        new XElement(Xhtml.head,
                                                     new XElement(Xhtml.meta,
                                                                  new XAttribute(HtmlNoNamespace.http_equiv, "Content-Type"),
                                                                  new XAttribute(HtmlNoNamespace.content,
                                                                                 "text/html; charset=windows-1252")),
                                                     new XElement(Xhtml.meta,
                                                                  new XAttribute(HtmlNoNamespace.name, "Generator"),
                                                                  new XAttribute(HtmlNoNamespace.content,
                                                                                 "PowerTools for Open XML")),
                                                     settings.PageTitle != null ? new XElement(Xhtml.title,
                                                                                               settings.PageTitle) : null,
                                                     settings.Css != null ? new XElement(Xhtml.style,
                                                                                         new XComment(Environment.NewLine +
                                                                                                      settings.Css + Environment.NewLine)) : null
                                                     ),
                                        element.Elements().Select(e => ConvertToHtmlTransform(
                                                                      wordDoc, settings, e, imageHandler))
                                        ));
                }

                // Transform the w:body element to the XHTML h:body element.
                if (element.Name == W.body)
                {
                    return(new XElement(Xhtml.body,
                                        element.Elements().Select(e => ConvertToHtmlTransform(
                                                                      wordDoc, settings, e, imageHandler))));
                }

                // Transform every paragraph with a style that has paragraph properties
                // that has an outline level into the same level of heading.  This takes
                // care of transforming headings of every level.
                if (element.Name == W.p)
                {
                    string styleId = (string)element.Elements(W.pPr).Elements(W.pStyle)
                                     .Attributes(W.val).FirstOrDefault();
                    XElement style = wordDoc.MainDocumentPart.StyleDefinitionsPart
                                     .GetXDocument().Root.Elements(W.style)
                                     .Where(s => (string)s.Attribute(W.styleId) == styleId)
                                     .FirstOrDefault();
                    if (style != null)
                    {
                        int?outlineLevel = (int?)style.Elements(W.pPr)
                                           .Elements(W.outlineLvl).Attributes(W.val).FirstOrDefault();
                        if (outlineLevel != null)
                        {
                            return(new XElement(Xhtml.xhtml + string.Format("h{0}",
                                                                            outlineLevel + 1),
                                                settings.CssClassPrefix != null ?
                                                new XAttribute(HtmlNoNamespace._class,
                                                               settings.CssClassPrefix + styleId) : null,
                                                ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc,
                                                                                                   element, null)),
                                                element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                                      settings, e, imageHandler))));
                        }
                    }
                }

                // Transform w:p to h:p.
                if (element.Name == W.p)
                {
                    string styleId = (string)element.Elements(W.pPr).Elements(W.pStyle)
                                     .Attributes(W.val).FirstOrDefault();
                    if (styleId == null)
                    {
                        styleId = (string)wordDoc.MainDocumentPart.StyleDefinitionsPart
                                  .GetXDocument().Root.Elements(W.style)
                                  .Where(e => (string)e.Attribute(W.type) == "paragraph" &&
                                         (string)e.Attribute(W._default) == "1")
                                  .FirstOrDefault().Attributes(W.styleId).FirstOrDefault();
                    }
                    XElement z = new XElement(Xhtml.p,
                                              styleId != null ? (
                                                  settings.CssClassPrefix != null ?
                                                  new XAttribute(HtmlNoNamespace._class,
                                                                 settings.CssClassPrefix + styleId) : null
                                                  ) : null,
                                              ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc,
                                                                                                 element, null)),
                                              element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                                    settings, e, imageHandler)));
                    return(z);
                }

                // Transform every hyperlink in the document to the XHTML h:A element.
                if (element.Name == W.hyperlink && element.Attribute(R.id) != null)
                {
                    try
                    {
                        return(new XElement(Xhtml.A,
                                            new XAttribute(HtmlNoNamespace.href,
                                                           wordDoc.MainDocumentPart
                                                           .HyperlinkRelationships
                                                           .Where(x => x.Id == (string)element.Attribute(R.id))
                                                           .First()
                                                           .Uri
                                                           ),
                                            ConvertEntities(element.Elements(W.r)
                                                            .Elements(W.t)
                                                            .Select(s => (string)s).StringConcatenate())
                                            ));
                    }
                    catch (UriFormatException)
                    {
                        return(element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                     settings, e, imageHandler)));
                    }
                }

                // Transform contents of runs that are part of a hyperlink.
                if (element.Name == W.r &&
                    element.Annotation <FieldInfo>() != null &&
                    element.Annotation <FieldInfo>().Arguments.Length > 0)
                {
                    FieldInfo fieldInfo = element.Annotation <FieldInfo>();
                    return(new XElement(Xhtml.A,
                                        new XAttribute(HtmlNoNamespace.href, fieldInfo.Arguments[0]),
                                        ConvertEntities(element.Elements(W.t)
                                                        .Select(s => (string)s).StringConcatenate())
                                        ));
                }

                // Transform contents of runs.
                if (element.Name == W.r)
                {
                    return(element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                 settings, e, imageHandler)));
                }

                // Transform every w:t element to a text node.
                if (element.Name == W.t)
                {
                    return(ConvertEntities(element.Value));
                }

                // Transform w:br to h:br.
                if (element.Name == W.br || element.Name == W.cr)
                {
                    return(new XElement(Xhtml.br));
                }

                // Transform w:noBreakHyphen to '-'
                if (element.Name == W.noBreakHyphen)
                {
                    return(new XText("-"));
                }

                // Transform w:tbl to h:tbl.
                if (element.Name == W.tbl)
                {
                    return(new XElement(Xhtml.table,
                                        new XAttribute(HtmlNoNamespace.border, 1),
                                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                              settings, e, imageHandler))));
                }

                // Transform w:tr to h:tr.
                if (element.Name == W.tr)
                {
                    return(new XElement(Xhtml.tr,
                                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                              settings, e, imageHandler))));
                }

                // Transform w:tc to h:td.
                if (element.Name == W.tc)
                {
                    return(new XElement(Xhtml.td,
                                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                              settings, e, imageHandler))));
                }

                // Transform images.
                if (element.Name == W.drawing || element.Name == W.pict)
                {
                    if (imageHandler == null)
                    {
                        return(null);
                    }
                    return(ProcessImage(wordDoc, element, imageHandler));
                }

                // The following removes any nodes that haven't been transformed.
                return(null);
            }
            return(null);
        }
 public static XElement ConvertToHtml(WordprocessingDocument wordDoc,
     HtmlConverterSettings htmlConverterSettings)
 {
     return ConvertToHtml(wordDoc, htmlConverterSettings, null);
 }
        private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc,
            HtmlConverterSettings settings, XNode node,
            Func<ImageInfo, XElement> imageHandler)
        {
            XElement element = node as XElement;
            if (element != null)
            {
                if (element.Name == W.document)
                    return new XElement(Xhtml.html,
                        new XElement(Xhtml.head,
                            new XElement(Xhtml.meta,
                                new XAttribute(HtmlNoNamespace.http_equiv, "Content-Type"),
                                new XAttribute(HtmlNoNamespace.content,
                                    "text/html; charset=windows-1252")),
                            new XElement(Xhtml.meta,
                                new XAttribute(HtmlNoNamespace.name, "Generator"),
                                new XAttribute(HtmlNoNamespace.content,
                                    "PowerTools for Open XML")),
                            settings.PageTitle != null ? new XElement(Xhtml.title,
                                settings.PageTitle) : null,
                            settings.Css != null ? new XElement(Xhtml.style,
                                new XComment(Environment.NewLine +
                                    settings.Css + Environment.NewLine)) : null
                        ),
                        element.Elements().Select(e => ConvertToHtmlTransform(
                            wordDoc, settings, e, imageHandler))
                    );

                // Transform the w:body element to the XHTML h:body element.
                if (element.Name == W.body)
                    return new XElement(Xhtml.body,
                        element.Elements().Select(e => ConvertToHtmlTransform(
                            wordDoc, settings, e, imageHandler)));

                // Transform every paragraph with a style that has paragraph properties
                // that has an outline level into the same level of heading.  This takes
                // care of transforming headings of every level.
                if (element.Name == W.p)
                {
                    string styleId = (string)element.Elements(W.pPr).Elements(W.pStyle)
                        .Attributes(W.val).FirstOrDefault();
                    XElement style = wordDoc.MainDocumentPart.StyleDefinitionsPart
                        .GetXDocument().Root.Elements(W.style)
                        .Where(s => (string)s.Attribute(W.styleId) == styleId)
                        .FirstOrDefault();
                    if (style != null)
                    {
                        int? outlineLevel = (int?)style.Elements(W.pPr)
                            .Elements(W.outlineLvl).Attributes(W.val).FirstOrDefault();
                        if (outlineLevel != null)
                        {
                            return new XElement(Xhtml.xhtml + string.Format("h{0}",
                                outlineLevel + 1),
                                settings.CssClassPrefix != null ?
                                    new XAttribute(HtmlNoNamespace._class,
                                        settings.CssClassPrefix + styleId) : null,
                                ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc,
                                    element, null)),
                                element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                    settings, e, imageHandler)));
                        }
                    }
                }

                // Transform w:p to h:p.
                if (element.Name == W.p)
                {
                    string styleId = (string)element.Elements(W.pPr).Elements(W.pStyle)
                        .Attributes(W.val).FirstOrDefault();
                    if (styleId == null)
                    {
                        styleId = (string)wordDoc.MainDocumentPart.StyleDefinitionsPart
                            .GetXDocument().Root.Elements(W.style)
                            .Where(e => (string)e.Attribute(W.type) == "paragraph" &&
                               (string)e.Attribute(W._default) == "1")
                            .FirstOrDefault().Attributes(W.styleId).FirstOrDefault();
                    }
                    XElement z = new XElement(Xhtml.p,
                        styleId != null ? (
                            settings.CssClassPrefix != null ?
                            new XAttribute(HtmlNoNamespace._class,
                                settings.CssClassPrefix + styleId) : null
                        ) : null,
                        ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc,
                            element, null)),
                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                            settings, e, imageHandler)));
                    return z;
                }

                // Transform every hyperlink in the document to the XHTML h:A element.
                if (element.Name == W.hyperlink && element.Attribute(R.id) != null)
                {
                    try
                    {
                        return new XElement(Xhtml.A,
                            new XAttribute(HtmlNoNamespace.href,
                                wordDoc.MainDocumentPart
                                    .HyperlinkRelationships
                                    .Where(x => x.Id == (string)element.Attribute(R.id))
                                    .First()
                                    .Uri
                            ),
                            ConvertEntities(element.Elements(W.r)
                                          .Elements(W.t)
                                          .Select(s => (string)s).StringConcatenate())
                        );
                    }
                    catch (UriFormatException)
                    {
                        return element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                            settings, e, imageHandler));
                    }
                }

                // Transform contents of runs that are part of a hyperlink.
                if (element.Name == W.r &&
                    element.Annotation<FieldInfo>() != null &&
                    element.Annotation<FieldInfo>().Arguments.Length > 0)
                {
                    FieldInfo fieldInfo = element.Annotation<FieldInfo>();
                    return new XElement(Xhtml.A,
                        new XAttribute(HtmlNoNamespace.href, fieldInfo.Arguments[0]),
                        ConvertEntities(element.Elements(W.t)
                            .Select(s => (string)s).StringConcatenate())
                    );
                }

                // Transform contents of runs.
                if (element.Name == W.r)
                    return element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                        settings, e, imageHandler));

                // Transform every w:t element to a text node.
                if (element.Name == W.t)
                    return ConvertEntities(element.Value);

                // Transform w:br to h:br.
                if (element.Name == W.br || element.Name == W.cr)
                    return new XElement(Xhtml.br);

                // Transform w:noBreakHyphen to '-'
                if (element.Name == W.noBreakHyphen)
                    return new XText("-");

                // Transform w:tbl to h:tbl.
                if (element.Name == W.tbl)
                    return new XElement(Xhtml.table,
                        new XAttribute(HtmlNoNamespace.border, 1),
                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                            settings, e, imageHandler)));

                // Transform w:tr to h:tr.
                if (element.Name == W.tr)
                    return new XElement(Xhtml.tr,
                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                            settings, e, imageHandler)));

                // Transform w:tc to h:td.
                if (element.Name == W.tc)
                    return new XElement(Xhtml.td,
                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                            settings, e, imageHandler)));

                // Transform images.
                if (element.Name == W.drawing || element.Name == W.pict)
                {
                    if (imageHandler == null)
                        return null;
                    return ProcessImage(wordDoc, element, imageHandler);
                }

                // The following removes any nodes that haven't been transformed.
                return null;
            }
            return null;
        }
 private static object ProcessHyperlinkToBookmark(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element)
 {
     var style = new Dictionary<string, string>();
     var a = new XElement(Xhtml.a,
         new XAttribute("href", "#" + (string) element.Attribute(W.anchor)),
         element.Elements(W.r).Select(run => ConvertRun(wordDoc, settings, run)));
     style.Add("text-decoration", "none");
     a.AddAnnotation(style);
     return a;
 }
 private static IEnumerable<object> GroupAndVerticallySpaceNumberedParagraphs(WordprocessingDocument wordDoc, HtmlConverterSettings settings,
     IEnumerable<XElement> elements, decimal currentMarginLeft)
 {
     var grouped = elements
         .GroupAdjacent(e =>
         {
             var abstractNumId = (string)e.Attribute(PtOpenXml.pt + "AbstractNumId");
             if (abstractNumId != null)
                 return "num:" + abstractNumId;
             var contextualSpacing = e.Elements(W.pPr).Elements(W.contextualSpacing).FirstOrDefault();
             if (contextualSpacing != null)
             {
                 var styleName = (string)e.Elements(W.pPr).Elements(W.pStyle).Attributes(W.val).FirstOrDefault();
                 if (styleName == null)
                     return "";
                 return "sty:" + styleName;
             }
             return "";
         })
         .ToList();
     var newContent = grouped
         .Select(g =>
         {
             if (g.Key == "")
                 return g.Select(e =>
                 {
                     return ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft);
                 });
             var last = g.Count() - 1;
             var content = g
                 .Select((e, i) =>
                 {
                     if (i == last)
                         return ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft);
                     else
                         return ConvertToHtmlTransform(wordDoc, settings, e, true, currentMarginLeft);
                 });
             return content;
         });
     return newContent.Cast<object>();
 }
 private static object ProcessContentControl(WordprocessingDocument wordDoc, HtmlConverterSettings settings,
     XElement element, decimal currentMarginLeft)
 {
     var relevantAncestors = element.Ancestors().TakeWhile(a => a.Name != W.txbxContent);
     var isRunLevelContentControl = relevantAncestors.Any(a => a.Name == W.p);
     if (isRunLevelContentControl)
     {
         return element.Elements(W.sdtContent).Elements()
             .Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft))
             .ToList();
     }
     return CreateBorderDivs(wordDoc, settings, element.Elements(W.sdtContent).Elements());
 }
        private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc,
            HtmlConverterSettings settings, XNode node,
            bool suppressTrailingWhiteSpace,
            decimal currentMarginLeft)
        {
            XElement element = node as XElement;
            if (element != null)
            {
                if (element.Name == W.document)
                    return new XElement(Xhtml.html,
                        new XElement(Xhtml.head,
                            new XElement(Xhtml.meta,
                                new XAttribute("http-equiv", "Content-Type"),
                                new XAttribute("content", "text/html; charset=utf-8")),
                            new XElement(Xhtml.meta,
                                new XAttribute("name", "Generator"),
                                new XAttribute("content", "PowerTools for Open XML")),
                            settings.PageTitle != null ? new XElement(Xhtml.title,
                                new XText(settings.PageTitle)) : null
                        ),
                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft))
                    );

                // Transform the w:body element to the XHTML h:body element.
                if (element.Name == W.body)
                {
                    var sectionDivContent = new XElement(Xhtml.body,
                        CreateSectionDivs(wordDoc, settings, element));
                    return sectionDivContent;
                }

                if (element.Name == W.p)
                {
                    var bidi = element
                        .Elements(W.pPr)
                        .Elements(W.bidi)
                        .Where(b => b.Attribute(W.val) == null || b.Attribute(W.val).ToBoolean() == true)
                        .FirstOrDefault();
                    var isBidi = bidi != null;
                    string styleId = (string)element.Elements(W.pPr).Elements(W.pStyle)
                        .Attributes(W.val).FirstOrDefault();
                    if (styleId != null)
                    {
                        XElement style = wordDoc.MainDocumentPart.StyleDefinitionsPart
                            .GetXDocument().Root.Elements(W.style)
                            .Where(s => (string)s.Attribute(W.styleId) == styleId)
                            .FirstOrDefault();
                        if (style != null)
                        {
                            int? outlineLevel = (int?)style.Elements(W.pPr)
                                .Elements(W.outlineLvl).Attributes(W.val).FirstOrDefault();
                            if (outlineLevel != null && outlineLevel <= 5)
                            {
                                XName elementName = Xhtml.xhtml + string.Format("h{0}",
                                    outlineLevel + 1);
                                return ConvertParagraph(wordDoc, settings, element, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi);
                            }
                            else
                            {
                                XName elementName = Xhtml.p;
                                var o = ConvertParagraph(wordDoc, settings, element, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi);
                                return o;
                            }
                        }
                    }
                    else
                    {
                        XName elementName = Xhtml.p;
                        var o = ConvertParagraph(wordDoc, settings, element, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi);
                        return o;
                    }
                }

                // Transform hyperlinks to the XHTML h:A element.
                if (element.Name == W.hyperlink && element.Attribute(R.id) != null)
                {
                    try
                    {
                        return new XElement(Xhtml.A,
                            new XAttribute("href",
                                wordDoc.MainDocumentPart
                                    .HyperlinkRelationships
                                    .Where(x => x.Id == (string)element.Attribute(R.id))
                                    .First()
                                    .Uri
                            ),
                            element.Elements(W.r).Select(run => ConvertRun(wordDoc, settings, run))
                        );
                    }
                    catch (UriFormatException)
                    {
                        return element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft));
                    }
                }

                // Transform hyperlinks to bookmarks to the XHTML h:A element.
                if (element.Name == W.hyperlink && element.Attribute(W.anchor) != null)
                {
                    var style = new Dictionary<string, string>();
                    var a = new XElement(Xhtml.A,
                        new XAttribute("href", "#" + (string)element.Attribute(W.anchor)),
                        element
                            .Elements(W.r)
                            .Select(run => ConvertRun(wordDoc, settings, run)));
                    style.Add("text-decoration", "none");
                    a.AddAnnotation(style);
                    return a;
                }

                // Transform contents of runs.
                if (element.Name == W.r)
                    return ConvertRun(wordDoc, settings, element);

                // Transform w:bookmarkStart into anchor
                if (element.Name == W.bookmarkStart)
                {
                    var name = (string)element.Attribute(W.name);
                    if (name != null)
                    {
                        var style = new Dictionary<string, string>();
                        var a = new XElement(Xhtml.A,
                            new XAttribute("id", name),
                            new XText(""));
                        style.Add("text-decoration", "none");
                        a.AddAnnotation(style);
                        return a;
                    }
                }

                // Transform every w:t element to a text node.
                if (element.Name == W.t)
                {
                    var textWithEntities = ConvertEntities(element.Value);
                    return textWithEntities;
                }

                // Transform symbols to spans
                if (element.Name == W.sym)
                {
                    var cs = (string)element.Attribute(W._char);
                    var c = Convert.ToInt32(cs, 16);
                    var symbolSpan = new XElement(Xhtml.span, new XEntity(string.Format("#{0}", ((int)c).ToString())));
                    return symbolSpan;
                }

                // Transform tabs that have the pt:TabWidth attribute set
                if (element.Name == W.tab)
                {
                    var tabWidthAtt = element.Attribute(PtOpenXml.TabWidth);
                    if (tabWidthAtt != null)
                    {
                        var leader = (string)element.Attribute(PtOpenXml.Leader);
                        var tabWidth = (decimal)tabWidthAtt;
                        var style = new Dictionary<string, string>();
                        XElement span;
                        if (leader != null)
                        {
                            var leaderChar = ".";
                            if (leader == "hyphen")
                                leaderChar = "-";
                            else if (leader == "dot")
                                leaderChar = ".";
                            else if (leader == "underscore")
                                leaderChar = "_";

                            var runContainingTabToReplace = element.Ancestors(W.r).First();
                            var fontNameAtt = runContainingTabToReplace.Attribute(PtOpenXml.pt + "FontName");
                            if (fontNameAtt == null)
                                fontNameAtt = runContainingTabToReplace.Ancestors(W.p).First()
                                    .Attribute(PtOpenXml.pt + "FontName");

                            var dummyRun = new XElement(W.r,
                                fontNameAtt,
                                runContainingTabToReplace.Elements(W.rPr),
                                new XElement(W.t, leaderChar));

                            var widthOfLeaderChar = CalcWidthOfRunInTwips(dummyRun);

                            bool forceArial = false;
                            if (widthOfLeaderChar == 0)
                            {
                                dummyRun = new XElement(W.r,
                                    new XAttribute(PtOpenXml.FontName, "Arial"),
                                    runContainingTabToReplace.Elements(W.rPr),
                                    new XElement(W.t, leaderChar));
                                widthOfLeaderChar = CalcWidthOfRunInTwips(dummyRun);
                                forceArial = true;
                            }

                            if (widthOfLeaderChar != 0)
                            {
                                var numberOfLeaderChars = (int)(Math.Floor((tabWidth * 1440) / widthOfLeaderChar));
                                if (numberOfLeaderChars < 0)
                                    numberOfLeaderChars = 0;
                                span = new XElement(Xhtml.span,
                                    " " + "".PadRight(numberOfLeaderChars, leaderChar[0]) + " ");
                                style.Add("margin", "0 0 0 0");
                                style.Add("padding", "0 0 0 0");
                                style.Add("width", string.Format("{0:0.00}in", tabWidth));
                                style.Add("text-align", "center");
                                if (forceArial)
                                    style.Add("font-family", "Arial");
                            }
                            else
                            {
                                span = new XElement(Xhtml.span, " ");
                                style.Add("margin", "0 0 0 0");
                                style.Add("padding", "0 0 0 0");
                                style.Add("width", string.Format("{0:0.00}in", tabWidth));
                                style.Add("text-align", "center");
                                if (leader == "underscore")
                                {
                                    style.Add("text-decoration", "underline");
                                }
                            }
                        }
                        else
                        {
#if false
                            var bidi = element
                                .Ancestors(W.p)
                                .Take(1)
                                .Elements(W.pPr)
                                .Elements(W.bidi)
                                .Where(b => b.Attribute(W.val) == null || b.Attribute(W.val).ToBoolean() == true)
                                .FirstOrDefault();
                            var isBidi = bidi != null;
                            if (isBidi)
                                span = new XElement(Xhtml.span, new XEntity("#x200f")); // RLM
                            else
                                span = new XElement(Xhtml.span, new XEntity("#x200e")); // LRM
#else
                            span = new XElement(Xhtml.span, new XEntity("nbsp"));
#endif
                            style.Add("margin", string.Format("0 0 0 {0:0.00}in", tabWidth));
                            style.Add("padding", "0 0 0 0");
                        }
                        span.AddAnnotation(style);
                        return span;
                    }
                }

                // Transform w:br to h:br.
                if (element.Name == W.br || element.Name == W.cr)
                {
                    XElement span = null;
                    var tabWidth = (decimal?)element.Attribute(PtOpenXml.TabWidth);
                    if (tabWidth != null)
                    {
                        span = new XElement(Xhtml.span);
                        var style = new Dictionary<string, string>();
                        style.Add("margin", string.Format("0 0 0 {0:0.00}in", tabWidth));
                        style.Add("padding", "0 0 0 0");
                        span.AddAnnotation(style);
                    }
                    var paragraph = element.Ancestors(W.p).FirstOrDefault();
                    bool isBidi = false;
                    if (paragraph != null)
                    {
                        var bidi = paragraph
                            .Elements(W.pPr)
                            .Elements(W.bidi)
                            .Where(b => b.Attribute(W.val) == null || b.Attribute(W.val).ToBoolean() == true)
                            .FirstOrDefault();
                        isBidi = bidi != null;
                    }
                    var br = new XElement(Xhtml.br);
                    XEntity zeroWidthChar = null;
                    if (isBidi)
                        zeroWidthChar = new XEntity("#x200f"); // RLM
                    else
                        zeroWidthChar = new XEntity("#x200e"); // LRM
                    return new object[] {
                        br,
                        zeroWidthChar,
                        span,
                    };
                }

                // Transform w:noBreakHyphen to '-'
                if (element.Name == W.noBreakHyphen)
                    return new XText("-");

                // Transform w:tbl to h:tbl.
                if (element.Name == W.tbl)
                {
                    var style = new Dictionary<string, string>();
                    style.AddIfMissing("border-collapse", "collapse");
                    style.AddIfMissing("border", "none");
                    var bidiVisual = element.Elements(W.tblPr).Elements(W.bidiVisual).FirstOrDefault();
                    var tblW = element.Elements(W.tblPr).Elements(W.tblW).FirstOrDefault();
                    if (tblW != null)
                    {
                        var type = (string)tblW.Attribute(W.type);
                        if (type != null && type == "pct")
                        {
                            var w = (int)tblW.Attribute(W._w);
                            style.AddIfMissing("width", (w / 50).ToString() + "%");
                        }
                    }
                    var tblInd = element.Elements(W.tblPr).Elements(W.tblInd).FirstOrDefault();
                    if (tblInd != null)
                    {
                        var tblIndType = (string)tblInd.Attribute(W.type);
                        if (tblIndType != null)
                        {
                            if (tblIndType == "dxa")
                            {
                                var width = (decimal?)tblInd.Attribute(W._w);
                                if (width != null)
                                {
                                    style.AddIfMissing("margin-left", string.Format("{0}pt", width / 20m));
                                }
                            }
                        }
                    }
                    XAttribute tableDirection = null;
                    if (bidiVisual != null)
                    {
                        tableDirection = new XAttribute("dir", "rtl");
                    }
                    else
                    {
                        tableDirection = new XAttribute("dir", "ltr");
                    }
                    style.AddIfMissing("margin-bottom", ".001pt");
                    var table = new XElement(Xhtml.table,
                        new XAttribute("border", "1"),
                        new XAttribute("cellspacing", 0),
                        new XAttribute("cellpadding", 0),
                        tableDirection,
                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)));
                    table.AddAnnotation(style);
                    var jc = (string)element.Elements(W.tblPr).Elements(W.jc).Attributes(W.val).FirstOrDefault();
                    if (jc == null)
                        jc = "left";
                    XAttribute dir = null;
                    XAttribute jcToUse = null;
                    if (bidiVisual != null)
                    {
                        dir = new XAttribute("dir", "rtl");
                        if (jc == "left")
                            jcToUse = new XAttribute("align", "right");
                        else if (jc == "right")
                            jcToUse = new XAttribute("align", "left");
                        else if (jc == "center")
                            jcToUse = new XAttribute("align", "center");
                    }
                    else
                    {
                        jcToUse = new XAttribute("align", jc);
                    }
                    var tableDiv = new XElement(Xhtml.div,
                        dir,
                        jcToUse,
                        table);
                    return tableDiv;
                }

                // Transform w:tr to h:tr.
                if (element.Name == W.tr)
                {
                    var style = new Dictionary<string, string>();
                    int? trHeight = (int?)element.Elements(W.trPr).Elements(W.trHeight).Attributes(W.val).FirstOrDefault();
                    if (trHeight != null)
                        style.AddIfMissing("height", string.Format("{0}in", (decimal)trHeight / 1440m));
                    var htmlRow = new XElement(Xhtml.tr,
                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)));
                    if (style.Any())
                        htmlRow.AddAnnotation(style);
                    return htmlRow;
                }

                // Transform w:tc to h:td.
                if (element.Name == W.tc)
                {
                    var style = new Dictionary<string, string>();
                    XAttribute colSpan = null;
                    XAttribute rowSpan = null;

                    var tcPr = element.Element(W.tcPr);
                    if (tcPr != null)
                    {
                        if ((string)tcPr.Elements(W.vMerge).Attributes(W.val).FirstOrDefault() == "restart")
                        {
                            var currentRow = element.Parent.ElementsBeforeSelf(W.tr).Count();
                            var currentCell = element.ElementsBeforeSelf(W.tc).Count();
                            var tbl = element.Parent.Parent;
                            int rowSpanCount = 1;
                            currentRow += 1;
                            while (true)
                            {
                                var row = tbl.Elements(W.tr).Skip(currentRow).FirstOrDefault();
                                if (row == null)
                                    break;
                                var cell2 = row.Elements(W.tc).Skip(currentCell).FirstOrDefault();
                                if (cell2 == null)
                                    break;
                                if (cell2.Elements(W.tcPr).Elements(W.vMerge).FirstOrDefault() == null)
                                    break;
                                if ((string)cell2.Elements(W.tcPr).Elements(W.vMerge).Attributes(W.val).FirstOrDefault() == "restart")
                                    break;
                                currentRow += 1;
                                rowSpanCount += 1;
                            }
                            rowSpan = new XAttribute("rowspan", rowSpanCount);
                        }

                        if (tcPr.Element(W.vMerge) != null && (string)tcPr.Elements(W.vMerge).Attributes(W.val).FirstOrDefault() != "restart")
                            return null;

                        if (tcPr.Element(W.vAlign) != null)
                        {
                            var vAlignVal = (string)tcPr.Elements(W.vAlign).Attributes(W.val).FirstOrDefault();
                            if (vAlignVal == "top")
                                style.AddIfMissing("vertical-align", "top");
                            else if (vAlignVal == "center")
                                style.AddIfMissing("vertical-align", "middle");
                            else if (vAlignVal == "bottom")
                                style.AddIfMissing("vertical-align", "bottom");
                            else
                                style.AddIfMissing("vertical-align", "middle");
                        }
                        style.AddIfMissing("vertical-align", "top");

                        if ((string)tcPr.Elements(W.tcW).Attributes(W.type).FirstOrDefault() == "dxa")
                        {
                            decimal width = (int)tcPr.Elements(W.tcW).Attributes(W._w).FirstOrDefault();
                            style.AddIfMissing("width", string.Format("{0}pt", width / 20m));
                        }
                        if ((string)tcPr.Elements(W.tcW).Attributes(W.type).FirstOrDefault() == "pct")
                        {
                            decimal width = (int)tcPr.Elements(W.tcW).Attributes(W._w).FirstOrDefault();
                            style.AddIfMissing("width", string.Format("{0:0.0}%", width / 50m));
                        }

                        var tcBorders = tcPr.Element(W.tcBorders);
                        GenerateBorderStyle(tcBorders, W.top, style, BorderType.Cell);
                        GenerateBorderStyle(tcBorders, W.right, style, BorderType.Cell);
                        GenerateBorderStyle(tcBorders, W.bottom, style, BorderType.Cell);
                        GenerateBorderStyle(tcBorders, W.left, style, BorderType.Cell);

                        CreateStyleFromShd(style, tcPr.Element(W.shd));

                        var gridSpan = (int?)tcPr.Elements(W.gridSpan).Attributes(W.val).Select(a => (int?)a).FirstOrDefault();
                        if (gridSpan != null)
                            colSpan = new XAttribute("colspan", (int)gridSpan);
                    }
                    style.AddIfMissing("padding-top", "0in");
                    style.AddIfMissing("padding-bottom", "0in");

                    var cell = new XElement(Xhtml.td,
                        rowSpan,
                        colSpan,
                        CreateBorderDivs(wordDoc, settings, element.Elements()));
                    cell.AddAnnotation(style);
                    return cell;
                }

                // Transform images
                if (element.Name == W.drawing || element.Name == W.pict || element.Name == W._object)
                {
                    if (settings.ImageHandler == null)
                        return null;
                    return ProcessImage(wordDoc, element, settings.ImageHandler);
                }

                if (element.Name == W.sdt)
                {
                    var relevantAncestors = element.Ancestors().TakeWhile(a => a.Name != W.txbxContent);
                    var isRunLevelContentControl = relevantAncestors.Any(a => a.Name == W.p);
                    if (isRunLevelContentControl)
                    {
                        var o = element.Element(W.sdtContent).Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft))
                            .ToList();
                        return o;
                    }
                    else
                    {
                        var o = CreateBorderDivs(wordDoc, settings, element.Element(W.sdtContent).Elements());
                        return o;
                    }
                }

                if (element.Name == W.smartTag || element.Name == W.fldSimple)
                {
                    var o = CreateBorderDivs(wordDoc, settings, element.Elements());
                    return o;
                }

                return null;
            }
            return null;
        }
 private static object ProcessTable(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element, decimal currentMarginLeft)
 {
     var style = new Dictionary<string, string>();
     style.AddIfMissing("border-collapse", "collapse");
     style.AddIfMissing("border", "none");
     var bidiVisual = element.Elements(W.tblPr).Elements(W.bidiVisual).FirstOrDefault();
     var tblW = element.Elements(W.tblPr).Elements(W.tblW).FirstOrDefault();
     if (tblW != null)
     {
         var type = (string)tblW.Attribute(W.type);
         if (type != null && type == "pct")
         {
             var w = (int)tblW.Attribute(W._w);
             style.AddIfMissing("width", (w / 50) + "%");
         }
     }
     var tblInd = element.Elements(W.tblPr).Elements(W.tblInd).FirstOrDefault();
     if (tblInd != null)
     {
         var tblIndType = (string)tblInd.Attribute(W.type);
         if (tblIndType != null)
         {
             if (tblIndType == "dxa")
             {
                 var width = (decimal?)tblInd.Attribute(W._w);
                 if (width != null)
                 {
                     style.AddIfMissing("margin-left",
                         width > 0m
                             ? string.Format(NumberFormatInfo.InvariantInfo, "{0}pt", width / 20m)
                             : "0");
                 }
             }
         }
     }
     var tableDirection = bidiVisual != null ? new XAttribute("dir", "rtl") : new XAttribute("dir", "ltr");
     style.AddIfMissing("margin-bottom", ".001pt");
     var table = new XElement(Xhtml.table,
         // TODO: Revisit and make sure the omission is covered by appropriate CSS.
         // new XAttribute("border", "1"),
         // new XAttribute("cellspacing", 0),
         // new XAttribute("cellpadding", 0),
         tableDirection,
         element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)));
     table.AddAnnotation(style);
     var jc = (string)element.Elements(W.tblPr).Elements(W.jc).Attributes(W.val).FirstOrDefault() ?? "left";
     XAttribute dir = null;
     XAttribute jcToUse = null;
     if (bidiVisual != null)
     {
         dir = new XAttribute("dir", "rtl");
         if (jc == "left")
             jcToUse = new XAttribute("align", "right");
         else if (jc == "right")
             jcToUse = new XAttribute("align", "left");
         else if (jc == "center")
             jcToUse = new XAttribute("align", "center");
     }
     else
     {
         jcToUse = new XAttribute("align", jc);
     }
     var tableDiv = new XElement(Xhtml.div,
         dir,
         jcToUse,
         table);
     return tableDiv;
 }
Example #27
0
 public static XElement ConvertToHtml(WordprocessingDocument wordDoc,
                                      HtmlConverterSettings htmlConverterSettings)
 {
     return(ConvertToHtml(wordDoc, htmlConverterSettings, null));
 }
 private static object ProcessTableRow(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement element,
     decimal currentMarginLeft)
 {
     var style = new Dictionary<string, string>();
     int? trHeight = (int?) element.Elements(W.trPr).Elements(W.trHeight).Attributes(W.val).FirstOrDefault();
     if (trHeight != null)
         style.AddIfMissing("height",
             string.Format(NumberFormatInfo.InvariantInfo, "{0:0.00}in", (decimal) trHeight/1440m));
     var htmlRow = new XElement(Xhtml.tr,
         element.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)));
     if (style.Any())
         htmlRow.AddAnnotation(style);
     return htmlRow;
 }
 public XElement ConvertToHtml(HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler)
 {
     return HtmlConverter.ConvertToHtml(this, htmlConverterSettings, imageHandler);
 }
        /*
         * Handle:
         * - b
         * - bdr
         * - caps
         * - color
         * - dstrike
         * - highlight
         * - i
         * - position
         * - rFonts
         * - shd
         * - smallCaps
         * - spacing
         * - strike
         * - sz
         * - u
         * - vanish
         * - vertAlign
         *
         * Don't handle:
         * - em
         * - emboss
         * - fitText
         * - imprint
         * - kern
         * - outline
         * - shadow
         * - w
         * 
         */

        private static object ConvertRun(WordprocessingDocument wordDoc, HtmlConverterSettings settings, XElement run)
        {
            var style = new Dictionary<string, string>();
            var sn = (string)run.Attribute(PtOpenXml.StyleName);
            if (sn != null)
                style.Add("PtStyleName", sn);

            var rPr = run.Element(W.rPr);

            if (rPr == null)
            {
                object content2 = run.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, 0m));
                return content2;
            }

            // hide all content that contains the w:rPr/w:webHidden element
            if (rPr.Element(W.webHidden) != null)
                return null;

            // W.bdr
            if (rPr.Element(W.bdr) != null && (string)rPr.Elements(W.bdr).Attributes(W.val).FirstOrDefault() != "none")
            {
                style.AddIfMissing("border", "solid windowtext 1.0pt");
                style.AddIfMissing("padding", "0in");
            }

            // W.color
            string color = (string)rPr.Elements(W.color).Attributes(W.val).FirstOrDefault();
            if (color != null)
                CreateColorProperty("color", color, style);

            // W.highlight
            string highlight = (string)rPr.Elements(W.highlight).Attributes(W.val).FirstOrDefault();
            if (highlight != null)
                CreateColorProperty("background", highlight, style);

            // W.shd
            string shade = (string)rPr.Elements(W.shd).Attributes(W.fill).FirstOrDefault();
            if (shade != null)
                CreateColorProperty("background", shade, style);

            // Pt.FontName
            string font = null;
            if (run.Element(W.sym) != null)
                font = (string)run.Elements(W.sym).Attributes(W.font).FirstOrDefault();
            else
                font = (string)run.Attributes(PtOpenXml.FontName).FirstOrDefault();
            if (font != null)
                CreateFontCssProperty(font, style);

            // W.sz
            var languageType = (string)run.Attribute(PtOpenXml.LanguageType);
            decimal? sz = null;
            if (languageType == "bidi")
                sz = (decimal?)rPr.Elements(W.szCs).Attributes(W.val).FirstOrDefault();
            else
                sz = (decimal?)rPr.Elements(W.sz).Attributes(W.val).FirstOrDefault();
            if (sz != null)
                style.AddIfMissing("font-size", string.Format("{0}pt", sz / 2.0m));

            // W.caps
            if (getBoolProp(rPr, W.caps))
                style.AddIfMissing("text-transform", "uppercase");

            // W.smallCaps
            if (getBoolProp(rPr, W.smallCaps))
                style.AddIfMissing("font-variant", "small-caps");

            // W.spacing
            decimal? spacingInTwips = (decimal?)rPr.Elements(W.spacing).Attributes(W.val).FirstOrDefault();
            if (spacingInTwips != null)
                style.AddIfMissing("letter-spacing", string.Format("{0}pt", spacingInTwips / 20));

            // W.position
            decimal? position = (decimal?)rPr.Elements(W.position).Attributes(W.val).FirstOrDefault();
            if (position != null)
            {
                style.AddIfMissing("position", "relative");
                style.AddIfMissing("top", string.Format("{0}pt", -(position / 2)));
            }

            // W.vanish
            if (getBoolProp(rPr, W.vanish))
                style.AddIfMissing("display", "none");

            object content = run.Elements().Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, 0m));

            // W.u
            if (rPr.Element(W.u) != null && (string)rPr.Elements(W.u).Attributes(W.val).FirstOrDefault() != "none")
            {
                var newContent = new XElement(Xhtml.u, content);
                if (newContent.Nodes().Any())
                    content = newContent;
                style.AddIfMissing("text-decoration", "underline");
            }

            // W.i
            if (getBoolProp(rPr, W.i))
            {
                var newContent = new XElement(Xhtml.i, content);
                if (newContent.Nodes().Any())
                    content = newContent;
                style.AddIfMissing("font-style", "italic");
            }

            // W.b
            if (getBoolProp(rPr, W.b))
            {
                var newContent = new XElement(Xhtml.b, content);
                if (newContent.Nodes().Any())
                    content = newContent;
                style.AddIfMissing("font-weight", "bold");
            }
            else
            {
                style.AddIfMissing("font-weight", "normal");
            }

            // W.strike
            if (getBoolProp(rPr, W.strike) || getBoolProp(rPr, W.dstrike))
            {
                var newContent = new XElement(Xhtml.s, content);
                if (newContent.Nodes().Any())
                    content = newContent;
                style.AddIfMissing("text-decoration", "line-through");
            }

            // W.vertAlign
            if (rPr.Element(W.vertAlign) != null && (string)rPr.Elements(W.vertAlign).Attributes(W.val).FirstOrDefault() == "superscript")
            {
                var newContent = new XElement(Xhtml.sup, content);
                if (newContent.Nodes().Any())
                    content = newContent;
            }

            if (rPr.Element(W.vertAlign) != null && (string)rPr.Elements(W.vertAlign).Attributes(W.val).FirstOrDefault() == "subscript")
            {
                var newContent = new XElement(Xhtml.sub, content);
                if (newContent.Nodes().Any())
                    content = newContent;
            }

            var rtl = rPr.Element(W.rtl);
            var isRtl = rtl != null;

            var paragraph = run.Ancestors(W.p).FirstOrDefault();
            var paraBidi = paragraph
                .Elements(W.pPr)
                .Elements(W.bidi)
                .Where(b => b.Attribute(W.val) == null || b.Attribute(W.val).ToBoolean() == true)
                .FirstOrDefault();
            var paraIsBidi = paraBidi != null;

            string lang = null;
            if (languageType == "western")
                lang = (string)rPr.Elements(W.lang).Attributes(W.val).FirstOrDefault();
            else if (languageType == "bidi")
                lang = (string)rPr.Elements(W.lang).Attributes(W.bidi).FirstOrDefault();
            else if (languageType == "eastAsia")
                lang = (string)rPr.Elements(W.lang).Attributes(W.eastAsia).FirstOrDefault();

            // only do the following for text runs.
            XEntity runStartMark = null;
            XEntity runEndMark = null;

            // Can't add directional marks if the font-family is symbol - they are visible, and display as a ?
            bool addDirectionalMarks = true;
            if (style.ContainsKey("font-family"))
            {
                if (style["font-family"].ToLower() == "symbol")
                    addDirectionalMarks = false;
            }
            if (addDirectionalMarks)
            {
                if (run.Element(W.t) != null)
                {
                    if (isRtl)
                    {
                        runStartMark = new XEntity("#x200f"); // RLM
                        runEndMark = new XEntity("#x200f"); // RLM
                    }
                    else
                    {
                        if (paraIsBidi)
                        {
                            runStartMark = new XEntity("#x200e"); // LRM
                            runEndMark = new XEntity("#x200e"); // LRM
                        }
                    }
                }
            }

            string defaultLanguage = "en-US"; // todo need to get defaultLanguage
            if (lang == null)
                lang = defaultLanguage;
            XAttribute langAttribute = new XAttribute("lang", lang);
            if (lang == defaultLanguage)
                langAttribute = null;

            if (style.Any() || isRtl || langAttribute != null)
            {
                style.AddIfMissing("margin", "0in");
                style.AddIfMissing("padding", "0in");
                var xe = new XElement(Xhtml.span,
                    langAttribute,
                    runStartMark,
                    content,
                    runEndMark);

                xe.AddAnnotation(style);
                content = xe;
            }
            return content;
        }
Example #31
0
        private string _ConvertToHtml(WordprocessingDocument doc)
        {
            lock(_convertLocker)
            {
                HtmlConverterSettings settings = new HtmlConverterSettings
                {
                    #region import images
                    //                        ImageHandler = imageInfo =>
                    //                        {
                    //                            DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName);
                    //                            if (!localDirInfo.Exists)
                    //                                localDirInfo.Create();
                    //                            ++imageCounter;
                    //                            string extension = imageInfo.ContentType.Split('/')[1].ToLower();
                    //
                    //                            ImageFormat imageFormat = null;
                    //                            switch (extension)
                    //                            {
                    //                                case "jpeg":
                    //                                    // Convert the .jpeg file to a .png file.
                    //                                    extension = "png";
                    //                                    imageFormat = ImageFormat.Png;
                    //                                    break;
                    //                                case "bmp":
                    //                                    imageFormat = ImageFormat.Bmp;
                    //                                    break;
                    //                                case "png":
                    //                                    imageFormat = ImageFormat.Png;
                    //                                    break;
                    //                                case "tiff":
                    //                                    imageFormat = ImageFormat.Tiff;
                    //                                    break;
                    //                            }
                    //
                    //                            if (imageFormat == null) return null;
                    //
                    //                            string imageFileName = String.Format("{0}{1}/img.{2}", imageDirectoryName, imageCounter, extension);
                    //                            try
                    //                            {
                    //                                imageInfo.Bitmap.Save(imageFileName, imageFormat);
                    //                            }
                    //                            catch (ExternalException)
                    //                            {
                    //                                return null;
                    //                            }
                    //
                    //                            XElement img = new XElement(Xhtml.img,
                    //                                new XAttribute(NoNamespace.src, imageFileName),
                    //                                imageInfo.ImgStyleAttribute,
                    //                                imageInfo.AltText != null ?
                    //                                    new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                    //
                    //                            return img;
                    //                        }
                    #endregion
                };

                CultureInfo ci = (CultureInfo)CultureInfo.CurrentCulture.Clone();

                CultureInfo ci2 = (CultureInfo)CultureInfo.CurrentCulture.Clone();
                ci2.NumberFormat.NumberDecimalSeparator = ".";
                Thread.CurrentThread.CurrentCulture = ci2;

                XElement html = HtmlConverter.ConvertToHtml(doc, settings);

                Thread.CurrentThread.CurrentCulture = ci;

                return html.ToStringNewLineOnAttributes();
            }
        }
        private static object CreateBorderDivs(WordprocessingDocument wordDoc, HtmlConverterSettings settings, IEnumerable<XElement> elements)
        {
            return elements.GroupAdjacent(e =>
                {
                    if (e.Elements(W.pPr).Elements(W.pBdr).Any())
                    {
                        var pBdr = e.Element(W.pPr).Element(W.pBdr);
                        var indStr = "";
                        var ind = e.Element(W.pPr).Element(W.ind);
                        if (ind != null)
                            indStr = ind.ToString(SaveOptions.DisableFormatting);
                        return pBdr.ToString(SaveOptions.DisableFormatting) + indStr;
                    }
                    else if (e.Name == W.tbl)
                    {
                        return "table";
                    }
                    else
                    {
                        return ""; // empty string means no pBdr
                    }
                })
                .Select(g =>
                {
                    if (g.Key == "")
                    {
                        var o = GroupAndVerticallySpaceNumberedParagraphs(wordDoc, settings, g, 0m);
                        return (object)o;
                    }
                    if (g.Key == "table")
                    {
                        var o = g.Select(gc => ConvertToHtmlTransform(wordDoc,
                            settings, gc, false, 0));
                        return o;
                    }
                    var pPr = g.First().Element(W.pPr);
                    var pBdr = pPr.Element(W.pBdr);
                    Dictionary<string, string> style = new Dictionary<string, string>();
                    GenerateBorderStyle(pBdr, W.top, style, BorderType.Paragraph);
                    GenerateBorderStyle(pBdr, W.right, style, BorderType.Paragraph);
                    GenerateBorderStyle(pBdr, W.bottom, style, BorderType.Paragraph);
                    GenerateBorderStyle(pBdr, W.left, style, BorderType.Paragraph);
                    var ind = pPr.Element(W.ind);
                    decimal currentMarginLeft = 0m;
                    if (ind != null)
                    {
                        decimal? left = (decimal?)ind.Attribute(W.left);
                        decimal leftInInches = 0;
                        if (left != null)
                            leftInInches = (decimal)left / 1440;
                        decimal? hanging = (decimal?)ind.Attribute(W.hanging);
                        decimal hangingInInches = 0;
                        if (hanging != null)
                            hangingInInches = -(decimal)hanging / 1440;
                        currentMarginLeft = leftInInches + hangingInInches;
                        style.AddIfMissing("margin-left", string.Format("{0:0.00}in", currentMarginLeft));
                    }

                    var div = new XElement(Xhtml.div,
                        GroupAndVerticallySpaceNumberedParagraphs(wordDoc, settings, g, currentMarginLeft));

                    div.AddAnnotation(style);
                    return div;
                })
            .ToList();
        }
        public static string ConvertToHtml(byte[] byteArray)
        {
            string result = null;
            List<Bitmap> documentPictures = new List<Bitmap>();
            using (MemoryStream memoryStream = new MemoryStream())
            {
                memoryStream.Write(byteArray, 0, byteArray.Length);
                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true))
                {
                    //var destFileName = new FileInfo(fi.Name.Replace(".docx", ".html"));
                    //if (outputDirectory != null && outputDirectory != string.Empty)
                    //{
                    //    DirectoryInfo di = new DirectoryInfo(outputDirectory);
                    //    if (!di.Exists)
                    //    {
                    //        throw new OpenXmlPowerToolsException("Output directory does not exist");
                    //    }
                    //    destFileName = new FileInfo(Path.Combine(di.FullName, destFileName.Name));
                    //}
                    //var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files";
                    int imageCounter = 0;

                    var pageTitle = "RoboBrailleDoc";
                    var part = wDoc.CoreFilePropertiesPart;
                    if (part != null)
                    {
                        pageTitle = (string)part.GetXDocument().Descendants(DC.title).FirstOrDefault() ?? "RoboBrailleDoc";
                    }

                    // TODO: Determine max-width from size of content area.
                    HtmlConverterSettings settings = new HtmlConverterSettings()
                    {
                        AdditionalCss = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
                        PageTitle = pageTitle,
                        FabricateCssClasses = true,
                        CssClassPrefix = "pt-",
                        RestrictToSupportedLanguages = false,
                        RestrictToSupportedNumberingFormats = false,
                        ImageHandler = imageInfo =>
                        {
                            //DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName);
                            //if (!localDirInfo.Exists)
                            //    localDirInfo.Create();
                            ++imageCounter;
                            string extension = imageInfo.ContentType.Split('/')[1].ToLower();
                            ImageFormat imageFormat = null;
                            if (extension == "png")
                                imageFormat = ImageFormat.Png;
                            else if (extension == "gif")
                                imageFormat = ImageFormat.Gif;
                            else if (extension == "bmp")
                                imageFormat = ImageFormat.Bmp;
                            else if (extension == "jpeg")
                                imageFormat = ImageFormat.Jpeg;
                            else if (extension == "tiff")
                            {
                                // Convert tiff to gif.
                                extension = "gif";
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "x-wmf")
                            {
                                extension = "wmf";
                                imageFormat = ImageFormat.Wmf;
                            }

                            // If the image format isn't one that we expect, ignore it,
                            // and don't return markup for the link.
                            if (imageFormat == null)
                                return null;
                            try
                            {
                                //imageInfo.Bitmap.Save(imageFileName, imageFormat);
                                documentPictures.Add(imageInfo.Bitmap);
                            }
                            catch (ExternalException)
                            {
                                return null;
                            }
                            string imageFileName = "/image" + imageCounter.ToString() + "." + extension;
                            XElement img = new XElement(Xhtml.img,
                                new XAttribute(NoNamespace.src, imageFileName),
                                imageInfo.ImgStyleAttribute,
                                imageInfo.AltText != null ?
                                    new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                            return img;
                        }
                    };
                    XElement htmlElement = HtmlConverter.ConvertToHtml(wDoc, settings);

                    // Produce HTML document with <!DOCTYPE html > declaration to tell the browser
                    // we are using HTML5.
                    var html = new XDocument(
                        new XDocumentType("html", null, null, null),
                        htmlElement);

                    // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
                    // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
                    // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
                    // for detailed explanation.
                    //
                    // If you further transform the XML tree returned by ConvertToHtmlTransform, you
                    // must do it correctly, or entities will not be serialized properly.

                    var htmlString = html.ToString(SaveOptions.DisableFormatting);
                    //File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);
                    result = htmlString;
                }
            }
            return result;
        }
        private static object ConvertContentThatCanContainFields(WordprocessingDocument wordDoc, HtmlConverterSettings settings,
            IEnumerable<XElement> elements)
        {
            var grouped = elements
                .GroupAdjacent(e =>
                {
                    Stack<FieldRetriever.FieldElementTypeInfo> stack = e.Annotation<Stack<FieldRetriever.FieldElementTypeInfo>>();
                    if (stack == null || !stack.Any())
                        return (int?)null;
                    int lowestId = stack.Select(st => st.Id).Min();
                    return lowestId;
                })
                .ToList();
            var txformed = grouped
                .Select(g =>
                {
                    if (g.Key == null)
                    {
                        var o = (object)g.
                            Select(n =>
                            {
                                var o2 = ConvertToHtmlTransform(wordDoc, settings, n, false, 0m);
                                return o2;
                            })
                            .ToList();
                        return o;
                    }

                    var instrText = FieldRetriever.InstrText(g.First().Ancestors().Last(), (int)g.Key).TrimStart('{').TrimEnd('}');

                    var parsed = FieldRetriever.ParseField(instrText);
                    if (parsed.FieldType != "HYPERLINK")
                        return g.Select(n => ConvertToHtmlTransform(wordDoc, settings, n, false, 0m));
                    if (parsed.Arguments.Length > 0)
                    {
                        var a = new XElement(Xhtml.A,
                            new XAttribute("href", parsed.Arguments[0]),
                            g.DescendantsAndSelf(W.r).Select(run => ConvertRun(wordDoc, settings, run)));
                        return a;
                    }
                    else
                    {
                        var a = new XElement(Xhtml.A,
                            g.DescendantsAndSelf(W.r).Select(run => ConvertRun(wordDoc, settings, run)));
                        return a;
                    }
                })
                .ToList();

            return txformed;
        }
        /*
         * Notes on line spacing
         *
         * the w:line and w:lineRule attributes control spacing between lines - including between lines within a paragraph
         *
         * If w:spacing w:lineRule="auto" then
         *   w:spacing w:line is a percentage where 240 == 100%
         *
         *   (line value / 240) * 100 = percentage of line
         *
         * If w:spacing w:lineRule="exact" or w:lineRule="atLeast" then
         *   w:spacing w:line is in twips
         *   1440 = exactly one inch from line to line
         *
         * Handle
         * - ind
         * - jc
         * - numPr
         * - pBdr
         * - shd
         * - spacing
         * - textAlignment
         *
         * Don't Handle (yet)
         * - adjustRightInd?
         * - autoSpaceDE
         * - autoSpaceDN
         * - bidi
         * - contextualSpacing
         * - divId
         * - framePr
         * - keepLines
         * - keepNext
         * - kinsoku
         * - mirrorIndents
         * - overflowPunct
         * - pageBreakBefore
         * - snapToGrid
         * - suppressAutoHyphens
         * - suppressLineNumbers
         * - suppressOverlap
         * - tabs
         * - textBoxTightWrap
         * - textDirection
         * - topLinePunct
         * - widowControl
         * - wordWrap
         *
         */

        private static object ConvertParagraph(WordprocessingDocument wordDoc, HtmlConverterSettings settings,
            XElement paragraph, XName elementName, bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi)
        {
            var style = DefineParagraphStyle(paragraph, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi);
            var rtl = isBidi ? new XAttribute("dir", "rtl") : new XAttribute("dir", "ltr");
            var firstMark = isBidi ? new XEntity("#x200f") : null;

            // Analyze initial runs to see whether we have a tab, in which case we will render
            // a span with a defined width and ignore the tab rather than rendering the text
            // preceding the tab and the tab as a span with a computed width.
            var firstTabRun = paragraph
                .Elements(W.r)
                .FirstOrDefault(run => run.Elements(W.tab).Any());
            var elementsPrecedingTab = firstTabRun != null
                ? paragraph.Elements(W.r).TakeWhile(e => e != firstTabRun)
                    .Where(e => e.Elements().Any(c => c.Attributes(PtOpenXml.TabWidth).Any())).ToList()
                : Enumerable.Empty<XElement>().ToList();

            // TODO: Revisit
            // For the time being, if a hyperlink field precedes the tab, we'll render it as before.
            var hyperlinkPrecedesTab = elementsPrecedingTab
                .Elements(W.r)
                .Elements(W.instrText)
                .Select(e => e.Value)
                .Any(value => value != null && value.TrimStart().ToUpper().StartsWith("HYPERLINK"));
            if (hyperlinkPrecedesTab)
            {
                var paraElement1 = new XElement(elementName,
                    rtl,
                    firstMark,
                    ConvertContentThatCanContainFields(wordDoc, settings, paragraph.Elements()));
                paraElement1.AddAnnotation(style);
                return paraElement1;
            }

            var txElementsPrecedingTab = TransformElementsPrecedingTab(wordDoc, settings, elementsPrecedingTab, firstTabRun);
            var elementsSucceedingTab = firstTabRun != null
                ? paragraph.Elements().SkipWhile(e => e != firstTabRun).Skip(1)
                : paragraph.Elements();
            var paraElement = new XElement(elementName,
                rtl,
                firstMark,
                txElementsPrecedingTab,
                ConvertContentThatCanContainFields(wordDoc, settings, elementsSucceedingTab));
            paraElement.AddAnnotation(style);

            return paraElement;
        }
 public XElement ConvertToHtml(HtmlConverterSettings htmlConverterSettings)
 {
     return HtmlConverter.ConvertToHtml(this, htmlConverterSettings);
 }
        private static List<object> TransformElementsPrecedingTab(WordprocessingDocument wordDoc, HtmlConverterSettings settings,
            List<XElement> elementsPrecedingTab, XElement firstTabRun)
        {
            var tabWidth = firstTabRun != null
                ? (decimal?) firstTabRun.Elements(W.tab).Attributes(PtOpenXml.TabWidth).FirstOrDefault() ?? 0m
                : 0m;
            var precedingElementsWidth = elementsPrecedingTab
                .Elements()
                .Where(c => c.Attributes(PtOpenXml.TabWidth).Any())
                .Select(e => (decimal) e.Attribute(PtOpenXml.TabWidth))
                .Sum();
            var totalWidth = precedingElementsWidth + tabWidth;

            var txElementsPrecedingTab = elementsPrecedingTab
                .Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, 0m))
                .ToList();
            if (txElementsPrecedingTab.Count > 1)
            {
                var span = new XElement(Xhtml.span, txElementsPrecedingTab);
                var spanStyle = new Dictionary<string, string>
                {
                    { "display", "inline-block" },
                    { "text-indent", "0" },
                    { "width", string.Format(NumberFormatInfo.InvariantInfo, "{0:0.000}in", totalWidth) }
                };
                span.AddAnnotation(spanStyle);
            }
            else if (txElementsPrecedingTab.Count == 1)
            {
                var element = txElementsPrecedingTab.First() as XElement;
                if (element != null)
                {
                    var spanStyle = element.Annotation<Dictionary<string, string>>();
                    spanStyle.AddIfMissing("display", "inline-block");
                    spanStyle.AddIfMissing("text-indent", "0");
                    spanStyle.AddIfMissing("width", string.Format(NumberFormatInfo.InvariantInfo, "{0:0.000}in", totalWidth));
                }
            }
            return txElementsPrecedingTab;
        }
        /*
         * Notes on line spacing
         * 
         * the w:line and w:lineRule attributes control spacing between lines - including between lines within a paragraph
         * 
         * If w:spacing w:lineRule="auto" then
         *   w:spacing w:line is a percentage where 240 == 100%
         *   
         *   (line value / 240) * 100 = percentage of line 
         *   
         * If w:spacing w:lineRule="exact" or w:lineRule="atLeast" then
         *   w:spacing w:line is in twips
         *   1440 = exactly one inch from line to line
         *
         * Handle
         * - ind
         * - jc
         * - numPr
         * - pBdr
         * - shd
         * - spacing
         * - textAlignment
         *  
         * Don't Handle (yet)
         * - adjustRightInd?
         * - autoSpaceDE
         * - autoSpaceDN
         * - bidi
         * - contextualSpacing
         * - divId
         * - framePr
         * - keepLines
         * - keepNext
         * - kinsoku
         * - mirrorIndents
         * - overflowPunct
         * - pageBreakBefore
         * - snapToGrid
         * - suppressAutoHyphens
         * - suppressLineNumbers
         * - suppressOverlap
         * - tabs
         * - textBoxTightWrap
         * - textDirection
         * - topLinePunct
         * - widowControl
         * - wordWrap
         * 
         */

        private static object ConvertParagraph(WordprocessingDocument wordDoc, HtmlConverterSettings settings,
            XElement paragraph, XName elementName, bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi)
        {
            var style = new Dictionary<string, string>();
            var sn = (string)paragraph.Attribute(PtOpenXml.StyleName);
            if (sn != null)
                style.Add("PtStyleName", sn);

            XElement pPr = paragraph.Element(W.pPr);
            if (pPr != null)
            {
                var spacing = pPr.Element(W.spacing);
                if (spacing != null)
                {
                    var spacingBefore = (decimal?)spacing.Attribute(W.before);
                    if (spacingBefore != null)
                        style.AddIfMissing("margin-top", string.Format("{0}pt", spacingBefore / 20.0m));

                    var lineRule = (string)spacing.Attribute(W.lineRule);
                    if (lineRule == "auto")
                    {
                        var line = (decimal)spacing.Attribute(W.line);
                        if (line != 240m)
                        {
                            var pct = (line / 240m) * 100m;
                            style.Add("line-height", string.Format("{0:0.0}%", pct));
                        }
                    }
                    if (lineRule == "exact")
                    {
                        var line = (decimal)spacing.Attribute(W.line);
                        var points = line / 20m;
                        style.Add("line-height", string.Format("{0:0.0}pt", points));
                    }
                    if (lineRule == "atLeast")
                    {
                        var line = (decimal)spacing.Attribute(W.line);
                        var points = line / 20m;
                        if (points >= 14m)
                            style.Add("line-height", string.Format("{0:0.0}pt", points));
                    }

                    decimal? spacingAfter;
                    if (suppressTrailingWhiteSpace)
                        spacingAfter = 0;
                    else
                        spacingAfter = (decimal?)spacing.Attribute(W.after) /*+ addToSpacing*/;

                    if (spacingAfter != null)
                        style.AddIfMissing("margin-bottom", string.Format("{0}pt", spacingAfter / 20.0m));

                }

                var ind = pPr.Element(W.ind);
                if (ind != null)
                {
                    decimal? left = (decimal?)ind.Attribute(W.left);
                    if (left != null)
                    {
                        decimal leftInInches = (decimal)left / 1440 - currentMarginLeft;
                        style.AddIfMissing(isBidi ? "margin-right" : "margin-left", 
                            string.Format("{0:0.00}in", leftInInches));
                    }

                    decimal? right = (decimal?)ind.Attribute(W.right);
                    if (right != null)
                    {
                        decimal rightInInches = (decimal)right / 1440;
                        style.AddIfMissing(isBidi ? "margin-left" : "margin-right",
                            string.Format("{0:0.00}in", rightInInches));
                    }

                    decimal? firstLine = (decimal?)ind.Attribute(W.firstLine);
                    if (firstLine != null)
                    {
                        decimal firstLineInInches = (decimal)firstLine / 1440m;
                        style.AddIfMissing("text-indent", string.Format("{0:0.00}in", firstLineInInches));
                    }
                    decimal? hanging = (decimal?)ind.Attribute(W.hanging);
                    if (hanging != null)
                    {
                        decimal hangingInInches = (decimal)-hanging / 1440m;
                        style.AddIfMissing("text-indent", string.Format("{0:0.00}in", hangingInInches));
                    }
                }

                // todo need to handle
                // - both
                // - mediumKashida
                // - distribute
                // - numTab
                // - highKashida
                // - lowKashida
                // - thaiDistribute

                var jcVal = (string)pPr.Elements(W.jc).Attributes(W.val).FirstOrDefault();
                if (jcVal == null)
                {
                    jcVal = "left";
                }
                if (jcVal == "left")
                {
                    if (isBidi)
                        style.AddIfMissing("text-align", "right");
                    else
                        style.AddIfMissing("text-align", "left");
                }
                else if (jcVal == "right")
                {
                    if (isBidi)
                        style.AddIfMissing("text-align", "left");
                    else
                        style.AddIfMissing("text-align", "right");
                }
                else if (jcVal == "center")
                    style.AddIfMissing("text-align", "center");
                else if (jcVal == "both")
                    style.AddIfMissing("text-align", "justify");

                CreateStyleFromShd(style, pPr.Element(W.shd));

                // Pt.FontName
                string font = (string)paragraph.Attributes(PtOpenXml.FontName).FirstOrDefault();
                if (font != null)
                    CreateFontCssProperty(font, style);

                // W.sz
                decimal? sz = null;

                var languageType = (string)paragraph.Attribute(PtOpenXml.LanguageType);
                if (languageType == "bidi")
                    sz = (decimal?)pPr.Elements(W.rPr).Elements(W.szCs).Attributes(W.val).FirstOrDefault();
                else
                    sz = (decimal?)pPr.Elements(W.rPr).Elements(W.sz).Attributes(W.val).FirstOrDefault();

                var sizesOfAllRunsInParagraph = paragraph
                    .DescendantsTrimmed(W.txbxContent)
                    .Select(run =>
                    {
                        if (run.Name != W.r)
                            return null;
                        var runLanguageType = (string)run.Attribute(PtOpenXml.LanguageType);
                        if (runLanguageType == "bidi")
                        {
                            var runCsSz = (decimal?)run
                                .Elements(W.rPr)
                                .Elements(W.szCs)
                                .Attributes(W.val)
                                .FirstOrDefault();
                            return runCsSz;
                        }
                        else
                        {
                            var runSz = (decimal?)run
                                .Elements(W.rPr)
                                .Elements(W.sz)
                                .Attributes(W.val)
                                .FirstOrDefault();
                            return runSz;
                        }
                    })
                    .Where(runSz => runSz != null);

                if (sizesOfAllRunsInParagraph.Any())
                    sz = sizesOfAllRunsInParagraph.Cast<decimal>().Max();

                if (sz != null)
                    style.AddIfMissing("font-size", string.Format("{0}pt", sz / 2.0m));

                var languageTypeOfAllRunsInParagraph = paragraph
                    .DescendantsTrimmed(W.txbxContent)
                    .Select(run =>
                    {
                        if (run.Name != W.r)
                            return null;
                        var runLanguageType = (string)run.Attribute(PtOpenXml.LanguageType);
                        return runLanguageType;
                    })
                    .Where(runSz => runSz != null);

                if (!languageTypeOfAllRunsInParagraph.Any(lt => lt == "bidi"))
                    style.AddIfMissing("line-height", "108%");

                // vertical text alignment as of December 2013 does not work in any major browsers.
                var verticalTextAlignment = (string)pPr.Elements(W.textAlignment).Attributes(W.val).FirstOrDefault();
                if (verticalTextAlignment != null && verticalTextAlignment != "auto")
                {
                    if (verticalTextAlignment == "top")
                        style.AddIfMissing("vertical-align", "top");
                    else if (verticalTextAlignment == "center")
                        style.AddIfMissing("vertical-align", "middle");
                    else if (verticalTextAlignment == "baseline")
                        style.AddIfMissing("vertical-align", "baseline");
                    else if (verticalTextAlignment == "bottom")
                        style.AddIfMissing("vertical-align", "bottom");
                }

                style.AddIfMissing("margin-top", "0pt");
                style.AddIfMissing("margin-left", "0pt");
                style.AddIfMissing("margin-right", "0pt");
                style.AddIfMissing("margin-bottom", ".001pt");

            }
            XAttribute rtl = null;
            XEntity firstMark = null;
            if (isBidi)
            {
                rtl = new XAttribute("dir", "rtl");
                firstMark = new XEntity("#x200f"); // RLM
            }
            else
            {
                rtl = new XAttribute("dir", "ltr");
            }
            var paraElement = new XElement(elementName,
                rtl,
                firstMark,
                ConvertContentThatCanContainFields(wordDoc, settings, paragraph.Elements()));
            paraElement.AddAnnotation(style);
            return paraElement;
        }
Example #39
0
 public XElement ConvertToHtml(HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler)
 {
     return(HtmlConverter.ConvertToHtml(this, htmlConverterSettings, imageHandler));
 }