コード例 #1
0
 /// <summary>
 /// Convert to html
 /// </summary>
 /// <param name="doc"></param>
 /// <param name="htmlConverterSettings"></param>
 /// <param name="imageHandler"></param>
 /// <returns></returns>
 public XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler)
 {
     using (var streamDoc = new OpenXmlMemoryStreamDocument(doc))
     {
         using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
         {
             return(ConvertToHtml(document, htmlConverterSettings, imageHandler));
         }
     }
 }
コード例 #2
0
 /// <summary>
 /// Convert to html
 /// </summary>
 /// <param name="doc"></param>
 /// <param name="htmlConverterSettings"></param>
 /// <returns></returns>
 public XElement ConvertToHtml(WmlDocument doc, HtmlConverterSettings htmlConverterSettings)
 {
     using (var streamDoc = new OpenXmlMemoryStreamDocument(doc))
     {
         using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
         {
             return(ConvertToHtml(document, htmlConverterSettings));
         }
     }
 }
コード例 #3
0
        /// <summary>
        /// Convert to html
        /// </summary>
        /// <param name="wordDoc"></param>
        /// <param name="htmlConverterSettings"></param>
        /// <param name="imageHandler"></param>
        /// <returns></returns>
        public XElement ConvertToHtml(WordprocessingDocument wordDoc,
                                      HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler)
        {
            InitEntityMap();
            if (htmlConverterSettings.ConvertFormatting)
            {
                throw new InvalidSettingsException("Conversion with formatting is not supported");
            }
            RevisionAccepter.AcceptRevisions(wordDoc);
            var settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = true,
                RemoveEndAndFootNotes       = true,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = true,
                RemoveProof           = true,
                RemoveRsidInfo        = true,
                RemoveSmartTags       = true,
                RemoveSoftHyphens     = true,
                ReplaceTabsWithSpaces = true,
            };

            MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
            XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root;

            AnnotateHyperlinkContent(rootElement);
            var xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                                                         rootElement, imageHandler);

            // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
            // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
            // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
            // for detailed explanation.
            //
            // If you further transform the XML tree returned by ConvertToHtmlTransform, you
            // must do it correctly, or entities will not be serialized properly.

            return(xhtml);
        }
コード例 #4
0
 /// <summary>
 /// Convert to html
 /// </summary>
 /// <param name="wordDoc"></param>
 /// <param name="htmlConverterSettings"></param>
 /// <returns></returns>
 public XElement ConvertToHtml(WordprocessingDocument wordDoc,
                               HtmlConverterSettings htmlConverterSettings)
 {
     return(ConvertToHtml(wordDoc, htmlConverterSettings, null));
 }
コード例 #5
0
        /// <summary>
        /// Convert to html transform
        /// </summary>
        /// <param name="wordDoc"></param>
        /// <param name="settings"></param>
        /// <param name="node"></param>
        /// <param name="imageHandler"></param>
        /// <returns></returns>
        private object ConvertToHtmlTransform(WordprocessingDocument wordDoc,
                                              HtmlConverterSettings settings, XNode node,
                                              Func <ImageInfo, XElement> imageHandler)
        {
            var element = node as XElement;

            if (element != null)
            {
                if (element.Name == W.document)
                {
                    return(new XElement(Xhtml.Html,
                                        new XElement(Xhtml.Head,
                                                     new XElement(Xhtml.Meta,
                                                                  new XAttribute(HtmlNoNamespace.HttpEquiv, "Content-Type"),
                                                                  new XAttribute(HtmlNoNamespace.Content,
                                                                                 "text/html; charset=windows-1252")),
                                                     new XElement(Xhtml.Meta,
                                                                  new XAttribute(HtmlNoNamespace.Name, "Generator"),
                                                                  new XAttribute(HtmlNoNamespace.Content,
                                                                                 "PowerTools for Open XML")),
                                                     settings.PageTitle != null ? new XElement(Xhtml.Title,
                                                                                               settings.PageTitle) : null,
                                                     settings.Css != null ? new XElement(Xhtml.Style,
                                                                                         new XComment(Environment.NewLine +
                                                                                                      settings.Css + Environment.NewLine)) : null
                                                     ),
                                        element.Elements().Select(e => ConvertToHtmlTransform(
                                                                      wordDoc, settings, e, imageHandler))
                                        ));
                }

                // Transform the w:body element to the XHTML h:body element.
                if (element.Name == W.body)
                {
                    return(new XElement(Xhtml.Body,
                                        element.Elements().Select(e => ConvertToHtmlTransform(
                                                                      wordDoc, settings, e, imageHandler))));
                }

                // Transform every paragraph with a style that has paragraph properties
                // that has an outline level into the same level of heading.  This takes
                // care of transforming headings of every level.
                if (element.Name == W.p)
                {
                    var styleId = (string)element.Elements(W.pPr).Elements(W.pStyle)
                                  .Attributes(W.val).FirstOrDefault();
                    var xElement = wordDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument().Root;
                    if (xElement != null)
                    {
                        var style = xElement.Elements(W.style).FirstOrDefault(s => (string)s.Attribute(W.styleId) == styleId);
                        if (style != null)
                        {
                            var outlineLevel = (int?)style.Elements(W.pPr)
                                               .Elements(W.outlineLvl).Attributes(W.val).FirstOrDefault();
                            if (outlineLevel != null)
                            {
                                return(new XElement(Xhtml.xhtml + string.Format("h{0}",
                                                                                outlineLevel + 1),
                                                    settings.CssClassPrefix != null ?
                                                    new XAttribute(HtmlNoNamespace.Class,
                                                                   settings.CssClassPrefix + styleId) : null,
                                                    ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc,
                                                                                                       element, null)),
                                                    element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                                          settings, e, imageHandler))));
                            }
                        }
                    }
                }

                // Transform w:p to h:p.
                if (element.Name == W.p)
                {
                    var styleId = (string)element.Elements(W.pPr).Elements(W.pStyle)
                                  .Attributes(W.val).FirstOrDefault();
                    if (styleId == null)
                    {
                        var xElement = wordDoc.MainDocumentPart.StyleDefinitionsPart.GetXDocument().Root;
                        if (xElement != null)
                        {
                            var firstOrDefault = xElement.Elements(W.style).FirstOrDefault(e => (string)e.Attribute(W.type) == "paragraph" &&
                                                                                           (string)e.Attribute(W._default) == "1");
                            if (firstOrDefault != null)
                            {
                                styleId = (string)firstOrDefault.Attributes(W.styleId).FirstOrDefault();
                            }
                        }
                    }
                    var z = new XElement(Xhtml.P,
                                         styleId != null ? (
                                             settings.CssClassPrefix != null ?
                                             new XAttribute(HtmlNoNamespace.Class,
                                                            settings.CssClassPrefix + styleId) : null
                                             ) : null,
                                         ConvertEntities(ListItemRetriever.RetrieveListItem(wordDoc,
                                                                                            element, null)),
                                         element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                               settings, e, imageHandler)));
                    return(z);
                }

                // Transform every hyperlink in the document to the XHTML h:A element.
                if (element.Name == W.hyperlink && element.Attribute(R.id) != null)
                {
                    try
                    {
                        return(new XElement(Xhtml.A,
                                            new XAttribute(HtmlNoNamespace.Href,
                                                           wordDoc.MainDocumentPart
                                                           .HyperlinkRelationships.First(x => x.Id == (string)element.Attribute(R.id))
                                                           .Uri
                                                           ),
                                            ConvertEntities(element.Elements(W.r)
                                                            .Elements(W.t)
                                                            .Select(s => (string)s).StringConcatenate())
                                            ));
                    }
                    catch (UriFormatException)
                    {
                        return(element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                     settings, e, imageHandler)));
                    }
                }

                // Transform contents of runs that are part of a hyperlink.
                var annotation = element.Annotation <FieldInfo>();
                if (annotation != null && (element.Name == W.r && annotation.Arguments.Length > 0))
                {
                    var fieldInfo = element.Annotation <FieldInfo>();
                    if (fieldInfo != null)
                    {
                        return(new XElement(Xhtml.A,
                                            new XAttribute(HtmlNoNamespace.Href, fieldInfo.Arguments[0]),
                                            ConvertEntities(element.Elements(W.t)
                                                            .Select(s => (string)s).StringConcatenate())
                                            ));
                    }
                }

                // Transform contents of runs.
                if (element.Name == W.r)
                {
                    return(element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                 settings, e, imageHandler)));
                }

                // Transform every w:t element to a text node.
                if (element.Name == W.t)
                {
                    return(ConvertEntities(element.Value));
                }

                // Transform w:br to h:br.
                if (element.Name == W.br || element.Name == W.cr)
                {
                    return(new XElement(Xhtml.Br));
                }

                // Transform w:noBreakHyphen to '-'
                if (element.Name == W.noBreakHyphen)
                {
                    return(new XText("-"));
                }

                // Transform w:tbl to h:tbl.
                if (element.Name == W.tbl)
                {
                    return(new XElement(Xhtml.Table,
                                        new XAttribute(HtmlNoNamespace.Border, 1),
                                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                              settings, e, imageHandler))));
                }

                // Transform w:tr to h:tr.
                if (element.Name == W.tr)
                {
                    return(new XElement(Xhtml.Tr,
                                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                              settings, e, imageHandler))));
                }

                // Transform w:tc to h:td.
                if (element.Name == W.tc)
                {
                    return(new XElement(Xhtml.Td,
                                        element.Elements().Select(e => ConvertToHtmlTransform(wordDoc,
                                                                                              settings, e, imageHandler))));
                }

                // Transform images.
                if (element.Name == W.drawing || element.Name == W.pict)
                {
                    if (imageHandler == null)
                    {
                        return(null);
                    }
                    return(ProcessImage(wordDoc, element, imageHandler));
                }

                // The following removes any nodes that haven't been transformed.
                return(null);
            }
            return(null);
        }