Пример #1
0
 public static void SimplifyMarkup(WordprocessingDocument doc,
                                   SimplifyMarkupSettings settings)
 {
     if (settings.RemoveMarkupForDocumentComparison)
     {
         settings.RemoveRsidInfo = true;
         RemoveElementsForDocumentComparison(doc);
     }
     if (settings.RemoveRsidInfo)
     {
         RemoveRsidInfoInSettings(doc);
     }
     if (settings.AcceptRevisions)
     {
         RevisionAccepter.AcceptRevisions(doc);
     }
     foreach (var part in doc.ContentParts())
     {
         SimplifyMarkupForPart(part, settings);
     }
     if (doc.MainDocumentPart.StyleDefinitionsPart != null)
     {
         SimplifyMarkupForPart(doc.MainDocumentPart.StyleDefinitionsPart, settings);
     }
     if (doc.MainDocumentPart.StylesWithEffectsPart != null)
     {
         SimplifyMarkupForPart(doc.MainDocumentPart.StylesWithEffectsPart, settings);
     }
 }
Пример #2
0
        /// <summary>
        /// Converts a specific node instead of the whole word document into HTML.
        /// Note: this method is added for the above purpose. See the other method:
        /// public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler)
        /// </summary>
        /// <param name="wordDoc"></param>
        /// <param name="node">The node to convert to HTML.</param>
        /// <param name="htmlConverterSettings"></param>
        /// <returns></returns>
        public static XElement ConvertToHtml(WordprocessingDocument wordDoc, XNode node,
                                             HtmlConverterSettings htmlConverterSettings)
        {
            InitEntityMap();
            if (htmlConverterSettings.ConvertFormatting)
            {
                throw new InvalidSettingsException("Conversion with formatting is not supported");
            }
            RevisionAccepter.AcceptRevisions(wordDoc);
            SimplifyMarkupSettings settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = true,
                RemoveEndAndFootNotes       = true,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = true,
                RemoveProof           = true,
                RemoveRsidInfo        = true,
                RemoveSmartTags       = true,
                RemoveSoftHyphens     = true,
                ReplaceTabsWithSpaces = true,
            };

            MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
            AnnotateHyperlinkContent((XElement)node);
            XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                                                              node, null);

            return(xhtml);
        }
Пример #3
0
 public static void SimplifyMarkup(WordprocessingDocument doc,
                                   SimplifyMarkupSettings settings)
 {
     if (settings.AcceptRevisions)
     {
         RevisionAccepter.AcceptRevisions(doc);
     }
     foreach (var part in doc.ContentParts())
     {
         SimplifyMarkupForPart(part, settings);
     }
     if (doc.MainDocumentPart.StyleDefinitionsPart != null)
     {
         SimplifyMarkupForPart(doc.MainDocumentPart.StyleDefinitionsPart, settings);
     }
     if (doc.MainDocumentPart.StylesWithEffectsPart != null)
     {
         SimplifyMarkupForPart(doc.MainDocumentPart.StylesWithEffectsPart, settings);
     }
 }
Пример #4
0
        public static XElement ConvertToHtml(WordprocessingDocument wordDoc,
                                             HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler)
        {
            InitEntityMap();
            if (htmlConverterSettings.ConvertFormatting)
            {
                throw new InvalidSettingsException("Conversion with formatting is not supported");
            }
            RevisionAccepter.AcceptRevisions(wordDoc);
            SimplifyMarkupSettings settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = true,
                RemoveEndAndFootNotes       = true,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = true,
                RemoveProof           = true,
                RemoveRsidInfo        = true,
                RemoveSmartTags       = true,
                RemoveSoftHyphens     = true,
                ReplaceTabsWithSpaces = true,
            };

            MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
            XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root;

            AnnotateHyperlinkContent(rootElement);
            XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                                                              rootElement, imageHandler);

            // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
            // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
            // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
            // for detailed explanation.
            //
            // If you further transform the XML tree returned by ConvertToHtmlTransform, you
            // must do it correctly, or entities will not be serialized properly.

            return(xhtml);
        }