Esempio n. 1
0
        /// <summary>
        /// Converts a specific node instead of the whole word document into HTML.
        /// Note: this method is added for the above purpose. See the other method:
        /// public static XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func<ImageInfo, XElement> imageHandler)
        /// </summary>
        /// <param name="wordDoc"></param>
        /// <param name="node">The node to convert to HTML.</param>
        /// <param name="htmlConverterSettings"></param>
        /// <returns></returns>
        public static XElement ConvertToHtml(WordprocessingDocument wordDoc, XNode node,
                                             HtmlConverterSettings htmlConverterSettings)
        {
            InitEntityMap();
            if (htmlConverterSettings.ConvertFormatting)
            {
                throw new InvalidSettingsException("Conversion with formatting is not supported");
            }
            RevisionAccepter.AcceptRevisions(wordDoc);
            SimplifyMarkupSettings settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = true,
                RemoveEndAndFootNotes       = true,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = true,
                RemoveProof           = true,
                RemoveRsidInfo        = true,
                RemoveSmartTags       = true,
                RemoveSoftHyphens     = true,
                ReplaceTabsWithSpaces = true,
            };

            MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
            AnnotateHyperlinkContent((XElement)node);
            XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                                                              node, null);

            return(xhtml);
        }
Esempio n. 2
0
        public static XElement ConvertToHtml(WordprocessingDocument wordDoc,
                                             HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler)
        {
            InitEntityMap();
            if (htmlConverterSettings.ConvertFormatting)
            {
                throw new InvalidSettingsException("Conversion with formatting is not supported");
            }
            RevisionAccepter.AcceptRevisions(wordDoc);
            SimplifyMarkupSettings settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = true,
                RemoveEndAndFootNotes       = true,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = true,
                RemoveProof           = true,
                RemoveRsidInfo        = true,
                RemoveSmartTags       = true,
                RemoveSoftHyphens     = true,
                ReplaceTabsWithSpaces = true,
            };

            MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
            XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root;

            AnnotateHyperlinkContent(rootElement);
            XElement xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                                                              rootElement, imageHandler);

            // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
            // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
            // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
            // for detailed explanation.
            //
            // If you further transform the XML tree returned by ConvertToHtmlTransform, you
            // must do it correctly, or entities will not be serialized properly.

            return(xhtml);
        }
Esempio n. 3
0
 public WmlDocument SimplifyMarkup(SimplifyMarkupSettings settings)
 {
     return(MarkupSimplifier.SimplifyMarkup(this, settings));
 }
Esempio n. 4
0
        private static WmlDocument PreProcessMarkup(WmlDocument source, int startingIdForFootnotesEndnotes)
        {
            // open and close to get rid of MC content
            using (var ms = new MemoryStream())
            {
                ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
                var os = new OpenSettings
                {
                    MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings(
                        MarkupCompatibilityProcessMode.ProcessAllParts,
                        FileFormatVersions.Office2007)
                };

                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os))
                {
                    OpenXmlPartRootElement unused = wDoc.MainDocumentPart.RootElement;
                    if (wDoc.MainDocumentPart.FootnotesPart != null)
                    {
                        // contrary to what you might think, looking at the API, it is necessary to access the root element of each part to cause
                        // the SDK to process MC markup.
                        OpenXmlPartRootElement unused1 = wDoc.MainDocumentPart.FootnotesPart.RootElement;
                    }

                    if (wDoc.MainDocumentPart.EndnotesPart != null)
                    {
                        OpenXmlPartRootElement unused1 = wDoc.MainDocumentPart.EndnotesPart.RootElement;
                    }
                }

                source = new WmlDocument(source.FileName, ms.ToArray());
            }

            // open and close to get rid of MC content
            using (var ms = new MemoryStream())
            {
                ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
                var os = new OpenSettings
                {
                    MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings(
                        MarkupCompatibilityProcessMode.ProcessAllParts,
                        FileFormatVersions.Office2007)
                };

                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os))
                {
                    TestForInvalidContent(wDoc);
                    RemoveExistingPowerToolsMarkup(wDoc);

                    // Removing content controls, field codes, and bookmarks is a no-no for many use cases.
                    // We need content controls, e.g., on the title page. Field codes are required for
                    // automatic cross-references, which require bookmarks.
                    // TODO: Revisit
                    var msSettings = new SimplifyMarkupSettings
                    {
                        RemoveBookmarks = true,

                        AcceptRevisions = false,
                        RemoveComments  = true,

                        RemoveContentControls = true,
                        RemoveFieldCodes      = true,

                        RemoveGoBackBookmark        = true,
                        RemoveLastRenderedPageBreak = true,
                        RemovePermissions           = true,
                        RemoveProof       = true,
                        RemoveSmartTags   = true,
                        RemoveSoftHyphens = true,
                        RemoveHyperlinks  = true
                    };
                    MarkupSimplifier.SimplifyMarkup(wDoc, msSettings);
                    ChangeFootnoteEndnoteReferencesToUniqueRange(wDoc, startingIdForFootnotesEndnotes);
                    AddUnidsToMarkupInContentParts(wDoc);
                    AddFootnotesEndnotesParts(wDoc);
                    FillInEmptyFootnotesEndnotes(wDoc);
                }

                return(new WmlDocument(source.FileName, ms.ToArray()));
            }
        }
        public static WmlDocument MergeComments(WmlDocument document1, WmlDocument document2,
                                                bool ensureLocked)
        {
            WmlDocument cDoc1 = new WmlDocument(document1);
            WmlDocument cDoc2 = new WmlDocument(document2);

            using (OpenXmlMemoryStreamDocument streamDoc1 = new OpenXmlMemoryStreamDocument(cDoc1))
                using (WordprocessingDocument doc1 = streamDoc1.GetWordprocessingDocument())
                    using (OpenXmlMemoryStreamDocument streamDoc2 = new OpenXmlMemoryStreamDocument(cDoc2))
                        using (WordprocessingDocument doc2 = streamDoc2.GetWordprocessingDocument())
                        {
                            SimplifyMarkupSettings mss = new SimplifyMarkupSettings()
                            {
                                RemoveProof          = true,
                                RemoveRsidInfo       = true,
                                RemoveGoBackBookmark = true,
                            };
                            MarkupSimplifier.SimplifyMarkup(doc1, mss);
                            MarkupSimplifier.SimplifyMarkup(doc2, mss);

                            // If documents don't contain the same content, then don't attempt to merge comments.
                            bool same = DocumentComparer.CompareDocuments(doc1, doc2);
                            if (!same)
                            {
                                throw new CommentMergerDifferingContentsException(
                                          "Documents do not contain the same content");
                            }

                            if (doc1.MainDocumentPart.WordprocessingCommentsPart == null &&
                                doc2.MainDocumentPart.WordprocessingCommentsPart == null)
                            {
                                return(new WmlDocument(document1));
                            }
                            if (doc1.MainDocumentPart.WordprocessingCommentsPart != null &&
                                doc2.MainDocumentPart.WordprocessingCommentsPart == null)
                            {
                                return(new WmlDocument(document1));
                            }
                            if (doc1.MainDocumentPart.WordprocessingCommentsPart == null &&
                                doc2.MainDocumentPart.WordprocessingCommentsPart != null)
                            {
                                return(new WmlDocument(document2));
                            }
                            // If either of the documents have no comments, then return the other one.
                            if (!doc1.MainDocumentPart.WordprocessingCommentsPart.GetXDocument().Root
                                .Elements(W.comment).Any())
                            {
                                return(new WmlDocument(document2));
                            }
                            if (!doc2.MainDocumentPart.WordprocessingCommentsPart.GetXDocument().Root
                                .Elements(W.comment).Any())
                            {
                                return(new WmlDocument(document1));
                            }

                            if (ensureLocked)
                            {
                                // If either document is not locked (allowing only commenting), don't attempt to
                                // merge comments.
                                if (doc1.ExtendedFilePropertiesPart.GetXDocument().Root
                                    .Element(EP.DocSecurity).Value != "8")
                                {
                                    throw new CommentMergerUnlockedDocumentException(
                                              "Document1 is not locked");
                                }
                                if (doc2.ExtendedFilePropertiesPart.GetXDocument().Root
                                    .Element(EP.DocSecurity).Value != "8")
                                {
                                    throw new CommentMergerUnlockedDocumentException(
                                              "Document2 is not locked");
                                }
                            }

                            RenumberCommentsInDoc2(doc1, doc2);

                            WmlDocument destDoc = new WmlDocument(document1);

                            using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(destDoc))
                            {
                                using (WordprocessingDocument destWDoc = streamDoc.GetWordprocessingDocument())
                                {
                                    // Merge the comments part.
                                    XDocument commentsPartXDoc = new XDocument(
                                        new XElement(W.comments,
                                                     new XAttribute(XNamespace.Xmlns + "w", W.w),
                                                     doc1.MainDocumentPart.WordprocessingCommentsPart.GetXDocument().Root.Elements(),
                                                     doc2.MainDocumentPart.WordprocessingCommentsPart.GetXDocument().Root.Elements()));
                                    destWDoc.MainDocumentPart.WordprocessingCommentsPart.PutXDocument(commentsPartXDoc);

                                    MergeCommentsInPart(doc1.MainDocumentPart, doc2.MainDocumentPart,
                                                        destWDoc.MainDocumentPart, commentsPartXDoc);
                                }
                                return(streamDoc.GetModifiedWmlDocument());
                            }
                        }
        }