private static object RemoveCustomXmlAndContentControlsTransform(
            XNode node, SimplifyMarkupSettings simplifyMarkupSettings)
        {
            XElement element = node as XElement;

            if (element != null)
            {
                if (simplifyMarkupSettings.RemoveSmartTags &&
                    element.Name == W.smartTag)
                {
                    return(element
                           .Elements()
                           .Select(e =>
                                   RemoveCustomXmlAndContentControlsTransform(e,
                                                                              simplifyMarkupSettings)));
                }

                if (simplifyMarkupSettings.RemoveContentControls &&
                    element.Name == W.sdt)
                {
                    return(element
                           .Elements(W.sdtContent)
                           .Elements()
                           .Select(e =>
                                   RemoveCustomXmlAndContentControlsTransform(e,
                                                                              simplifyMarkupSettings)));
                }

                return(new XElement(element.Name,
                                    element.Attributes(),
                                    element.Nodes().Select(n => RemoveCustomXmlAndContentControlsTransform(n, simplifyMarkupSettings))));
            }

            return(node);
        }
 public static WmlDocument SimplifyMarkup(WmlDocument doc, SimplifyMarkupSettings settings)
 {
     using (var streamDoc = new OpenXmlMemoryStreamDocument(doc))
     {
         using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
             SimplifyMarkup(document, settings);
         return(streamDoc.GetModifiedWmlDocument());
     }
 }
        public static void SimplifyMarkup(WordprocessingDocument doc, SimplifyMarkupSettings settings)
        {
            if (settings.RemoveMarkupForDocumentComparison)
            {
                settings.RemoveRsidInfo = true;
                RemoveElementsForDocumentComparison(doc);
            }
            if (settings.RemoveRsidInfo)
            {
                RemoveRsidInfoInSettings(doc);
            }
            if (settings.AcceptRevisions)
            {
                RevisionAccepter.AcceptRevisions(doc);
            }
            foreach (OpenXmlPart part in doc.ContentParts())
            {
                SimplifyMarkupForPart(part, settings);
            }

            if (doc.MainDocumentPart.StyleDefinitionsPart != null)
            {
                SimplifyMarkupForPart(doc.MainDocumentPart.StyleDefinitionsPart, settings);
            }
            if (doc.MainDocumentPart.StylesWithEffectsPart != null)
            {
                SimplifyMarkupForPart(doc.MainDocumentPart.StylesWithEffectsPart, settings);
            }

            if (settings.RemoveComments)
            {
                WordprocessingCommentsPart commentsPart = doc.MainDocumentPart.WordprocessingCommentsPart;
                if (commentsPart != null)
                {
                    doc.MainDocumentPart.DeletePart(commentsPart);
                }

                WordprocessingCommentsExPart commentsExPart = doc.MainDocumentPart.WordprocessingCommentsExPart;
                if (commentsExPart != null)
                {
                    doc.MainDocumentPart.DeletePart(commentsExPart);
                }
            }
        }
        private static void SimplifyMarkupForPart(OpenXmlPart part, SimplifyMarkupSettings settings)
        {
            var parameters = new SimplifyMarkupParameters();

            if (part.ContentType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
            {
                var doc = (WordprocessingDocument)part.OpenXmlPackage;
                if (settings.RemoveGoBackBookmark)
                {
                    XElement goBackBookmark = doc
                                              .MainDocumentPart
                                              .GetXDocument()
                                              .Descendants(W.bookmarkStart)
                                              .FirstOrDefault(bm => (string)bm.Attribute(W.name) == "_GoBack");
                    if (goBackBookmark != null)
                    {
                        parameters.GoBackId = (int)goBackBookmark.Attribute(W.id);
                    }
                }
            }

            XDocument xdoc    = part.GetXDocument();
            XElement  newRoot = xdoc.Root;

            // Need to do this first to enable simplifying hyperlinks.
            if (settings.RemoveContentControls || settings.RemoveSmartTags)
            {
                newRoot = (XElement)RemoveCustomXmlAndContentControlsTransform(newRoot, settings);
            }

            // This may touch many elements, so needs to be its own transform.
            if (settings.RemoveRsidInfo)
            {
                newRoot = (XElement)RemoveRsidTransform(newRoot);
            }

            var prevNewRoot = new XDocument(newRoot);

            while (true)
            {
                if (settings.RemoveComments ||
                    settings.RemoveEndAndFootNotes ||
                    settings.ReplaceTabsWithSpaces ||
                    settings.RemoveFieldCodes ||
                    settings.RemovePermissions ||
                    settings.RemoveProof ||
                    settings.RemoveBookmarks ||
                    settings.RemoveWebHidden ||
                    settings.RemoveGoBackBookmark ||
                    settings.RemoveHyperlinks)
                {
                    newRoot = (XElement)SimplifyMarkupTransform(newRoot, settings, parameters);
                }

                // Remove runs and run properties that have become empty due to previous transforms.
                newRoot = (XElement)RemoveEmptyRunsAndRunPropertiesTransform(newRoot);

                // Merge adjacent runs that have identical run properties.
                newRoot = (XElement)MergeAdjacentRunsTransform(newRoot);

                // Merge adjacent instrText elements.
                newRoot = (XElement)MergeAdjacentInstrText(newRoot);

                // Separate run children into separate runs
                newRoot = (XElement)SeparateRunChildrenIntoSeparateRuns(newRoot);

                if (XNode.DeepEquals(prevNewRoot.Root, newRoot))
                {
                    break;
                }

                prevNewRoot = new XDocument(newRoot);
            }

            if (settings.NormalizeXml)
            {
                XAttribute[] nsAttrs =
                {
                    new XAttribute(XNamespace.Xmlns + "wpc",   WPC.wpc),
                    new XAttribute(XNamespace.Xmlns + "mc",    MC.mc),
                    new XAttribute(XNamespace.Xmlns + "o",     O.o),
                    new XAttribute(XNamespace.Xmlns + "r",     R.r),
                    new XAttribute(XNamespace.Xmlns + "m",     M.m),
                    new XAttribute(XNamespace.Xmlns + "v",     VML.vml),
                    new XAttribute(XNamespace.Xmlns + "wp14",  WP14.wp14),
                    new XAttribute(XNamespace.Xmlns + "wp",    WP.wp),
                    new XAttribute(XNamespace.Xmlns + "w10",   W10.w10),
                    new XAttribute(XNamespace.Xmlns + "w",     W.w),
                    new XAttribute(XNamespace.Xmlns + "w14",   W14.w14),
                    new XAttribute(XNamespace.Xmlns + "w15",   W15.w15),
                    new XAttribute(XNamespace.Xmlns + "w16se", W16SE.w16se),
                    new XAttribute(XNamespace.Xmlns + "wpg",   WPG.wpg),
                    new XAttribute(XNamespace.Xmlns + "wpi",   WPI.wpi),
                    new XAttribute(XNamespace.Xmlns + "wne",   WNE.wne),
                    new XAttribute(XNamespace.Xmlns + "wps",   WPS.wps),
                    new XAttribute(MC.Ignorable,               "w14 wp14 w15 w16se"),
                };

                XDocument newXDoc = Normalize(new XDocument(newRoot), null);
                newRoot = newXDoc.Root;
                if (newRoot != null)
                {
                    foreach (XAttribute nsAttr in nsAttrs)
                    {
                        if (newRoot.Attribute(nsAttr.Name) == null)
                        {
                            newRoot.Add(nsAttr);
                        }
                    }
                }

                part.PutXDocument(newXDoc);
            }
            else
            {
                part.PutXDocument(new XDocument(newRoot));
            }
        }
        // lastRenderedPageBreak, permEnd, permStart, proofErr, noProof
        // softHyphen:
        // Remove when simplifying.

        // fldSimple, fldData, fldChar, instrText:
        // For hyperlinks, generate same in XHtml.  Other than hyperlinks, do the following:
        // - collapse fldSimple
        // - remove fldSimple, fldData, fldChar, instrText.

        private static object SimplifyMarkupTransform(
            XNode node,
            SimplifyMarkupSettings settings,
            SimplifyMarkupParameters parameters)
        {
            var element = node as XElement;

            if (element == null)
            {
                return(node);
            }

            if (settings.RemovePermissions &&
                ((element.Name == W.permEnd) ||
                 (element.Name == W.permStart)))
            {
                return(null);
            }

            if (settings.RemoveProof &&
                ((element.Name == W.proofErr) ||
                 (element.Name == W.noProof)))
            {
                return(null);
            }

            if (settings.RemoveSoftHyphens &&
                (element.Name == W.softHyphen))
            {
                return(null);
            }

            if (settings.RemoveLastRenderedPageBreak &&
                (element.Name == W.lastRenderedPageBreak))
            {
                return(null);
            }

            if (settings.RemoveBookmarks &&
                ((element.Name == W.bookmarkStart) ||
                 (element.Name == W.bookmarkEnd)))
            {
                return(null);
            }

            if (settings.RemoveGoBackBookmark &&
                (((element.Name == W.bookmarkStart) && ((int)element.Attribute(W.id) == parameters.GoBackId)) ||
                 ((element.Name == W.bookmarkEnd) && ((int)element.Attribute(W.id) == parameters.GoBackId))))
            {
                return(null);
            }

            if (settings.RemoveWebHidden &&
                (element.Name == W.webHidden))
            {
                return(null);
            }

            if (settings.ReplaceTabsWithSpaces &&
                (element.Name == W.tab) &&
                (element.Parent != null && element.Parent.Name == W.r))
            {
                return(new XElement(W.t, new XAttribute(XNamespace.Xml + "space", "preserve"), " "));
            }

            if (settings.RemoveComments &&
                ((element.Name == W.commentRangeStart) ||
                 (element.Name == W.commentRangeEnd) ||
                 (element.Name == W.commentReference) ||
                 (element.Name == W.annotationRef)))
            {
                return(null);
            }

            if (settings.RemoveComments &&
                (element.Name == W.rStyle) &&
                (element.Attribute(W.val).Value == "CommentReference"))
            {
                return(null);
            }

            if (settings.RemoveEndAndFootNotes &&
                ((element.Name == W.endnoteReference) ||
                 (element.Name == W.footnoteReference)))
            {
                return(null);
            }

            if (settings.RemoveFieldCodes)
            {
                if (element.Name == W.fldSimple)
                {
                    return(element.Elements().Select(e => SimplifyMarkupTransform(e, settings, parameters)));
                }

                if ((element.Name == W.fldData) ||
                    (element.Name == W.fldChar) ||
                    (element.Name == W.instrText))
                {
                    return(null);
                }
            }

            if (settings.RemoveHyperlinks &&
                (element.Name == W.hyperlink))
            {
                return(element.Elements());
            }

            return(new XElement(element.Name,
                                element.Attributes(),
                                element.Nodes().Select(n => SimplifyMarkupTransform(n, settings, parameters))));
        }
 public WmlDocument SimplifyMarkup(SimplifyMarkupSettings settings)
 {
     return(MarkupSimplifier.SimplifyMarkup(this, settings));
 }
Beispiel #7
0
        private static WmlDocument PreProcessMarkup(WmlDocument source, int startingIdForFootnotesEndnotes)
        {
            // open and close to get rid of MC content
            using (var ms = new MemoryStream())
            {
                ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
                var os = new OpenSettings
                {
                    MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings(
                        MarkupCompatibilityProcessMode.ProcessAllParts,
                        FileFormatVersions.Office2007)
                };

                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os))
                {
                    OpenXmlPartRootElement unused = wDoc.MainDocumentPart.RootElement;
                    if (wDoc.MainDocumentPart.FootnotesPart != null)
                    {
                        // contrary to what you might think, looking at the API, it is necessary to access the root element of each part to cause
                        // the SDK to process MC markup.
                        OpenXmlPartRootElement unused1 = wDoc.MainDocumentPart.FootnotesPart.RootElement;
                    }

                    if (wDoc.MainDocumentPart.EndnotesPart != null)
                    {
                        OpenXmlPartRootElement unused1 = wDoc.MainDocumentPart.EndnotesPart.RootElement;
                    }
                }

                source = new WmlDocument(source.FileName, ms.ToArray());
            }

            // open and close to get rid of MC content
            using (var ms = new MemoryStream())
            {
                ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
                var os = new OpenSettings
                {
                    MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings(
                        MarkupCompatibilityProcessMode.ProcessAllParts,
                        FileFormatVersions.Office2007)
                };

                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os))
                {
                    TestForInvalidContent(wDoc);
                    RemoveExistingPowerToolsMarkup(wDoc);

                    // Removing content controls, field codes, and bookmarks is a no-no for many use cases.
                    // We need content controls, e.g., on the title page. Field codes are required for
                    // automatic cross-references, which require bookmarks.
                    // TODO: Revisit
                    var msSettings = new SimplifyMarkupSettings
                    {
                        RemoveBookmarks = true,

                        AcceptRevisions = false,
                        RemoveComments  = true,

                        RemoveContentControls = true,
                        RemoveFieldCodes      = true,

                        RemoveGoBackBookmark        = true,
                        RemoveLastRenderedPageBreak = true,
                        RemovePermissions           = true,
                        RemoveProof       = true,
                        RemoveSmartTags   = true,
                        RemoveSoftHyphens = true,
                        RemoveHyperlinks  = true
                    };
                    MarkupSimplifier.SimplifyMarkup(wDoc, msSettings);
                    ChangeFootnoteEndnoteReferencesToUniqueRange(wDoc, startingIdForFootnotesEndnotes);
                    AddUnidsToMarkupInContentParts(wDoc);
                    AddFootnotesEndnotesParts(wDoc);
                    FillInEmptyFootnotesEndnotes(wDoc);
                }

                return(new WmlDocument(source.FileName, ms.ToArray()));
            }
        }