Example #1
0
        private static object RemoveCustomXmlAndContentControlsTransform(
            XNode node, SimplifyMarkupSettings simplifyMarkupSettings)
        {
            XElement element = node as XElement;

            if (element != null)
            {
                if (simplifyMarkupSettings.RemoveSmartTags &&
                    element.Name == W.smartTag)
                {
                    return(element
                           .Elements()
                           .Select(e =>
                                   RemoveCustomXmlAndContentControlsTransform(e,
                                                                              simplifyMarkupSettings)));
                }

                if (simplifyMarkupSettings.RemoveContentControls &&
                    element.Name == W.sdt)
                {
                    return(element
                           .Element(W.sdtContent)
                           .Elements()
                           .Select(e =>
                                   RemoveCustomXmlAndContentControlsTransform(e,
                                                                              simplifyMarkupSettings)));
                }
            }
            return(node);
        }
 public static WmlDocument SimplifyMarkup(WmlDocument doc, SimplifyMarkupSettings settings)
 {
     using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc))
     {
         using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
         {
             SimplifyMarkup(document, settings);
         }
         return streamDoc.GetModifiedWmlDocument();
     }
 }
Example #3
0
 public static WmlDocument SimplifyMarkup(WmlDocument doc, SimplifyMarkupSettings settings)
 {
     using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc))
     {
         using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument())
         {
             SimplifyMarkup(document, settings);
         }
         return(streamDoc.GetModifiedWmlDocument());
     }
 }
 public static void SimplifyMarkup(WordprocessingDocument doc,
     SimplifyMarkupSettings settings)
 {
     if (settings.AcceptRevisions)
         RevisionAccepter.AcceptRevisions(doc);
     foreach (var part in doc.ContentParts())
         SimplifyMarkupForPart(part, settings);
     if (doc.MainDocumentPart.StyleDefinitionsPart != null)
         SimplifyMarkupForPart(doc.MainDocumentPart.StyleDefinitionsPart, settings);
     if (doc.MainDocumentPart.StylesWithEffectsPart != null)
         SimplifyMarkupForPart(doc.MainDocumentPart.StylesWithEffectsPart, settings);
 }
Example #5
0
 public static void SimplifyMarkup(WordprocessingDocument doc,
                                   SimplifyMarkupSettings settings)
 {
     if (settings.AcceptRevisions)
     {
         RevisionAccepter.AcceptRevisions(doc);
     }
     foreach (var part in doc.ContentParts())
     {
         SimplifyMarkupForPart(part, settings);
     }
     if (doc.MainDocumentPart.StyleDefinitionsPart != null)
     {
         SimplifyMarkupForPart(doc.MainDocumentPart.StyleDefinitionsPart, settings);
     }
     if (doc.MainDocumentPart.StylesWithEffectsPart != null)
     {
         SimplifyMarkupForPart(doc.MainDocumentPart.StylesWithEffectsPart, settings);
     }
 }
 public WmlDocument SimplifyMarkup(SimplifyMarkupSettings settings)
 {
     return MarkupSimplifier.SimplifyMarkup(this, settings);
 }
        // lastRenderedPageBreak, permEnd, permStart, proofErr, noProof
        // softHyphen:
        // Remove when simplifying.
        // fldSimple, fldData, fldChar, instrText:
        // For hyperlinks, generate same in XHtml.  Other than hyperlinks, do the following:
        // - collapse fldSimple
        // - remove fldSimple, fldData, fldChar, instrText.
        private static object SimplifyMarkupTransform(
            XNode node,
            SimplifyMarkupSettings settings,
            SimplifyMarkupParameters parameters)
        {
            XElement element = node as XElement;
            if (element != null)
            {
                if (settings.RemovePermissions &&
                    (element.Name == W.permEnd ||
                    element.Name == W.permStart))
                    return null;

                if (settings.RemoveProof &&
                    (element.Name == W.proofErr ||
                    element.Name == W.noProof))
                    return null;

                if (settings.RemoveSoftHyphens &&
                    element.Name == W.softHyphen)
                    return null;

                if (settings.RemoveLastRenderedPageBreak &&
                    element.Name == W.lastRenderedPageBreak)
                    return null;

                if (settings.RemoveBookmarks &&
                    (element.Name == W.bookmarkStart ||
                     element.Name == W.bookmarkEnd))
                    return null;

                if (settings.RemoveGoBackBookmark &&
                    ((element.Name == W.bookmarkStart && (int)element.Attribute(W.id) == parameters.GoBackId) ||
                    (element.Name == W.bookmarkEnd && (int)element.Attribute(W.id) == parameters.GoBackId)))
                    return null;

                if (settings.RemoveWebHidden &&
                    element.Name == W.webHidden)
                    return null;

                if (settings.ReplaceTabsWithSpaces && element.Name == W.tab &&
                    element.Parent.Name == W.r)
                    return new XElement(W.t,
                        new XAttribute(XNamespace.Xml + "space", "preserve"),
                        " ");

                if (settings.RemoveComments &&
                    (element.Name == W.commentRangeStart ||
                    element.Name == W.commentRangeEnd ||
                    element.Name == W.commentReference ||
                    element.Name == W.annotationRef))
                    return null;

                if (settings.RemoveComments &&
                    element.Name == W.rStyle &&
                    element.Attribute(W.val).Value == "CommentReference")
                    return null;

                if (settings.RemoveEndAndFootNotes &&
                    (element.Name == W.endnoteReference ||
                    element.Name == W.footnoteReference))
                    return null;

                if (settings.RemoveFieldCodes)
                {
                    if (element.Name == W.fldSimple)
                        return element.Elements().Select(e =>
                            SimplifyMarkupTransform(e, settings, parameters));
                    if (element.Name == W.fldData ||
                        element.Name == W.fldChar ||
                        element.Name == W.instrText)
                        return null;
                }

                return new XElement(element.Name,
                    element.Attributes(),
                    element.Nodes().Select(n =>
                        SimplifyMarkupTransform(n, settings, parameters)));
            }
            return node;
        }
        private static void SimplifyMarkupForPart(
            OpenXmlPart part,
            SimplifyMarkupSettings settings)
        {
            SimplifyMarkupParameters parameters = new SimplifyMarkupParameters();
            if (part.ContentType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
            {
                WordprocessingDocument doc = (WordprocessingDocument)part.OpenXmlPackage;
                if (settings.RemoveGoBackBookmark == true)
                {
                    var goBackBookmark = doc
                        .MainDocumentPart
                        .GetXDocument()
                        .Root
                        .Descendants(W.bookmarkStart)
                        .FirstOrDefault(bm => (string)bm.Attribute(W.name) == "_GoBack");
                    if (goBackBookmark != null)
                        parameters.GoBackId = (int)goBackBookmark.Attribute(W.id);
                }
            }

            XDocument xdoc = part.GetXDocument();
            XElement newRoot = xdoc.Root;

            // Need to do this first to enable simplifying hyperlinks.
            if (settings.RemoveContentControls ||
                settings.RemoveSmartTags)
                newRoot = (XElement)
                    RemoveCustomXmlAndContentControlsTransform(
                        newRoot, settings);

            // This may touch many elements, so needs to be its own
            // transform.
            if (settings.RemoveRsidInfo)
                newRoot = (XElement)RemoveRsidTransform(newRoot);

            XDocument prevNewRoot = new XDocument(newRoot);
            while (true)
            {
                if (settings.RemoveComments ||
                    settings.RemoveEndAndFootNotes ||
                    settings.ReplaceTabsWithSpaces ||
                    settings.RemoveFieldCodes ||
                    settings.RemovePermissions ||
                    settings.RemoveProof ||
                    settings.RemoveBookmarks ||
                    settings.RemoveWebHidden ||
                    settings.RemoveGoBackBookmark)
                    newRoot = (XElement)SimplifyMarkupTransform(newRoot,
                        settings, parameters);

                // Remove runs and run properties that have become empty due to previous
                // transforms.
                newRoot = (XElement)
                    RemoveEmptyRunsAndRunPropertiesTransform(newRoot);

                // Merge adjacent runs that have identical run properties.
                newRoot = (XElement)MergeAdjacentRunsTransform(newRoot);

                // Merge adjacent instrText elements.
                newRoot = (XElement)MergeAdjacentInstrText(newRoot);

                if (XNode.DeepEquals(prevNewRoot.Root, newRoot))
                    break;

                prevNewRoot = new XDocument(newRoot);
            }

            if (settings.NormalizeXml)
            {
                XAttribute[] ns_attrs =
                {
                    new XAttribute(XNamespace.Xmlns + "wpc", WPC.wpc),
                    new XAttribute(XNamespace.Xmlns + "mc", MC.mc),
                    new XAttribute(XNamespace.Xmlns + "o", O.o),
                    new XAttribute(XNamespace.Xmlns + "r", R.r),
                    new XAttribute(XNamespace.Xmlns + "m", M.m),
                    new XAttribute(XNamespace.Xmlns + "v", VML.vml),
                    new XAttribute(XNamespace.Xmlns + "wp14", WP14.wp14),
                    new XAttribute(XNamespace.Xmlns + "wp", WP.wp),
                    new XAttribute(XNamespace.Xmlns + "w10", W10.w10),
                    new XAttribute(XNamespace.Xmlns + "w", W.w),
                    new XAttribute(XNamespace.Xmlns + "w14", W14.w14),
                    new XAttribute(XNamespace.Xmlns + "wpg", WPG.wpg),
                    new XAttribute(XNamespace.Xmlns + "wpi", WPI.wpi),
                    new XAttribute(XNamespace.Xmlns + "wne", WNE.wne),
                    new XAttribute(XNamespace.Xmlns + "wps", WPS.wps),
                    new XAttribute(MC.Ignorable, "w14 wp14"),
                };

                XDocument newXDoc = Normalize(new XDocument(newRoot), null);
                foreach (var nsatt in ns_attrs)
                {
                    if (newXDoc.Root.Attribute(nsatt.Name) == null)
                        newXDoc.Root.Add(nsatt);
                }
                part.PutXDocument(newXDoc);
            }
            else
            {
                part.PutXDocument(new XDocument(newRoot));
            }
        }
        private static object RemoveCustomXmlAndContentControlsTransform(
            XNode node, SimplifyMarkupSettings simplifyMarkupSettings)
        {
            XElement element = node as XElement;
            if (element != null)
            {
                if (simplifyMarkupSettings.RemoveSmartTags &&
                    element.Name == W.smartTag)
                    return element
                        .Elements()
                        .Select(e =>
                            RemoveCustomXmlAndContentControlsTransform(e,
                                simplifyMarkupSettings));

                if (simplifyMarkupSettings.RemoveContentControls &&
                    element.Name == W.sdt)
                    return element
                        .Element(W.sdtContent)
                        .Elements()
                        .Select(e =>
                            RemoveCustomXmlAndContentControlsTransform(e,
                                simplifyMarkupSettings));
            }
            return node;
        }
Example #10
0
        private static void SimplifyMarkupForPart(
            OpenXmlPart part,
            SimplifyMarkupSettings settings)
        {
            SimplifyMarkupParameters parameters = new SimplifyMarkupParameters();

            if (part.ContentType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
            {
                WordprocessingDocument doc = (WordprocessingDocument)part.OpenXmlPackage;
                if (settings.RemoveGoBackBookmark == true)
                {
                    var goBackBookmark = doc
                                         .MainDocumentPart
                                         .GetXDocument()
                                         .Root
                                         .Descendants(W.bookmarkStart)
                                         .FirstOrDefault(bm => (string)bm.Attribute(W.name) == "_GoBack");
                    if (goBackBookmark != null)
                    {
                        parameters.GoBackId = (int)goBackBookmark.Attribute(W.id);
                    }
                }
            }


            XDocument xdoc    = part.GetXDocument();
            XElement  newRoot = xdoc.Root;

            // Need to do this first to enable simplifying hyperlinks.
            if (settings.RemoveContentControls ||
                settings.RemoveSmartTags)
            {
                newRoot = (XElement)
                          RemoveCustomXmlAndContentControlsTransform(
                    newRoot, settings);
            }

            // This may touch many elements, so needs to be its own
            // transform.
            if (settings.RemoveRsidInfo)
            {
                newRoot = (XElement)RemoveRsidTransform(newRoot);
            }

            XDocument prevNewRoot = new XDocument(newRoot);

            while (true)
            {
                if (settings.RemoveComments ||
                    settings.RemoveEndAndFootNotes ||
                    settings.ReplaceTabsWithSpaces ||
                    settings.RemoveFieldCodes ||
                    settings.RemovePermissions ||
                    settings.RemoveProof ||
                    settings.RemoveBookmarks ||
                    settings.RemoveWebHidden ||
                    settings.RemoveGoBackBookmark)
                {
                    newRoot = (XElement)SimplifyMarkupTransform(newRoot,
                                                                settings, parameters);
                }

                // Remove runs and run properties that have become empty due to previous
                // transforms.
                newRoot = (XElement)
                          RemoveEmptyRunsAndRunPropertiesTransform(newRoot);

                // Merge adjacent runs that have identical run properties.
                newRoot = (XElement)MergeAdjacentRunsTransform(newRoot);

                // Merge adjacent instrText elements.
                newRoot = (XElement)MergeAdjacentInstrText(newRoot);

                if (XNode.DeepEquals(prevNewRoot.Root, newRoot))
                {
                    break;
                }

                prevNewRoot = new XDocument(newRoot);
            }

            if (settings.NormalizeXml)
            {
                XAttribute[] ns_attrs =
                {
                    new XAttribute(XNamespace.Xmlns + "wpc",  WPC.wpc),
                    new XAttribute(XNamespace.Xmlns + "mc",   MC.mc),
                    new XAttribute(XNamespace.Xmlns + "o",    O.o),
                    new XAttribute(XNamespace.Xmlns + "r",    R.r),
                    new XAttribute(XNamespace.Xmlns + "m",    M.m),
                    new XAttribute(XNamespace.Xmlns + "v",    VML.vml),
                    new XAttribute(XNamespace.Xmlns + "wp14", WP14.wp14),
                    new XAttribute(XNamespace.Xmlns + "wp",   WP.wp),
                    new XAttribute(XNamespace.Xmlns + "w10",  W10.w10),
                    new XAttribute(XNamespace.Xmlns + "w",    W.w),
                    new XAttribute(XNamespace.Xmlns + "w14",  W14.w14),
                    new XAttribute(XNamespace.Xmlns + "wpg",  WPG.wpg),
                    new XAttribute(XNamespace.Xmlns + "wpi",  WPI.wpi),
                    new XAttribute(XNamespace.Xmlns + "wne",  WNE.wne),
                    new XAttribute(XNamespace.Xmlns + "wps",  WPS.wps),
                    new XAttribute(MC.Ignorable,              "w14 wp14"),
                };

                XDocument newXDoc = Normalize(new XDocument(newRoot), null);
                foreach (var nsatt in ns_attrs)
                {
                    if (newXDoc.Root.Attribute(nsatt.Name) == null)
                    {
                        newXDoc.Root.Add(nsatt);
                    }
                }
                part.PutXDocument(newXDoc);
            }
            else
            {
                part.PutXDocument(new XDocument(newRoot));
            }
        }
Example #11
0
        // lastRenderedPageBreak, permEnd, permStart, proofErr, noProof
        // softHyphen:
        // Remove when simplifying.

        // fldSimple, fldData, fldChar, instrText:
        // For hyperlinks, generate same in XHtml.  Other than hyperlinks, do the following:
        // - collapse fldSimple
        // - remove fldSimple, fldData, fldChar, instrText.

        private static object SimplifyMarkupTransform(
            XNode node,
            SimplifyMarkupSettings settings,
            SimplifyMarkupParameters parameters)
        {
            XElement element = node as XElement;

            if (element != null)
            {
                if (settings.RemovePermissions &&
                    (element.Name == W.permEnd ||
                     element.Name == W.permStart))
                {
                    return(null);
                }

                if (settings.RemoveProof &&
                    (element.Name == W.proofErr ||
                     element.Name == W.noProof))
                {
                    return(null);
                }

                if (settings.RemoveSoftHyphens &&
                    element.Name == W.softHyphen)
                {
                    return(null);
                }

                if (settings.RemoveLastRenderedPageBreak &&
                    element.Name == W.lastRenderedPageBreak)
                {
                    return(null);
                }

                if (settings.RemoveBookmarks &&
                    (element.Name == W.bookmarkStart ||
                     element.Name == W.bookmarkEnd))
                {
                    return(null);
                }

                if (settings.RemoveGoBackBookmark &&
                    ((element.Name == W.bookmarkStart && (int)element.Attribute(W.id) == parameters.GoBackId) ||
                     (element.Name == W.bookmarkEnd && (int)element.Attribute(W.id) == parameters.GoBackId)))
                {
                    return(null);
                }

                if (settings.RemoveWebHidden &&
                    element.Name == W.webHidden)
                {
                    return(null);
                }

                if (settings.ReplaceTabsWithSpaces && element.Name == W.tab &&
                    element.Parent.Name == W.r)
                {
                    return(new XElement(W.t,
                                        new XAttribute(XNamespace.Xml + "space", "preserve"),
                                        " "));
                }

                if (settings.RemoveComments &&
                    (element.Name == W.commentRangeStart ||
                     element.Name == W.commentRangeEnd ||
                     element.Name == W.commentReference ||
                     element.Name == W.annotationRef))
                {
                    return(null);
                }

                if (settings.RemoveComments &&
                    element.Name == W.rStyle &&
                    element.Attribute(W.val).Value == "CommentReference")
                {
                    return(null);
                }

                if (settings.RemoveEndAndFootNotes &&
                    (element.Name == W.endnoteReference ||
                     element.Name == W.footnoteReference))
                {
                    return(null);
                }

                if (settings.RemoveFieldCodes)
                {
                    if (element.Name == W.fldSimple)
                    {
                        return(element.Elements().Select(e =>
                                                         SimplifyMarkupTransform(e, settings, parameters)));
                    }
                    if (element.Name == W.fldData ||
                        element.Name == W.fldChar ||
                        element.Name == W.instrText)
                    {
                        return(null);
                    }
                }

                return(new XElement(element.Name,
                                    element.Attributes(),
                                    element.Nodes().Select(n =>
                                                           SimplifyMarkupTransform(n, settings, parameters))));
            }
            return(node);
        }
Example #12
0
 public WmlDocument SimplifyMarkup(SimplifyMarkupSettings settings)
 {
     return(MarkupSimplifier.SimplifyMarkup(this, settings));
 }