Ejemplo n.º 1
0
        private void btnApply_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();

            ofd.Multiselect = true;
            DialogResult dr = ofd.ShowDialog();

            foreach (var item in ofd.FileNames)
            {
                using (WordprocessingDocument doc =
                           WordprocessingDocument.Open(item, true))
                {
                    SimplifyMarkupSettings settings = new SimplifyMarkupSettings
                    {
                        RemoveContentControls       = cbRemoveContentControls.Checked,
                        RemoveSmartTags             = cbRemoveSmartTags.Checked,
                        RemoveRsidInfo              = cbRemoveRsidInfo.Checked,
                        RemoveComments              = cbRemoveComments.Checked,
                        RemoveEndAndFootNotes       = cbRemoveEndAndFootNotes.Checked,
                        ReplaceTabsWithSpaces       = cbReplaceTabsWithSpaces.Checked,
                        RemoveFieldCodes            = cbRemoveFieldCodes.Checked,
                        RemovePermissions           = cbRemovePermissions.Checked,
                        RemoveProof                 = cbRemoveProof.Checked,
                        RemoveSoftHyphens           = cbRemoveSoftHyphens.Checked,
                        RemoveLastRenderedPageBreak = cbRemoveLastRenderedPageBreak.Checked,
                        RemoveBookmarks             = cbRemoveBookmarks.Checked,
                        RemoveWebHidden             = cbRemoveWebHidden.Checked,
                        NormalizeXml                = cbNormalize.Checked,
                    };
                    Clippit.MarkupSimplifier.SimplifyMarkup(doc, settings);
                }
            }
        }
Ejemplo n.º 2
0
        public void CanRemoveContentControls()
        {
            XDocument partDocument = XDocument.Parse(SdtDocumentXmlString);

            Assert.True(partDocument.Descendants(W.sdt).Any());

            using (var stream = new MemoryStream())
                using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, DocumentType))
                {
                    MainDocumentPart part = wordDocument.AddMainDocumentPart();
                    part.PutXDocument(partDocument);

                    var settings = new SimplifyMarkupSettings {
                        RemoveContentControls = true
                    };
                    MarkupSimplifier.SimplifyMarkup(wordDocument, settings);

                    partDocument = part.GetXDocument();
                    XElement element = partDocument
                                       .Descendants(W.body)
                                       .Descendants()
                                       .First();

                    Assert.False(partDocument.Descendants(W.sdt).Any());
                    Assert.Equal(W.p, element.Name);
                }
        }
Ejemplo n.º 3
0
        public void CanRemoveGoBackBookmarks()
        {
            XDocument partDocument = XDocument.Parse(GoBackBookmarkDocumentXmlString);

            Assert.Contains(partDocument
                            .Descendants(W.bookmarkStart)
                            , e => e.Attribute(W.name).Value == "_GoBack" && e.Attribute(W.id).Value == "0");
            Assert.Contains(partDocument
                            .Descendants(W.bookmarkEnd)
                            , e => e.Attribute(W.id).Value == "0");

            using (var stream = new MemoryStream())
                using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, DocumentType))
                {
                    MainDocumentPart part = wordDocument.AddMainDocumentPart();
                    part.PutXDocument(partDocument);

                    var settings = new SimplifyMarkupSettings {
                        RemoveGoBackBookmark = true
                    };
                    MarkupSimplifier.SimplifyMarkup(wordDocument, settings);

                    partDocument = part.GetXDocument();
                    Assert.False(partDocument.Descendants(W.bookmarkStart).Any());
                    Assert.False(partDocument.Descendants(W.bookmarkEnd).Any());
                }
        }
Ejemplo n.º 4
0
        private static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine("Example output files are in a DateTime stamped directory in ./bin/debug.  The directory name is ExampleOutput-yy-mm-dd-hhmmss.");
                Console.WriteLine("If you are building in release mode, they will, of course, be in ./bin/release.");
                Console.WriteLine("MarkupSimplifierApp.exe 1.docx 2.docx");
            }

            foreach (var item in args)
            {
                using var doc = WordprocessingDocument.Open(item, true);
                var settings = new SimplifyMarkupSettings
                {
                    RemoveContentControls       = Settings.Default.RemoveContentControls,
                    RemoveSmartTags             = Settings.Default.RemoveSmartTags,
                    RemoveRsidInfo              = Settings.Default.RemoveRsidInfo,
                    RemoveComments              = Settings.Default.RemoveComments,
                    RemoveEndAndFootNotes       = Settings.Default.RemoveEndAndFootNotes,
                    ReplaceTabsWithSpaces       = Settings.Default.ReplaceTabsWithSpaces,
                    RemoveFieldCodes            = Settings.Default.RemoveFieldCodes,
                    RemovePermissions           = Settings.Default.RemovePermissions,
                    RemoveProof                 = Settings.Default.RemoveProof,
                    RemoveSoftHyphens           = Settings.Default.RemoveSoftHyphens,
                    RemoveLastRenderedPageBreak = Settings.Default.RemoveLastRenderedPageBreak,
                    RemoveBookmarks             = Settings.Default.RemoveBookmarks,
                    RemoveWebHidden             = Settings.Default.RemoveWebHidden,
                    NormalizeXml                = Settings.Default.NormalizeXml,
                };
                MarkupSimplifier.SimplifyMarkup(doc, settings);
            }
        }
Ejemplo n.º 5
0
        public byte[] GetWordReplacedTextUsingPlaintext(string templatePath, List <WordReplacement> items)
        {
            var pathdir = ConfigurationManager.AppSettings["doctemplate"].ToString();

            if (pathdir.StartsWith("~"))
            {
                pathdir = HttpContext.Current.Server.MapPath(pathdir);
            }

            var path = Path.Combine(pathdir, templatePath);

            FileStream fileStream = new FileStream(path, FileMode.Open);

            using (MemoryStream templateStream = new MemoryStream()) {
                //templateStream.Write(templateBytes, 0, (int)templateBytes.Length);
                fileStream.CopyStream(templateStream);
                fileStream.Close();
                using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(templateStream, true)) {
                    wordDoc.ChangeDocumentType(WordprocessingDocumentType.Document);

                    SimplifyMarkupSettings settings = new SimplifyMarkupSettings {
                        RemoveProof    = true,
                        RemoveRsidInfo = true,
                        NormalizeXml   = true,
                        //RemoveContentControls = true,
                        //RemoveMarkupForDocumentComparison = true
                    };

                    MarkupSimplifier.SimplifyMarkup(wordDoc, settings);


                    string docText = null;
                    using (StreamReader sr = new StreamReader(wordDoc.MainDocumentPart.GetStream())) {
                        docText = sr.ReadToEnd();
                    }

                    foreach (var item in items)
                    {
                        if (!string.IsNullOrEmpty(item.TextToReplace))
                        {
                            Regex regexText = new Regex(item.TextToReplace);
                            docText = regexText.Replace(docText, item.ReplacementText ?? "");
                        }
                    }


                    using (StreamWriter sw = new StreamWriter(templateStream)) {
                        sw.Write(docText);


                        return(templateStream.ToArray());
                    }
                }
            }
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Loads external (word) xml file, which is stored on a storage medium. The absolute path to the file must be passed as parameter.
        /// </summary>
        /// <param name="filePath">Specifies the path for the xml file the user wants to load.</param>
        /// <returns>Returns true if the file exists and could be loaded, otherwise false.</returns>
        public bool LoadXmlFile(string filePath)
        {
            try
            {
                //// Copy usecase xml-file to windows user temp folder to fix the problem that the file is opened in write access
                string fileName    = filePath.Substring(filePath.LastIndexOf("\\") + 1);
                string newFilePath = Path.Combine(Path.GetTempPath(), fileName);
                File.Copy(filePath, newFilePath, true);
                this.useCaseFilePath = newFilePath;

                //// Open and load usecase xml-file
                this.useCaseFile = WordprocessingDocument.Open(this.useCaseFilePath, true);

                //// Set and apply settings for opended and loaded usecase xml-file
                SimplifyMarkupSettings settings = new SimplifyMarkupSettings
                {
                    AcceptRevisions                   = false,
                    RemoveContentControls             = true,
                    RemoveSmartTags                   = true,
                    RemoveRsidInfo                    = true,
                    RemoveComments                    = true,
                    RemoveEndAndFootNotes             = true,
                    ReplaceTabsWithSpaces             = true,
                    RemoveFieldCodes                  = false,
                    RemovePermissions                 = true,
                    RemoveProof                       = true,
                    RemoveSoftHyphens                 = true,
                    RemoveLastRenderedPageBreak       = true,
                    RemoveBookmarks                   = true,
                    RemoveWebHidden                   = true,
                    RemoveGoBackBookmark              = true,
                    RemoveMarkupForDocumentComparison = true,
                    NormalizeXml                      = true,
                };
                MarkupSimplifier.SimplifyMarkup(this.useCaseFile, settings);

                return(true);
            }
            catch (Exception ex)
            {
                //// General error while loading the usecase xml-file

                //// Set the error message
                this.errorMessage = "Fehler beim Einlesen der UseCase-Datei: " + ex.Message.ToString();

                //// Close usecase file and delete temporary file from windows user temp folder
                if (File.Exists(this.useCaseFilePath))
                {
                    File.Delete(this.useCaseFilePath);
                }

                return(false);
            }
        }
Ejemplo n.º 7
0
        public static string FinishReview(string path)
        {
            using WordprocessingDocument wordDocument = WordprocessingDocument.Open(path, true);

            var settings = new SimplifyMarkupSettings
            {
                AcceptRevisions = true,
                RemoveComments  = true
            };

            MarkupSimplifier.SimplifyMarkup(wordDocument, settings);
            return(wordDocument.MainDocumentPart.GetXElement().ToString());
        }
Ejemplo n.º 8
0
        public static void DoConversionViaWord(FileInfo newAltChunkBeforeFi, FileInfo newAltChunkAfterFi, XElement html)
        {
            var blankAltChunkFi = new DirectoryInfo(Path.Combine(TestUtil.SourceDir.FullName, "Blank-altchunk.docx"));

            File.Copy(blankAltChunkFi.FullName, newAltChunkBeforeFi.FullName);
            using (WordprocessingDocument myDoc = WordprocessingDocument.Open(newAltChunkBeforeFi.FullName, true))
            {
                string                      altChunkId = "AltChunkId1";
                MainDocumentPart            mainPart   = myDoc.MainDocumentPart;
                AlternativeFormatImportPart chunk      = mainPart.AddAlternativeFormatImportPart(
                    "application/xhtml+xml", altChunkId);
                using (Stream chunkStream = chunk.GetStream(FileMode.Create, FileAccess.Write))
                    using (StreamWriter stringStream = new StreamWriter(chunkStream))
                        stringStream.Write(html.ToString());
                XElement altChunk = new XElement(W.altChunk,
                                                 new XAttribute(R.id, altChunkId)
                                                 );
                XDocument mainDocumentXDoc = myDoc.MainDocumentPart.GetXDocument();
                mainDocumentXDoc.Root
                .Element(W.body)
                .AddFirst(altChunk);
                myDoc.MainDocumentPart.PutXDocument();
            }

            WordAutomationUtilities.OpenAndSaveAs(newAltChunkBeforeFi.FullName, newAltChunkAfterFi.FullName);

            while (true)
            {
                try
                {
                    using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newAltChunkAfterFi.FullName, true))
                    {
                        SimplifyMarkupSettings settings2 = new SimplifyMarkupSettings
                        {
                            RemoveMarkupForDocumentComparison = true,
                        };
                        MarkupSimplifier.SimplifyMarkup(wDoc, settings2);
                        XElement newRoot = (XElement)RemoveDivTransform(wDoc.MainDocumentPart.GetXDocument().Root);
                        wDoc.MainDocumentPart.GetXDocument().Root.ReplaceWith(newRoot);
                        wDoc.MainDocumentPart.PutXDocumentWithFormatting();
                    }
                    break;
                }
                catch (IOException)
                {
                    System.Threading.Thread.Sleep(50);
                    continue;
                }
            }
        }
Ejemplo n.º 9
0
        private void CleanAndSaveItem(ListViewItem lvi)
        {
            //listView1.EnsureVisible(lvi.Index);
            var file = lvi.Tag as FileInfo;

            if (lvi.ToolTipText == "Unsaved" && //prevent double saving
                IsValidTarget(file))
            {
                try
                {
                    using (WordprocessingDocument doc = WordprocessingDocument.Open(file.FullName, true))
                    {
                        SimplifyMarkupSettings settings = new SimplifyMarkupSettings
                        {
                            AcceptRevisions = true,
                            //setting this to false reduces translation quality, but if true some documents have XML format errors when opening
                            NormalizeXml                      = true, // Merges Run's in a paragraph with similar formatting
                            RemoveBookmarks                   = true,
                            RemoveComments                    = true,
                            RemoveContentControls             = true,
                            RemoveEndAndFootNotes             = true,
                            RemoveFieldCodes                  = false, //true,
                            RemoveGoBackBookmark              = true,
                            RemoveHyperlinks                  = false,
                            RemoveLastRenderedPageBreak       = true,
                            RemoveMarkupForDocumentComparison = true,
                            RemovePermissions                 = false,
                            RemoveProof           = true,
                            RemoveRsidInfo        = true,
                            RemoveSmartTags       = true,
                            RemoveSoftHyphens     = true,
                            RemoveWebHidden       = true,
                            ReplaceTabsWithSpaces = false
                        };
                        MarkupSimplifier.SimplifyMarkup(doc, settings);
                        // OpenXmlPowerTools.WmlComparer.Compare
                        doc.Save();
                        lvi.BackColor   = Color.Green;
                        lvi.ToolTipText = "Saved";
                    }
                }
                catch (Exception ex)
                {
                    lvi.BackColor   = Color.Red;
                    lvi.ToolTipText = ex.Message;
                    //Console.WriteLine("Error in File: " + file.FullName + ". " + ex.Message);
                }
            }
        }
Ejemplo n.º 10
0
        public static void CopyFormattingAssembledDocx(FileInfo source, FileInfo dest)
        {
            var ba = File.ReadAllBytes(source.FullName);

            using (MemoryStream ms = new MemoryStream())
            {
                ms.Write(ba, 0, ba.Length);
                using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(ms, true))
                {
                    RevisionAccepter.AcceptRevisions(wordDoc);
                    SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings
                    {
                        RemoveComments              = true,
                        RemoveContentControls       = true,
                        RemoveEndAndFootNotes       = true,
                        RemoveFieldCodes            = false,
                        RemoveLastRenderedPageBreak = true,
                        RemovePermissions           = true,
                        RemoveProof           = true,
                        RemoveRsidInfo        = true,
                        RemoveSmartTags       = true,
                        RemoveSoftHyphens     = true,
                        RemoveGoBackBookmark  = true,
                        ReplaceTabsWithSpaces = false,
                    };
                    MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings);

                    FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings
                    {
                        RemoveStyleNamesFromParagraphAndRunProperties = false,
                        ClearStyles = false,
                        RestrictToSupportedLanguages            = false,
                        RestrictToSupportedNumberingFormats     = false,
                        CreateHtmlConverterAnnotationAttributes = true,
                        OrderElementsPerStandard  = false,
                        ListItemRetrieverSettings =
                            new ListItemRetrieverSettings()
                        {
                            ListItemTextImplementations = ListItemRetrieverSettings.DefaultListItemTextImplementations,
                        },
                    };

                    FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings);
                }
                var newBa = ms.ToArray();
                File.WriteAllBytes(dest.FullName, newBa);
            }
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Convert to html
        /// </summary>
        /// <param name="wordDoc"></param>
        /// <param name="htmlConverterSettings"></param>
        /// <param name="imageHandler"></param>
        /// <returns></returns>
        public XElement ConvertToHtml(WordprocessingDocument wordDoc,
                                      HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler)
        {
            InitEntityMap();
            if (htmlConverterSettings.ConvertFormatting)
            {
                throw new InvalidSettingsException("Conversion with formatting is not supported");
            }
            RevisionAccepter.AcceptRevisions(wordDoc);
            var settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = true,
                RemoveEndAndFootNotes       = true,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = true,
                RemoveProof           = true,
                RemoveRsidInfo        = true,
                RemoveSmartTags       = true,
                RemoveSoftHyphens     = true,
                ReplaceTabsWithSpaces = true,
            };

            MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
            XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root;

            AnnotateHyperlinkContent(rootElement);
            var xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings,
                                                         rootElement, imageHandler);

            // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
            // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
            // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
            // for detailed explanation.
            //
            // If you further transform the XML tree returned by ConvertToHtmlTransform, you
            // must do it correctly, or entities will not be serialized properly.

            return(xhtml);
        }
Ejemplo n.º 12
0
        private string ConvertDocxToHtml(Stream inputDoc)
        {
            // convert Stream to a memory stream
            using (var memStream = new MemoryStream())
            {
                inputDoc.CopyTo(memStream);

                // open Word document stream
                using (WordprocessingDocument doc =
                           WordprocessingDocument.Open(memStream, true))
                {
                    // remove unnecessary markup
                    SimplifyMarkupSettings settings = new SimplifyMarkupSettings
                    {
                        AcceptRevisions                   = true,
                        NormalizeXml                      = true,
                        RemoveBookmarks                   = true,
                        RemoveComments                    = true,
                        RemoveContentControls             = true,
                        RemoveEndAndFootNotes             = true,
                        RemoveFieldCodes                  = true,
                        RemoveGoBackBookmark              = true,
                        RemoveHyperlinks                  = false,
                        RemoveLastRenderedPageBreak       = true,
                        RemoveMarkupForDocumentComparison = true,
                        RemovePermissions                 = true,
                        RemoveProof           = true,
                        RemoveRsidInfo        = true,
                        RemoveSmartTags       = true,
                        RemoveSoftHyphens     = true,
                        RemoveWebHidden       = true,
                        ReplaceTabsWithSpaces = true
                    };
                    MarkupSimplifier.SimplifyMarkup(doc, settings);

                    // export to html
                    return(WmlToHtmlConverter.ConvertToHtml(doc, new WmlToHtmlConverterSettings()).ToString());
                }
            }
        }
Ejemplo n.º 13
0
        // private static object WrapInBraces(IEnumerable<XNode> nodes, XElement firstText, XElement lastText)
        // {
        //     return nodes.Select(node =>
        //     {
        //         XElement element = node as XElement;
        //         if (element != null)
        //         {
        //             if (element.Name == W.t)
        //             {
        //                 if (element == firstText)
        //                 {
        //                     var newText = "{" + element.Value;
        //                     if (element == lastText) // also last?
        //                     {
        //                         newText = newText + "}";
        //                     }
        //                     return new XElement(element.Name, element.Attributes(), newText);
        //                 }
        //                 if (element == lastText)
        //                 {
        //                     return new XElement(element.Name, element.Attributes(), element.Value + "}");
        //                 }
        //             }
        //             return new XElement(element.Name,
        //                 element.Attributes(),
        //                 WrapInBraces(element.Nodes(), firstText, lastText));
        //         }
        //         return node;
        //     });
        // }

        private static void SimplifyTemplateMarkup(WordprocessingDocument wordDoc)
        {
            // strip down the template to eliminate unnecessary work
            SimplifyMarkupSettings settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = false,
                RemoveEndAndFootNotes       = false,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = false,
                RemoveProof                       = true,
                RemoveRsidInfo                    = true,
                RemoveSmartTags                   = false, // todo: change this back to true once we have patched OXPT to make it work right
                RemoveSoftHyphens                 = false,
                ReplaceTabsWithSpaces             = false,
                RemoveMarkupForDocumentComparison = true,
                RemoveWebHidden                   = true
            };

            MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
        }
Ejemplo n.º 14
0
        private void SimplifyMarkup(WordprocessingDocument originalDocument)
        {
            SimplifyMarkupSettings settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = true,
                RemoveEndAndFootNotes       = true,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = true,
                RemoveProof           = true,
                RemoveRsidInfo        = true,
                RemoveSmartTags       = true,
                RemoveSoftHyphens     = true,
                ReplaceTabsWithSpaces = true,
                RemoveBookmarks       = true,
                RemoveGoBackBookmark  = true,
                RemoveHyperlinks      = true,
            };

            MarkupSimplifier.SimplifyMarkup(originalDocument, settings);
        }
        private static void CleanMarkup(WordprocessingDocument doc)
        {
            //REMOVE THESE Markups, because they break up the text into multiple pieces,
            //thereby preventing simple search and replace
            SimplifyMarkupSettings settings = new SimplifyMarkupSettings
            {
                RemoveComments              = true,
                RemoveContentControls       = true,
                RemoveEndAndFootNotes       = true,
                RemoveFieldCodes            = false,
                RemoveLastRenderedPageBreak = true,
                RemovePermissions           = true,
                RemoveProof           = true,
                RemoveRsidInfo        = true,
                RemoveSmartTags       = true,
                RemoveSoftHyphens     = true,
                ReplaceTabsWithSpaces = true,
                RemoveBookmarks       = true
            };

            MarkupSimplifier.SimplifyMarkup(doc, settings);
        }
Ejemplo n.º 16
0
        public void RemoveSmartTags()
        {
            string        name       = "SmartTags.docx"; // this document has an invalid smartTag element (apparently inserted by 3rd party software)
            DirectoryInfo sourceDir  = new DirectoryInfo("../../../../test/templates/");
            FileInfo      docx       = new FileInfo(Path.Combine(sourceDir.FullName, name));
            DirectoryInfo destDir    = new DirectoryInfo("../../../../test/history/dot-net-results");
            FileInfo      outputDocx = new FileInfo(Path.Combine(destDir.FullName, name));
            string        filePath   = outputDocx.FullName;
            string        outPath    = Path.Combine(destDir.FullName, "SmartTags-Removed.docx");

            docx.CopyTo(filePath, true);
            WmlDocument doc = new WmlDocument(filePath);

            byte[]      byteArray      = doc.DocumentByteArray;
            WmlDocument transformedDoc = null;

            using (MemoryStream mem = new MemoryStream())
            {
                mem.Write(byteArray, 0, byteArray.Length);
                using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(mem, true))
                {
                    var settings = new SimplifyMarkupSettings {
                        RemoveSmartTags = true
                    };                                                                   // we try to remove smart tags, but the (apparently) invalid one is not removed correctly
                    MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
                }
                transformedDoc = new WmlDocument(outPath, mem.ToArray());
                Assert.False(transformedDoc.MainDocumentPart.Descendants(W.smartTag).Any());
                transformedDoc.Save();
            }
            // transformedDoc still has leftover bits of the invalid smart tag, and should therefore be invalid
            // (consider whether it would be appropriate to patch SimplifyMarkup to correctly remove this apparently invalid smart tag?)
            var validator = new Validator();
            var result    = validator.ValidateDocument(outPath);

            // MS Word also complains about the validity of this document
            Assert.True(result.HasErrors);
        }
Ejemplo n.º 17
0
        public void CanRemoveSmartTags()
        {
            XDocument partDocument = XDocument.Parse(SmartTagDocumentXmlString);

            Assert.True(partDocument.Descendants(W.smartTag).Any());

            using (var stream = new MemoryStream())
                using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, DocumentType))
                {
                    MainDocumentPart part = wordDocument.AddMainDocumentPart();
                    part.PutXDocument(partDocument);

                    var settings = new SimplifyMarkupSettings {
                        RemoveSmartTags = true
                    };
                    MarkupSimplifier.SimplifyMarkup(wordDocument, settings);

                    partDocument = part.GetXDocument();
                    XElement t = partDocument.Descendants(W.t).First();

                    Assert.False(partDocument.Descendants(W.smartTag).Any());
                    Assert.Equal(SmartTagDocumentTextValue, t.Value);
                }
        }
Ejemplo n.º 18
0
        public byte[] GetWordReplacedText(string templatePath, List <WordReplacement> items)
        {
            var pathdir = ConfigurationManager.AppSettings["doctemplate"].ToString();

            if (pathdir.StartsWith("~"))
            {
                pathdir = HttpContext.Current.Server.MapPath(pathdir);
            }

            var path = Path.Combine(pathdir, templatePath);

            FileStream fileStream = new FileStream(path, FileMode.Open);

            using (MemoryStream templateStream = new MemoryStream()) {
                //templateStream.Write(templateBytes, 0, (int)templateBytes.Length);
                fileStream.CopyStream(templateStream);
                fileStream.Close();
                using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(templateStream, true)) {
                    wordDoc.ChangeDocumentType(WordprocessingDocumentType.Document);

                    SimplifyMarkupSettings settings = new SimplifyMarkupSettings {
                        RemoveComments              = true,
                        RemoveContentControls       = true,
                        RemoveEndAndFootNotes       = true,
                        RemoveFieldCodes            = false,
                        RemoveLastRenderedPageBreak = true,
                        RemovePermissions           = true,
                        RemoveProof                       = true,
                        RemoveRsidInfo                    = true,
                        RemoveSmartTags                   = true,
                        RemoveSoftHyphens                 = true,
                        ReplaceTabsWithSpaces             = true,
                        RemoveWebHidden                   = true,
                        RemoveMarkupForDocumentComparison = true
                    };

                    MarkupSimplifier.SimplifyMarkup(wordDoc, settings);

                    var body    = wordDoc.MainDocumentPart.Document.Body;
                    var tables  = body.Elements <DocumentFormat.OpenXml.Wordprocessing.Table>().ToList();
                    var paras   = body.Elements <Paragraph>();
                    var runsall = body.Descendants <Run>().ToList();

                    foreach (var item in wordDoc.MainDocumentPart.HeaderParts)
                    {
                        foreach (var run in item.RootElement.Descendants <Run>())
                        {
                            runsall.Add(run);
                        }
                    }

                    foreach (var item in wordDoc.MainDocumentPart.FooterParts)
                    {
                        foreach (var run in item.RootElement.Descendants <Run>())
                        {
                            runsall.Add(run);
                        }
                    }

                    for (int i = 0; i < runsall.Count(); i++)
                    {
                        var r        = runsall[i];
                        var textsrun = r.Elements <Text>();

                        if ((items.Any(t => !t.MatchWholeText ? r.InnerText?.Trim().Contains(t.TextToReplace) == true : r.InnerText == t.TextToReplace)))
                        {
                            var replace = items.Where(t => !t.MatchWholeText ? r.InnerText?.Trim().Contains(t.TextToReplace) == true : r.InnerText == t.TextToReplace).FirstOrDefault();

                            if (!replace.IsCheckBox)
                            {
                                foreach (var text in textsrun)
                                {
                                    if (text != null)
                                    {
                                        if (items.Any(t => !t.MatchWholeText ? text.InnerText?.Trim().Contains(t.TextToReplace) == true : text.InnerText == t.TextToReplace))
                                        {
                                            var wrd = items.FirstOrDefault(it => !it.MatchWholeText ? text.InnerText?.Trim().Contains(it.TextToReplace) == true : text.InnerText == it.TextToReplace);

                                            //while (items.Any(it => !it.MatchWholeText ? text.InnerText?.Trim().Contains(it.TextToReplace) == true : text.InnerText == it.TextToReplace)) {

                                            //}
                                            if (replace.UseRun)
                                            {
                                                try {
                                                    if (replace.Run != null)
                                                    {
                                                        replace.Run.Append(r.RunProperties.CloneNode(true));
                                                        r.RemoveAllChildren();
                                                        r.Append(replace.Run.CloneNode(true));
                                                    }
                                                    else
                                                    {
                                                        r.RemoveAllChildren();
                                                    }
                                                } catch (Exception exc) {
                                                }
                                            }
                                            else
                                            {
                                                wrd       = items.FirstOrDefault(it => !it.MatchWholeText ? text.InnerText?.Trim().Contains(it.TextToReplace) == true : text.InnerText == it.TextToReplace);
                                                text.Text = wrd.MatchWholeText ? wrd.ReplacementText : text.Text.Replace(wrd.TextToReplace, wrd.ReplacementText);
                                            }
                                        }
                                    }
                                }
                            }
                            else
                            {
                                r.RemoveAllChildren();
                                replace.Checkboxes.ForEach(c => {
                                    r.Append(c.CloneNode(true));
                                });


                                //var text = textsrun.FirstOrDefault();

                                //if (text != null)
                                //{
                                //    text.Text = String.Empty;
                                //}

                                //r.Chil();//.RemoveChild(r);
                            }
                        }
                    }

                    MarkupSimplifier.SimplifyMarkup(wordDoc, settings);
                    wordDoc.Save();

                    return(templateStream.ToArray());

                    //foreach (var para in paras) {
                    //    var fieldss = para.Elements<SimpleField>();
                    //    var runs = para.Elements<Run>().ToList();

                    //    for (int i = 0; i < runs.Count; i++) {
                    //        var r = runs[i];
                    //        var texts = r.Elements<Text>();

                    //        if ((items.Any(t => r.InnerText?.Trim().Contains(t.TextToReplace) == true))) {
                    //            var replace = items.Where(t => r.InnerText?.Trim().Contains(t.TextToReplace) == true).FirstOrDefault();

                    //            if (!replace.IsCheckBox) {
                    //                foreach (var text in texts) {
                    //                    if (text != null) {
                    //                        if (items.Any(t => text.Text?.Trim().Contains(t.TextToReplace) == true)) {

                    //                            var wrd = items.FirstOrDefault(it => text.Text.Contains(it.TextToReplace));

                    //                            while(items.Any(t => text.Text?.Trim().Contains(t.TextToReplace) == true)) {
                    //                                wrd = items.FirstOrDefault(it => text.Text.Contains(it.TextToReplace));
                    //                                text.Text = text.Text.Replace(wrd.TextToReplace, wrd.ReplacementText);
                    //                            }



                    //                        }
                    //                    }
                    //                }
                    //            } else {

                    //                replace.Checkboxes.ForEach(c => {
                    //                    r.InsertAfterSelf(c);
                    //                });
                    //                para.RemoveChild(r);
                    //            }
                    //        }

                    //    }
                    //}


                    //foreach (var table in tables) {
                    //    var rows = table.Elements<DocumentFormat.OpenXml.Wordprocessing.TableRow>();
                    //    foreach (var row in rows) {
                    //        var cells = row.Elements<DocumentFormat.OpenXml.Wordprocessing.TableCell>();
                    //        foreach (var cell in cells) {
                    //            var ps = cell.Elements<Paragraph>();
                    //            foreach (var p in ps) {

                    //                var runs = p.Elements<Run>().ToList();


                    //                for (int i = 0; i < runs.Count; i++) {
                    //                    var r = runs[i];
                    //                    var texts = r.Elements<Text>();

                    //                    if ((items.Any(t => t.TextToReplace == r.InnerText?.Trim()))) {
                    //                        var replace = items.Where(t => t.TextToReplace == r.InnerText?.Trim()).FirstOrDefault();

                    //                        if (!replace.IsCheckBox) {
                    //                            foreach (var text in texts) {
                    //                                if (text != null) {
                    //                                    if (items.Any(t => t.TextToReplace == text.Text?.Trim()))
                    //                                        text.Text = text.Text.Replace(text.Text, items.Where(t => t.TextToReplace == text.Text?.Trim()).FirstOrDefault().ReplacementText);
                    //                                }
                    //                            }
                    //                        } else {

                    //                            replace.Checkboxes.ForEach(c => {
                    //                                r.InsertAfterSelf(c);
                    //                            });
                    //                            p.RemoveChild(r);
                    //                        }
                    //                    }

                    //                }



                    //            }
                    //        }
                    //    }
                    //}



                    //var allruns = body.Elements<Run>();

                    //foreach (var run in allruns) {
                    //    foreach (var text in run.Elements<Text>()) {
                    //        if (text != null) {
                    //            if (items.Any(t => t.TextToReplace == text.Text))
                    //                text.Text = text.Text.Replace(text.Text, items.Where(t => t.TextToReplace == text.Text).FirstOrDefault().ReplacementText);
                    //        }
                    //    }
                    //}
                }
            }
        }
Ejemplo n.º 19
0
        public void ReplaceTags(string filePath, Dictionary <string, string> keyValuePairs)
        {
            using (var wordDocument = WordprocessingDocument.Open(filePath, true))
            {
                SimplifyMarkupSettings settings = new SimplifyMarkupSettings
                {
                    RemoveComments              = true,
                    RemoveContentControls       = true,
                    RemoveFieldCodes            = false,
                    RemoveLastRenderedPageBreak = true,
                    RemovePermissions           = true,
                    RemoveProof           = true,
                    RemoveRsidInfo        = true,
                    RemoveSmartTags       = true,
                    RemoveSoftHyphens     = true,
                    ReplaceTabsWithSpaces = true,
                    RemoveBookmarks       = true,
                };
                MarkupSimplifier.SimplifyMarkup(wordDocument, settings);

                var mainPart = wordDocument.MainDocumentPart;

                foreach (var para in mainPart.Document.Body.ChildElements)
                {
                    foreach (var run in para.Elements <Run>())
                    {
                        foreach (var text in run.Elements <Text>())
                        {
                            var texto = text.Text.ToLowerCase();
                            //// Verficando se o texto não contêm a tag de imagem, ou seja, {{imagem:...}}
                            //if (texto.Contains("{{imagem:", StringComparison.InvariantCultureIgnoreCase))
                            //{
                            //    var match = Regex.Match(texto, @"{{imagem:([A-Za-z0-9\-_]+)}}");
                            //    if (match.Success)
                            //    {
                            //        //var key = match.Value.Replace("{{imagem:", string.Empty).Replace("}}", string.Empty);
                            //        var key = match.Value;
                            //        if (keyValuePairs.ContainsKey(key))
                            //        {
                            //            string imageBase64 = keyValuePairs[key];
                            //            this.AddImage(wordDocument, para, imageBase64);
                            //            text.Text = string.Empty;
                            //        }
                            //    }
                            //}
                            // Verficando se o texto não contêm a tag de texto, ou seja, {{texto:...}}
                            //    else
                            if (texto.Contains("{{texto:", StringComparison.InvariantCultureIgnoreCase))
                            {
                                var matches = Regex.Matches(texto, @"{{texto:([A-Za-z0-9\-_]+)}}");
                                foreach (Match match in matches)
                                {
                                    var key = match.Value;
                                    if (keyValuePairs.ContainsKey(key))
                                    {
                                        string value = keyValuePairs[key];
                                        text.Text = text.Text.Replace(match.Value, value, StringComparison.InvariantCultureIgnoreCase);
                                    }
                                }
                            }
                        }
                    }
                }

                foreach (var keyValuePair in keyValuePairs)
                {
                    // Search for text holder
                    var textPlaceHolders = wordDocument.MainDocumentPart.Document.Body.Descendants <Text>()
                                           .Where((x) => x.Text == keyValuePair.Key).ToList();

                    foreach (var textPlaceHolder in textPlaceHolders)
                    {
                        if (textPlaceHolder == null)
                        {
                            Console.WriteLine("Text holder not found!");
                        }
                        else
                        {
                            var parent = textPlaceHolder.Parent;

                            if (!(parent is Run))  // Parent should be a run element.
                            {
                                Console.Out.WriteLine("Parent is not run");
                            }
                            else
                            {
                                if (textPlaceHolder.Text.Contains("{{imagem:", StringComparison.InvariantCultureIgnoreCase))
                                {
                                    var matches = Regex.Matches(textPlaceHolder.Text, @"{{imagem:([A-Za-z0-9\-_]+)}}");
                                    foreach (Match match in matches)
                                    {
                                        var key = match.Value;
                                        if (keyValuePairs.ContainsKey(key))
                                        {
                                            string image   = keyValuePairs[key];
                                            var    element = this.AddImage(wordDocument, parent, image);
                                            textPlaceHolder.Parent.InsertAfterSelf(new DocumentFormat.OpenXml.Wordprocessing.Paragraph(element));
                                            textPlaceHolder.Remove();
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                foreach (var foot in mainPart.FooterParts)

                {
                    foreach (var currentText in foot.RootElement.Descendants <Text>())
                    {
                        var texto = currentText.Text.ToLowerCase();

                        if (texto.Contains("{{texto:", StringComparison.InvariantCultureIgnoreCase))
                        {
                            var matches = Regex.Matches(texto, @"{{texto:([A-Za-z0-9\-_]+)}}");

                            foreach (Match match in matches)
                            {
                                var key = match.Value;
                                if (keyValuePairs.ContainsKey(key))
                                {
                                    string value = keyValuePairs[key];
                                    currentText.Text = currentText.Text.Replace(match.Value, value, StringComparison.InvariantCulture);
                                }
                            }
                        }
                    }
                }

                mainPart.Document.Save();
                wordDocument.Close();
            }
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Encontra todas as tags no documento solicitado
        /// </summary>
        /// <param name="filePath">O path do azure do arquivo.</param>
        public IEnumerable <string> GetTags(string filePath)
        {
            var tags = new List <string>();

            using (var wordDocument = WordprocessingDocument.Open(filePath, true))
            {
                SimplifyMarkupSettings settings = new SimplifyMarkupSettings
                {
                    RemoveComments              = true,
                    RemoveContentControls       = true,
                    RemoveFieldCodes            = false,
                    RemoveLastRenderedPageBreak = true,
                    RemovePermissions           = true,
                    RemoveProof           = true,
                    RemoveRsidInfo        = true,
                    RemoveSmartTags       = true,
                    RemoveSoftHyphens     = true,
                    ReplaceTabsWithSpaces = true,
                    RemoveBookmarks       = true,
                };
                MarkupSimplifier.SimplifyMarkup(wordDocument, settings);

                var mainPart = wordDocument.MainDocumentPart;

                foreach (var para in mainPart.Document.Body.ChildElements)
                {
                    foreach (var run in para.Elements <Run>())
                    {
                        foreach (var text in run.Elements <Text>())
                        {
                            var texto = text.Text.ToLowerCase();

                            // Verficando se o texto não contêm a tag de imagem, ou seja, {{imagem:...}}
                            if (texto.Contains("{{imagem:"))
                            {
                                var matches = Regex.Matches(texto, @"{{imagem:([A-Za-z0-9\-_]+)}}");
                                foreach (Match match in matches)
                                {
                                    var key = match.Value.Replace("{{", string.Empty).Replace("}}", string.Empty);
                                    if (!tags.Any(t => t == key))
                                    {
                                        tags.Add(key);
                                    }
                                }
                            }
                            // Verficando se o texto não contêm a tag de texto, ou seja, {{texto:...}}
                            else if (texto.Contains("{{texto:"))
                            {
                                var matches = Regex.Matches(texto, @"{{texto:([A-Za-z0-9\-_]+)}}");
                                foreach (Match match in matches)
                                {
                                    var key = match.Value.Replace("{{", string.Empty).Replace("}}", string.Empty);
                                    if (!tags.Any(t => t == key))
                                    {
                                        tags.Add(key);
                                    }
                                }
                            }
                        }
                    }
                }

                foreach (var foot in mainPart.FooterParts)
                {
                    foreach (var currentText in foot.RootElement.Descendants <Text>())
                    {
                        var texto = currentText.Text.ToLowerCase();
                        if (texto.Contains("{{texto:", StringComparison.InvariantCultureIgnoreCase))
                        {
                            var matches = Regex.Matches(texto, @"{{texto:([A-Za-z0-9\-_]+)}}");
                            foreach (Match match in matches)
                            {
                                var key = match.Value.Replace("{{", string.Empty).Replace("}}", string.Empty);
                                if (!tags.Any(t => t == key))
                                {
                                    tags.Add(key);
                                }
                            }
                        }
                        // Verficando se o texto não contêm a tag de imagem, ou seja, {{imagem:...}}
                        if (texto.Contains("{{imagem:"))
                        {
                            var matches = Regex.Matches(texto, @"{{imagem:([A-Za-z0-9\-_]+)}}");
                            foreach (Match match in matches)
                            {
                                var key = match.Value.Replace("{{", string.Empty).Replace("}}", string.Empty);
                                if (!tags.Any(t => t == key))
                                {
                                    tags.Add(key);
                                }
                            }
                        }
                    }
                }
            }

            return(tags);
        }
Ejemplo n.º 21
0
        /// <summary>
        /// This method uses the MarkupSimplifier features from the OpenXMLPowerTools
        /// to remove the profile Errors and the RSID tags from Office, making the XML
        /// file cleaner to be processed to any other API
        /// </summary>
        /// <param name="docLocation"> The absolute location of the docx file</param>
        /// <param name="z">A namespace to be placed at the XML tags in the TransformToSimpleXml() method</param>
        /// <param name="formatDocument">Boolean indicating if the document should be or rewrited</param>
        public static void SimplifyMarkup(string docLocation, string z, bool formatDocument)
        {
            try
            {
                using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(docLocation, true))
                {
                    RevisionAccepter.AcceptRevisions(wordDoc);

                    //Here I Define what components I want to clean from the XML. See all the attributes on the SimplifyMarkupSettings definitions
                    SimplifyMarkupSettings settings = new SimplifyMarkupSettings
                    {
                        RemoveComments              = true,
                        RemoveContentControls       = true,
                        RemoveEndAndFootNotes       = true,
                        RemoveFieldCodes            = false,
                        RemoveLastRenderedPageBreak = true,
                        RemovePermissions           = true,
                        RemoveProof                       = true,
                        RemoveRsidInfo                    = true,
                        RemoveSmartTags                   = true,
                        RemoveSoftHyphens                 = true,
                        ReplaceTabsWithSpaces             = true,
                        NormalizeXml                      = false,
                        RemoveWebHidden                   = true,
                        RemoveMarkupForDocumentComparison = true,
                    };

                    MarkupSimplifier.SimplifyMarkup(wordDoc, settings);

                    //Getting the deafult style of the document
                    string defaultParagraphStyleId = wordDoc.MainDocumentPart
                                                     .StyleDefinitionsPart.GetXDocument().Root.Elements(W.style)
                                                     .Where(e => (string)e.Attribute(W.type) == "paragraph" &&
                                                            (string)e.Attribute(W._default) == "1")
                                                     .Select(s => (string)s.Attribute(W.styleId))
                                                     .FirstOrDefault();
                    //Getting all the paragraphs in a xml node.
                    XElement simplerXml = (XElement)TransformToSimpleXml(
                        wordDoc.MainDocumentPart.GetXDocument().Root,
                        defaultParagraphStyleId, z);
                    Console.WriteLine(simplerXml);

                    wordDoc.Save();
                    wordDoc.Close();

                    //If formatDocument is true, the ReWriteDocument() method is called
                    if (formatDocument)
                    {
                        Console.WriteLine("Reescrevendo o documento sem estilos");
                        try
                        {
                            ReWriteDocument(docLocation, simplerXml);
                            Console.WriteLine("Sucesso ao Reformatar o documento!");
                        }
                        catch (Exception e)
                        {
                            throw new Exception(string.Format("Erro ao Reformatar o Arquivo: {0}", e.ToString()));
                        }
                    }
                }
            }
            catch (Exception e)
            {
                throw new Exception(string.Format("Não foi Possível simplificar o Arquivo. Erro: {0}", e.ToString()));
            }
        }