private void btnApply_Click(object sender, EventArgs e) { OpenFileDialog ofd = new OpenFileDialog(); ofd.Multiselect = true; DialogResult dr = ofd.ShowDialog(); foreach (var item in ofd.FileNames) { using (WordprocessingDocument doc = WordprocessingDocument.Open(item, true)) { SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveContentControls = cbRemoveContentControls.Checked, RemoveSmartTags = cbRemoveSmartTags.Checked, RemoveRsidInfo = cbRemoveRsidInfo.Checked, RemoveComments = cbRemoveComments.Checked, RemoveEndAndFootNotes = cbRemoveEndAndFootNotes.Checked, ReplaceTabsWithSpaces = cbReplaceTabsWithSpaces.Checked, RemoveFieldCodes = cbRemoveFieldCodes.Checked, RemovePermissions = cbRemovePermissions.Checked, RemoveProof = cbRemoveProof.Checked, RemoveSoftHyphens = cbRemoveSoftHyphens.Checked, RemoveLastRenderedPageBreak = cbRemoveLastRenderedPageBreak.Checked, RemoveBookmarks = cbRemoveBookmarks.Checked, RemoveWebHidden = cbRemoveWebHidden.Checked, NormalizeXml = cbNormalize.Checked, }; Clippit.MarkupSimplifier.SimplifyMarkup(doc, settings); } } }
public void CanRemoveContentControls() { XDocument partDocument = XDocument.Parse(SdtDocumentXmlString); Assert.True(partDocument.Descendants(W.sdt).Any()); using (var stream = new MemoryStream()) using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, DocumentType)) { MainDocumentPart part = wordDocument.AddMainDocumentPart(); part.PutXDocument(partDocument); var settings = new SimplifyMarkupSettings { RemoveContentControls = true }; MarkupSimplifier.SimplifyMarkup(wordDocument, settings); partDocument = part.GetXDocument(); XElement element = partDocument .Descendants(W.body) .Descendants() .First(); Assert.False(partDocument.Descendants(W.sdt).Any()); Assert.Equal(W.p, element.Name); } }
public void CanRemoveGoBackBookmarks() { XDocument partDocument = XDocument.Parse(GoBackBookmarkDocumentXmlString); Assert.Contains(partDocument .Descendants(W.bookmarkStart) , e => e.Attribute(W.name).Value == "_GoBack" && e.Attribute(W.id).Value == "0"); Assert.Contains(partDocument .Descendants(W.bookmarkEnd) , e => e.Attribute(W.id).Value == "0"); using (var stream = new MemoryStream()) using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, DocumentType)) { MainDocumentPart part = wordDocument.AddMainDocumentPart(); part.PutXDocument(partDocument); var settings = new SimplifyMarkupSettings { RemoveGoBackBookmark = true }; MarkupSimplifier.SimplifyMarkup(wordDocument, settings); partDocument = part.GetXDocument(); Assert.False(partDocument.Descendants(W.bookmarkStart).Any()); Assert.False(partDocument.Descendants(W.bookmarkEnd).Any()); } }
private static void Main(string[] args) { if (args.Length == 0) { Console.WriteLine("Example output files are in a DateTime stamped directory in ./bin/debug. The directory name is ExampleOutput-yy-mm-dd-hhmmss."); Console.WriteLine("If you are building in release mode, they will, of course, be in ./bin/release."); Console.WriteLine("MarkupSimplifierApp.exe 1.docx 2.docx"); } foreach (var item in args) { using var doc = WordprocessingDocument.Open(item, true); var settings = new SimplifyMarkupSettings { RemoveContentControls = Settings.Default.RemoveContentControls, RemoveSmartTags = Settings.Default.RemoveSmartTags, RemoveRsidInfo = Settings.Default.RemoveRsidInfo, RemoveComments = Settings.Default.RemoveComments, RemoveEndAndFootNotes = Settings.Default.RemoveEndAndFootNotes, ReplaceTabsWithSpaces = Settings.Default.ReplaceTabsWithSpaces, RemoveFieldCodes = Settings.Default.RemoveFieldCodes, RemovePermissions = Settings.Default.RemovePermissions, RemoveProof = Settings.Default.RemoveProof, RemoveSoftHyphens = Settings.Default.RemoveSoftHyphens, RemoveLastRenderedPageBreak = Settings.Default.RemoveLastRenderedPageBreak, RemoveBookmarks = Settings.Default.RemoveBookmarks, RemoveWebHidden = Settings.Default.RemoveWebHidden, NormalizeXml = Settings.Default.NormalizeXml, }; MarkupSimplifier.SimplifyMarkup(doc, settings); } }
public byte[] GetWordReplacedTextUsingPlaintext(string templatePath, List <WordReplacement> items) { var pathdir = ConfigurationManager.AppSettings["doctemplate"].ToString(); if (pathdir.StartsWith("~")) { pathdir = HttpContext.Current.Server.MapPath(pathdir); } var path = Path.Combine(pathdir, templatePath); FileStream fileStream = new FileStream(path, FileMode.Open); using (MemoryStream templateStream = new MemoryStream()) { //templateStream.Write(templateBytes, 0, (int)templateBytes.Length); fileStream.CopyStream(templateStream); fileStream.Close(); using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(templateStream, true)) { wordDoc.ChangeDocumentType(WordprocessingDocumentType.Document); SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveProof = true, RemoveRsidInfo = true, NormalizeXml = true, //RemoveContentControls = true, //RemoveMarkupForDocumentComparison = true }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); string docText = null; using (StreamReader sr = new StreamReader(wordDoc.MainDocumentPart.GetStream())) { docText = sr.ReadToEnd(); } foreach (var item in items) { if (!string.IsNullOrEmpty(item.TextToReplace)) { Regex regexText = new Regex(item.TextToReplace); docText = regexText.Replace(docText, item.ReplacementText ?? ""); } } using (StreamWriter sw = new StreamWriter(templateStream)) { sw.Write(docText); return(templateStream.ToArray()); } } } }
/// <summary> /// Loads external (word) xml file, which is stored on a storage medium. The absolute path to the file must be passed as parameter. /// </summary> /// <param name="filePath">Specifies the path for the xml file the user wants to load.</param> /// <returns>Returns true if the file exists and could be loaded, otherwise false.</returns> public bool LoadXmlFile(string filePath) { try { //// Copy usecase xml-file to windows user temp folder to fix the problem that the file is opened in write access string fileName = filePath.Substring(filePath.LastIndexOf("\\") + 1); string newFilePath = Path.Combine(Path.GetTempPath(), fileName); File.Copy(filePath, newFilePath, true); this.useCaseFilePath = newFilePath; //// Open and load usecase xml-file this.useCaseFile = WordprocessingDocument.Open(this.useCaseFilePath, true); //// Set and apply settings for opended and loaded usecase xml-file SimplifyMarkupSettings settings = new SimplifyMarkupSettings { AcceptRevisions = false, RemoveContentControls = true, RemoveSmartTags = true, RemoveRsidInfo = true, RemoveComments = true, RemoveEndAndFootNotes = true, ReplaceTabsWithSpaces = true, RemoveFieldCodes = false, RemovePermissions = true, RemoveProof = true, RemoveSoftHyphens = true, RemoveLastRenderedPageBreak = true, RemoveBookmarks = true, RemoveWebHidden = true, RemoveGoBackBookmark = true, RemoveMarkupForDocumentComparison = true, NormalizeXml = true, }; MarkupSimplifier.SimplifyMarkup(this.useCaseFile, settings); return(true); } catch (Exception ex) { //// General error while loading the usecase xml-file //// Set the error message this.errorMessage = "Fehler beim Einlesen der UseCase-Datei: " + ex.Message.ToString(); //// Close usecase file and delete temporary file from windows user temp folder if (File.Exists(this.useCaseFilePath)) { File.Delete(this.useCaseFilePath); } return(false); } }
public static string FinishReview(string path) { using WordprocessingDocument wordDocument = WordprocessingDocument.Open(path, true); var settings = new SimplifyMarkupSettings { AcceptRevisions = true, RemoveComments = true }; MarkupSimplifier.SimplifyMarkup(wordDocument, settings); return(wordDocument.MainDocumentPart.GetXElement().ToString()); }
public static void DoConversionViaWord(FileInfo newAltChunkBeforeFi, FileInfo newAltChunkAfterFi, XElement html) { var blankAltChunkFi = new DirectoryInfo(Path.Combine(TestUtil.SourceDir.FullName, "Blank-altchunk.docx")); File.Copy(blankAltChunkFi.FullName, newAltChunkBeforeFi.FullName); using (WordprocessingDocument myDoc = WordprocessingDocument.Open(newAltChunkBeforeFi.FullName, true)) { string altChunkId = "AltChunkId1"; MainDocumentPart mainPart = myDoc.MainDocumentPart; AlternativeFormatImportPart chunk = mainPart.AddAlternativeFormatImportPart( "application/xhtml+xml", altChunkId); using (Stream chunkStream = chunk.GetStream(FileMode.Create, FileAccess.Write)) using (StreamWriter stringStream = new StreamWriter(chunkStream)) stringStream.Write(html.ToString()); XElement altChunk = new XElement(W.altChunk, new XAttribute(R.id, altChunkId) ); XDocument mainDocumentXDoc = myDoc.MainDocumentPart.GetXDocument(); mainDocumentXDoc.Root .Element(W.body) .AddFirst(altChunk); myDoc.MainDocumentPart.PutXDocument(); } WordAutomationUtilities.OpenAndSaveAs(newAltChunkBeforeFi.FullName, newAltChunkAfterFi.FullName); while (true) { try { using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newAltChunkAfterFi.FullName, true)) { SimplifyMarkupSettings settings2 = new SimplifyMarkupSettings { RemoveMarkupForDocumentComparison = true, }; MarkupSimplifier.SimplifyMarkup(wDoc, settings2); XElement newRoot = (XElement)RemoveDivTransform(wDoc.MainDocumentPart.GetXDocument().Root); wDoc.MainDocumentPart.GetXDocument().Root.ReplaceWith(newRoot); wDoc.MainDocumentPart.PutXDocumentWithFormatting(); } break; } catch (IOException) { System.Threading.Thread.Sleep(50); continue; } } }
private void CleanAndSaveItem(ListViewItem lvi) { //listView1.EnsureVisible(lvi.Index); var file = lvi.Tag as FileInfo; if (lvi.ToolTipText == "Unsaved" && //prevent double saving IsValidTarget(file)) { try { using (WordprocessingDocument doc = WordprocessingDocument.Open(file.FullName, true)) { SimplifyMarkupSettings settings = new SimplifyMarkupSettings { AcceptRevisions = true, //setting this to false reduces translation quality, but if true some documents have XML format errors when opening NormalizeXml = true, // Merges Run's in a paragraph with similar formatting RemoveBookmarks = true, RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, //true, RemoveGoBackBookmark = true, RemoveHyperlinks = false, RemoveLastRenderedPageBreak = true, RemoveMarkupForDocumentComparison = true, RemovePermissions = false, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveWebHidden = true, ReplaceTabsWithSpaces = false }; MarkupSimplifier.SimplifyMarkup(doc, settings); // OpenXmlPowerTools.WmlComparer.Compare doc.Save(); lvi.BackColor = Color.Green; lvi.ToolTipText = "Saved"; } } catch (Exception ex) { lvi.BackColor = Color.Red; lvi.ToolTipText = ex.Message; //Console.WriteLine("Error in File: " + file.FullName + ". " + ex.Message); } } }
public static void CopyFormattingAssembledDocx(FileInfo source, FileInfo dest) { var ba = File.ReadAllBytes(source.FullName); using (MemoryStream ms = new MemoryStream()) { ms.Write(ba, 0, ba.Length); using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(ms, true)) { RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveGoBackBookmark = true, ReplaceTabsWithSpaces = false, }; MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings); FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings { RemoveStyleNamesFromParagraphAndRunProperties = false, ClearStyles = false, RestrictToSupportedLanguages = false, RestrictToSupportedNumberingFormats = false, CreateHtmlConverterAnnotationAttributes = true, OrderElementsPerStandard = false, ListItemRetrieverSettings = new ListItemRetrieverSettings() { ListItemTextImplementations = ListItemRetrieverSettings.DefaultListItemTextImplementations, }, }; FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings); } var newBa = ms.ToArray(); File.WriteAllBytes(dest.FullName, newBa); } }
/// <summary> /// Convert to html /// </summary> /// <param name="wordDoc"></param> /// <param name="htmlConverterSettings"></param> /// <param name="imageHandler"></param> /// <returns></returns> public XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler) { InitEntityMap(); if (htmlConverterSettings.ConvertFormatting) { throw new InvalidSettingsException("Conversion with formatting is not supported"); } RevisionAccepter.AcceptRevisions(wordDoc); var settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root; AnnotateHyperlinkContent(rootElement); var xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, rootElement, imageHandler); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. return(xhtml); }
private string ConvertDocxToHtml(Stream inputDoc) { // convert Stream to a memory stream using (var memStream = new MemoryStream()) { inputDoc.CopyTo(memStream); // open Word document stream using (WordprocessingDocument doc = WordprocessingDocument.Open(memStream, true)) { // remove unnecessary markup SimplifyMarkupSettings settings = new SimplifyMarkupSettings { AcceptRevisions = true, NormalizeXml = true, RemoveBookmarks = true, RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = true, RemoveGoBackBookmark = true, RemoveHyperlinks = false, RemoveLastRenderedPageBreak = true, RemoveMarkupForDocumentComparison = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveWebHidden = true, ReplaceTabsWithSpaces = true }; MarkupSimplifier.SimplifyMarkup(doc, settings); // export to html return(WmlToHtmlConverter.ConvertToHtml(doc, new WmlToHtmlConverterSettings()).ToString()); } } }
// private static object WrapInBraces(IEnumerable<XNode> nodes, XElement firstText, XElement lastText) // { // return nodes.Select(node => // { // XElement element = node as XElement; // if (element != null) // { // if (element.Name == W.t) // { // if (element == firstText) // { // var newText = "{" + element.Value; // if (element == lastText) // also last? // { // newText = newText + "}"; // } // return new XElement(element.Name, element.Attributes(), newText); // } // if (element == lastText) // { // return new XElement(element.Name, element.Attributes(), element.Value + "}"); // } // } // return new XElement(element.Name, // element.Attributes(), // WrapInBraces(element.Nodes(), firstText, lastText)); // } // return node; // }); // } private static void SimplifyTemplateMarkup(WordprocessingDocument wordDoc) { // strip down the template to eliminate unnecessary work SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = false, RemoveEndAndFootNotes = false, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = false, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = false, // todo: change this back to true once we have patched OXPT to make it work right RemoveSoftHyphens = false, ReplaceTabsWithSpaces = false, RemoveMarkupForDocumentComparison = true, RemoveWebHidden = true }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); }
private void SimplifyMarkup(WordprocessingDocument originalDocument) { SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, RemoveBookmarks = true, RemoveGoBackBookmark = true, RemoveHyperlinks = true, }; MarkupSimplifier.SimplifyMarkup(originalDocument, settings); }
private static void CleanMarkup(WordprocessingDocument doc) { //REMOVE THESE Markups, because they break up the text into multiple pieces, //thereby preventing simple search and replace SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, RemoveBookmarks = true }; MarkupSimplifier.SimplifyMarkup(doc, settings); }
public void RemoveSmartTags() { string name = "SmartTags.docx"; // this document has an invalid smartTag element (apparently inserted by 3rd party software) DirectoryInfo sourceDir = new DirectoryInfo("../../../../test/templates/"); FileInfo docx = new FileInfo(Path.Combine(sourceDir.FullName, name)); DirectoryInfo destDir = new DirectoryInfo("../../../../test/history/dot-net-results"); FileInfo outputDocx = new FileInfo(Path.Combine(destDir.FullName, name)); string filePath = outputDocx.FullName; string outPath = Path.Combine(destDir.FullName, "SmartTags-Removed.docx"); docx.CopyTo(filePath, true); WmlDocument doc = new WmlDocument(filePath); byte[] byteArray = doc.DocumentByteArray; WmlDocument transformedDoc = null; using (MemoryStream mem = new MemoryStream()) { mem.Write(byteArray, 0, byteArray.Length); using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(mem, true)) { var settings = new SimplifyMarkupSettings { RemoveSmartTags = true }; // we try to remove smart tags, but the (apparently) invalid one is not removed correctly MarkupSimplifier.SimplifyMarkup(wordDoc, settings); } transformedDoc = new WmlDocument(outPath, mem.ToArray()); Assert.False(transformedDoc.MainDocumentPart.Descendants(W.smartTag).Any()); transformedDoc.Save(); } // transformedDoc still has leftover bits of the invalid smart tag, and should therefore be invalid // (consider whether it would be appropriate to patch SimplifyMarkup to correctly remove this apparently invalid smart tag?) var validator = new Validator(); var result = validator.ValidateDocument(outPath); // MS Word also complains about the validity of this document Assert.True(result.HasErrors); }
public void CanRemoveSmartTags() { XDocument partDocument = XDocument.Parse(SmartTagDocumentXmlString); Assert.True(partDocument.Descendants(W.smartTag).Any()); using (var stream = new MemoryStream()) using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, DocumentType)) { MainDocumentPart part = wordDocument.AddMainDocumentPart(); part.PutXDocument(partDocument); var settings = new SimplifyMarkupSettings { RemoveSmartTags = true }; MarkupSimplifier.SimplifyMarkup(wordDocument, settings); partDocument = part.GetXDocument(); XElement t = partDocument.Descendants(W.t).First(); Assert.False(partDocument.Descendants(W.smartTag).Any()); Assert.Equal(SmartTagDocumentTextValue, t.Value); } }
public byte[] GetWordReplacedText(string templatePath, List <WordReplacement> items) { var pathdir = ConfigurationManager.AppSettings["doctemplate"].ToString(); if (pathdir.StartsWith("~")) { pathdir = HttpContext.Current.Server.MapPath(pathdir); } var path = Path.Combine(pathdir, templatePath); FileStream fileStream = new FileStream(path, FileMode.Open); using (MemoryStream templateStream = new MemoryStream()) { //templateStream.Write(templateBytes, 0, (int)templateBytes.Length); fileStream.CopyStream(templateStream); fileStream.Close(); using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(templateStream, true)) { wordDoc.ChangeDocumentType(WordprocessingDocumentType.Document); SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, RemoveWebHidden = true, RemoveMarkupForDocumentComparison = true }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); var body = wordDoc.MainDocumentPart.Document.Body; var tables = body.Elements <DocumentFormat.OpenXml.Wordprocessing.Table>().ToList(); var paras = body.Elements <Paragraph>(); var runsall = body.Descendants <Run>().ToList(); foreach (var item in wordDoc.MainDocumentPart.HeaderParts) { foreach (var run in item.RootElement.Descendants <Run>()) { runsall.Add(run); } } foreach (var item in wordDoc.MainDocumentPart.FooterParts) { foreach (var run in item.RootElement.Descendants <Run>()) { runsall.Add(run); } } for (int i = 0; i < runsall.Count(); i++) { var r = runsall[i]; var textsrun = r.Elements <Text>(); if ((items.Any(t => !t.MatchWholeText ? r.InnerText?.Trim().Contains(t.TextToReplace) == true : r.InnerText == t.TextToReplace))) { var replace = items.Where(t => !t.MatchWholeText ? r.InnerText?.Trim().Contains(t.TextToReplace) == true : r.InnerText == t.TextToReplace).FirstOrDefault(); if (!replace.IsCheckBox) { foreach (var text in textsrun) { if (text != null) { if (items.Any(t => !t.MatchWholeText ? text.InnerText?.Trim().Contains(t.TextToReplace) == true : text.InnerText == t.TextToReplace)) { var wrd = items.FirstOrDefault(it => !it.MatchWholeText ? text.InnerText?.Trim().Contains(it.TextToReplace) == true : text.InnerText == it.TextToReplace); //while (items.Any(it => !it.MatchWholeText ? text.InnerText?.Trim().Contains(it.TextToReplace) == true : text.InnerText == it.TextToReplace)) { //} if (replace.UseRun) { try { if (replace.Run != null) { replace.Run.Append(r.RunProperties.CloneNode(true)); r.RemoveAllChildren(); r.Append(replace.Run.CloneNode(true)); } else { r.RemoveAllChildren(); } } catch (Exception exc) { } } else { wrd = items.FirstOrDefault(it => !it.MatchWholeText ? text.InnerText?.Trim().Contains(it.TextToReplace) == true : text.InnerText == it.TextToReplace); text.Text = wrd.MatchWholeText ? wrd.ReplacementText : text.Text.Replace(wrd.TextToReplace, wrd.ReplacementText); } } } } } else { r.RemoveAllChildren(); replace.Checkboxes.ForEach(c => { r.Append(c.CloneNode(true)); }); //var text = textsrun.FirstOrDefault(); //if (text != null) //{ // text.Text = String.Empty; //} //r.Chil();//.RemoveChild(r); } } } MarkupSimplifier.SimplifyMarkup(wordDoc, settings); wordDoc.Save(); return(templateStream.ToArray()); //foreach (var para in paras) { // var fieldss = para.Elements<SimpleField>(); // var runs = para.Elements<Run>().ToList(); // for (int i = 0; i < runs.Count; i++) { // var r = runs[i]; // var texts = r.Elements<Text>(); // if ((items.Any(t => r.InnerText?.Trim().Contains(t.TextToReplace) == true))) { // var replace = items.Where(t => r.InnerText?.Trim().Contains(t.TextToReplace) == true).FirstOrDefault(); // if (!replace.IsCheckBox) { // foreach (var text in texts) { // if (text != null) { // if (items.Any(t => text.Text?.Trim().Contains(t.TextToReplace) == true)) { // var wrd = items.FirstOrDefault(it => text.Text.Contains(it.TextToReplace)); // while(items.Any(t => text.Text?.Trim().Contains(t.TextToReplace) == true)) { // wrd = items.FirstOrDefault(it => text.Text.Contains(it.TextToReplace)); // text.Text = text.Text.Replace(wrd.TextToReplace, wrd.ReplacementText); // } // } // } // } // } else { // replace.Checkboxes.ForEach(c => { // r.InsertAfterSelf(c); // }); // para.RemoveChild(r); // } // } // } //} //foreach (var table in tables) { // var rows = table.Elements<DocumentFormat.OpenXml.Wordprocessing.TableRow>(); // foreach (var row in rows) { // var cells = row.Elements<DocumentFormat.OpenXml.Wordprocessing.TableCell>(); // foreach (var cell in cells) { // var ps = cell.Elements<Paragraph>(); // foreach (var p in ps) { // var runs = p.Elements<Run>().ToList(); // for (int i = 0; i < runs.Count; i++) { // var r = runs[i]; // var texts = r.Elements<Text>(); // if ((items.Any(t => t.TextToReplace == r.InnerText?.Trim()))) { // var replace = items.Where(t => t.TextToReplace == r.InnerText?.Trim()).FirstOrDefault(); // if (!replace.IsCheckBox) { // foreach (var text in texts) { // if (text != null) { // if (items.Any(t => t.TextToReplace == text.Text?.Trim())) // text.Text = text.Text.Replace(text.Text, items.Where(t => t.TextToReplace == text.Text?.Trim()).FirstOrDefault().ReplacementText); // } // } // } else { // replace.Checkboxes.ForEach(c => { // r.InsertAfterSelf(c); // }); // p.RemoveChild(r); // } // } // } // } // } // } //} //var allruns = body.Elements<Run>(); //foreach (var run in allruns) { // foreach (var text in run.Elements<Text>()) { // if (text != null) { // if (items.Any(t => t.TextToReplace == text.Text)) // text.Text = text.Text.Replace(text.Text, items.Where(t => t.TextToReplace == text.Text).FirstOrDefault().ReplacementText); // } // } //} } } }
public void ReplaceTags(string filePath, Dictionary <string, string> keyValuePairs) { using (var wordDocument = WordprocessingDocument.Open(filePath, true)) { SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, RemoveBookmarks = true, }; MarkupSimplifier.SimplifyMarkup(wordDocument, settings); var mainPart = wordDocument.MainDocumentPart; foreach (var para in mainPart.Document.Body.ChildElements) { foreach (var run in para.Elements <Run>()) { foreach (var text in run.Elements <Text>()) { var texto = text.Text.ToLowerCase(); //// Verficando se o texto não contêm a tag de imagem, ou seja, {{imagem:...}} //if (texto.Contains("{{imagem:", StringComparison.InvariantCultureIgnoreCase)) //{ // var match = Regex.Match(texto, @"{{imagem:([A-Za-z0-9\-_]+)}}"); // if (match.Success) // { // //var key = match.Value.Replace("{{imagem:", string.Empty).Replace("}}", string.Empty); // var key = match.Value; // if (keyValuePairs.ContainsKey(key)) // { // string imageBase64 = keyValuePairs[key]; // this.AddImage(wordDocument, para, imageBase64); // text.Text = string.Empty; // } // } //} // Verficando se o texto não contêm a tag de texto, ou seja, {{texto:...}} // else if (texto.Contains("{{texto:", StringComparison.InvariantCultureIgnoreCase)) { var matches = Regex.Matches(texto, @"{{texto:([A-Za-z0-9\-_]+)}}"); foreach (Match match in matches) { var key = match.Value; if (keyValuePairs.ContainsKey(key)) { string value = keyValuePairs[key]; text.Text = text.Text.Replace(match.Value, value, StringComparison.InvariantCultureIgnoreCase); } } } } } } foreach (var keyValuePair in keyValuePairs) { // Search for text holder var textPlaceHolders = wordDocument.MainDocumentPart.Document.Body.Descendants <Text>() .Where((x) => x.Text == keyValuePair.Key).ToList(); foreach (var textPlaceHolder in textPlaceHolders) { if (textPlaceHolder == null) { Console.WriteLine("Text holder not found!"); } else { var parent = textPlaceHolder.Parent; if (!(parent is Run)) // Parent should be a run element. { Console.Out.WriteLine("Parent is not run"); } else { if (textPlaceHolder.Text.Contains("{{imagem:", StringComparison.InvariantCultureIgnoreCase)) { var matches = Regex.Matches(textPlaceHolder.Text, @"{{imagem:([A-Za-z0-9\-_]+)}}"); foreach (Match match in matches) { var key = match.Value; if (keyValuePairs.ContainsKey(key)) { string image = keyValuePairs[key]; var element = this.AddImage(wordDocument, parent, image); textPlaceHolder.Parent.InsertAfterSelf(new DocumentFormat.OpenXml.Wordprocessing.Paragraph(element)); textPlaceHolder.Remove(); } } } } } } } foreach (var foot in mainPart.FooterParts) { foreach (var currentText in foot.RootElement.Descendants <Text>()) { var texto = currentText.Text.ToLowerCase(); if (texto.Contains("{{texto:", StringComparison.InvariantCultureIgnoreCase)) { var matches = Regex.Matches(texto, @"{{texto:([A-Za-z0-9\-_]+)}}"); foreach (Match match in matches) { var key = match.Value; if (keyValuePairs.ContainsKey(key)) { string value = keyValuePairs[key]; currentText.Text = currentText.Text.Replace(match.Value, value, StringComparison.InvariantCulture); } } } } } mainPart.Document.Save(); wordDocument.Close(); } }
/// <summary> /// Encontra todas as tags no documento solicitado /// </summary> /// <param name="filePath">O path do azure do arquivo.</param> public IEnumerable <string> GetTags(string filePath) { var tags = new List <string>(); using (var wordDocument = WordprocessingDocument.Open(filePath, true)) { SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, RemoveBookmarks = true, }; MarkupSimplifier.SimplifyMarkup(wordDocument, settings); var mainPart = wordDocument.MainDocumentPart; foreach (var para in mainPart.Document.Body.ChildElements) { foreach (var run in para.Elements <Run>()) { foreach (var text in run.Elements <Text>()) { var texto = text.Text.ToLowerCase(); // Verficando se o texto não contêm a tag de imagem, ou seja, {{imagem:...}} if (texto.Contains("{{imagem:")) { var matches = Regex.Matches(texto, @"{{imagem:([A-Za-z0-9\-_]+)}}"); foreach (Match match in matches) { var key = match.Value.Replace("{{", string.Empty).Replace("}}", string.Empty); if (!tags.Any(t => t == key)) { tags.Add(key); } } } // Verficando se o texto não contêm a tag de texto, ou seja, {{texto:...}} else if (texto.Contains("{{texto:")) { var matches = Regex.Matches(texto, @"{{texto:([A-Za-z0-9\-_]+)}}"); foreach (Match match in matches) { var key = match.Value.Replace("{{", string.Empty).Replace("}}", string.Empty); if (!tags.Any(t => t == key)) { tags.Add(key); } } } } } } foreach (var foot in mainPart.FooterParts) { foreach (var currentText in foot.RootElement.Descendants <Text>()) { var texto = currentText.Text.ToLowerCase(); if (texto.Contains("{{texto:", StringComparison.InvariantCultureIgnoreCase)) { var matches = Regex.Matches(texto, @"{{texto:([A-Za-z0-9\-_]+)}}"); foreach (Match match in matches) { var key = match.Value.Replace("{{", string.Empty).Replace("}}", string.Empty); if (!tags.Any(t => t == key)) { tags.Add(key); } } } // Verficando se o texto não contêm a tag de imagem, ou seja, {{imagem:...}} if (texto.Contains("{{imagem:")) { var matches = Regex.Matches(texto, @"{{imagem:([A-Za-z0-9\-_]+)}}"); foreach (Match match in matches) { var key = match.Value.Replace("{{", string.Empty).Replace("}}", string.Empty); if (!tags.Any(t => t == key)) { tags.Add(key); } } } } } } return(tags); }
/// <summary> /// This method uses the MarkupSimplifier features from the OpenXMLPowerTools /// to remove the profile Errors and the RSID tags from Office, making the XML /// file cleaner to be processed to any other API /// </summary> /// <param name="docLocation"> The absolute location of the docx file</param> /// <param name="z">A namespace to be placed at the XML tags in the TransformToSimpleXml() method</param> /// <param name="formatDocument">Boolean indicating if the document should be or rewrited</param> public static void SimplifyMarkup(string docLocation, string z, bool formatDocument) { try { using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(docLocation, true)) { RevisionAccepter.AcceptRevisions(wordDoc); //Here I Define what components I want to clean from the XML. See all the attributes on the SimplifyMarkupSettings definitions SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, NormalizeXml = false, RemoveWebHidden = true, RemoveMarkupForDocumentComparison = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); //Getting the deafult style of the document string defaultParagraphStyleId = wordDoc.MainDocumentPart .StyleDefinitionsPart.GetXDocument().Root.Elements(W.style) .Where(e => (string)e.Attribute(W.type) == "paragraph" && (string)e.Attribute(W._default) == "1") .Select(s => (string)s.Attribute(W.styleId)) .FirstOrDefault(); //Getting all the paragraphs in a xml node. XElement simplerXml = (XElement)TransformToSimpleXml( wordDoc.MainDocumentPart.GetXDocument().Root, defaultParagraphStyleId, z); Console.WriteLine(simplerXml); wordDoc.Save(); wordDoc.Close(); //If formatDocument is true, the ReWriteDocument() method is called if (formatDocument) { Console.WriteLine("Reescrevendo o documento sem estilos"); try { ReWriteDocument(docLocation, simplerXml); Console.WriteLine("Sucesso ao Reformatar o documento!"); } catch (Exception e) { throw new Exception(string.Format("Erro ao Reformatar o Arquivo: {0}", e.ToString())); } } } } catch (Exception e) { throw new Exception(string.Format("Não foi Possível simplificar o Arquivo. Erro: {0}", e.ToString())); } }