static void Main(string[] args) { var n = DateTime.Now; var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second)); tempDi.Create(); var sourceDoc = new FileInfo("../../TestDocument.docx"); var newDoc = new FileInfo("Modified.docx"); if (newDoc.Exists) { newDoc.Delete(); } File.Copy(sourceDoc.FullName, newDoc.FullName); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newDoc.FullName, true)) { int count; var xDoc = wDoc.MainDocumentPart.GetXDocument(); Regex regex; IEnumerable <XElement> content; content = xDoc.Descendants(W.p); regex = new Regex("[.]\x020+"); count = OpenXmlRegex.Replace(content, regex, "." + Environment.NewLine, null); foreach (var para in content) { var newPara = (XElement)TransformEnvironmentNewLineToParagraph(para); para.ReplaceNodes(newPara.Nodes()); } wDoc.MainDocumentPart.PutXDocument(); } }
public void CanReplaceTextWithQuotationMarksAndTrackedChanges() { var partDocument = XDocument.Parse(QuotationMarksAndTrackedChangesDocumentXmlString); var p = partDocument.Descendants(W.p).First(); var innerText = InnerText(p); Assert.Equal( "Text can be enclosed in “normal double quotes” and in «double angle quotation marks».", innerText); using var stream = new MemoryStream(); using var wordDocument = WordprocessingDocument.Create(stream, DocumentType); var part = wordDocument.AddMainDocumentPart(); part.PutXDocument(partDocument); var content = partDocument.Descendants(W.p); var regex = new Regex(string.Format("{0}(?<words>{1}){2}", LeftDoubleQuotationMarks, Words, RightDoubleQuotationMarks)); var count = OpenXmlRegex.Replace(content, regex, "‘changed ${words}’", null, true, "John Doe"); p = partDocument.Descendants(W.p).First(); innerText = InnerText(p); Assert.Equal(2, count); Assert.Equal( "Text can be enclosed in ‘changed normal double quotes’ and in ‘changed double angle quotation marks’.", innerText); Assert.Contains(p.Elements(W.ins), e => InnerText(e) == "‘changed normal double quotes’"); Assert.Contains(p.Elements(W.ins), e => InnerText(e) == "‘changed double angle quotation marks’"); }
static void Main(string[] args) { var sourceDoc = new FileInfo("../../TestDocument.docx"); var newDoc = new FileInfo("Modified.docx"); if (newDoc.Exists) { newDoc.Delete(); } File.Copy(sourceDoc.FullName, newDoc.FullName); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newDoc.FullName, true)) { int count; var xDoc = wDoc.MainDocumentPart.GetXDocument(); Regex regex; IEnumerable <XElement> content; content = xDoc.Descendants(W.p); regex = new Regex("[.]\x020+"); count = OpenXmlRegex.Replace(content, regex, "." + Environment.NewLine, null); foreach (var para in content) { var newPara = (XElement)TransformEnvironmentNewLineToParagraph(para); para.ReplaceNodes(newPara.Nodes()); } wDoc.MainDocumentPart.PutXDocument(); } }
public void CanReplaceTextWithFields() { var partDocument = XDocument.Parse(FieldsDocumentXmlString); var p = partDocument.Descendants(W.p).Last(); var innerText = InnerText(p); Assert.Equal("As stated in Article {__1} and this Section {__1.1}, this is described in Schedule C (Performance Framework).", innerText); using var stream = new MemoryStream(); using var wordDocument = WordprocessingDocument.Create(stream, DocumentType); var part = wordDocument.AddMainDocumentPart(); part.PutXDocument(partDocument); var content = partDocument.Descendants(W.p); var regex = new Regex(@"Schedule C \(Performance Framework\)"); var count = OpenXmlRegex.Replace(content, regex, "Exhibit 4", null, true, "John Doe"); p = partDocument.Descendants(W.p).Last(); innerText = InnerText(p); Assert.Equal(1, count); Assert.Equal("As stated in Article {__1} and this Section {__1.1}, this is described in Exhibit 4.", innerText); }
public void CanReplaceTextWithQuotationMarks() { XDocument partDocument = XDocument.Parse(QuotationMarksDocumentXmlString); XElement p = partDocument.Descendants(W.p).First(); string innerText = InnerText(p); Assert.Equal( "Text can be enclosed in “normal double quotes” and in «double angle quotation marks».", innerText); using (var stream = new MemoryStream()) using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, DocumentType)) { MainDocumentPart part = wordDocument.AddMainDocumentPart(); part.PutXDocument(partDocument); IEnumerable <XElement> content = partDocument.Descendants(W.p); var regex = new Regex(string.Format("{0}(?<words>{1}){2}", LeftDoubleQuotationMarks, Words, RightDoubleQuotationMarks)); int count = OpenXmlRegex.Replace(content, regex, "‘changed ${words}’", null); p = partDocument.Descendants(W.p).First(); innerText = InnerText(p); Assert.Equal(2, count); Assert.Equal( "Text can be enclosed in ‘changed normal double quotes’ and in ‘changed double angle quotation marks’.", innerText); } }
public void PowerPointSample() { var sourcePres = new FileInfo(GetFilePath("TestPresentation.pptx")); var newPres = new FileInfo(Path.Combine(TempDir, "Modified-TestPresentation.pptx")); File.Copy(sourcePres.FullName, newPres.FullName); using var pDoc = PresentationDocument.Open(newPres.FullName, true); foreach (var slidePart in pDoc.PresentationPart.SlideParts) { var xDoc = slidePart.GetXDocument(); // Replace content var content = xDoc.Descendants(A.p); var regex = new Regex("Hello"); var count = OpenXmlRegex.Replace(content, regex, "H e l l o", null); Log.WriteLine("Example #18 Replaced: {0}", count); // If you absolutely want to preserve compatibility with PowerPoint 2007, then you will need to strip the xml:space="preserve" attribute throughout. // This is an issue for PowerPoint only, not Word, and for 2007 only. // The side-effect of this is that if a run has space at the beginning or end of it, the space will be stripped upon loading, and content/layout will be affected. xDoc.Descendants().Attributes(XNamespace.Xml + "space").Remove(); slidePart.PutXDocument(); } }
public void CanReplaceTextWithSymbolsAndTrackedChanges() { var partDocument = XDocument.Parse(SymbolsAndTrackedChangesDocumentXmlString); var p = partDocument.Descendants(W.p).First(); var innerText = InnerText(p); Assert.Equal("We can also use symbols such as \uF021 or \uF028.", innerText); using var stream = new MemoryStream(); using var wordDocument = WordprocessingDocument.Create(stream, DocumentType); var part = wordDocument.AddMainDocumentPart(); part.PutXDocument(partDocument); var content = partDocument.Descendants(W.p); var regex = new Regex(@"[\uF021]"); var count = OpenXmlRegex.Replace(content, regex, "\uF028", null, true, "John Doe"); p = partDocument.Descendants(W.p).First(); innerText = InnerText(p); Assert.Equal(1, count); Assert.Equal("We can also use symbols such as \uF028 or \uF028.", innerText); Assert.Contains(p.Descendants(W.ins), ins => ins.Descendants(W.sym).Any( sym => sym.Attribute(W.font).Value == "Wingdings" && sym.Attribute(W._char).Value == "F028")); }
public static int SubstituirPorRegex(string arquivoOrigem, string arquivoDestino, IEnumerable <SubstituicaoRegex> substituicoes) { UtilitarioArquivo.ClonarArquivo(arquivoOrigem, arquivoDestino); int quantidadeSubstituicoes = 0; using (PresentationDocument presentation = PresentationDocument.Open(arquivoDestino, isEditable: true)) { foreach (SlidePart slidePart in presentation.PresentationPart.SlideParts) { XDocument xDocument = slidePart.GetXDocument(); foreach (SubstituicaoRegex substituicao in substituicoes) { // Replace content IEnumerable <XElement> content = xDocument.Descendants(A.p); quantidadeSubstituicoes += OpenXmlRegex.Replace(content, substituicao.Chave, substituicao.Valor, doReplacement: null); // If you absolutely want to preserve compatibility with PowerPoint 2007, then you will need to strip the xml:space="preserve" attribute throughout. // This is an issue for PowerPoint only, not Word, and for 2007 only. // The side-effect of this is that if a run has space at the beginning or end of it, the space will be stripped upon loading, and content/layout will be affected. xDocument.Descendants().Attributes(XNamespace.Xml + "space").Remove(); slidePart.PutXDocument(); } } } return(quantidadeSubstituicoes); }
public void Replace_PlaceholderInOneOrMoreRuns_SuccessfullyReplaced( string example, string propName, IEnumerable <string> runTexts, string replacement) { // Create a test WordprocessingDocument on a MemoryStream. using MemoryStream stream = CreateWordprocessingDocument(runTexts); // Save the Word document before replacing the placeholder. // You can use this to inspect the input Word document. File.WriteAllBytes($"{example} before Replacing.docx", stream.ToArray()); // Replace the placeholder identified by propName with the replacement text. using (WordprocessingDocument wordDocument = WordprocessingDocument.Open(stream, true)) { // Read the root element, a w:document in this case. // Note that GetXElement() is a shortcut for GetXDocument().Root. // This caches the root element and we can later write it back // to the main document part, using the PutXDocument() method. XElement document = wordDocument.MainDocumentPart.GetXElement(); // Specify the parameters of the OpenXmlRegex.Replace() method, // noting that the replacement is given as a parameter. IEnumerable <XElement> content = document.Descendants(W.p); var regex = new Regex(propName); // Perform the replacement, thereby modifying the root element. OpenXmlRegex.Replace(content, regex, replacement, null); // Write the changed root element back to the main document part. wordDocument.MainDocumentPart.PutXDocument(); } // Assert that we have done it right. AssertReplacementWasSuccessful(stream, replacement); // Save the Word document after having replaced the placeholder. // You can use this to inspect the output Word document. File.WriteAllBytes($"{example} after Replacing.docx", stream.ToArray()); }
public static int SubstituirPorRegex(string arquivoOrigem, string arquivoDestino, IEnumerable <SubstituicaoRegex> substituicoes) { UtilitarioArquivo.ClonarArquivo(arquivoOrigem, arquivoDestino); int quantidadeSubstituicoes = 0; using (WordprocessingDocument wordDocument = WordprocessingDocument.Open(arquivoDestino, isEditable: true)) { XDocument xDocument = wordDocument.MainDocumentPart.GetXDocument(); IEnumerable <XElement> content = xDocument.Descendants(W.p); foreach (SubstituicaoRegex substituicao in substituicoes) { quantidadeSubstituicoes += OpenXmlRegex.Replace(content, substituicao.Chave, substituicao.Valor, doReplacement: null); } wordDocument.MainDocumentPart.PutXDocument(); } return(quantidadeSubstituicoes); }
public void WordSample2() { var sourceDoc = new FileInfo(GetFilePath("TestDocument.docx")); var newDoc = new FileInfo(Path.Combine(TempDir, "Modified.docx")); File.Copy(sourceDoc.FullName, newDoc.FullName); using var wDoc = WordprocessingDocument.Open(newDoc.FullName, true); int count; var xDoc = wDoc.MainDocumentPart.GetXDocument(); var content = xDoc.Descendants(W.p); var regex = new Regex("[.]\x020+"); count = OpenXmlRegex.Replace(content, regex, "." + Environment.NewLine, null); foreach (var para in content) { var newPara = (XElement)TransformEnvironmentNewLineToParagraph(para); para.ReplaceNodes(newPara.Nodes()); } wDoc.MainDocumentPart.PutXDocument(); }
static void Main(string[] args) { var n = DateTime.Now; var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second)); tempDi.Create(); var sourceDoc = new FileInfo("../../TestDocument.docx"); var newDoc = new FileInfo(Path.Combine(tempDi.FullName, "Modified.docx")); File.Copy(sourceDoc.FullName, newDoc.FullName); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newDoc.FullName, true)) { int count; var xDoc = wDoc.MainDocumentPart.GetXDocument(); Regex regex; IEnumerable <XElement> content; // Match content (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Match(content, regex); Console.WriteLine("Example #1 Count: {0}", count); // Match content, case insensitive (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); count = OpenXmlRegex.Match(content, regex); Console.WriteLine("Example #2 Count: {0}", count); // Match content, with callback (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); count = OpenXmlRegex.Match(content, regex, (element, match) => Console.WriteLine("Example #3 Found value: >{0}<", match.Value)); // Replace content, beginning of paragraph (paragraph 2) content = xDoc.Descendants(W.p).Skip(1).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "Audio gives", null); Console.WriteLine("Example #4 Replaced: {0}", count); // Replace content, middle of paragraph (paragraph 3) content = xDoc.Descendants(W.p).Skip(2).Take(1); regex = new Regex("powerful"); count = OpenXmlRegex.Replace(content, regex, "good", null); Console.WriteLine("Example #5 Replaced: {0}", count); // Replace content, end of paragraph (paragraph 4) content = xDoc.Descendants(W.p).Skip(3).Take(1); regex = new Regex(" [a-z.]*$"); count = OpenXmlRegex.Replace(content, regex, " super good point!", null); Console.WriteLine("Example #6 Replaced: {0}", count); // Delete content, beginning of paragraph (paragraph 5) content = xDoc.Descendants(W.p).Skip(4).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #7 Deleted: {0}", count); // Delete content, middle of paragraph (paragraph 6) content = xDoc.Descendants(W.p).Skip(5).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #8 Deleted: {0}", count); // Delete content, end of paragraph (paragraph 7) content = xDoc.Descendants(W.p).Skip(6).Take(1); regex = new Regex("[.]$"); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #9 Deleted: {0}", count); // Replace content in inserted text, same author (paragraph 8) content = xDoc.Descendants(W.p).Skip(7).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "Eric White"); Console.WriteLine("Example #10 Deleted: {0}", count); // Delete content in inserted text, same author (paragraph 9) content = xDoc.Descendants(W.p).Skip(8).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Console.WriteLine("Example #11 Deleted: {0}", count); // Replace content partially in inserted text, same author (paragraph 10) content = xDoc.Descendants(W.p).Skip(9).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "Eric White"); Console.WriteLine("Example #12 Replaced: {0}", count); // Delete content partially in inserted text, same author (paragraph 11) content = xDoc.Descendants(W.p).Skip(10).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Console.WriteLine("Example #13 Deleted: {0}", count); // Replace content in inserted text, different author (paragraph 12) content = xDoc.Descendants(W.p).Skip(11).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "John Doe"); Console.WriteLine("Example #14 Deleted: {0}", count); // Delete content in inserted text, different author (paragraph 13) content = xDoc.Descendants(W.p).Skip(12).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Console.WriteLine("Example #15 Deleted: {0}", count); // Replace content partially in inserted text, different author (paragraph 14) content = xDoc.Descendants(W.p).Skip(13).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "John Doe"); Console.WriteLine("Example #16 Replaced: {0}", count); // Delete content partially in inserted text, different author (paragraph 15) content = xDoc.Descendants(W.p).Skip(14).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Console.WriteLine("Example #17 Deleted: {0}", count); wDoc.MainDocumentPart.PutXDocument(); } var sourcePres = new FileInfo("../../TestPresentation.pptx"); var newPres = new FileInfo(Path.Combine(tempDi.FullName, "Modified.pptx")); File.Copy(sourcePres.FullName, newPres.FullName); using (PresentationDocument pDoc = PresentationDocument.Open(newPres.FullName, true)) { foreach (var slidePart in pDoc.PresentationPart.SlideParts) { int count; var xDoc = slidePart.GetXDocument(); Regex regex; IEnumerable <XElement> content; // Replace content content = xDoc.Descendants(A.p); regex = new Regex("Hello"); count = OpenXmlRegex.Replace(content, regex, "H e l l o", null); Console.WriteLine("Example #18 Replaced: {0}", count); // If you absolutely want to preserve compatibility with PowerPoint 2007, then you will need to strip the xml:space="preserve" attribute throughout. // This is an issue for PowerPoint only, not Word, and for 2007 only. // The side-effect of this is that if a run has space at the beginning or end of it, the space will be stripped upon loading, and content/layout will be affected. xDoc.Descendants().Attributes(XNamespace.Xml + "space").Remove(); slidePart.PutXDocument(); } } }
/// <summary> /// Creates the document. /// </summary> /// <param name="mergeTemplate">The merge template.</param> /// <param name="mergeObjectList">The merge object list.</param> /// <param name="globalMergeFields">The global merge fields.</param> /// <returns></returns> public override BinaryFile CreateDocument(MergeTemplate mergeTemplate, List <object> mergeObjectList, Dictionary <string, object> globalMergeFields) { this.Exceptions = new List <Exception>(); BinaryFile outputBinaryFile = null; var rockContext = new RockContext(); var binaryFileService = new BinaryFileService(rockContext); var templateBinaryFile = binaryFileService.Get(mergeTemplate.TemplateBinaryFileId); if (templateBinaryFile == null) { return(null); } // Start by creating a new document with the contents of the Template (so that Styles, etc get included) XDocument sourceTemplateDocX; // NOTE: On using multiple IDisposable, see https://stackoverflow.com/a/12603126/1755417 and https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/using-statement using (MemoryStream sourceTemplateStream = new MemoryStream(), outputDocStream = new MemoryStream()) { templateBinaryFile.ContentStream.CopyTo(outputDocStream); outputDocStream.Seek(0, SeekOrigin.Begin); // now that we have the outputdoc started, simplify the sourceTemplate templateBinaryFile.ContentStream.CopyTo(sourceTemplateStream); sourceTemplateStream.Seek(0, SeekOrigin.Begin); var simplifiedDoc = WordprocessingDocument.Open(sourceTemplateStream, true); MarkupSimplifier.SimplifyMarkup(simplifiedDoc, this.simplifyMarkupSettingsAll); //// simplify any nodes that have Lava in it that might not have been caught by the MarkupSimplifier //// MarkupSimplifier only merges superfluous runs that are children of a paragraph sourceTemplateDocX = simplifiedDoc.MainDocumentPart.GetXDocument(); OpenXmlRegex.Match( sourceTemplateDocX.Elements(), this.lavaRegEx, (x, m) => { foreach (var nonParagraphRunsParent in x.DescendantNodes().OfType <XElement>().Where(a => a.Parent != null && a.Name != null) .Where(a => (a.Name.LocalName == "r")).Select(a => a.Parent).Distinct().ToList()) { if (lavaRegEx.IsMatch(nonParagraphRunsParent.Value)) { var tempParent = XElement.Parse(new Paragraph().OuterXml); tempParent.Add(nonParagraphRunsParent.Nodes()); tempParent = MarkupSimplifier.MergeAdjacentSuperfluousRuns(tempParent); nonParagraphRunsParent.ReplaceNodes(tempParent.Nodes()); } } }); XElement lastLavaNode = sourceTemplateDocX.DescendantNodes().OfType <XElement>().LastOrDefault(a => lavaRegEx.IsMatch(a.Value)); // ensure there is a { Next } indicator after the last lava node in the template if (lastLavaNode != null) { var nextRecordMatch = nextRecordRegEx.Match(lastLavaNode.Value); if (nextRecordMatch == null || !nextRecordMatch.Success) { // if the last lava node doesn't have a { next }, append to the end lastLavaNode.Value += " {% next %} "; } else { if (!lastLavaNode.Value.EndsWith(nextRecordMatch.Value)) { // if the last lava node does have a { next }, but there is stuff after it, add it (just in case) lastLavaNode.Value += " {% next %} "; } } } bool?allSameParent = null; using (WordprocessingDocument outputDoc = WordprocessingDocument.Open(outputDocStream, true)) { var xdoc = outputDoc.MainDocumentPart.GetXDocument(); var outputBodyNode = xdoc.DescendantNodes().OfType <XElement>().FirstOrDefault(a => a.Name.LocalName.Equals("body")); outputBodyNode.RemoveNodes(); int recordIndex = 0; int?lastRecordIndex = null; int recordCount = mergeObjectList.Count(); while (recordIndex < recordCount) { if (lastRecordIndex.HasValue && lastRecordIndex == recordIndex) { // something went wrong, so throw to avoid spinning infinitely throw new Exception("Unexpected unchanged recordIndex"); } lastRecordIndex = recordIndex; using (var tempMergeTemplateStream = new MemoryStream()) { sourceTemplateStream.Position = 0; sourceTemplateStream.CopyTo(tempMergeTemplateStream); tempMergeTemplateStream.Position = 0; var tempMergeTemplateX = new XDocument(sourceTemplateDocX); var tempMergeTemplateBodyNode = tempMergeTemplateX.DescendantNodes().OfType <XElement>().FirstOrDefault(a => a.Name.LocalName.Equals("body")); // find all the Nodes that have a {% next %}. List <XElement> nextIndicatorNodes = new List <XElement>(); OpenXmlRegex.Match( tempMergeTemplateX.Elements(), this.nextRecordRegEx, (x, m) => { nextIndicatorNodes.Add(x); }); allSameParent = allSameParent ?? nextIndicatorNodes.Count > 1 && nextIndicatorNodes.Select(a => a.Parent).Distinct().Count() == 1; List <XContainer> recordContainerNodes = new List <XContainer>(); foreach (var nextIndicatorNodeParent in nextIndicatorNodes.Select(a => a.Parent).Where(a => a != null)) { XContainer recordContainerNode = nextIndicatorNodeParent; if (!allSameParent.Value) { // go up the parent nodes until we have more than one "Next" descendent so that we know what to consider our record container while (recordContainerNode.Parent != null) { if (this.nextRecordRegEx.Matches(recordContainerNode.Parent.Value).Count == 1) { // still just the one "next" indicator, so go out another parent recordContainerNode = recordContainerNode.Parent; } else { // we went too far up the parents and found multiple "next" children, so use this node as the recordContainerNode break; } } } if (!recordContainerNodes.Contains(recordContainerNode)) { recordContainerNodes.Add(recordContainerNode); } } foreach (var recordContainerNode in recordContainerNodes) { //// loop thru each of the recordContainerNodes //// If we have more records than nodes, we'll jump out to the outer "while" and append another template and keep going XContainer mergedXRecord; var recordContainerNodeXml = recordContainerNode.ToString(SaveOptions.DisableFormatting | SaveOptions.OmitDuplicateNamespaces).ReplaceWordChars(); if (recordIndex >= recordCount) { // out of records, so clear out any remaining template nodes that haven't been merged string xml = recordContainerNodeXml; mergedXRecord = XElement.Parse(xml) as XContainer; OpenXmlRegex.Replace(mergedXRecord.Nodes().OfType <XElement>(), this.regExDot, string.Empty, (a, b) => { return(true); }); recordIndex++; } else { //// just in case they have shared parent node, or if there is trailing {{ next }} after the last lava //// on the page, split the XML for each record and reassemble it when done List <string> xmlChunks = this.nextRecordRegEx.Split(recordContainerNodeXml).ToList(); string mergedXml = string.Empty; foreach (var xml in xmlChunks) { bool incRecordIndex = true; if (lavaRegEx.IsMatch(xml)) { if (recordIndex < recordCount) { try { var wordMergeObjects = new LavaDataDictionary(); wordMergeObjects.Add("Row", mergeObjectList[recordIndex]); foreach (var field in globalMergeFields) { wordMergeObjects.Add(field.Key, field.Value); } var resolvedXml = xml.ResolveMergeFields(wordMergeObjects, true, true); mergedXml += resolvedXml; if (resolvedXml == xml) { // there weren't any MergeFields after all, so don't move to the next record incRecordIndex = false; } } catch (Exception ex) { // if ResolveMergeFields failed, log the exception, then just return the orig xml this.Exceptions.Add(ex); mergedXml += xml; } if (incRecordIndex) { recordIndex++; } } else { // out of records, so put a special '{% next_empty %}' that we can use to clear up unmerged parts of the template mergedXml += " {% next_empty %} " + xml; } } else { mergedXml += xml; } } mergedXRecord = XElement.Parse(mergedXml) as XContainer; } // remove the orig nodes and replace with merged nodes recordContainerNode.RemoveNodes(); recordContainerNode.Add(mergedXRecord.Nodes().OfType <XElement>()); var mergedRecordContainer = XElement.Parse(recordContainerNode.ToString(SaveOptions.DisableFormatting)); if (recordContainerNode.Parent != null) { // the recordContainerNode is some child/descendent of <body> recordContainerNode.ReplaceWith(mergedRecordContainer); } else { // the recordContainerNode is the <body> recordContainerNode.RemoveNodes(); recordContainerNode.Add(mergedRecordContainer.Nodes()); if (recordIndex < recordCount) { // add page break var pageBreakXml = new Paragraph(new Run(new Break() { Type = BreakValues.Page })).OuterXml; var pageBreak = XElement.Parse(pageBreakXml, LoadOptions.None); var lastParagraph = recordContainerNode.Nodes().OfType <XElement>().Where(a => a.Name.LocalName == "p").LastOrDefault(); if (lastParagraph != null) { lastParagraph.AddAfterSelf(pageBreak); // Add page formatting for the page before the page break. var lastSectPr = recordContainerNode.Nodes().OfType <XElement>().Where(a => a.Name.LocalName == "sectPr").LastOrDefault(); if (lastSectPr != null) { var paragraphPropertiesXml = new Paragraph(new ParagraphProperties(new SectionProperties(lastSectPr.ToString()))).OuterXml; var paragraphProperties = XElement.Parse(paragraphPropertiesXml, LoadOptions.None); pageBreak.AddAfterSelf(paragraphProperties); } } } } } outputBodyNode.Add(tempMergeTemplateBodyNode.Nodes()); } } // remove all the 'next' delimiters OpenXmlRegex.Replace(outputBodyNode.Nodes().OfType <XElement>(), this.nextRecordRegEx, string.Empty, (xx, mm) => { return(true); }); // find all the 'next_empty' delimiters that we might have added and clear out the content in the paragraph nodes that follow OpenXmlRegex.Match( outputBodyNode.Nodes().OfType <XElement>(), this.nextEmptyRecordRegEx, (xx, mm) => { var afterSiblings = xx.ElementsAfterSelf().ToList(); // get all the paragraph elements after the 'next_empty' node and clear out the content var nodesToClean = afterSiblings.Where(a => a.Name.LocalName == "p").ToList(); // if the next_empty node has lava, clean that up too var xxContent = xx.ToString(); if (lavaRegEx.IsMatch(xxContent)) { nodesToClean.Add(xx); } foreach (var node in nodesToClean) { // remove all child nodes from each paragraph node if (node.HasElements) { node.RemoveNodes(); } } }); // remove all the 'next_empty' delimiters OpenXmlRegex.Replace(outputBodyNode.Nodes().OfType <XElement>(), this.nextEmptyRecordRegEx, string.Empty, (xx, mm) => { return(true); }); // remove all but the last SectionProperties element (there should only be one per section (body)) var sectPrItems = outputBodyNode.Nodes().OfType <XElement>().Where(a => a.Name.LocalName == "sectPr"); foreach (var extra in sectPrItems.Where(a => a != sectPrItems.Last()).ToList()) { extra.Remove(); } // renumber all the ids to make sure they are unique var idAttrs = xdoc.DescendantNodes().OfType <XElement>().Where(a => a.HasAttributes).Select(a => a.Attribute("id")).Where(s => s != null); int lastId = 1; foreach (var attr in idAttrs) { attr.Value = lastId.ToString(); lastId++; } LavaDataDictionary globalMergeHash = new LavaDataDictionary(); foreach (var field in globalMergeFields) { globalMergeHash.Add(field.Key, field.Value); } HeaderFooterGlobalMerge(outputDoc, globalMergeHash); // sweep thru any remaining un-merged body parts for any Lava having to do with Global merge fields foreach (var bodyTextPart in outputDoc.MainDocumentPart.Document.Body.Descendants <Text>()) { string nodeText = bodyTextPart.Text.ReplaceWordChars(); if (lavaRegEx.IsMatch(nodeText)) { bodyTextPart.Text = nodeText.ResolveMergeFields(globalMergeHash, true, true); } } // remove the last pagebreak MarkupSimplifier.SimplifyMarkup(outputDoc, new SimplifyMarkupSettings { RemoveLastRenderedPageBreak = true }); // If you want to see validation errors /* * var validator = new OpenXmlValidator(); * var errors = validator.Validate( outputDoc ).ToList(); */ } outputBinaryFile = new BinaryFile(); outputBinaryFile.IsTemporary = true; outputBinaryFile.ContentStream = outputDocStream; outputBinaryFile.FileName = "MergeTemplateOutput" + Path.GetExtension(templateBinaryFile.FileName); outputBinaryFile.MimeType = templateBinaryFile.MimeType; outputBinaryFile.BinaryFileTypeId = new BinaryFileTypeService(rockContext).Get(Rock.SystemGuid.BinaryFiletype.DEFAULT.AsGuid()).Id; binaryFileService.Add(outputBinaryFile); rockContext.SaveChanges(); } return(outputBinaryFile); }
static void Main(string[] args) { var n = DateTime.Now; var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second)); tempDi.Create(); string presentation = "../../Presentation1.pptx"; string hiddenPresentation = "../../HiddenPresentation.pptx"; // First, load both presentations into byte arrays, simulating retrieving presentations from some source // such as a SharePoint server var baPresentation = File.ReadAllBytes(presentation); var baHiddenPresentation = File.ReadAllBytes(hiddenPresentation); // Next, replace "thee" with "the" in the main presentation var pmlMainPresentation = new PmlDocument("Main.pptx", baPresentation); PmlDocument modifiedMainPresentation = null; using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(pmlMainPresentation)) { using (PresentationDocument document = streamDoc.GetPresentationDocument()) { var pXDoc = document.PresentationPart.GetXDocument(); foreach (var slideId in pXDoc.Root.Elements(P.sldIdLst).Elements(P.sldId)) { var slideRelId = (string)slideId.Attribute(R.id); var slidePart = document.PresentationPart.GetPartById(slideRelId); var slideXDoc = slidePart.GetXDocument(); var paragraphs = slideXDoc.Descendants(A.p).ToList(); OpenXmlRegex.Replace(paragraphs, new Regex("thee"), "the", null); slidePart.PutXDocument(); } } modifiedMainPresentation = streamDoc.GetModifiedPmlDocument(); } // Combine the two presentations into a single presentation var slideSources = new List <SlideSource>() { new SlideSource(modifiedMainPresentation, 0, 1, true), new SlideSource(new PmlDocument("Hidden.pptx", baHiddenPresentation), true), new SlideSource(modifiedMainPresentation, 1, true), }; PmlDocument combinedPresentation = PresentationBuilder.BuildPresentation(slideSources); // Replace <# TRADEMARK #> with AdventureWorks (c) PmlDocument modifiedCombinedPresentation = null; using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(combinedPresentation)) { using (PresentationDocument document = streamDoc.GetPresentationDocument()) { var pXDoc = document.PresentationPart.GetXDocument(); foreach (var slideId in pXDoc.Root.Elements(P.sldIdLst).Elements(P.sldId).Skip(1).Take(1)) { var slideRelId = (string)slideId.Attribute(R.id); var slidePart = document.PresentationPart.GetPartById(slideRelId); var slideXDoc = slidePart.GetXDocument(); var paragraphs = slideXDoc.Descendants(A.p).ToList(); OpenXmlRegex.Replace(paragraphs, new Regex("<# TRADEMARK #>"), "AdventureWorks (c)", null); slidePart.PutXDocument(); } } modifiedCombinedPresentation = streamDoc.GetModifiedPmlDocument(); } // we now have a PmlDocument (which is essentially a byte array) that can be saved as necessary. modifiedCombinedPresentation.SaveAs(Path.Combine(tempDi.FullName, "Modified.pptx")); }
private static object IdentifyAndTransformFields(XNode node, FieldAccumulator fieldAccumulator) { XElement element = node as XElement; if (element != null) { if (element.Name == W.sdt) { var alias = (string)element.Elements(W.sdtPr).Elements(W.alias).Attributes(W.val).FirstOrDefault(); if (alias == null || alias == "") { var ccContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .CleanUpInvalidCharacters(); if (FieldRecognizer.IsField(ccContents, out ccContents)) { //var isBlockLevel = element.Element(W.sdtContent).Elements(W.p).FirstOrDefault() != null; var newCC = new XElement(element.Name, element.Attributes()); var props = element.Elements(W.sdtPr).FirstOrDefault(); if (props == null) { props = new XElement(W.sdtPr); } else { props.Remove(); } newCC.Add(props); var tagElem = props.Elements(W.tag).FirstOrDefault(); if (tagElem == null) { tagElem = new XElement(W.tag); props.Add(tagElem); } var fieldId = fieldAccumulator.AddField(ccContents); tagElem.SetAttributeValue(W.val, fieldId); newCC.Add(element.Nodes()); return(newCC); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)))); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)))); } if (element.Name == W.p) { fieldAccumulator.BeginBlock(); var paraContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .Trim(); int occurances = CountSubstring(FieldRecognizer.EmbedBegin, paraContents); if (occurances == 1 && paraContents.StartsWith(FieldRecognizer.EmbedBegin + FieldRecognizer.FieldBegin) && paraContents.EndsWith(FieldRecognizer.FieldEnd + FieldRecognizer.EmbedEnd)) { var content = paraContents .Substring(FieldRecognizer.EmbedBegin.Length, paraContents.Length - FieldRecognizer.EmbedBegin.Length - FieldRecognizer.EmbedEnd.Length) .Trim(); if (FieldRecognizer.IsField(content, out content)) { var fieldId = fieldAccumulator.AddField(content); fieldAccumulator.EndBlock(); var ppr = element.Elements(W.pPr).FirstOrDefault(); var rpr = (ppr != null) ? ppr.Elements(W.rPr).FirstOrDefault() : null; XElement r = new XElement(W.r, rpr, new XElement(W.t, FieldRecognizer.FieldBegin + content + FieldRecognizer.FieldEnd)); return(new XElement(element.Name, element.Attributes(), element.Elements(W.pPr), CCTWrap(fieldId, r) )); } // else fall through to (slower) case } if (paraContents.Contains(FieldRecognizer.EmbedBegin + FieldRecognizer.FieldBegin)) { fieldAccumulator.RegisterNonFieldContentInBlock(); var runReplacementInfo = new List <XElement>(); var placeholderText = Guid.NewGuid().ToString(); var r = new Regex( Regex.Escape(FieldRecognizer.EmbedBegin) + "\\s*" + Regex.Escape(FieldRecognizer.FieldBegin) + ".*?" + Regex.Escape(FieldRecognizer.FieldEnd) + "\\s*" + Regex.Escape(FieldRecognizer.EmbedEnd)); var replacedCount = OpenXmlRegex.Replace(new[] { element }, r, placeholderText, (para, match) => { var matchString = match.Value.Trim().Replace("\u0001", ""); // unrecognized codes/elements returned as \u0001; strip these var content = matchString.Substring( FieldRecognizer.EmbedBegin.Length, matchString.Length - FieldRecognizer.EmbedBegin.Length - FieldRecognizer.EmbedEnd.Length ).CleanUpInvalidCharacters(); if (FieldRecognizer.IsField(content, out content)) { runReplacementInfo.Add(CCWrap(new XElement(W.r, new XElement(W.t, FieldRecognizer.FieldBegin + content + FieldRecognizer.FieldEnd)))); return(true); } return(false); }, false); if (replacedCount > 0) { var newPara = new XElement(element); foreach (var elem in runReplacementInfo) { var runToReplace = newPara.Descendants(W.r).FirstOrDefault(rn => rn.Value == placeholderText && rn.Parent.Name != Templater.OD.Content); if (runToReplace == null) { throw new InvalidOperationException("Internal error"); } else { var rpr = runToReplace.Elements(W.rPr).FirstOrDefault(); if (rpr != null) { rpr.Remove(); elem.Elements(W.sdtContent).First().Elements(W.r).First().AddFirst(rpr); } runToReplace.ReplaceWith(elem); } } var coalescedParagraph = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(newPara); var transformedContent = IdentifyAndTransformFields(coalescedParagraph, fieldAccumulator); fieldAccumulator.EndBlock(); return(transformedContent); } } var transformedParaContent = element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)).ToArray(); fieldAccumulator.EndBlock(); return(new XElement(element.Name, element.Attributes(), transformedParaContent)); } if (element.Name == W.lastRenderedPageBreak) { // documents assembled from templates will almost always change pagination, so remove Word's pagination hints // (also because they're not handled cleanly by OXPT) return(null); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)))); } return(node); }
public void WordSample1() { var sourceDoc = new FileInfo(GetFilePath("TestDocument.docx")); var newDoc = new FileInfo(Path.Combine(TempDir, "Modified.docx")); File.Copy(sourceDoc.FullName, newDoc.FullName, true); using var wDoc = WordprocessingDocument.Open(newDoc.FullName, true); var xDoc = wDoc.MainDocumentPart.GetXDocument(); // Match content (paragraph 1) var content = xDoc.Descendants(W.p).Take(1); var regex = new Regex("Video"); var count = OpenXmlRegex.Match(content, regex); Log.WriteLine("Example #1 Count: {0}", count); // Match content, case insensitive (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); count = OpenXmlRegex.Match(content, regex); Log.WriteLine("Example #2 Count: {0}", count); // Match content, with callback (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); OpenXmlRegex.Match(content, regex, (element, match) => Log.WriteLine("Example #3 Found value: >{0}<", match.Value)); // Replace content, beginning of paragraph (paragraph 2) content = xDoc.Descendants(W.p).Skip(1).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "Audio gives", null); Log.WriteLine("Example #4 Replaced: {0}", count); // Replace content, middle of paragraph (paragraph 3) content = xDoc.Descendants(W.p).Skip(2).Take(1); regex = new Regex("powerful"); count = OpenXmlRegex.Replace(content, regex, "good", null); Log.WriteLine("Example #5 Replaced: {0}", count); // Replace content, end of paragraph (paragraph 4) content = xDoc.Descendants(W.p).Skip(3).Take(1); regex = new Regex(" [a-z.]*$"); count = OpenXmlRegex.Replace(content, regex, " super good point!", null); Log.WriteLine("Example #6 Replaced: {0}", count); // Delete content, beginning of paragraph (paragraph 5) content = xDoc.Descendants(W.p).Skip(4).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "", null); Log.WriteLine("Example #7 Deleted: {0}", count); // Delete content, middle of paragraph (paragraph 6) content = xDoc.Descendants(W.p).Skip(5).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null); Log.WriteLine("Example #8 Deleted: {0}", count); // Delete content, end of paragraph (paragraph 7) content = xDoc.Descendants(W.p).Skip(6).Take(1); regex = new Regex("[.]$"); count = OpenXmlRegex.Replace(content, regex, "", null); Log.WriteLine("Example #9 Deleted: {0}", count); // Replace content in inserted text, same author (paragraph 8) content = xDoc.Descendants(W.p).Skip(7).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "Eric White"); Log.WriteLine("Example #10 Deleted: {0}", count); // Delete content in inserted text, same author (paragraph 9) content = xDoc.Descendants(W.p).Skip(8).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Log.WriteLine("Example #11 Deleted: {0}", count); // Replace content partially in inserted text, same author (paragraph 10) content = xDoc.Descendants(W.p).Skip(9).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "Eric White"); Log.WriteLine("Example #12 Replaced: {0}", count); // Delete content partially in inserted text, same author (paragraph 11) content = xDoc.Descendants(W.p).Skip(10).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Log.WriteLine("Example #13 Deleted: {0}", count); // Replace content in inserted text, different author (paragraph 12) content = xDoc.Descendants(W.p).Skip(11).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "John Doe"); Log.WriteLine("Example #14 Deleted: {0}", count); // Delete content in inserted text, different author (paragraph 13) content = xDoc.Descendants(W.p).Skip(12).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Log.WriteLine("Example #15 Deleted: {0}", count); // Replace content partially in inserted text, different author (paragraph 14) content = xDoc.Descendants(W.p).Skip(13).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "John Doe"); Log.WriteLine("Example #16 Replaced: {0}", count); // Delete content partially in inserted text, different author (paragraph 15) content = xDoc.Descendants(W.p).Skip(14).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Log.WriteLine("Example #17 Deleted: {0}", count); const string LeftDoubleQuotationMarks = @"[\u0022“„«»”]"; const string Words = @"[\w\-&/]+(?:\s[\w\-&/]+)*"; const string RightDoubleQuotationMarks = @"[\u0022”‟»«“]"; // Replace content using replacement pattern (paragraph 16) content = xDoc.Descendants(W.p).Skip(15).Take(1); regex = new Regex($"{LeftDoubleQuotationMarks}(?<words>{Words}){RightDoubleQuotationMarks}"); count = OpenXmlRegex.Replace(content, regex, "‘${words}’", null); Log.WriteLine("Example #18 Replaced: {0}", count); // Replace content using replacement pattern in partially inserted text (paragraph 17) content = xDoc.Descendants(W.p).Skip(16).Take(1); regex = new Regex($"{LeftDoubleQuotationMarks}(?<words>{Words}){RightDoubleQuotationMarks}"); count = OpenXmlRegex.Replace(content, regex, "‘${words}’", null, true, "John Doe"); Log.WriteLine("Example #19 Replaced: {0}", count); // Replace content using replacement pattern (paragraph 18) content = xDoc.Descendants(W.p).Skip(17).Take(1); regex = new Regex($"({LeftDoubleQuotationMarks})(video)({RightDoubleQuotationMarks})"); count = OpenXmlRegex.Replace(content, regex, "$1audio$3", null, true, "John Doe"); Log.WriteLine("Example #20 Replaced: {0}", count); // Recognize tabs (paragraph 19) content = xDoc.Descendants(W.p).Skip(18).Take(1); regex = new Regex(@"([1-9])\.\t"); count = OpenXmlRegex.Replace(content, regex, "($1)\t", null); Log.WriteLine("Example #21 Replaced: {0}", count); // The next two examples deal with line breaks, i.e., the <w:br/> elements. // Note that you should use the U+000D (Carriage Return) character (i.e., '\r') // to match a <w:br/> (or <w:cr/>) and replace content with a <w:br/> element. // Depending on your platform, the end of line character(s) provided by // Environment.NewLine might be "\n" (Unix), "\r\n" (Windows), or "\r" (Mac). // Recognize tabs and insert line breaks (paragraph 20). content = xDoc.Descendants(W.p).Skip(19).Take(1); regex = new Regex($@"([1-9])\.{UnicodeMapper.HorizontalTabulation}"); count = OpenXmlRegex.Replace(content, regex, $"Article $1{UnicodeMapper.CarriageReturn}", null); Log.WriteLine("Example #22 Replaced: {0}", count); // Recognize and remove line breaks (paragraph 21) content = xDoc.Descendants(W.p).Skip(20).Take(1); regex = new Regex($"{UnicodeMapper.CarriageReturn}"); count = OpenXmlRegex.Replace(content, regex, " ", null); Log.WriteLine("Example #23 Replaced: {0}", count); // Remove soft hyphens (paragraph 22) var paras = xDoc.Descendants(W.p).Skip(21).Take(1).ToList(); count = OpenXmlRegex.Replace(paras, new Regex($"{UnicodeMapper.SoftHyphen}"), "", null); count += OpenXmlRegex.Replace(paras, new Regex("use"), "no longer use", null); Log.WriteLine("Example #24 Replaced: {0}", count); // The next example deals with symbols (i.e., w:sym elements). // To work with symbols, you should acquire the Unicode values for the // symbols you wish to match or use in replacement patterns. The reason // is that UnicodeMapper will (a) mimic Microsoft Word in shifting the // Unicode values into the Unicode private use area (by adding U+F000) // and (b) use replacements for Unicode values that have been used in // conjunction with different fonts already (by adding U+E000). // // The replacement Únicode values will depend on the order in which // symbols are retrieved. Therefore, you should not rely on any fixed // assignment. // // In the example below, pencil will be represented by U+F021, whereas // spider (same value with different font) will be represented by U+E001. // If spider had been assigned first, spider would be U+F021 and pencil // would be U+E001. var oldPhone = UnicodeMapper.SymToChar("Wingdings", 40); var newPhone = UnicodeMapper.SymToChar("Wingdings", 41); var pencil = UnicodeMapper.SymToChar("Wingdings", 0x21); var spider = UnicodeMapper.SymToChar("Webdings", 0x21); // Replace or comment on symbols (paragraph 23) paras = xDoc.Descendants(W.p).Skip(22).Take(1).ToList(); count = OpenXmlRegex.Replace(paras, new Regex($"{oldPhone}"), $"{newPhone} (replaced with new phone)", null); count += OpenXmlRegex.Replace(paras, new Regex($"({pencil})"), "$1 (same pencil)", null); count += OpenXmlRegex.Replace(paras, new Regex($"({spider})"), "$1 (same spider)", null); Log.WriteLine("Example #25 Replaced: {0}", count); wDoc.MainDocumentPart.PutXDocument(); }
public void Sample2() { var presentation = GetFilePath("Sample2/Presentation1.pptx"); var hiddenPresentation = GetFilePath("Sample2/HiddenPresentation.pptx"); // First, load both presentations into byte arrays, simulating retrieving presentations from some source // such as a SharePoint server var baPresentation = File.ReadAllBytes(presentation); var baHiddenPresentation = File.ReadAllBytes(hiddenPresentation); // Next, replace "thee" with "the" in the main presentation var pmlMainPresentation = new PmlDocument("Main.pptx", baPresentation); PmlDocument modifiedMainPresentation; using (var streamDoc = new OpenXmlMemoryStreamDocument(pmlMainPresentation)) { using (var document = streamDoc.GetPresentationDocument()) { var pXDoc = document.PresentationPart.GetXDocument(); foreach (var slideId in pXDoc.Root.Elements(P.sldIdLst).Elements(P.sldId)) { var slideRelId = (string)slideId.Attribute(R.id); var slidePart = document.PresentationPart.GetPartById(slideRelId); var slideXDoc = slidePart.GetXDocument(); var paragraphs = slideXDoc.Descendants(A.p).ToList(); OpenXmlRegex.Replace(paragraphs, new Regex("thee"), "the", null); slidePart.PutXDocument(); } } modifiedMainPresentation = streamDoc.GetModifiedPmlDocument(); } // Combine the two presentations into a single presentation var slideSources = new List <SlideSource> { new(modifiedMainPresentation, 0, 1, true), new(new PmlDocument("Hidden.pptx", baHiddenPresentation), true), new(modifiedMainPresentation, 1, true), }; var combinedPresentation = PresentationBuilder.BuildPresentation(slideSources); // Replace <# TRADEMARK #> with AdventureWorks (c) PmlDocument modifiedCombinedPresentation; using (var streamDoc = new OpenXmlMemoryStreamDocument(combinedPresentation)) { using (var document = streamDoc.GetPresentationDocument()) { var pXDoc = document.PresentationPart.GetXDocument(); foreach (var slideId in pXDoc.Root.Elements(P.sldIdLst).Elements(P.sldId).Skip(1).Take(1)) { var slideRelId = (string)slideId.Attribute(R.id); var slidePart = document.PresentationPart.GetPartById(slideRelId); var slideXDoc = slidePart.GetXDocument(); var paragraphs = slideXDoc.Descendants(A.p).ToList(); OpenXmlRegex.Replace(paragraphs, new Regex("<# TRADEMARK #>"), "AdventureWorks (c)", null); slidePart.PutXDocument(); } } modifiedCombinedPresentation = streamDoc.GetModifiedPmlDocument(); } // we now have a PmlDocument (which is essentially a byte array) that can be saved as necessary. modifiedCombinedPresentation.SaveAs(Path.Combine(TempDir, "ModifiedCombinedPresentation.pptx")); }
public static void Main(string[] args) { DateTime n = DateTime.Now; var tempDi = new DirectoryInfo( $"ExampleOutput-{n.Year - 2000:00}-{n.Month:00}-{n.Day:00}-{n.Hour:00}{n.Minute:00}{n.Second:00}"); tempDi.Create(); var sourceDoc = new FileInfo("../../TestDocument.docx"); var newDoc = new FileInfo(Path.Combine(tempDi.FullName, "Modified.docx")); File.Copy(sourceDoc.FullName, newDoc.FullName); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newDoc.FullName, true)) { XDocument xDoc = wDoc.MainDocumentPart.GetXDocument(); // Match content (paragraph 1) IEnumerable <XElement> content = xDoc.Descendants(W.p).Take(1); var regex = new Regex("Video"); int count = OpenXmlRegex.Match(content, regex); Console.WriteLine("Example #1 Count: {0}", count); // Match content, case insensitive (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); count = OpenXmlRegex.Match(content, regex); Console.WriteLine("Example #2 Count: {0}", count); // Match content, with callback (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); OpenXmlRegex.Match(content, regex, (element, match) => Console.WriteLine("Example #3 Found value: >{0}<", match.Value)); // Replace content, beginning of paragraph (paragraph 2) content = xDoc.Descendants(W.p).Skip(1).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "Audio gives", null); Console.WriteLine("Example #4 Replaced: {0}", count); // Replace content, middle of paragraph (paragraph 3) content = xDoc.Descendants(W.p).Skip(2).Take(1); regex = new Regex("powerful"); count = OpenXmlRegex.Replace(content, regex, "good", null); Console.WriteLine("Example #5 Replaced: {0}", count); // Replace content, end of paragraph (paragraph 4) content = xDoc.Descendants(W.p).Skip(3).Take(1); regex = new Regex(" [a-z.]*$"); count = OpenXmlRegex.Replace(content, regex, " super good point!", null); Console.WriteLine("Example #6 Replaced: {0}", count); // Delete content, beginning of paragraph (paragraph 5) content = xDoc.Descendants(W.p).Skip(4).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #7 Deleted: {0}", count); // Delete content, middle of paragraph (paragraph 6) content = xDoc.Descendants(W.p).Skip(5).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #8 Deleted: {0}", count); // Delete content, end of paragraph (paragraph 7) content = xDoc.Descendants(W.p).Skip(6).Take(1); regex = new Regex("[.]$"); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #9 Deleted: {0}", count); // Replace content in inserted text, same author (paragraph 8) content = xDoc.Descendants(W.p).Skip(7).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "Eric White"); Console.WriteLine("Example #10 Deleted: {0}", count); // Delete content in inserted text, same author (paragraph 9) content = xDoc.Descendants(W.p).Skip(8).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Console.WriteLine("Example #11 Deleted: {0}", count); // Replace content partially in inserted text, same author (paragraph 10) content = xDoc.Descendants(W.p).Skip(9).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "Eric White"); Console.WriteLine("Example #12 Replaced: {0}", count); // Delete content partially in inserted text, same author (paragraph 11) content = xDoc.Descendants(W.p).Skip(10).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Console.WriteLine("Example #13 Deleted: {0}", count); // Replace content in inserted text, different author (paragraph 12) content = xDoc.Descendants(W.p).Skip(11).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "John Doe"); Console.WriteLine("Example #14 Deleted: {0}", count); // Delete content in inserted text, different author (paragraph 13) content = xDoc.Descendants(W.p).Skip(12).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Console.WriteLine("Example #15 Deleted: {0}", count); // Replace content partially in inserted text, different author (paragraph 14) content = xDoc.Descendants(W.p).Skip(13).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "John Doe"); Console.WriteLine("Example #16 Replaced: {0}", count); // Delete content partially in inserted text, different author (paragraph 15) content = xDoc.Descendants(W.p).Skip(14).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Console.WriteLine("Example #17 Deleted: {0}", count); const string leftDoubleQuotationMarks = @"[\u0022“„«»”]"; const string words = @"[\w\-&/]+(?:\s[\w\-&/]+)*"; const string rightDoubleQuotationMarks = @"[\u0022”‟»«“]"; // Replace content using replacement pattern (paragraph 16) content = xDoc.Descendants(W.p).Skip(15).Take(1); regex = new Regex($"{leftDoubleQuotationMarks}(?<words>{words}){rightDoubleQuotationMarks}"); count = OpenXmlRegex.Replace(content, regex, "‘${words}’", null); Console.WriteLine("Example #18 Replaced: {0}", count); // Replace content using replacement pattern in partially inserted text (paragraph 17) content = xDoc.Descendants(W.p).Skip(16).Take(1); regex = new Regex($"{leftDoubleQuotationMarks}(?<words>{words}){rightDoubleQuotationMarks}"); count = OpenXmlRegex.Replace(content, regex, "‘${words}’", null, true, "John Doe"); Console.WriteLine("Example #19 Replaced: {0}", count); // Replace content using replacement pattern (paragraph 18) content = xDoc.Descendants(W.p).Skip(17).Take(1); regex = new Regex($"({leftDoubleQuotationMarks})(video)({rightDoubleQuotationMarks})"); count = OpenXmlRegex.Replace(content, regex, "$1audio$3", null, true, "John Doe"); Console.WriteLine("Example #20 Replaced: {0}", count); // Recognize tabs (paragraph 19) content = xDoc.Descendants(W.p).Skip(18).Take(1); regex = new Regex(@"([1-9])\.\t"); count = OpenXmlRegex.Replace(content, regex, "($1)\t", null); Console.WriteLine("Example #21 Replaced: {0}", count); // The next two examples deal with line breaks, i.e., the <w:br/> elements. // Note that you should use the U+000D (Carriage Return) character (i.e., '\r') // to match a <w:br/> (or <w:cr/>) and replace content with a <w:br/> element. // Depending on your platform, the end of line character(s) provided by // Environment.NewLine might be "\n" (Unix), "\r\n" (Windows), or "\r" (Mac). // Recognize tabs and insert line breaks (paragraph 20). content = xDoc.Descendants(W.p).Skip(19).Take(1); regex = new Regex($@"([1-9])\.{UnicodeMapper.HorizontalTabulation}"); count = OpenXmlRegex.Replace(content, regex, $"Article $1{UnicodeMapper.CarriageReturn}", null); Console.WriteLine("Example #22 Replaced: {0}", count); // Recognize and remove line breaks (paragraph 21) content = xDoc.Descendants(W.p).Skip(20).Take(1); regex = new Regex($"{UnicodeMapper.CarriageReturn}"); count = OpenXmlRegex.Replace(content, regex, " ", null); Console.WriteLine("Example #23 Replaced: {0}", count); // Remove soft hyphens (paragraph 22) List <XElement> paras = xDoc.Descendants(W.p).Skip(21).Take(1).ToList(); count = OpenXmlRegex.Replace(paras, new Regex($"{UnicodeMapper.SoftHyphen}"), "", null); count += OpenXmlRegex.Replace(paras, new Regex("use"), "no longer use", null); Console.WriteLine("Example #24 Replaced: {0}", count); // The next example deals with symbols (i.e., w:sym elements). // To work with symbols, you should acquire the Unicode values for the // symbols you wish to match or use in replacement patterns. The reason // is that UnicodeMapper will (a) mimic Microsoft Word in shifting the // Unicode values into the Unicode private use area (by adding U+F000) // and (b) use replacements for Unicode values that have been used in // conjunction with different fonts already (by adding U+E000). // // The replacement Únicode values will depend on the order in which // symbols are retrieved. Therefore, you should not rely on any fixed // assignment. // // In the example below, pencil will be represented by U+F021, whereas // spider (same value with different font) will be represented by U+E001. // If spider had been assigned first, spider would be U+F021 and pencil // would be U+E001. char oldPhone = UnicodeMapper.SymToChar("Wingdings", 40); char newPhone = UnicodeMapper.SymToChar("Wingdings", 41); char pencil = UnicodeMapper.SymToChar("Wingdings", 0x21); char spider = UnicodeMapper.SymToChar("Webdings", 0x21); // Replace or comment on symbols (paragraph 23) paras = xDoc.Descendants(W.p).Skip(22).Take(1).ToList(); count = OpenXmlRegex.Replace(paras, new Regex($"{oldPhone}"), $"{newPhone} (replaced with new phone)", null); count += OpenXmlRegex.Replace(paras, new Regex($"({pencil})"), "$1 (same pencil)", null); count += OpenXmlRegex.Replace(paras, new Regex($"({spider})"), "$1 (same spider)", null); Console.WriteLine("Example #25 Replaced: {0}", count); wDoc.MainDocumentPart.PutXDocument(); } var sourcePres = new FileInfo("../../TestPresentation.pptx"); var newPres = new FileInfo(Path.Combine(tempDi.FullName, "Modified.pptx")); File.Copy(sourcePres.FullName, newPres.FullName); using (PresentationDocument pDoc = PresentationDocument.Open(newPres.FullName, true)) { foreach (SlidePart slidePart in pDoc.PresentationPart.SlideParts) { XDocument xDoc = slidePart.GetXDocument(); // Replace content IEnumerable <XElement> content = xDoc.Descendants(A.p); var regex = new Regex("Hello"); int count = OpenXmlRegex.Replace(content, regex, "H e l l o", null); Console.WriteLine("Example #18 Replaced: {0}", count); // If you absolutely want to preserve compatibility with PowerPoint 2007, then you will need to strip the xml:space="preserve" attribute throughout. // This is an issue for PowerPoint only, not Word, and for 2007 only. // The side-effect of this is that if a run has space at the beginning or end of it, the space will be stripped upon loading, and content/layout will be affected. xDoc.Descendants().Attributes(XNamespace.Xml + "space").Remove(); slidePart.PutXDocument(); } } }
private static object TransformToMetadata(XNode node, XElement data, TemplateError te) { XElement element = node as XElement; if (element != null) { if (element.Name == W.sdt) { var alias = (string)element.Elements(W.sdtPr).Elements(W.alias).Attributes(W.val).FirstOrDefault(); if (alias == null || alias == "" || s_AliasList.Contains(alias)) { var ccContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .Trim() .Replace('“', '"') .Replace('”', '"'); if (ccContents.StartsWith("<")) { XElement xml = TransformXmlTextToMetadata(te, ccContents); if (xml.Name == W.p || xml.Name == W.r) // this means there was an error processing the XML. { if (element.Parent.Name == W.p) { return(xml.Elements(W.r)); } return(xml); } if (alias != null && xml.Name.LocalName != alias) { if (element.Parent.Name == W.p) { return(CreateRunErrorMessage("Error: Content control alias does not match metadata element name", te)); } else { return(CreateParaErrorMessage("Error: Content control alias does not match metadata element name", te)); } } xml.Add(element.Elements(W.sdtContent).Elements()); return(xml); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => TransformToMetadata(n, data, te)))); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => TransformToMetadata(n, data, te)))); } if (element.Name == W.p) { var paraContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .Trim(); int occurances = paraContents.Select((c, i) => paraContents.Substring(i)).Count(sub => sub.StartsWith("<#")); if (paraContents.StartsWith("<#") && paraContents.EndsWith("#>") && occurances == 1) { var xmlText = paraContents.Substring(2, paraContents.Length - 4).Trim(); XElement xml = TransformXmlTextToMetadata(te, xmlText); if (xml.Name == W.p || xml.Name == W.r) { return(xml); } xml.Add(element); return(xml); } if (paraContents.Contains("<#")) { List <RunReplacementInfo> runReplacementInfo = new List <RunReplacementInfo>(); var thisGuid = Guid.NewGuid().ToString(); var r = new Regex("<#.*?#>"); XElement xml = null; OpenXmlRegex.Replace(new[] { element }, r, thisGuid, (para, match) => { var matchString = match.Value.Trim(); var xmlText = matchString.Substring(2, matchString.Length - 4).Trim().Replace('“', '"').Replace('”', '"'); try { xml = XElement.Parse(xmlText); } catch (XmlException e) { RunReplacementInfo rri = new RunReplacementInfo() { Xml = null, XmlExceptionMessage = "XmlException: " + e.Message, SchemaValidationMessage = null, }; runReplacementInfo.Add(rri); return(true); } string schemaError = ValidatePerSchema(xml); if (schemaError != null) { RunReplacementInfo rri = new RunReplacementInfo() { Xml = null, XmlExceptionMessage = null, SchemaValidationMessage = "Schema Validation Error: " + schemaError, }; runReplacementInfo.Add(rri); return(true); } RunReplacementInfo rri2 = new RunReplacementInfo() { Xml = xml, XmlExceptionMessage = null, SchemaValidationMessage = null, }; runReplacementInfo.Add(rri2); return(true); }, false); var newPara = new XElement(element); foreach (var rri in runReplacementInfo) { var runToReplace = newPara.Descendants(W.r).FirstOrDefault(rn => rn.Value == thisGuid && rn.Parent.Name != PA.Content); if (runToReplace == null) { throw new OpenXmlPowerToolsException("Internal error"); } if (rri.XmlExceptionMessage != null) { runToReplace.ReplaceWith(CreateRunErrorMessage(rri.XmlExceptionMessage, te)); } else if (rri.SchemaValidationMessage != null) { runToReplace.ReplaceWith(CreateRunErrorMessage(rri.SchemaValidationMessage, te)); } else { var newXml = new XElement(rri.Xml); newXml.Add(runToReplace); runToReplace.ReplaceWith(newXml); } } var coalescedParagraph = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(newPara); return(coalescedParagraph); } } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => TransformToMetadata(n, data, te)))); } return(node); }