static void Main(string[] args) { var n = DateTime.Now; var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second)); tempDi.Create(); var sourceDoc = new FileInfo("../../TestDocument.docx"); var newDoc = new FileInfo(Path.Combine(tempDi.FullName, "Modified.docx")); File.Copy(sourceDoc.FullName, newDoc.FullName); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newDoc.FullName, true)) { int count; var xDoc = wDoc.MainDocumentPart.GetXDocument(); Regex regex; IEnumerable <XElement> content; // Match content (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Match(content, regex); Console.WriteLine("Example #1 Count: {0}", count); // Match content, case insensitive (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); count = OpenXmlRegex.Match(content, regex); Console.WriteLine("Example #2 Count: {0}", count); // Match content, with callback (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); count = OpenXmlRegex.Match(content, regex, (element, match) => Console.WriteLine("Example #3 Found value: >{0}<", match.Value)); // Replace content, beginning of paragraph (paragraph 2) content = xDoc.Descendants(W.p).Skip(1).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "Audio gives", null); Console.WriteLine("Example #4 Replaced: {0}", count); // Replace content, middle of paragraph (paragraph 3) content = xDoc.Descendants(W.p).Skip(2).Take(1); regex = new Regex("powerful"); count = OpenXmlRegex.Replace(content, regex, "good", null); Console.WriteLine("Example #5 Replaced: {0}", count); // Replace content, end of paragraph (paragraph 4) content = xDoc.Descendants(W.p).Skip(3).Take(1); regex = new Regex(" [a-z.]*$"); count = OpenXmlRegex.Replace(content, regex, " super good point!", null); Console.WriteLine("Example #6 Replaced: {0}", count); // Delete content, beginning of paragraph (paragraph 5) content = xDoc.Descendants(W.p).Skip(4).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #7 Deleted: {0}", count); // Delete content, middle of paragraph (paragraph 6) content = xDoc.Descendants(W.p).Skip(5).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #8 Deleted: {0}", count); // Delete content, end of paragraph (paragraph 7) content = xDoc.Descendants(W.p).Skip(6).Take(1); regex = new Regex("[.]$"); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #9 Deleted: {0}", count); // Replace content in inserted text, same author (paragraph 8) content = xDoc.Descendants(W.p).Skip(7).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "Eric White"); Console.WriteLine("Example #10 Deleted: {0}", count); // Delete content in inserted text, same author (paragraph 9) content = xDoc.Descendants(W.p).Skip(8).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Console.WriteLine("Example #11 Deleted: {0}", count); // Replace content partially in inserted text, same author (paragraph 10) content = xDoc.Descendants(W.p).Skip(9).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "Eric White"); Console.WriteLine("Example #12 Replaced: {0}", count); // Delete content partially in inserted text, same author (paragraph 11) content = xDoc.Descendants(W.p).Skip(10).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Console.WriteLine("Example #13 Deleted: {0}", count); // Replace content in inserted text, different author (paragraph 12) content = xDoc.Descendants(W.p).Skip(11).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "John Doe"); Console.WriteLine("Example #14 Deleted: {0}", count); // Delete content in inserted text, different author (paragraph 13) content = xDoc.Descendants(W.p).Skip(12).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Console.WriteLine("Example #15 Deleted: {0}", count); // Replace content partially in inserted text, different author (paragraph 14) content = xDoc.Descendants(W.p).Skip(13).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "John Doe"); Console.WriteLine("Example #16 Replaced: {0}", count); // Delete content partially in inserted text, different author (paragraph 15) content = xDoc.Descendants(W.p).Skip(14).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Console.WriteLine("Example #17 Deleted: {0}", count); wDoc.MainDocumentPart.PutXDocument(); } var sourcePres = new FileInfo("../../TestPresentation.pptx"); var newPres = new FileInfo(Path.Combine(tempDi.FullName, "Modified.pptx")); File.Copy(sourcePres.FullName, newPres.FullName); using (PresentationDocument pDoc = PresentationDocument.Open(newPres.FullName, true)) { foreach (var slidePart in pDoc.PresentationPart.SlideParts) { int count; var xDoc = slidePart.GetXDocument(); Regex regex; IEnumerable <XElement> content; // Replace content content = xDoc.Descendants(A.p); regex = new Regex("Hello"); count = OpenXmlRegex.Replace(content, regex, "H e l l o", null); Console.WriteLine("Example #18 Replaced: {0}", count); // If you absolutely want to preserve compatibility with PowerPoint 2007, then you will need to strip the xml:space="preserve" attribute throughout. // This is an issue for PowerPoint only, not Word, and for 2007 only. // The side-effect of this is that if a run has space at the beginning or end of it, the space will be stripped upon loading, and content/layout will be affected. xDoc.Descendants().Attributes(XNamespace.Xml + "space").Remove(); slidePart.PutXDocument(); } } }
public static void Main(string[] args) { DateTime n = DateTime.Now; var tempDi = new DirectoryInfo( $"ExampleOutput-{n.Year - 2000:00}-{n.Month:00}-{n.Day:00}-{n.Hour:00}{n.Minute:00}{n.Second:00}"); tempDi.Create(); var sourceDoc = new FileInfo("../../TestDocument.docx"); var newDoc = new FileInfo(Path.Combine(tempDi.FullName, "Modified.docx")); File.Copy(sourceDoc.FullName, newDoc.FullName); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newDoc.FullName, true)) { XDocument xDoc = wDoc.MainDocumentPart.GetXDocument(); // Match content (paragraph 1) IEnumerable <XElement> content = xDoc.Descendants(W.p).Take(1); var regex = new Regex("Video"); int count = OpenXmlRegex.Match(content, regex); Console.WriteLine("Example #1 Count: {0}", count); // Match content, case insensitive (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); count = OpenXmlRegex.Match(content, regex); Console.WriteLine("Example #2 Count: {0}", count); // Match content, with callback (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); OpenXmlRegex.Match(content, regex, (element, match) => Console.WriteLine("Example #3 Found value: >{0}<", match.Value)); // Replace content, beginning of paragraph (paragraph 2) content = xDoc.Descendants(W.p).Skip(1).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "Audio gives", null); Console.WriteLine("Example #4 Replaced: {0}", count); // Replace content, middle of paragraph (paragraph 3) content = xDoc.Descendants(W.p).Skip(2).Take(1); regex = new Regex("powerful"); count = OpenXmlRegex.Replace(content, regex, "good", null); Console.WriteLine("Example #5 Replaced: {0}", count); // Replace content, end of paragraph (paragraph 4) content = xDoc.Descendants(W.p).Skip(3).Take(1); regex = new Regex(" [a-z.]*$"); count = OpenXmlRegex.Replace(content, regex, " super good point!", null); Console.WriteLine("Example #6 Replaced: {0}", count); // Delete content, beginning of paragraph (paragraph 5) content = xDoc.Descendants(W.p).Skip(4).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #7 Deleted: {0}", count); // Delete content, middle of paragraph (paragraph 6) content = xDoc.Descendants(W.p).Skip(5).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #8 Deleted: {0}", count); // Delete content, end of paragraph (paragraph 7) content = xDoc.Descendants(W.p).Skip(6).Take(1); regex = new Regex("[.]$"); count = OpenXmlRegex.Replace(content, regex, "", null); Console.WriteLine("Example #9 Deleted: {0}", count); // Replace content in inserted text, same author (paragraph 8) content = xDoc.Descendants(W.p).Skip(7).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "Eric White"); Console.WriteLine("Example #10 Deleted: {0}", count); // Delete content in inserted text, same author (paragraph 9) content = xDoc.Descendants(W.p).Skip(8).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Console.WriteLine("Example #11 Deleted: {0}", count); // Replace content partially in inserted text, same author (paragraph 10) content = xDoc.Descendants(W.p).Skip(9).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "Eric White"); Console.WriteLine("Example #12 Replaced: {0}", count); // Delete content partially in inserted text, same author (paragraph 11) content = xDoc.Descendants(W.p).Skip(10).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Console.WriteLine("Example #13 Deleted: {0}", count); // Replace content in inserted text, different author (paragraph 12) content = xDoc.Descendants(W.p).Skip(11).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "John Doe"); Console.WriteLine("Example #14 Deleted: {0}", count); // Delete content in inserted text, different author (paragraph 13) content = xDoc.Descendants(W.p).Skip(12).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Console.WriteLine("Example #15 Deleted: {0}", count); // Replace content partially in inserted text, different author (paragraph 14) content = xDoc.Descendants(W.p).Skip(13).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "John Doe"); Console.WriteLine("Example #16 Replaced: {0}", count); // Delete content partially in inserted text, different author (paragraph 15) content = xDoc.Descendants(W.p).Skip(14).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Console.WriteLine("Example #17 Deleted: {0}", count); const string leftDoubleQuotationMarks = @"[\u0022“„«»”]"; const string words = @"[\w\-&/]+(?:\s[\w\-&/]+)*"; const string rightDoubleQuotationMarks = @"[\u0022”‟»«“]"; // Replace content using replacement pattern (paragraph 16) content = xDoc.Descendants(W.p).Skip(15).Take(1); regex = new Regex($"{leftDoubleQuotationMarks}(?<words>{words}){rightDoubleQuotationMarks}"); count = OpenXmlRegex.Replace(content, regex, "‘${words}’", null); Console.WriteLine("Example #18 Replaced: {0}", count); // Replace content using replacement pattern in partially inserted text (paragraph 17) content = xDoc.Descendants(W.p).Skip(16).Take(1); regex = new Regex($"{leftDoubleQuotationMarks}(?<words>{words}){rightDoubleQuotationMarks}"); count = OpenXmlRegex.Replace(content, regex, "‘${words}’", null, true, "John Doe"); Console.WriteLine("Example #19 Replaced: {0}", count); // Replace content using replacement pattern (paragraph 18) content = xDoc.Descendants(W.p).Skip(17).Take(1); regex = new Regex($"({leftDoubleQuotationMarks})(video)({rightDoubleQuotationMarks})"); count = OpenXmlRegex.Replace(content, regex, "$1audio$3", null, true, "John Doe"); Console.WriteLine("Example #20 Replaced: {0}", count); // Recognize tabs (paragraph 19) content = xDoc.Descendants(W.p).Skip(18).Take(1); regex = new Regex(@"([1-9])\.\t"); count = OpenXmlRegex.Replace(content, regex, "($1)\t", null); Console.WriteLine("Example #21 Replaced: {0}", count); // The next two examples deal with line breaks, i.e., the <w:br/> elements. // Note that you should use the U+000D (Carriage Return) character (i.e., '\r') // to match a <w:br/> (or <w:cr/>) and replace content with a <w:br/> element. // Depending on your platform, the end of line character(s) provided by // Environment.NewLine might be "\n" (Unix), "\r\n" (Windows), or "\r" (Mac). // Recognize tabs and insert line breaks (paragraph 20). content = xDoc.Descendants(W.p).Skip(19).Take(1); regex = new Regex($@"([1-9])\.{UnicodeMapper.HorizontalTabulation}"); count = OpenXmlRegex.Replace(content, regex, $"Article $1{UnicodeMapper.CarriageReturn}", null); Console.WriteLine("Example #22 Replaced: {0}", count); // Recognize and remove line breaks (paragraph 21) content = xDoc.Descendants(W.p).Skip(20).Take(1); regex = new Regex($"{UnicodeMapper.CarriageReturn}"); count = OpenXmlRegex.Replace(content, regex, " ", null); Console.WriteLine("Example #23 Replaced: {0}", count); // Remove soft hyphens (paragraph 22) List <XElement> paras = xDoc.Descendants(W.p).Skip(21).Take(1).ToList(); count = OpenXmlRegex.Replace(paras, new Regex($"{UnicodeMapper.SoftHyphen}"), "", null); count += OpenXmlRegex.Replace(paras, new Regex("use"), "no longer use", null); Console.WriteLine("Example #24 Replaced: {0}", count); // The next example deals with symbols (i.e., w:sym elements). // To work with symbols, you should acquire the Unicode values for the // symbols you wish to match or use in replacement patterns. The reason // is that UnicodeMapper will (a) mimic Microsoft Word in shifting the // Unicode values into the Unicode private use area (by adding U+F000) // and (b) use replacements for Unicode values that have been used in // conjunction with different fonts already (by adding U+E000). // // The replacement Únicode values will depend on the order in which // symbols are retrieved. Therefore, you should not rely on any fixed // assignment. // // In the example below, pencil will be represented by U+F021, whereas // spider (same value with different font) will be represented by U+E001. // If spider had been assigned first, spider would be U+F021 and pencil // would be U+E001. char oldPhone = UnicodeMapper.SymToChar("Wingdings", 40); char newPhone = UnicodeMapper.SymToChar("Wingdings", 41); char pencil = UnicodeMapper.SymToChar("Wingdings", 0x21); char spider = UnicodeMapper.SymToChar("Webdings", 0x21); // Replace or comment on symbols (paragraph 23) paras = xDoc.Descendants(W.p).Skip(22).Take(1).ToList(); count = OpenXmlRegex.Replace(paras, new Regex($"{oldPhone}"), $"{newPhone} (replaced with new phone)", null); count += OpenXmlRegex.Replace(paras, new Regex($"({pencil})"), "$1 (same pencil)", null); count += OpenXmlRegex.Replace(paras, new Regex($"({spider})"), "$1 (same spider)", null); Console.WriteLine("Example #25 Replaced: {0}", count); wDoc.MainDocumentPart.PutXDocument(); } var sourcePres = new FileInfo("../../TestPresentation.pptx"); var newPres = new FileInfo(Path.Combine(tempDi.FullName, "Modified.pptx")); File.Copy(sourcePres.FullName, newPres.FullName); using (PresentationDocument pDoc = PresentationDocument.Open(newPres.FullName, true)) { foreach (SlidePart slidePart in pDoc.PresentationPart.SlideParts) { XDocument xDoc = slidePart.GetXDocument(); // Replace content IEnumerable <XElement> content = xDoc.Descendants(A.p); var regex = new Regex("Hello"); int count = OpenXmlRegex.Replace(content, regex, "H e l l o", null); Console.WriteLine("Example #18 Replaced: {0}", count); // If you absolutely want to preserve compatibility with PowerPoint 2007, then you will need to strip the xml:space="preserve" attribute throughout. // This is an issue for PowerPoint only, not Word, and for 2007 only. // The side-effect of this is that if a run has space at the beginning or end of it, the space will be stripped upon loading, and content/layout will be affected. xDoc.Descendants().Attributes(XNamespace.Xml + "space").Remove(); slidePart.PutXDocument(); } } }
/// <summary> /// Creates the document. /// </summary> /// <param name="mergeTemplate">The merge template.</param> /// <param name="mergeObjectList">The merge object list.</param> /// <param name="globalMergeFields">The global merge fields.</param> /// <returns></returns> public override BinaryFile CreateDocument(MergeTemplate mergeTemplate, List <object> mergeObjectList, Dictionary <string, object> globalMergeFields) { this.Exceptions = new List <Exception>(); BinaryFile outputBinaryFile = null; var rockContext = new RockContext(); var binaryFileService = new BinaryFileService(rockContext); var templateBinaryFile = binaryFileService.Get(mergeTemplate.TemplateBinaryFileId); if (templateBinaryFile == null) { return(null); } // Start by creating a new document with the contents of the Template (so that Styles, etc get included) XDocument sourceTemplateDocX; // NOTE: On using multiple IDisposable, see https://stackoverflow.com/a/12603126/1755417 and https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/using-statement using (MemoryStream sourceTemplateStream = new MemoryStream(), outputDocStream = new MemoryStream()) { templateBinaryFile.ContentStream.CopyTo(outputDocStream); outputDocStream.Seek(0, SeekOrigin.Begin); // now that we have the outputdoc started, simplify the sourceTemplate templateBinaryFile.ContentStream.CopyTo(sourceTemplateStream); sourceTemplateStream.Seek(0, SeekOrigin.Begin); var simplifiedDoc = WordprocessingDocument.Open(sourceTemplateStream, true); MarkupSimplifier.SimplifyMarkup(simplifiedDoc, this.simplifyMarkupSettingsAll); //// simplify any nodes that have Lava in it that might not have been caught by the MarkupSimplifier //// MarkupSimplifier only merges superfluous runs that are children of a paragraph sourceTemplateDocX = simplifiedDoc.MainDocumentPart.GetXDocument(); OpenXmlRegex.Match( sourceTemplateDocX.Elements(), this.lavaRegEx, (x, m) => { foreach (var nonParagraphRunsParent in x.DescendantNodes().OfType <XElement>().Where(a => a.Parent != null && a.Name != null) .Where(a => (a.Name.LocalName == "r")).Select(a => a.Parent).Distinct().ToList()) { if (lavaRegEx.IsMatch(nonParagraphRunsParent.Value)) { var tempParent = XElement.Parse(new Paragraph().OuterXml); tempParent.Add(nonParagraphRunsParent.Nodes()); tempParent = MarkupSimplifier.MergeAdjacentSuperfluousRuns(tempParent); nonParagraphRunsParent.ReplaceNodes(tempParent.Nodes()); } } }); XElement lastLavaNode = sourceTemplateDocX.DescendantNodes().OfType <XElement>().LastOrDefault(a => lavaRegEx.IsMatch(a.Value)); // ensure there is a { Next } indicator after the last lava node in the template if (lastLavaNode != null) { var nextRecordMatch = nextRecordRegEx.Match(lastLavaNode.Value); if (nextRecordMatch == null || !nextRecordMatch.Success) { // if the last lava node doesn't have a { next }, append to the end lastLavaNode.Value += " {% next %} "; } else { if (!lastLavaNode.Value.EndsWith(nextRecordMatch.Value)) { // if the last lava node does have a { next }, but there is stuff after it, add it (just in case) lastLavaNode.Value += " {% next %} "; } } } bool?allSameParent = null; using (WordprocessingDocument outputDoc = WordprocessingDocument.Open(outputDocStream, true)) { var xdoc = outputDoc.MainDocumentPart.GetXDocument(); var outputBodyNode = xdoc.DescendantNodes().OfType <XElement>().FirstOrDefault(a => a.Name.LocalName.Equals("body")); outputBodyNode.RemoveNodes(); int recordIndex = 0; int?lastRecordIndex = null; int recordCount = mergeObjectList.Count(); while (recordIndex < recordCount) { if (lastRecordIndex.HasValue && lastRecordIndex == recordIndex) { // something went wrong, so throw to avoid spinning infinitely throw new Exception("Unexpected unchanged recordIndex"); } lastRecordIndex = recordIndex; using (var tempMergeTemplateStream = new MemoryStream()) { sourceTemplateStream.Position = 0; sourceTemplateStream.CopyTo(tempMergeTemplateStream); tempMergeTemplateStream.Position = 0; var tempMergeTemplateX = new XDocument(sourceTemplateDocX); var tempMergeTemplateBodyNode = tempMergeTemplateX.DescendantNodes().OfType <XElement>().FirstOrDefault(a => a.Name.LocalName.Equals("body")); // find all the Nodes that have a {% next %}. List <XElement> nextIndicatorNodes = new List <XElement>(); OpenXmlRegex.Match( tempMergeTemplateX.Elements(), this.nextRecordRegEx, (x, m) => { nextIndicatorNodes.Add(x); }); allSameParent = allSameParent ?? nextIndicatorNodes.Count > 1 && nextIndicatorNodes.Select(a => a.Parent).Distinct().Count() == 1; List <XContainer> recordContainerNodes = new List <XContainer>(); foreach (var nextIndicatorNodeParent in nextIndicatorNodes.Select(a => a.Parent).Where(a => a != null)) { XContainer recordContainerNode = nextIndicatorNodeParent; if (!allSameParent.Value) { // go up the parent nodes until we have more than one "Next" descendent so that we know what to consider our record container while (recordContainerNode.Parent != null) { if (this.nextRecordRegEx.Matches(recordContainerNode.Parent.Value).Count == 1) { // still just the one "next" indicator, so go out another parent recordContainerNode = recordContainerNode.Parent; } else { // we went too far up the parents and found multiple "next" children, so use this node as the recordContainerNode break; } } } if (!recordContainerNodes.Contains(recordContainerNode)) { recordContainerNodes.Add(recordContainerNode); } } foreach (var recordContainerNode in recordContainerNodes) { //// loop thru each of the recordContainerNodes //// If we have more records than nodes, we'll jump out to the outer "while" and append another template and keep going XContainer mergedXRecord; var recordContainerNodeXml = recordContainerNode.ToString(SaveOptions.DisableFormatting | SaveOptions.OmitDuplicateNamespaces).ReplaceWordChars(); if (recordIndex >= recordCount) { // out of records, so clear out any remaining template nodes that haven't been merged string xml = recordContainerNodeXml; mergedXRecord = XElement.Parse(xml) as XContainer; OpenXmlRegex.Replace(mergedXRecord.Nodes().OfType <XElement>(), this.regExDot, string.Empty, (a, b) => { return(true); }); recordIndex++; } else { //// just in case they have shared parent node, or if there is trailing {{ next }} after the last lava //// on the page, split the XML for each record and reassemble it when done List <string> xmlChunks = this.nextRecordRegEx.Split(recordContainerNodeXml).ToList(); string mergedXml = string.Empty; foreach (var xml in xmlChunks) { bool incRecordIndex = true; if (lavaRegEx.IsMatch(xml)) { if (recordIndex < recordCount) { try { var wordMergeObjects = new LavaDataDictionary(); wordMergeObjects.Add("Row", mergeObjectList[recordIndex]); foreach (var field in globalMergeFields) { wordMergeObjects.Add(field.Key, field.Value); } var resolvedXml = xml.ResolveMergeFields(wordMergeObjects, true, true); mergedXml += resolvedXml; if (resolvedXml == xml) { // there weren't any MergeFields after all, so don't move to the next record incRecordIndex = false; } } catch (Exception ex) { // if ResolveMergeFields failed, log the exception, then just return the orig xml this.Exceptions.Add(ex); mergedXml += xml; } if (incRecordIndex) { recordIndex++; } } else { // out of records, so put a special '{% next_empty %}' that we can use to clear up unmerged parts of the template mergedXml += " {% next_empty %} " + xml; } } else { mergedXml += xml; } } mergedXRecord = XElement.Parse(mergedXml) as XContainer; } // remove the orig nodes and replace with merged nodes recordContainerNode.RemoveNodes(); recordContainerNode.Add(mergedXRecord.Nodes().OfType <XElement>()); var mergedRecordContainer = XElement.Parse(recordContainerNode.ToString(SaveOptions.DisableFormatting)); if (recordContainerNode.Parent != null) { // the recordContainerNode is some child/descendent of <body> recordContainerNode.ReplaceWith(mergedRecordContainer); } else { // the recordContainerNode is the <body> recordContainerNode.RemoveNodes(); recordContainerNode.Add(mergedRecordContainer.Nodes()); if (recordIndex < recordCount) { // add page break var pageBreakXml = new Paragraph(new Run(new Break() { Type = BreakValues.Page })).OuterXml; var pageBreak = XElement.Parse(pageBreakXml, LoadOptions.None); var lastParagraph = recordContainerNode.Nodes().OfType <XElement>().Where(a => a.Name.LocalName == "p").LastOrDefault(); if (lastParagraph != null) { lastParagraph.AddAfterSelf(pageBreak); // Add page formatting for the page before the page break. var lastSectPr = recordContainerNode.Nodes().OfType <XElement>().Where(a => a.Name.LocalName == "sectPr").LastOrDefault(); if (lastSectPr != null) { var paragraphPropertiesXml = new Paragraph(new ParagraphProperties(new SectionProperties(lastSectPr.ToString()))).OuterXml; var paragraphProperties = XElement.Parse(paragraphPropertiesXml, LoadOptions.None); pageBreak.AddAfterSelf(paragraphProperties); } } } } } outputBodyNode.Add(tempMergeTemplateBodyNode.Nodes()); } } // remove all the 'next' delimiters OpenXmlRegex.Replace(outputBodyNode.Nodes().OfType <XElement>(), this.nextRecordRegEx, string.Empty, (xx, mm) => { return(true); }); // find all the 'next_empty' delimiters that we might have added and clear out the content in the paragraph nodes that follow OpenXmlRegex.Match( outputBodyNode.Nodes().OfType <XElement>(), this.nextEmptyRecordRegEx, (xx, mm) => { var afterSiblings = xx.ElementsAfterSelf().ToList(); // get all the paragraph elements after the 'next_empty' node and clear out the content var nodesToClean = afterSiblings.Where(a => a.Name.LocalName == "p").ToList(); // if the next_empty node has lava, clean that up too var xxContent = xx.ToString(); if (lavaRegEx.IsMatch(xxContent)) { nodesToClean.Add(xx); } foreach (var node in nodesToClean) { // remove all child nodes from each paragraph node if (node.HasElements) { node.RemoveNodes(); } } }); // remove all the 'next_empty' delimiters OpenXmlRegex.Replace(outputBodyNode.Nodes().OfType <XElement>(), this.nextEmptyRecordRegEx, string.Empty, (xx, mm) => { return(true); }); // remove all but the last SectionProperties element (there should only be one per section (body)) var sectPrItems = outputBodyNode.Nodes().OfType <XElement>().Where(a => a.Name.LocalName == "sectPr"); foreach (var extra in sectPrItems.Where(a => a != sectPrItems.Last()).ToList()) { extra.Remove(); } // renumber all the ids to make sure they are unique var idAttrs = xdoc.DescendantNodes().OfType <XElement>().Where(a => a.HasAttributes).Select(a => a.Attribute("id")).Where(s => s != null); int lastId = 1; foreach (var attr in idAttrs) { attr.Value = lastId.ToString(); lastId++; } LavaDataDictionary globalMergeHash = new LavaDataDictionary(); foreach (var field in globalMergeFields) { globalMergeHash.Add(field.Key, field.Value); } HeaderFooterGlobalMerge(outputDoc, globalMergeHash); // sweep thru any remaining un-merged body parts for any Lava having to do with Global merge fields foreach (var bodyTextPart in outputDoc.MainDocumentPart.Document.Body.Descendants <Text>()) { string nodeText = bodyTextPart.Text.ReplaceWordChars(); if (lavaRegEx.IsMatch(nodeText)) { bodyTextPart.Text = nodeText.ResolveMergeFields(globalMergeHash, true, true); } } // remove the last pagebreak MarkupSimplifier.SimplifyMarkup(outputDoc, new SimplifyMarkupSettings { RemoveLastRenderedPageBreak = true }); // If you want to see validation errors /* * var validator = new OpenXmlValidator(); * var errors = validator.Validate( outputDoc ).ToList(); */ } outputBinaryFile = new BinaryFile(); outputBinaryFile.IsTemporary = true; outputBinaryFile.ContentStream = outputDocStream; outputBinaryFile.FileName = "MergeTemplateOutput" + Path.GetExtension(templateBinaryFile.FileName); outputBinaryFile.MimeType = templateBinaryFile.MimeType; outputBinaryFile.BinaryFileTypeId = new BinaryFileTypeService(rockContext).Get(Rock.SystemGuid.BinaryFiletype.DEFAULT.AsGuid()).Id; binaryFileService.Add(outputBinaryFile); rockContext.SaveChanges(); } return(outputBinaryFile); }
public void WordSample1() { var sourceDoc = new FileInfo(GetFilePath("TestDocument.docx")); var newDoc = new FileInfo(Path.Combine(TempDir, "Modified.docx")); File.Copy(sourceDoc.FullName, newDoc.FullName, true); using var wDoc = WordprocessingDocument.Open(newDoc.FullName, true); var xDoc = wDoc.MainDocumentPart.GetXDocument(); // Match content (paragraph 1) var content = xDoc.Descendants(W.p).Take(1); var regex = new Regex("Video"); var count = OpenXmlRegex.Match(content, regex); Log.WriteLine("Example #1 Count: {0}", count); // Match content, case insensitive (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); count = OpenXmlRegex.Match(content, regex); Log.WriteLine("Example #2 Count: {0}", count); // Match content, with callback (paragraph 1) content = xDoc.Descendants(W.p).Take(1); regex = new Regex("video", RegexOptions.IgnoreCase); OpenXmlRegex.Match(content, regex, (element, match) => Log.WriteLine("Example #3 Found value: >{0}<", match.Value)); // Replace content, beginning of paragraph (paragraph 2) content = xDoc.Descendants(W.p).Skip(1).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "Audio gives", null); Log.WriteLine("Example #4 Replaced: {0}", count); // Replace content, middle of paragraph (paragraph 3) content = xDoc.Descendants(W.p).Skip(2).Take(1); regex = new Regex("powerful"); count = OpenXmlRegex.Replace(content, regex, "good", null); Log.WriteLine("Example #5 Replaced: {0}", count); // Replace content, end of paragraph (paragraph 4) content = xDoc.Descendants(W.p).Skip(3).Take(1); regex = new Regex(" [a-z.]*$"); count = OpenXmlRegex.Replace(content, regex, " super good point!", null); Log.WriteLine("Example #6 Replaced: {0}", count); // Delete content, beginning of paragraph (paragraph 5) content = xDoc.Descendants(W.p).Skip(4).Take(1); regex = new Regex("^Video provides"); count = OpenXmlRegex.Replace(content, regex, "", null); Log.WriteLine("Example #7 Deleted: {0}", count); // Delete content, middle of paragraph (paragraph 6) content = xDoc.Descendants(W.p).Skip(5).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null); Log.WriteLine("Example #8 Deleted: {0}", count); // Delete content, end of paragraph (paragraph 7) content = xDoc.Descendants(W.p).Skip(6).Take(1); regex = new Regex("[.]$"); count = OpenXmlRegex.Replace(content, regex, "", null); Log.WriteLine("Example #9 Deleted: {0}", count); // Replace content in inserted text, same author (paragraph 8) content = xDoc.Descendants(W.p).Skip(7).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "Eric White"); Log.WriteLine("Example #10 Deleted: {0}", count); // Delete content in inserted text, same author (paragraph 9) content = xDoc.Descendants(W.p).Skip(8).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Log.WriteLine("Example #11 Deleted: {0}", count); // Replace content partially in inserted text, same author (paragraph 10) content = xDoc.Descendants(W.p).Skip(9).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "Eric White"); Log.WriteLine("Example #12 Replaced: {0}", count); // Delete content partially in inserted text, same author (paragraph 11) content = xDoc.Descendants(W.p).Skip(10).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "Eric White"); Log.WriteLine("Example #13 Deleted: {0}", count); // Replace content in inserted text, different author (paragraph 12) content = xDoc.Descendants(W.p).Skip(11).Take(1); regex = new Regex("Video"); count = OpenXmlRegex.Replace(content, regex, "Audio", null, true, "John Doe"); Log.WriteLine("Example #14 Deleted: {0}", count); // Delete content in inserted text, different author (paragraph 13) content = xDoc.Descendants(W.p).Skip(12).Take(1); regex = new Regex("powerful "); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Log.WriteLine("Example #15 Deleted: {0}", count); // Replace content partially in inserted text, different author (paragraph 14) content = xDoc.Descendants(W.p).Skip(13).Take(1); regex = new Regex("Video provides "); count = OpenXmlRegex.Replace(content, regex, "Audio gives ", null, true, "John Doe"); Log.WriteLine("Example #16 Replaced: {0}", count); // Delete content partially in inserted text, different author (paragraph 15) content = xDoc.Descendants(W.p).Skip(14).Take(1); regex = new Regex(" to help you prove your point"); count = OpenXmlRegex.Replace(content, regex, "", null, true, "John Doe"); Log.WriteLine("Example #17 Deleted: {0}", count); const string LeftDoubleQuotationMarks = @"[\u0022“„«»”]"; const string Words = @"[\w\-&/]+(?:\s[\w\-&/]+)*"; const string RightDoubleQuotationMarks = @"[\u0022”‟»«“]"; // Replace content using replacement pattern (paragraph 16) content = xDoc.Descendants(W.p).Skip(15).Take(1); regex = new Regex($"{LeftDoubleQuotationMarks}(?<words>{Words}){RightDoubleQuotationMarks}"); count = OpenXmlRegex.Replace(content, regex, "‘${words}’", null); Log.WriteLine("Example #18 Replaced: {0}", count); // Replace content using replacement pattern in partially inserted text (paragraph 17) content = xDoc.Descendants(W.p).Skip(16).Take(1); regex = new Regex($"{LeftDoubleQuotationMarks}(?<words>{Words}){RightDoubleQuotationMarks}"); count = OpenXmlRegex.Replace(content, regex, "‘${words}’", null, true, "John Doe"); Log.WriteLine("Example #19 Replaced: {0}", count); // Replace content using replacement pattern (paragraph 18) content = xDoc.Descendants(W.p).Skip(17).Take(1); regex = new Regex($"({LeftDoubleQuotationMarks})(video)({RightDoubleQuotationMarks})"); count = OpenXmlRegex.Replace(content, regex, "$1audio$3", null, true, "John Doe"); Log.WriteLine("Example #20 Replaced: {0}", count); // Recognize tabs (paragraph 19) content = xDoc.Descendants(W.p).Skip(18).Take(1); regex = new Regex(@"([1-9])\.\t"); count = OpenXmlRegex.Replace(content, regex, "($1)\t", null); Log.WriteLine("Example #21 Replaced: {0}", count); // The next two examples deal with line breaks, i.e., the <w:br/> elements. // Note that you should use the U+000D (Carriage Return) character (i.e., '\r') // to match a <w:br/> (or <w:cr/>) and replace content with a <w:br/> element. // Depending on your platform, the end of line character(s) provided by // Environment.NewLine might be "\n" (Unix), "\r\n" (Windows), or "\r" (Mac). // Recognize tabs and insert line breaks (paragraph 20). content = xDoc.Descendants(W.p).Skip(19).Take(1); regex = new Regex($@"([1-9])\.{UnicodeMapper.HorizontalTabulation}"); count = OpenXmlRegex.Replace(content, regex, $"Article $1{UnicodeMapper.CarriageReturn}", null); Log.WriteLine("Example #22 Replaced: {0}", count); // Recognize and remove line breaks (paragraph 21) content = xDoc.Descendants(W.p).Skip(20).Take(1); regex = new Regex($"{UnicodeMapper.CarriageReturn}"); count = OpenXmlRegex.Replace(content, regex, " ", null); Log.WriteLine("Example #23 Replaced: {0}", count); // Remove soft hyphens (paragraph 22) var paras = xDoc.Descendants(W.p).Skip(21).Take(1).ToList(); count = OpenXmlRegex.Replace(paras, new Regex($"{UnicodeMapper.SoftHyphen}"), "", null); count += OpenXmlRegex.Replace(paras, new Regex("use"), "no longer use", null); Log.WriteLine("Example #24 Replaced: {0}", count); // The next example deals with symbols (i.e., w:sym elements). // To work with symbols, you should acquire the Unicode values for the // symbols you wish to match or use in replacement patterns. The reason // is that UnicodeMapper will (a) mimic Microsoft Word in shifting the // Unicode values into the Unicode private use area (by adding U+F000) // and (b) use replacements for Unicode values that have been used in // conjunction with different fonts already (by adding U+E000). // // The replacement Únicode values will depend on the order in which // symbols are retrieved. Therefore, you should not rely on any fixed // assignment. // // In the example below, pencil will be represented by U+F021, whereas // spider (same value with different font) will be represented by U+E001. // If spider had been assigned first, spider would be U+F021 and pencil // would be U+E001. var oldPhone = UnicodeMapper.SymToChar("Wingdings", 40); var newPhone = UnicodeMapper.SymToChar("Wingdings", 41); var pencil = UnicodeMapper.SymToChar("Wingdings", 0x21); var spider = UnicodeMapper.SymToChar("Webdings", 0x21); // Replace or comment on symbols (paragraph 23) paras = xDoc.Descendants(W.p).Skip(22).Take(1).ToList(); count = OpenXmlRegex.Replace(paras, new Regex($"{oldPhone}"), $"{newPhone} (replaced with new phone)", null); count += OpenXmlRegex.Replace(paras, new Regex($"({pencil})"), "$1 (same pencil)", null); count += OpenXmlRegex.Replace(paras, new Regex($"({spider})"), "$1 (same spider)", null); Log.WriteLine("Example #25 Replaced: {0}", count); wDoc.MainDocumentPart.PutXDocument(); }