private static object MergeAdjacentRunsTransform(XNode node) { if (!(node is XElement element)) { return(node); } if (element.Name == W.p) { return(WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(element)); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => MergeAdjacentRunsTransform(n)))); }
private static object WmlSearchAndReplaceTransform(XNode node, Regex regex, string replacement, Func <XElement, Match, bool> callback, bool trackRevisions, string revisionTrackingAuthor, ReplaceInternalInfo replInfo, bool coalesceContent) { var element = node as XElement; if (element == null) { return(node); } if (element.Name == W.p) { XElement paragraph = element; string preliminaryContent = paragraph .DescendantsTrimmed(W.txbxContent) .Where(d => d.Name == W.r && (d.Parent == null || d.Parent.Name != W.del)) .Select(UnicodeMapper.RunToString) .StringConcatenate(); if (regex.IsMatch(preliminaryContent)) { var paragraphWithSplitRuns = new XElement(W.p, paragraph.Attributes(), paragraph.Nodes().Select(n => WmlSearchAndReplaceTransform(n, regex, replacement, callback, trackRevisions, revisionTrackingAuthor, replInfo, coalesceContent))); IEnumerable <XElement> runsTrimmed = paragraphWithSplitRuns .DescendantsTrimmed(W.txbxContent) .Where(d => d.Name == W.r && (d.Parent == null || d.Parent.Name != W.del)); var charsAndRuns = runsTrimmed .Select(r => new { Ch = UnicodeMapper.RunToString(r), r }) .ToList(); string content = charsAndRuns.Select(t => t.Ch).StringConcatenate(); XElement[] alignedRuns = charsAndRuns.Select(t => t.r).ToArray(); MatchCollection matchCollection = regex.Matches(content); replInfo.Count += matchCollection.Count; // Process Match if (replacement == null) { if (callback == null) { return(paragraph); } foreach (Match match in matchCollection.Cast <Match>()) { callback(paragraph, match); } return(paragraph); } // Process Replace foreach (Match match in matchCollection.Cast <Match>()) { if (match.Length == 0) { continue; } if ((callback != null) && !callback(paragraph, match)) { continue; } List <XElement> runCollection = alignedRuns .Skip(match.Index) .Take(match.Length) .ToList(); // uses the Skip / Take special semantics of array to implement efficient finding of sub array XElement firstRun = runCollection.First(); XElement firstRunProperties = firstRun.Elements(W.rPr).FirstOrDefault(); // save away first run properties if (trackRevisions) { if (replacement != "") { // We coalesce runs as some methods, e.g., in DocumentAssembler, // will try to find the replacement string even though they // set coalesceContent to false. string newTextValue = match.Result(replacement); List <XElement> newRuns = UnicodeMapper.StringToCoalescedRunList(newTextValue, firstRunProperties); var newIns = new XElement(W.ins, new XAttribute(W.author, revisionTrackingAuthor), new XAttribute(W.date, DateTime.UtcNow.ToString("s") + "Z"), newRuns); if (firstRun.Parent != null && firstRun.Parent.Name == W.ins) { firstRun.Parent.AddBeforeSelf(newIns); } else { firstRun.AddBeforeSelf(newIns); } } foreach (XElement run in runCollection) { bool isInIns = run.Parent != null && run.Parent.Name == W.ins; if (isInIns) { XElement parentIns = run.Parent; XElement grandParentParagraph = parentIns.Parent; if (grandParentParagraph != null) { if ((string)parentIns.Attributes(W.author).FirstOrDefault() == revisionTrackingAuthor) { List <XElement> parentInsSiblings = grandParentParagraph .Elements() .Where(c => c != parentIns) .ToList(); grandParentParagraph.ReplaceNodes(parentInsSiblings); } else { List <XElement> parentInsSiblings = grandParentParagraph .Elements() .Select(c => c == parentIns ? new XElement(W.ins, parentIns.Attributes(), new XElement(W.del, new XAttribute(W.author, revisionTrackingAuthor), new XAttribute(W.date, DateTime.UtcNow.ToString("s") + "Z"), parentIns.Elements().Select(TransformToDelText))) : c) .ToList(); grandParentParagraph.ReplaceNodes(parentInsSiblings); } } } else { var delRun = new XElement(W.del, new XAttribute(W.author, revisionTrackingAuthor), new XAttribute(W.date, DateTime.UtcNow.ToString("s") + "Z"), TransformToDelText(run)); run.ReplaceWith(delRun); } } } else // not tracked revisions { foreach (XElement runToDelete in runCollection.Skip(1).ToList()) { if (runToDelete.Parent != null && runToDelete.Parent.Name == W.ins) { runToDelete.Parent.Remove(); } else { runToDelete.Remove(); } } // We coalesce runs as some methods, e.g., in DocumentAssembler, // will try to find the replacement string even though they // set coalesceContent to false. string newTextValue = match.Result(replacement); List <XElement> newRuns = UnicodeMapper.StringToCoalescedRunList(newTextValue, firstRunProperties); if (firstRun.Parent != null && firstRun.Parent.Name == W.ins) { firstRun.Parent.ReplaceWith(newRuns); } else { firstRun.ReplaceWith(newRuns); } } } return(coalesceContent ? WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(paragraphWithSplitRuns) : paragraphWithSplitRuns); } var newParagraph = new XElement(W.p, paragraph.Attributes(), paragraph.Nodes().Select(n => { var e = n as XElement; if (e == null) { return(n); } if (e.Name == W.pPr) { return(e); } if (((e.Name == W.r) && e.Elements(W.t).Any()) || e.Elements(W.tab).Any()) { return(e); } if ((e.Name == W.ins) && e.Elements(W.r).Elements(W.t).Any()) { return(e); } return(WmlSearchAndReplaceTransform(e, regex, replacement, callback, trackRevisions, revisionTrackingAuthor, replInfo, coalesceContent)); })); return(coalesceContent ? WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(newParagraph) // CoalesceContent(newParagraph) : newParagraph); } if (element.Name == W.ins && element.Elements(W.r).Any()) { List <object> collectionOfCollections = element .Elements() .Select(n => WmlSearchAndReplaceTransform(n, regex, replacement, callback, trackRevisions, revisionTrackingAuthor, replInfo, coalesceContent)) .ToList(); List <object> collectionOfIns = collectionOfCollections .Select(c => { var elements = c as IEnumerable <XElement>; return(elements != null ? elements.Select(ixc => new XElement(W.ins, element.Attributes(), ixc)) : c); }) .ToList(); return(collectionOfIns); } if (element.Name == W.r) { return(element.Elements() .Where(e => e.Name != W.rPr) .Select(e => e.Name == W.t ? ((string)e).Select(c => new XElement(W.r, element.Elements(W.rPr), new XElement(W.t, XmlUtil.GetXmlSpaceAttribute(c), c))) : new[] { new XElement(W.r, element.Elements(W.rPr), e) }) .SelectMany(t => t)); } return(new XElement(element.Name, element.Attributes(), element.Nodes() .Select(n => WmlSearchAndReplaceTransform(n, regex, replacement, callback, trackRevisions, revisionTrackingAuthor, replInfo, coalesceContent)))); }
private static object TransformToMetadata(XNode node, XElement data, TemplateError te) { if (node is XElement element) { if (element.Name == W.sdt) { var alias = (string)element.Elements(W.sdtPr).Elements(W.alias).Attributes(W.val).FirstOrDefault(); if (alias == null || alias == "" || s_AliasList.Contains(alias)) { var ccContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .Trim() .Replace('“', '"') .Replace('”', '"'); if (ccContents.StartsWith("<")) { var xml = TransformXmlTextToMetadata(te, ccContents); if (xml.Name == W.p || xml.Name == W.r) // this means there was an error processing the XML. { if (element.Parent.Name == W.p) { return(xml.Elements(W.r)); } return(xml); } if (alias != null && xml.Name.LocalName != alias) { if (element.Parent.Name == W.p) { return(CreateRunErrorMessage("Error: Content control alias does not match metadata element name", te)); } else { return(CreateParaErrorMessage("Error: Content control alias does not match metadata element name", te)); } } xml.Add(element.Elements(W.sdtContent).Elements()); return(xml); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => TransformToMetadata(n, data, te)))); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => TransformToMetadata(n, data, te)))); } if (element.Name == W.p) { var paraContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .Trim(); var occurances = paraContents.Select((c, i) => paraContents.Substring(i)).Count(sub => sub.StartsWith("<#")); if (paraContents.StartsWith("<#") && paraContents.EndsWith("#>") && occurances == 1) { var xmlText = paraContents.Substring(2, paraContents.Length - 4).Trim(); var xml = TransformXmlTextToMetadata(te, xmlText); if (xml.Name == W.p || xml.Name == W.r) { return(xml); } xml.Add(element); return(xml); } if (paraContents.Contains("<#")) { var runReplacementInfo = new List <RunReplacementInfo>(); var thisGuid = Guid.NewGuid().ToString(); var r = new Regex("<#.*?#>"); XElement?xml = null; OpenXmlRegex.Replace(new[] { element }, r, thisGuid, (para, match) => { var matchString = match.Value.Trim(); var xmlText = matchString.Substring(2, matchString.Length - 4).Trim().Replace('“', '"').Replace('”', '"'); try { xml = XElement.Parse(xmlText); } catch (XmlException e) { var rri = new RunReplacementInfo() { Xml = null, XmlExceptionMessage = "XmlException: " + e.Message, SchemaValidationMessage = null, }; runReplacementInfo.Add(rri); return(true); } var schemaError = ValidatePerSchema(xml); if (schemaError != null) { var rri = new RunReplacementInfo() { Xml = null, XmlExceptionMessage = null, SchemaValidationMessage = "Schema Validation Error: " + schemaError, }; runReplacementInfo.Add(rri); return(true); } var rri2 = new RunReplacementInfo() { Xml = xml, XmlExceptionMessage = null, SchemaValidationMessage = null, }; runReplacementInfo.Add(rri2); return(true); }, false); var newPara = new XElement(element); foreach (var rri in runReplacementInfo) { var runToReplace = newPara.Descendants(W.r).FirstOrDefault(rn => rn.Value == thisGuid && rn.Parent.Name != PA.Content); if (runToReplace == null) { throw new OpenXmlPowerToolsException("Internal error"); } if (rri.XmlExceptionMessage != null) { runToReplace.ReplaceWith(CreateRunErrorMessage(rri.XmlExceptionMessage, te)); } else if (rri.SchemaValidationMessage != null) { runToReplace.ReplaceWith(CreateRunErrorMessage(rri.SchemaValidationMessage, te)); } else { var newXml = new XElement(rri.Xml); newXml.Add(runToReplace); runToReplace.ReplaceWith(newXml); } } var coalescedParagraph = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(newPara); return(coalescedParagraph); } } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => TransformToMetadata(n, data, te)))); } return(node); }
private static object IdentifyAndTransformFields(XNode node, FieldAccumulator fieldAccumulator) { XElement element = node as XElement; if (element != null) { if (element.Name == W.sdt) { var alias = (string)element.Elements(W.sdtPr).Elements(W.alias).Attributes(W.val).FirstOrDefault(); if (alias == null || alias == "") { var ccContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .CleanUpInvalidCharacters(); if (FieldRecognizer.IsField(ccContents, out ccContents)) { //var isBlockLevel = element.Element(W.sdtContent).Elements(W.p).FirstOrDefault() != null; var newCC = new XElement(element.Name, element.Attributes()); var props = element.Elements(W.sdtPr).FirstOrDefault(); if (props == null) { props = new XElement(W.sdtPr); } else { props.Remove(); } newCC.Add(props); var tagElem = props.Elements(W.tag).FirstOrDefault(); if (tagElem == null) { tagElem = new XElement(W.tag); props.Add(tagElem); } var fieldId = fieldAccumulator.AddField(ccContents); tagElem.SetAttributeValue(W.val, fieldId); newCC.Add(element.Nodes()); return(newCC); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)))); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)))); } if (element.Name == W.p) { fieldAccumulator.BeginBlock(); var paraContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .Trim(); int occurances = CountSubstring(FieldRecognizer.EmbedBegin, paraContents); if (occurances == 1 && paraContents.StartsWith(FieldRecognizer.EmbedBegin + FieldRecognizer.FieldBegin) && paraContents.EndsWith(FieldRecognizer.FieldEnd + FieldRecognizer.EmbedEnd)) { var content = paraContents .Substring(FieldRecognizer.EmbedBegin.Length, paraContents.Length - FieldRecognizer.EmbedBegin.Length - FieldRecognizer.EmbedEnd.Length) .Trim(); if (FieldRecognizer.IsField(content, out content)) { var fieldId = fieldAccumulator.AddField(content); fieldAccumulator.EndBlock(); var ppr = element.Elements(W.pPr).FirstOrDefault(); var rpr = (ppr != null) ? ppr.Elements(W.rPr).FirstOrDefault() : null; XElement r = new XElement(W.r, rpr, new XElement(W.t, FieldRecognizer.FieldBegin + content + FieldRecognizer.FieldEnd)); return(new XElement(element.Name, element.Attributes(), element.Elements(W.pPr), CCTWrap(fieldId, r) )); } // else fall through to (slower) case } if (paraContents.Contains(FieldRecognizer.EmbedBegin + FieldRecognizer.FieldBegin)) { fieldAccumulator.RegisterNonFieldContentInBlock(); var runReplacementInfo = new List <XElement>(); var placeholderText = Guid.NewGuid().ToString(); var r = new Regex( Regex.Escape(FieldRecognizer.EmbedBegin) + "\\s*" + Regex.Escape(FieldRecognizer.FieldBegin) + ".*?" + Regex.Escape(FieldRecognizer.FieldEnd) + "\\s*" + Regex.Escape(FieldRecognizer.EmbedEnd)); var replacedCount = OpenXmlRegex.Replace(new[] { element }, r, placeholderText, (para, match) => { var matchString = match.Value.Trim().Replace("\u0001", ""); // unrecognized codes/elements returned as \u0001; strip these var content = matchString.Substring( FieldRecognizer.EmbedBegin.Length, matchString.Length - FieldRecognizer.EmbedBegin.Length - FieldRecognizer.EmbedEnd.Length ).CleanUpInvalidCharacters(); if (FieldRecognizer.IsField(content, out content)) { runReplacementInfo.Add(CCWrap(new XElement(W.r, new XElement(W.t, FieldRecognizer.FieldBegin + content + FieldRecognizer.FieldEnd)))); return(true); } return(false); }, false); if (replacedCount > 0) { var newPara = new XElement(element); foreach (var elem in runReplacementInfo) { var runToReplace = newPara.Descendants(W.r).FirstOrDefault(rn => rn.Value == placeholderText && rn.Parent.Name != Templater.OD.Content); if (runToReplace == null) { throw new InvalidOperationException("Internal error"); } else { var rpr = runToReplace.Elements(W.rPr).FirstOrDefault(); if (rpr != null) { rpr.Remove(); elem.Elements(W.sdtContent).First().Elements(W.r).First().AddFirst(rpr); } runToReplace.ReplaceWith(elem); } } var coalescedParagraph = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(newPara); var transformedContent = IdentifyAndTransformFields(coalescedParagraph, fieldAccumulator); fieldAccumulator.EndBlock(); return(transformedContent); } } var transformedParaContent = element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)).ToArray(); fieldAccumulator.EndBlock(); return(new XElement(element.Name, element.Attributes(), transformedParaContent)); } if (element.Name == W.lastRenderedPageBreak) { // documents assembled from templates will almost always change pagination, so remove Word's pagination hints // (also because they're not handled cleanly by OXPT) return(null); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)))); } return(node); }