private static object MergeAdjacentRunsTransform(XNode node) { if (!(node is XElement element)) { return(node); } if (element.Name == W.p) { return(WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(element)); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => MergeAdjacentRunsTransform(n)))); }
private static object WmlSearchAndReplaceTransform(XNode node, Regex regex, string replacement, Func <XElement, Match, bool> callback, bool trackRevisions, string revisionTrackingAuthor, ReplaceInternalInfo replInfo, bool coalesceContent) { var element = node as XElement; if (element == null) { return(node); } if (element.Name == W.p) { XElement paragraph = element; string preliminaryContent = paragraph .DescendantsTrimmed(W.txbxContent) .Where(d => d.Name == W.r && (d.Parent == null || d.Parent.Name != W.del)) .Select(UnicodeMapper.RunToString) .StringConcatenate(); if (regex.IsMatch(preliminaryContent)) { var paragraphWithSplitRuns = new XElement(W.p, paragraph.Attributes(), paragraph.Nodes().Select(n => WmlSearchAndReplaceTransform(n, regex, replacement, callback, trackRevisions, revisionTrackingAuthor, replInfo, coalesceContent))); IEnumerable <XElement> runsTrimmed = paragraphWithSplitRuns .DescendantsTrimmed(W.txbxContent) .Where(d => d.Name == W.r && (d.Parent == null || d.Parent.Name != W.del)); var charsAndRuns = runsTrimmed .Select(r => new { Ch = UnicodeMapper.RunToString(r), r }) .ToList(); string content = charsAndRuns.Select(t => t.Ch).StringConcatenate(); XElement[] alignedRuns = charsAndRuns.Select(t => t.r).ToArray(); MatchCollection matchCollection = regex.Matches(content); replInfo.Count += matchCollection.Count; // Process Match if (replacement == null) { if (callback == null) { return(paragraph); } foreach (Match match in matchCollection.Cast <Match>()) { callback(paragraph, match); } return(paragraph); } // Process Replace foreach (Match match in matchCollection.Cast <Match>()) { if (match.Length == 0) { continue; } if ((callback != null) && !callback(paragraph, match)) { continue; } List <XElement> runCollection = alignedRuns .Skip(match.Index) .Take(match.Length) .ToList(); // uses the Skip / Take special semantics of array to implement efficient finding of sub array XElement firstRun = runCollection.First(); XElement firstRunProperties = firstRun.Elements(W.rPr).FirstOrDefault(); // save away first run properties if (trackRevisions) { if (replacement != "") { // We coalesce runs as some methods, e.g., in DocumentAssembler, // will try to find the replacement string even though they // set coalesceContent to false. string newTextValue = match.Result(replacement); List <XElement> newRuns = UnicodeMapper.StringToCoalescedRunList(newTextValue, firstRunProperties); var newIns = new XElement(W.ins, new XAttribute(W.author, revisionTrackingAuthor), new XAttribute(W.date, DateTime.UtcNow.ToString("s") + "Z"), newRuns); if (firstRun.Parent != null && firstRun.Parent.Name == W.ins) { firstRun.Parent.AddBeforeSelf(newIns); } else { firstRun.AddBeforeSelf(newIns); } } foreach (XElement run in runCollection) { bool isInIns = run.Parent != null && run.Parent.Name == W.ins; if (isInIns) { XElement parentIns = run.Parent; XElement grandParentParagraph = parentIns.Parent; if (grandParentParagraph != null) { if ((string)parentIns.Attributes(W.author).FirstOrDefault() == revisionTrackingAuthor) { List <XElement> parentInsSiblings = grandParentParagraph .Elements() .Where(c => c != parentIns) .ToList(); grandParentParagraph.ReplaceNodes(parentInsSiblings); } else { List <XElement> parentInsSiblings = grandParentParagraph .Elements() .Select(c => c == parentIns ? new XElement(W.ins, parentIns.Attributes(), new XElement(W.del, new XAttribute(W.author, revisionTrackingAuthor), new XAttribute(W.date, DateTime.UtcNow.ToString("s") + "Z"), parentIns.Elements().Select(TransformToDelText))) : c) .ToList(); grandParentParagraph.ReplaceNodes(parentInsSiblings); } } } else { var delRun = new XElement(W.del, new XAttribute(W.author, revisionTrackingAuthor), new XAttribute(W.date, DateTime.UtcNow.ToString("s") + "Z"), TransformToDelText(run)); run.ReplaceWith(delRun); } } } else // not tracked revisions { foreach (XElement runToDelete in runCollection.Skip(1).ToList()) { if (runToDelete.Parent != null && runToDelete.Parent.Name == W.ins) { runToDelete.Parent.Remove(); } else { runToDelete.Remove(); } } // We coalesce runs as some methods, e.g., in DocumentAssembler, // will try to find the replacement string even though they // set coalesceContent to false. string newTextValue = match.Result(replacement); List <XElement> newRuns = UnicodeMapper.StringToCoalescedRunList(newTextValue, firstRunProperties); if (firstRun.Parent != null && firstRun.Parent.Name == W.ins) { firstRun.Parent.ReplaceWith(newRuns); } else { firstRun.ReplaceWith(newRuns); } } } return(coalesceContent ? WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(paragraphWithSplitRuns) : paragraphWithSplitRuns); } var newParagraph = new XElement(W.p, paragraph.Attributes(), paragraph.Nodes().Select(n => { var e = n as XElement; if (e == null) { return(n); } if (e.Name == W.pPr) { return(e); } if (((e.Name == W.r) && e.Elements(W.t).Any()) || e.Elements(W.tab).Any()) { return(e); } if ((e.Name == W.ins) && e.Elements(W.r).Elements(W.t).Any()) { return(e); } return(WmlSearchAndReplaceTransform(e, regex, replacement, callback, trackRevisions, revisionTrackingAuthor, replInfo, coalesceContent)); })); return(coalesceContent ? WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(newParagraph) // CoalesceContent(newParagraph) : newParagraph); } if (element.Name == W.ins && element.Elements(W.r).Any()) { List <object> collectionOfCollections = element .Elements() .Select(n => WmlSearchAndReplaceTransform(n, regex, replacement, callback, trackRevisions, revisionTrackingAuthor, replInfo, coalesceContent)) .ToList(); List <object> collectionOfIns = collectionOfCollections .Select(c => { var elements = c as IEnumerable <XElement>; return(elements != null ? elements.Select(ixc => new XElement(W.ins, element.Attributes(), ixc)) : c); }) .ToList(); return(collectionOfIns); } if (element.Name == W.r) { return(element.Elements() .Where(e => e.Name != W.rPr) .Select(e => e.Name == W.t ? ((string)e).Select(c => new XElement(W.r, element.Elements(W.rPr), new XElement(W.t, XmlUtil.GetXmlSpaceAttribute(c), c))) : new[] { new XElement(W.r, element.Elements(W.rPr), e) }) .SelectMany(t => t)); } return(new XElement(element.Name, element.Attributes(), element.Nodes() .Select(n => WmlSearchAndReplaceTransform(n, regex, replacement, callback, trackRevisions, revisionTrackingAuthor, replInfo, coalesceContent)))); }
private static object TransformToMetadata(XNode node, XElement data, TemplateError te) { if (node is XElement element) { if (element.Name == W.sdt) { var alias = (string)element.Elements(W.sdtPr).Elements(W.alias).Attributes(W.val).FirstOrDefault(); if (alias == null || alias == "" || s_AliasList.Contains(alias)) { var ccContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .Trim() .Replace('“', '"') .Replace('”', '"'); if (ccContents.StartsWith("<")) { var xml = TransformXmlTextToMetadata(te, ccContents); if (xml.Name == W.p || xml.Name == W.r) // this means there was an error processing the XML. { if (element.Parent.Name == W.p) { return(xml.Elements(W.r)); } return(xml); } if (alias != null && xml.Name.LocalName != alias) { if (element.Parent.Name == W.p) { return(CreateRunErrorMessage("Error: Content control alias does not match metadata element name", te)); } else { return(CreateParaErrorMessage("Error: Content control alias does not match metadata element name", te)); } } xml.Add(element.Elements(W.sdtContent).Elements()); return(xml); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => TransformToMetadata(n, data, te)))); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => TransformToMetadata(n, data, te)))); } if (element.Name == W.p) { var paraContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .Trim(); var occurances = paraContents.Select((c, i) => paraContents.Substring(i)).Count(sub => sub.StartsWith("<#")); if (paraContents.StartsWith("<#") && paraContents.EndsWith("#>") && occurances == 1) { var xmlText = paraContents.Substring(2, paraContents.Length - 4).Trim(); var xml = TransformXmlTextToMetadata(te, xmlText); if (xml.Name == W.p || xml.Name == W.r) { return(xml); } xml.Add(element); return(xml); } if (paraContents.Contains("<#")) { var runReplacementInfo = new List <RunReplacementInfo>(); var thisGuid = Guid.NewGuid().ToString(); var r = new Regex("<#.*?#>"); XElement?xml = null; OpenXmlRegex.Replace(new[] { element }, r, thisGuid, (para, match) => { var matchString = match.Value.Trim(); var xmlText = matchString.Substring(2, matchString.Length - 4).Trim().Replace('“', '"').Replace('”', '"'); try { xml = XElement.Parse(xmlText); } catch (XmlException e) { var rri = new RunReplacementInfo() { Xml = null, XmlExceptionMessage = "XmlException: " + e.Message, SchemaValidationMessage = null, }; runReplacementInfo.Add(rri); return(true); } var schemaError = ValidatePerSchema(xml); if (schemaError != null) { var rri = new RunReplacementInfo() { Xml = null, XmlExceptionMessage = null, SchemaValidationMessage = "Schema Validation Error: " + schemaError, }; runReplacementInfo.Add(rri); return(true); } var rri2 = new RunReplacementInfo() { Xml = xml, XmlExceptionMessage = null, SchemaValidationMessage = null, }; runReplacementInfo.Add(rri2); return(true); }, false); var newPara = new XElement(element); foreach (var rri in runReplacementInfo) { var runToReplace = newPara.Descendants(W.r).FirstOrDefault(rn => rn.Value == thisGuid && rn.Parent.Name != PA.Content); if (runToReplace == null) { throw new OpenXmlPowerToolsException("Internal error"); } if (rri.XmlExceptionMessage != null) { runToReplace.ReplaceWith(CreateRunErrorMessage(rri.XmlExceptionMessage, te)); } else if (rri.SchemaValidationMessage != null) { runToReplace.ReplaceWith(CreateRunErrorMessage(rri.SchemaValidationMessage, te)); } else { var newXml = new XElement(rri.Xml); newXml.Add(runToReplace); runToReplace.ReplaceWith(newXml); } } var coalescedParagraph = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(newPara); return(coalescedParagraph); } } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => TransformToMetadata(n, data, te)))); } return(node); }
public void WC002_Consolidate_Bulk_Test(string testId, string name1, string name2) { var sourceDir = new DirectoryInfo("../../../../TestFiles/"); var source1Docx = new FileInfo(Path.Combine(sourceDir.FullName, name1)); var source2Docx = new FileInfo(Path.Combine(sourceDir.FullName, name2)); var rootTempDir = TestUtil.TempDir; var thisTestTempDir = new DirectoryInfo(Path.Combine(rootTempDir.FullName, testId)); if (thisTestTempDir.Exists) { Assert.True(false, "Duplicate test id: " + testId); } else { thisTestTempDir.Create(); } var source1CopiedToDestDocx = new FileInfo(Path.Combine(thisTestTempDir.FullName, source1Docx.Name)); var source2CopiedToDestDocx = new FileInfo(Path.Combine(thisTestTempDir.FullName, source2Docx.Name)); if (!source1CopiedToDestDocx.Exists) { var wml1 = new WmlDocument(source1Docx.FullName); var wml2 = WordprocessingMLUtil.BreakLinkToTemplate(wml1); wml2.SaveAs(source1CopiedToDestDocx.FullName); } if (!source2CopiedToDestDocx.Exists) { var wml1 = new WmlDocument(source2Docx.FullName); var wml2 = WordprocessingMLUtil.BreakLinkToTemplate(wml1); wml2.SaveAs(source2CopiedToDestDocx.FullName); } var before = source1CopiedToDestDocx.Name.Replace(".docx", ""); var after = source2CopiedToDestDocx.Name.Replace(".docx", ""); var docxWithRevisionsFi = new FileInfo(Path.Combine(thisTestTempDir.FullName, before + "-COMPARE-" + after + ".docx")); var docxConsolidatedFi = new FileInfo(Path.Combine(thisTestTempDir.FullName, before + "-CONSOLIDATED-" + after + ".docx")); var source1Wml = new WmlDocument(source1CopiedToDestDocx.FullName); var source2Wml = new WmlDocument(source2CopiedToDestDocx.FullName); var settings = new WmlComparerSettings(); var comparedWml = OpenXmlPowerTools.WmlComparer.WmlComparer.Compare(source1Wml, source2Wml, settings); WordprocessingMLUtil.BreakLinkToTemplate(comparedWml).SaveAs(docxWithRevisionsFi.FullName); var revisedDocInfo = new List <WmlRevisedDocumentInfo>() { new WmlRevisedDocumentInfo() { RevisedDocument = source2Wml, Color = Color.LightBlue, Revisor = "Revised by Eric White", } }; var consolidatedWml = OpenXmlPowerTools.WmlComparer.WmlComparer.Consolidate( source1Wml, revisedDocInfo, settings); WordprocessingMLUtil.BreakLinkToTemplate(consolidatedWml).SaveAs(docxConsolidatedFi.FullName); var validationErrors = ""; using (var ms = new MemoryStream()) { ms.Write(consolidatedWml.DocumentByteArray, 0, consolidatedWml.DocumentByteArray.Length); using var wDoc = WordprocessingDocument.Open(ms, true); var validator = new OpenXmlValidator(); var errors = validator.Validate(wDoc).Where(e => !ExpectedErrors.Contains(e.Description)); if (errors.Any()) { var ind = " "; var sb = new StringBuilder(); foreach (var err in errors) { sb.Append("Error" + Environment.NewLine); sb.Append(ind + "ErrorType: " + err.ErrorType.ToString() + Environment.NewLine); sb.Append(ind + "Description: " + err.Description + Environment.NewLine); sb.Append(ind + "Part: " + err.Part.Uri.ToString() + Environment.NewLine); sb.Append(ind + "XPath: " + err.Path.XPath + Environment.NewLine); } validationErrors = sb.ToString(); } } if (!string.IsNullOrEmpty(validationErrors)) { Assert.True(false, validationErrors); } }
public void WC001_Consolidate(string testId, string originalName, string revisedDocumentsXml) { var sourceDir = new DirectoryInfo("../../../../TestFiles/"); var originalDocx = new FileInfo(Path.Combine(sourceDir.FullName, originalName)); var rootTempDir = TestUtil.TempDir; var thisTestTempDir = new DirectoryInfo(Path.Combine(rootTempDir.FullName, testId)); if (thisTestTempDir.Exists) { Assert.True(false, "Duplicate test id: " + testId); } else { thisTestTempDir.Create(); } var originalCopiedToDestDocx = new FileInfo(Path.Combine(thisTestTempDir.FullName, originalDocx.Name)); if (!originalCopiedToDestDocx.Exists) { var wml1 = new WmlDocument(originalDocx.FullName); var wml2 = WordprocessingMLUtil.BreakLinkToTemplate(wml1); wml2.SaveAs(originalCopiedToDestDocx.FullName); } var revisedDocumentsXElement = XElement.Parse(revisedDocumentsXml); var revisedDocumentsArray = revisedDocumentsXElement .Elements() .Select(z => { var revisedDocx = new FileInfo(Path.Combine(sourceDir.FullName, z.Element("DocName").Value)); var revisedCopiedToDestDocx = new FileInfo(Path.Combine(thisTestTempDir.FullName, revisedDocx.Name)); var wml1 = new WmlDocument(revisedDocx.FullName); var wml2 = WordprocessingMLUtil.BreakLinkToTemplate(wml1); wml2.SaveAs(revisedCopiedToDestDocx.FullName); return(new WmlRevisedDocumentInfo() { RevisedDocument = new WmlDocument(revisedCopiedToDestDocx.FullName), Color = ColorParser.FromName(z.Element("Color")?.Value), Revisor = z.Element("Revisor")?.Value, }); }) .ToList(); var consolidatedDocxName = originalCopiedToDestDocx.Name.Replace(".docx", "-Consolidated.docx"); var consolidatedDocumentFi = new FileInfo(Path.Combine(thisTestTempDir.FullName, consolidatedDocxName)); var source1Wml = new WmlDocument(originalCopiedToDestDocx.FullName); var settings = new WmlComparerSettings { DebugTempFileDi = thisTestTempDir }; var consolidatedWml = OpenXmlPowerTools.WmlComparer.WmlComparer.Consolidate( source1Wml, revisedDocumentsArray, settings); var wml3 = WordprocessingMLUtil.BreakLinkToTemplate(consolidatedWml); wml3.SaveAs(consolidatedDocumentFi.FullName); var validationErrors = ""; using (var ms = new MemoryStream()) { ms.Write(consolidatedWml.DocumentByteArray, 0, consolidatedWml.DocumentByteArray.Length); using var wDoc = WordprocessingDocument.Open(ms, true); var validator = new OpenXmlValidator(); var errors = validator.Validate(wDoc).Where(e => !ExpectedErrors.Contains(e.Description)); if (errors.Any()) { var ind = " "; var sb = new StringBuilder(); foreach (var err in errors) { sb.Append("Error" + Environment.NewLine); sb.Append(ind + "ErrorType: " + err.ErrorType.ToString() + Environment.NewLine); sb.Append(ind + "Description: " + err.Description + Environment.NewLine); sb.Append(ind + "Part: " + err.Part.Uri.ToString() + Environment.NewLine); sb.Append(ind + "XPath: " + err.Path.XPath + Environment.NewLine); } validationErrors = sb.ToString(); } } /************************************************************************************************************************/ if (validationErrors != "") { Assert.True(false, validationErrors); } }
private static object IdentifyAndTransformFields(XNode node, FieldAccumulator fieldAccumulator) { XElement element = node as XElement; if (element != null) { if (element.Name == W.sdt) { var alias = (string)element.Elements(W.sdtPr).Elements(W.alias).Attributes(W.val).FirstOrDefault(); if (alias == null || alias == "") { var ccContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .CleanUpInvalidCharacters(); if (FieldRecognizer.IsField(ccContents, out ccContents)) { //var isBlockLevel = element.Element(W.sdtContent).Elements(W.p).FirstOrDefault() != null; var newCC = new XElement(element.Name, element.Attributes()); var props = element.Elements(W.sdtPr).FirstOrDefault(); if (props == null) { props = new XElement(W.sdtPr); } else { props.Remove(); } newCC.Add(props); var tagElem = props.Elements(W.tag).FirstOrDefault(); if (tagElem == null) { tagElem = new XElement(W.tag); props.Add(tagElem); } var fieldId = fieldAccumulator.AddField(ccContents); tagElem.SetAttributeValue(W.val, fieldId); newCC.Add(element.Nodes()); return(newCC); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)))); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)))); } if (element.Name == W.p) { fieldAccumulator.BeginBlock(); var paraContents = element .DescendantsTrimmed(W.txbxContent) .Where(e => e.Name == W.t) .Select(t => (string)t) .StringConcatenate() .Trim(); int occurances = CountSubstring(FieldRecognizer.EmbedBegin, paraContents); if (occurances == 1 && paraContents.StartsWith(FieldRecognizer.EmbedBegin + FieldRecognizer.FieldBegin) && paraContents.EndsWith(FieldRecognizer.FieldEnd + FieldRecognizer.EmbedEnd)) { var content = paraContents .Substring(FieldRecognizer.EmbedBegin.Length, paraContents.Length - FieldRecognizer.EmbedBegin.Length - FieldRecognizer.EmbedEnd.Length) .Trim(); if (FieldRecognizer.IsField(content, out content)) { var fieldId = fieldAccumulator.AddField(content); fieldAccumulator.EndBlock(); var ppr = element.Elements(W.pPr).FirstOrDefault(); var rpr = (ppr != null) ? ppr.Elements(W.rPr).FirstOrDefault() : null; XElement r = new XElement(W.r, rpr, new XElement(W.t, FieldRecognizer.FieldBegin + content + FieldRecognizer.FieldEnd)); return(new XElement(element.Name, element.Attributes(), element.Elements(W.pPr), CCTWrap(fieldId, r) )); } // else fall through to (slower) case } if (paraContents.Contains(FieldRecognizer.EmbedBegin + FieldRecognizer.FieldBegin)) { fieldAccumulator.RegisterNonFieldContentInBlock(); var runReplacementInfo = new List <XElement>(); var placeholderText = Guid.NewGuid().ToString(); var r = new Regex( Regex.Escape(FieldRecognizer.EmbedBegin) + "\\s*" + Regex.Escape(FieldRecognizer.FieldBegin) + ".*?" + Regex.Escape(FieldRecognizer.FieldEnd) + "\\s*" + Regex.Escape(FieldRecognizer.EmbedEnd)); var replacedCount = OpenXmlRegex.Replace(new[] { element }, r, placeholderText, (para, match) => { var matchString = match.Value.Trim().Replace("\u0001", ""); // unrecognized codes/elements returned as \u0001; strip these var content = matchString.Substring( FieldRecognizer.EmbedBegin.Length, matchString.Length - FieldRecognizer.EmbedBegin.Length - FieldRecognizer.EmbedEnd.Length ).CleanUpInvalidCharacters(); if (FieldRecognizer.IsField(content, out content)) { runReplacementInfo.Add(CCWrap(new XElement(W.r, new XElement(W.t, FieldRecognizer.FieldBegin + content + FieldRecognizer.FieldEnd)))); return(true); } return(false); }, false); if (replacedCount > 0) { var newPara = new XElement(element); foreach (var elem in runReplacementInfo) { var runToReplace = newPara.Descendants(W.r).FirstOrDefault(rn => rn.Value == placeholderText && rn.Parent.Name != Templater.OD.Content); if (runToReplace == null) { throw new InvalidOperationException("Internal error"); } else { var rpr = runToReplace.Elements(W.rPr).FirstOrDefault(); if (rpr != null) { rpr.Remove(); elem.Elements(W.sdtContent).First().Elements(W.r).First().AddFirst(rpr); } runToReplace.ReplaceWith(elem); } } var coalescedParagraph = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(newPara); var transformedContent = IdentifyAndTransformFields(coalescedParagraph, fieldAccumulator); fieldAccumulator.EndBlock(); return(transformedContent); } } var transformedParaContent = element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)).ToArray(); fieldAccumulator.EndBlock(); return(new XElement(element.Name, element.Attributes(), transformedParaContent)); } if (element.Name == W.lastRenderedPageBreak) { // documents assembled from templates will almost always change pagination, so remove Word's pagination hints // (also because they're not handled cleanly by OXPT) return(null); } return(new XElement(element.Name, element.Attributes(), element.Nodes().Select(n => IdentifyAndTransformFields(n, fieldAccumulator)))); } return(node); }