static void Main(string[] args) { var n = DateTime.Now; var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second)); tempDi.Create(); WmlDocument originalWml = new WmlDocument("../../Original.docx"); List <WmlRevisedDocumentInfo> revisedDocumentInfoList = new List <WmlRevisedDocumentInfo>() { new WmlRevisedDocumentInfo() { RevisedDocument = new WmlDocument("../../RevisedByBob.docx"), Revisor = "Bob", Color = Color.LightBlue, }, new WmlRevisedDocumentInfo() { RevisedDocument = new WmlDocument("../../RevisedByMary.docx"), Revisor = "Mary", Color = Color.LightYellow, }, }; WmlComparerSettings settings = new WmlComparerSettings(); WmlDocument consolidatedWml = WmlComparer.Consolidate( originalWml, revisedDocumentInfoList, settings); consolidatedWml.SaveAs(Path.Combine(tempDi.FullName, "Consolidated.docx")); }
private static void AnnotateElementWithProps( OpenXmlPart part, XElement element, List <ComparisonUnitAtom> comparisonUnitAtomList, XName[] childElementPropertyNames, WmlComparerSettings settings) { IEnumerable <XElement> runChildrenToProcess; if (childElementPropertyNames == null) { runChildrenToProcess = element.Elements(); } else { runChildrenToProcess = element .Elements() .Where(e => !childElementPropertyNames.Contains(e.Name)); } foreach (XElement item in runChildrenToProcess) { CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); } }
static void Main(string[] args) { var n = DateTime.Now; var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second)); tempDi.Create(); WmlComparerSettings settings = new WmlComparerSettings(); WmlDocument result = WmlComparer.Compare( new WmlDocument("../../Source1.docx"), new WmlDocument("../../Source2.docx"), settings); result.SaveAs(Path.Combine(tempDi.FullName, "Compared.docx")); var revisions = WmlComparer.GetRevisions(result, settings); foreach (var rev in revisions) { Console.WriteLine("Author: " + rev.Author); Console.WriteLine("Revision type: " + rev.RevisionType); Console.WriteLine("Revision text: " + rev.Text); Console.WriteLine(); } }
private static WmlDocument HashBlockLevelContent( WmlDocument source, WmlDocument sourceAfterProc, WmlComparerSettings settings) { using (var msSource = new MemoryStream()) using (var msAfterProc = new MemoryStream()) { msSource.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); msAfterProc.Write(sourceAfterProc.DocumentByteArray, 0, sourceAfterProc.DocumentByteArray.Length); using (WordprocessingDocument wDocSource = WordprocessingDocument.Open(msSource, true)) using (WordprocessingDocument wDocAfterProc = WordprocessingDocument.Open(msAfterProc, true)) { // create Unid dictionary for source XDocument sourceMainXDoc = wDocSource.MainDocumentPart.GetXDocument(); XElement sourceMainRoot = sourceMainXDoc.Root ?? throw new ArgumentException(); Dictionary <string, XElement> sourceUnidDict = sourceMainRoot .Descendants() .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr) .ToDictionary(d => (string)d.Attribute(PtOpenXml.Unid)); XDocument afterProcMainXDoc = wDocAfterProc.MainDocumentPart.GetXDocument(); XElement afterProcMainRoot = afterProcMainXDoc.Root ?? throw new ArgumentException(); IEnumerable <XElement> blockLevelElements = afterProcMainRoot .Descendants() .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr); foreach (XElement blockLevelContent in blockLevelElements) { var cloneBlockLevelContentForHashing = (XElement)CloneBlockLevelContentForHashing( wDocAfterProc.MainDocumentPart, blockLevelContent, true, settings); string shaString = cloneBlockLevelContentForHashing .ToString(SaveOptions.DisableFormatting) .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", ""); string sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString); var thisUnid = (string)blockLevelContent.Attribute(PtOpenXml.Unid); if (thisUnid != null) { if (sourceUnidDict.ContainsKey(thisUnid)) { XElement correlatedBlockLevelContent = sourceUnidDict[thisUnid]; correlatedBlockLevelContent.Add(new XAttribute(PtOpenXml.CorrelatedSHA1Hash, sha1Hash)); } } } wDocSource.MainDocumentPart.PutXDocument(); } var sourceWithCorrelatedSHA1Hash = new WmlDocument(source.FileName, msSource.ToArray()); return(sourceWithCorrelatedSHA1Hash); } }
private static void SaveDocumentIfDesired(WmlDocument source, string name, WmlComparerSettings settings) { if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) { var fileInfo = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name)); source.SaveAs(fileInfo.FullName); } }
private static List <ComparisonUnitAtom> CreateComparisonUnitAtomListInternal( OpenXmlPart part, XElement contentParent, WmlComparerSettings settings) { var comparisonUnitAtomList = new List <ComparisonUnitAtom>(); CreateComparisonUnitAtomListRecurse(part, contentParent, comparisonUnitAtomList, settings); return(comparisonUnitAtomList); }
private static string GetSha1HashStringForElement(XElement contentElement, WmlComparerSettings settings) { string text = contentElement.Value; if (settings.CaseInsensitive) { text = text.ToUpper(settings.CultureInfo); } return(contentElement.Name.LocalName + text); }
public ComparisonUnitAtom( XElement contentElement, XElement[] ancestorElements, OpenXmlPart part, WmlComparerSettings settings) { ContentElement = contentElement; AncestorElements = ancestorElements; Part = part; RevTrackElement = GetRevisionTrackingElementFromAncestors(contentElement, AncestorElements); if (RevTrackElement == null) { CorrelationStatus = CorrelationStatus.Equal; } else { if (RevTrackElement.Name == W.del) { CorrelationStatus = CorrelationStatus.Deleted; } else if (RevTrackElement.Name == W.ins) { CorrelationStatus = CorrelationStatus.Inserted; } } var sha1Hash = (string)contentElement.Attribute(PtOpenXml.SHA1Hash); if (sha1Hash != null) { SHA1Hash = sha1Hash; } else { string shaHashString = GetSha1HashStringForElement(ContentElement, settings); SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaHashString); } }
internal static ComparisonUnitAtom[] CreateComparisonUnitAtomList( OpenXmlPart part, XElement contentParent, WmlComparerSettings settings) { VerifyNoInvalidContent(contentParent); AssignUnidToAllElements(contentParent); // add the Guid id to every element MoveLastSectPrIntoLastParagraph(contentParent); ComparisonUnitAtom[] cal = CreateComparisonUnitAtomListInternal(part, contentParent, settings).ToArray(); if (False) { var sb = new StringBuilder(); foreach (ComparisonUnitAtom item in cal) { sb.Append(item + Environment.NewLine); } string sbs = sb.ToString(); TestUtil.NotePad(sbs); } return(cal); }
// The following method must be made internal if we ever turn this part of the partial class // into its own class. private static ComparisonUnit[] GetComparisonUnitList( ComparisonUnitAtom[] comparisonUnitAtomList, WmlComparerSettings settings) { var seed = new Atgbw { Key = null, ComparisonUnitAtomMember = null, NextIndex = 0 }; IEnumerable <Atgbw> groupingKey = comparisonUnitAtomList .Rollup(seed, (sr, prevAtgbw, i) => { int?key; int nextIndex = prevAtgbw.NextIndex; if (sr.ContentElement.Name == W.t) { string chr = sr.ContentElement.Value; char ch = chr[0]; if (ch == '.' || ch == ',') { var beforeIsDigit = false; if (i > 0) { ComparisonUnitAtom prev = comparisonUnitAtomList[i - 1]; if (prev.ContentElement.Name == W.t && char.IsDigit(prev.ContentElement.Value[0])) { beforeIsDigit = true; } } var afterIsDigit = false; if (i < comparisonUnitAtomList.Length - 1) { ComparisonUnitAtom next = comparisonUnitAtomList[i + 1]; if (next.ContentElement.Name == W.t && char.IsDigit(next.ContentElement.Value[0])) { afterIsDigit = true; } } if (beforeIsDigit || afterIsDigit) { key = nextIndex; } else { nextIndex++; key = nextIndex; nextIndex++; } } else if (settings.WordSeparators.Contains(ch)) { nextIndex++; key = nextIndex; nextIndex++; } else { key = nextIndex; } } else if (WordBreakElements.Contains(sr.ContentElement.Name)) { nextIndex++; key = nextIndex; nextIndex++; } else { key = nextIndex; } return(new Atgbw { Key = key, ComparisonUnitAtomMember = sr, NextIndex = nextIndex }); }) .ToArray(); if (False) { var sb = new StringBuilder(); foreach (Atgbw item in groupingKey) { sb.Append(item.Key + Environment.NewLine); sb.Append(" " + item.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine); } string sbs = sb.ToString(); TestUtil.NotePad(sbs); } IEnumerable <IGrouping <int?, Atgbw> > groupedByWords = groupingKey .GroupAdjacent(gc => gc.Key) .ToArray(); if (False) { var sb = new StringBuilder(); foreach (IGrouping <int?, Atgbw> group in groupedByWords) { sb.Append("Group ===== " + @group.Key + Environment.NewLine); foreach (Atgbw gc in @group) { sb.Append(" " + gc.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine); } } string sbs = sb.ToString(); TestUtil.NotePad(sbs); } WithHierarchicalGroupingKey[] withHierarchicalGroupingKey = groupedByWords .Select(g => { string[] hierarchicalGroupingArray = g .First() .ComparisonUnitAtomMember .AncestorElements .Where(a => ComparisonGroupingElements.Contains(a.Name)) .Select(a => a.Name.LocalName + ":" + (string)a.Attribute(PtOpenXml.Unid)) .ToArray(); return(new WithHierarchicalGroupingKey { ComparisonUnitWord = new ComparisonUnitWord(g.Select(gc => gc.ComparisonUnitAtomMember)), HierarchicalGroupingArray = hierarchicalGroupingArray }); } ) .ToArray(); if (False) { var sb = new StringBuilder(); foreach (WithHierarchicalGroupingKey group in withHierarchicalGroupingKey) { sb.Append("Grouping Array: " + @group.HierarchicalGroupingArray.Select(gam => gam + " - ").StringConcatenate() + Environment.NewLine); foreach (ComparisonUnit gc in @group.ComparisonUnitWord.Contents) { sb.Append(" " + gc.ToString(0) + Environment.NewLine); } } string sbs = sb.ToString(); TestUtil.NotePad(sbs); } ComparisonUnit[] cul = GetHierarchicalComparisonUnits(withHierarchicalGroupingKey, 0).ToArray(); if (False) { string str = ComparisonUnit.ComparisonUnitListToString(cul); TestUtil.NotePad(str); } return(cul); }
// the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore // when generating the document, the appropriate row will be marked as deleted or inserted. public static List <WmlComparerRevision> GetRevisions(WmlDocument source, WmlComparerSettings settings) { using (var ms = new MemoryStream()) { ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true)) { TestForInvalidContent(wDoc); RemoveExistingPowerToolsMarkup(wDoc); XElement contentParent = wDoc.MainDocumentPart.GetXDocument().Root?.Element(W.body); ComparisonUnitAtom[] atomList = CreateComparisonUnitAtomList(wDoc.MainDocumentPart, contentParent, settings).ToArray(); if (False) { var sb = new StringBuilder(); foreach (ComparisonUnitAtom item in atomList) { sb.Append(item + Environment.NewLine); } string sbs = sb.ToString(); TestUtil.NotePad(sbs); } List <IGrouping <string, ComparisonUnitAtom> > grouped = atomList .GroupAdjacent(a => { string key = a.CorrelationStatus.ToString(); if (a.CorrelationStatus != CorrelationStatus.Equal) { var rt = new XElement(a.RevTrackElement.Name, new XAttribute(XNamespace.Xmlns + "w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"), a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid)); key += rt.ToString(SaveOptions.DisableFormatting); } return(key); }) .ToList(); List <IGrouping <string, ComparisonUnitAtom> > revisions = grouped .Where(k => k.Key != "Equal") .ToList(); if (False) { var sb = new StringBuilder(); foreach (IGrouping <string, ComparisonUnitAtom> item in revisions) { sb.Append(item.Key + Environment.NewLine); } string sbs = sb.ToString(); TestUtil.NotePad(sbs); } List <WmlComparerRevision> mainDocPartRevisionList = revisions .Select(rg => { var rev = new WmlComparerRevision(); if (rg.Key.StartsWith("Inserted")) { rev.RevisionType = WmlComparerRevisionType.Inserted; } else if (rg.Key.StartsWith("Deleted")) { rev.RevisionType = WmlComparerRevisionType.Deleted; } XElement revTrackElement = rg.First().RevTrackElement; rev.RevisionXElement = revTrackElement; rev.Author = (string)revTrackElement.Attribute(W.author); rev.ContentXElement = rg.First().ContentElement; rev.Date = (string)revTrackElement.Attribute(W.date); rev.PartUri = wDoc.MainDocumentPart.Uri; rev.PartContentType = wDoc.MainDocumentPart.ContentType; if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name)) { rev.Text = rg .Select(rgc => rgc.ContentElement.Name == W.pPr ? NewLine : rgc.ContentElement.Value) .StringConcatenate(); } return(rev); }) .ToList(); IEnumerable <WmlComparerRevision> footnotesRevisionList = GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.FootnotesPart, W.footnote, settings); IEnumerable <WmlComparerRevision> endnotesRevisionList = GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.EndnotesPart, W.endnote, settings); List <WmlComparerRevision> finalRevisionList = mainDocPartRevisionList .Concat(footnotesRevisionList) .Concat(endnotesRevisionList) .ToList(); return(finalRevisionList); } } }
private static IEnumerable <WmlComparerRevision> GetFootnoteEndnoteRevisionList( OpenXmlPart footnotesEndnotesPart, XName footnoteEndnoteElementName, WmlComparerSettings settings) { if (footnotesEndnotesPart == null) { return(Enumerable.Empty <WmlComparerRevision>()); } XDocument xDoc = footnotesEndnotesPart.GetXDocument(); IEnumerable <XElement> footnotesEndnotes = xDoc.Root?.Elements(footnoteEndnoteElementName) ?? throw new OpenXmlPowerToolsException("Invalid document."); var revisionsForPart = new List <WmlComparerRevision>(); foreach (XElement fn in footnotesEndnotes) { ComparisonUnitAtom[] atomList = CreateComparisonUnitAtomList(footnotesEndnotesPart, fn, settings).ToArray(); if (False) { var sb = new StringBuilder(); foreach (ComparisonUnitAtom item in atomList) { sb.Append(item + Environment.NewLine); } string sbs = sb.ToString(); TestUtil.NotePad(sbs); } List <IGrouping <string, ComparisonUnitAtom> > grouped = atomList .GroupAdjacent(a => { string key = a.CorrelationStatus.ToString(); if (a.CorrelationStatus != CorrelationStatus.Equal) { var rt = new XElement(a.RevTrackElement.Name, new XAttribute(XNamespace.Xmlns + "w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"), a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid)); key += rt.ToString(SaveOptions.DisableFormatting); } return(key); }) .ToList(); List <IGrouping <string, ComparisonUnitAtom> > revisions = grouped .Where(k => k.Key != "Equal") .ToList(); IEnumerable <WmlComparerRevision> thisNoteRevisionList = revisions .Select(rg => { var rev = new WmlComparerRevision(); if (rg.Key.StartsWith("Inserted")) { rev.RevisionType = WmlComparerRevisionType.Inserted; } else if (rg.Key.StartsWith("Deleted")) { rev.RevisionType = WmlComparerRevisionType.Deleted; } XElement revTrackElement = rg.First().RevTrackElement; rev.RevisionXElement = revTrackElement; rev.Author = (string)revTrackElement.Attribute(W.author); rev.ContentXElement = rg.First().ContentElement; rev.Date = (string)revTrackElement.Attribute(W.date); rev.PartUri = footnotesEndnotesPart.Uri; rev.PartContentType = footnotesEndnotesPart.ContentType; if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name)) { rev.Text = rg .Select(rgc => rgc.ContentElement.Name == W.pPr ? NewLine : rgc.ContentElement.Value) .StringConcatenate(); } return(rev); }); revisionsForPart.AddRange(thisNoteRevisionList); } return(revisionsForPart); }
// prohibit // - altChunk // - subDoc // - contentPart // This strips all text nodes from the XML tree, thereby leaving only the structure. private static object CloneBlockLevelContentForHashing( OpenXmlPart mainDocumentPart, XNode node, bool includeRelatedParts, WmlComparerSettings settings) { if (node is XElement element) { if (element.Name == W.bookmarkStart || element.Name == W.bookmarkEnd || element.Name == W.pPr || element.Name == W.rPr) { return(null); } if (element.Name == W.p) { var clonedPara = new XElement(element.Name, element.Attributes().Where(a => a.Name != W.rsid && a.Name != W.rsidDel && a.Name != W.rsidP && a.Name != W.rsidR && a.Name != W.rsidRDefault && a.Name != W.rsidRPr && a.Name != W.rsidSect && a.Name != W.rsidTr && a.Name.Namespace != PtOpenXml.pt), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); IEnumerable <IGrouping <bool, XElement> > groupedRuns = clonedPara .Elements() .GroupAdjacent(e => e.Name == W.r && e.Elements().Count() == 1 && e.Element(W.t) != null); var clonedParaWithGroupedRuns = new XElement(element.Name, groupedRuns.Select(g => { if (g.Key) { string text = g.Select(t => t.Value).StringConcatenate(); if (settings.CaseInsensitive) { text = text.ToUpper(settings.CultureInfo); } var newRun = (object)new XElement(W.r, new XElement(W.t, text)); return(newRun); } return(g); })); return(clonedParaWithGroupedRuns); } if (element.Name == W.r) { IEnumerable <XElement> clonedRuns = element .Elements() .Where(e => e.Name != W.rPr) .Select(rc => new XElement(W.r, CloneBlockLevelContentForHashing(mainDocumentPart, rc, includeRelatedParts, settings))); return(clonedRuns); } if (element.Name == W.tbl) { var clonedTable = new XElement(W.tbl, element.Elements(W.tr).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(clonedTable); } if (element.Name == W.tr) { var clonedRow = new XElement(W.tr, element.Elements(W.tc).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(clonedRow); } if (element.Name == W.tc) { var clonedCell = new XElement(W.tc, element.Elements().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(clonedCell); } if (element.Name == W.tcPr) { var clonedCellProps = new XElement(W.tcPr, element.Elements(W.gridSpan).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(clonedCellProps); } if (element.Name == W.gridSpan) { var clonedGridSpan = new XElement(W.gridSpan, new XAttribute("val", (string)element.Attribute(W.val))); return(clonedGridSpan); } if (element.Name == W.txbxContent) { var clonedTextbox = new XElement(W.txbxContent, element.Elements().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(clonedTextbox); } if (includeRelatedParts) { if (ComparisonUnitWord.ElementsWithRelationshipIds.Contains(element.Name)) { var newElement = new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt) .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)) .Select(a => { if (!ComparisonUnitWord.RelationshipAttributeNames.Contains(a.Name)) { return(a); } var rId = (string)a; // could be an hyperlink relationship try { OpenXmlPart oxp = mainDocumentPart.GetPartById(rId); if (oxp == null) { throw new FileFormatException("Invalid WordprocessingML Document"); } var anno = oxp.Annotation <PartSHA1HashAnnotation>(); if (anno != null) { return(new XAttribute(a.Name, anno.Hash)); } if (!oxp.ContentType.EndsWith("xml")) { using (Stream str = oxp.GetStream()) { byte[] ba; using (var br = new BinaryReader(str)) { ba = br.ReadBytes((int)str.Length); } string sha1 = WmlComparerUtil.SHA1HashStringForByteArray(ba); oxp.AddAnnotation(new PartSHA1HashAnnotation(sha1)); return(new XAttribute(a.Name, sha1)); } } } catch (ArgumentOutOfRangeException) { HyperlinkRelationship hr = mainDocumentPart.HyperlinkRelationships.FirstOrDefault(z => z.Id == rId); if (hr != null) { string str = hr.Uri.ToString(); return(new XAttribute(a.Name, str)); } // could be an external relationship ExternalRelationship er = mainDocumentPart.ExternalRelationships.FirstOrDefault(z => z.Id == rId); if (er != null) { string str = er.Uri.ToString(); return(new XAttribute(a.Name, str)); } return(new XAttribute(a.Name, "NULL Relationship")); } return(null); }), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(newElement); } } if (element.Name == VML.shape) { return(new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt) .Where(a => a.Name != "style" && a.Name != "id" && a.Name != "type"), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)))); } if (element.Name == O.OLEObject) { var o = new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt) .Where(a => a.Name != "ObjectID" && a.Name != R.id), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(o); } if (element.Name == W._object) { var o = new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(o); } if (element.Name == WP.docPr) { return(new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt && a.Name != "id"), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)))); } return(new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt) .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)))); } if (settings.CaseInsensitive) { if (node is XText xt) { string newText = xt.Value.ToUpper(settings.CultureInfo); return(new XText(newText)); } } return(node); }
public static WmlDocument Compare(WmlDocument source1, WmlDocument source2, WmlComparerSettings settings) { return(CompareInternal(source1, source2, settings, true)); }
private static void SaveCleanedDocuments(WmlDocument source1, WmlDocument producedDocument, WmlComparerSettings settings) { if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) { WmlDocument cleanedSource = CleanPowerToolsAndRsid(source1); SaveDocumentIfDesired(cleanedSource, "Cleaned-Source.docx", settings); WmlDocument cleanedProduced = CleanPowerToolsAndRsid(producedDocument); SaveDocumentIfDesired(cleanedProduced, "Cleaned-Produced.docx", settings); } }
private static void SaveDocumentsAfterProducingDocument(MemoryStream ms1, MemoryStream ms2, WmlComparerSettings settings) { if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) { SaveDocumentIfDesired(new WmlDocument("after1.docx", ms1), "Source1-Step5-AfterProducingDocument.docx", settings); SaveDocumentIfDesired(new WmlDocument("after2.docx", ms2), "Source2-Step5-AfterProducingDocument.docx", settings); } }
private static void CreateComparisonUnitAtomListRecurse( OpenXmlPart part, XElement element, List <ComparisonUnitAtom> comparisonUnitAtomList, WmlComparerSettings settings) { if (element.Name == W.body || element.Name == W.footnote || element.Name == W.endnote) { foreach (XElement item in element.Elements()) { CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); } return; } if (element.Name == W.p) { IEnumerable <XElement> paraChildrenToProcess = element .Elements() .Where(e => e.Name != W.pPr); foreach (XElement item in paraChildrenToProcess) { CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); } XElement paraProps = element.Element(W.pPr); if (paraProps == null) { var pPrComparisonUnitAtom = new ComparisonUnitAtom( new XElement(W.pPr), element.AncestorsAndSelf() .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse() .ToArray(), part, settings); comparisonUnitAtomList.Add(pPrComparisonUnitAtom); } else { var pPrComparisonUnitAtom = new ComparisonUnitAtom( paraProps, element.AncestorsAndSelf() .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse() .ToArray(), part, settings); comparisonUnitAtomList.Add(pPrComparisonUnitAtom); } return; } if (element.Name == W.r) { IEnumerable <XElement> runChildrenToProcess = element .Elements() .Where(e => e.Name != W.rPr); foreach (XElement item in runChildrenToProcess) { CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); } return; } if (element.Name == W.t || element.Name == W.delText) { string val = element.Value; foreach (char ch in val) { var sr = new ComparisonUnitAtom( new XElement(element.Name, ch), element.AncestorsAndSelf() .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse() .ToArray(), part, settings); comparisonUnitAtomList.Add(sr); } return; } if (AllowableRunChildren.Contains(element.Name) || element.Name == W._object) { var sr3 = new ComparisonUnitAtom( element, element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes) .Reverse().ToArray(), part, settings); comparisonUnitAtomList.Add(sr3); return; } RecursionInfo re = RecursionElements.FirstOrDefault(z => z.ElementName == element.Name); if (re != null) { AnnotateElementWithProps(part, element, comparisonUnitAtomList, re.ChildElementPropertyNames, settings); return; } if (ElementsToThrowAway.Contains(element.Name)) { return; } AnnotateElementWithProps(part, element, comparisonUnitAtomList, null, settings); }
private static WmlDocument CompareInternal( WmlDocument source1, WmlDocument source2, WmlComparerSettings settings, bool preProcessMarkupInOriginal) { if (preProcessMarkupInOriginal) { source1 = PreProcessMarkup(source1, settings.StartingIdForFootnotesEndnotes + 1000); } source2 = PreProcessMarkup(source2, settings.StartingIdForFootnotesEndnotes + 2000); SaveDocumentIfDesired(source1, "Source1-Step1-PreProcess.docx", settings); SaveDocumentIfDesired(source2, "Source2-Step1-PreProcess.docx", settings); // at this point, both source1 and source2 have unid on every element. These are the values that will // enable reassembly of the XML tree. But we need other values. // In source1: // - accept tracked revisions // - determine hash code for every block-level element // - save as attribute on every element // - accept tracked revisions and reject tracked revisions leave the unids alone, where possible. // - after accepting and calculating the hash, then can use the unids to find the right block-level // element in the unmodified source1, and install the hash // In source2: // - reject tracked revisions // - determine hash code for every block-level element // - save as an attribute on every element // - after rejecting and calculating the hash, then can use the unids to find the right block-level element // in the unmodified source2, and install the hash // - sometimes after accepting or rejecting tracked revisions, several paragraphs will get coalesced into a // single paragraph due to paragraph marks being inserted / deleted. // - in this case, some paragraphs will not get a hash injected onto them. // - if a paragraph doesn't have a hash, then it will never correspond to another paragraph, and such // issues will need to be resolved in the normal execution of the LCS algorithm. // - note that when we do propagate the unid through for the first paragraph. // Establish correlation between the two. // Find the longest common sequence of block-level elements where hash codes are the same. // this sometimes will be every block level element in the document. Or sometimes will be just a fair // number of them. // at the start of doing the LCS algorithm, we will match up content, and put them in corresponding unknown // correlated comparison units. Those paragraphs will only ever be matched to their corresponding paragraph. // then the algorithm can proceed as usual. // need to call ChangeFootnoteEndnoteReferencesToUniqueRange before creating the wmlResult document, so that // the same GUID ids are used for footnote and endnote references in both the 'after' document, and in the // result document. WmlDocument source1AfterAccepting = RevisionProcessor.AcceptRevisions(source1); WmlDocument source2AfterRejecting = RevisionProcessor.RejectRevisions(source2); SaveDocumentIfDesired(source1AfterAccepting, "Source1-Step2-AfterAccepting.docx", settings); SaveDocumentIfDesired(source2AfterRejecting, "Source2-Step2-AfterRejecting.docx", settings); // this creates the correlated hash codes that enable us to match up ranges of paragraphs based on // accepting in source1, rejecting in source2 source1 = HashBlockLevelContent(source1, source1AfterAccepting, settings); source2 = HashBlockLevelContent(source2, source2AfterRejecting, settings); SaveDocumentIfDesired(source1, "Source1-Step3-AfterHashing.docx", settings); SaveDocumentIfDesired(source2, "Source2-Step3-AfterHashing.docx", settings); // Accept revisions in before, and after source1 = RevisionProcessor.AcceptRevisions(source1); source2 = RevisionProcessor.AcceptRevisions(source2); SaveDocumentIfDesired(source1, "Source1-Step4-AfterAccepting.docx", settings); SaveDocumentIfDesired(source2, "Source2-Step4-AfterAccepting.docx", settings); // after accepting revisions, some unids may have been removed by revision accepter, along with the // correlatedSHA1Hash codes, this is as it should be. // but need to go back in and add guids to paragraphs that have had them removed. using (var ms = new MemoryStream()) { ms.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length); using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true)) { AddUnidsToMarkupInContentParts(wDoc); } } var wmlResult = new WmlDocument(source1); using (var ms1 = new MemoryStream()) using (var ms2 = new MemoryStream()) { ms1.Write(source1.DocumentByteArray, 0, source1.DocumentByteArray.Length); ms2.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length); WmlDocument producedDocument; using (WordprocessingDocument wDoc1 = WordprocessingDocument.Open(ms1, true)) using (WordprocessingDocument wDoc2 = WordprocessingDocument.Open(ms2, true)) { producedDocument = ProduceDocumentWithTrackedRevisions(settings, wmlResult, wDoc1, wDoc2); } SaveDocumentsAfterProducingDocument(ms1, ms2, settings); SaveCleanedDocuments(source1, producedDocument, settings); return(producedDocument); } }