public ComparisonUnitWord(IEnumerable <ComparisonUnitAtom> comparisonUnitAtomList) { Contents = comparisonUnitAtomList.OfType <ComparisonUnit>().ToList(); string sha1String = Contents.Select(c => c.SHA1Hash).StringConcatenate(); SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(sha1String); }
private static IEnumerable <ComparisonUnit> GetHierarchicalComparisonUnits( IEnumerable <WithHierarchicalGroupingKey> input, int level) { IEnumerable <IGrouping <string, WithHierarchicalGroupingKey> > grouped = input .GroupAdjacent( whgk => level >= whgk.HierarchicalGroupingArray.Length ? "" : whgk.HierarchicalGroupingArray[level]); List <ComparisonUnit> retList = grouped .Select(gc => { if (gc.Key == "") { return((IEnumerable <ComparisonUnit>)gc.Select(whgk => whgk.ComparisonUnitWord).ToList()); } string[] spl = gc.Key.Split(':'); ComparisonUnitGroupType groupType = WmlComparerUtil.ComparisonUnitGroupTypeFromLocalName(spl[0]); IEnumerable <ComparisonUnit> childHierarchicalComparisonUnits = GetHierarchicalComparisonUnits(gc, level + 1); var newCompUnitGroup = new ComparisonUnitGroup(childHierarchicalComparisonUnits, groupType, level); return(new[] { newCompUnitGroup }); }) .SelectMany(m => m) .ToList(); return(retList); }
private static WmlDocument HashBlockLevelContent( WmlDocument source, WmlDocument sourceAfterProc, WmlComparerSettings settings) { using (var msSource = new MemoryStream()) using (var msAfterProc = new MemoryStream()) { msSource.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); msAfterProc.Write(sourceAfterProc.DocumentByteArray, 0, sourceAfterProc.DocumentByteArray.Length); using (WordprocessingDocument wDocSource = WordprocessingDocument.Open(msSource, true)) using (WordprocessingDocument wDocAfterProc = WordprocessingDocument.Open(msAfterProc, true)) { // create Unid dictionary for source XDocument sourceMainXDoc = wDocSource.MainDocumentPart.GetXDocument(); XElement sourceMainRoot = sourceMainXDoc.Root ?? throw new ArgumentException(); Dictionary <string, XElement> sourceUnidDict = sourceMainRoot .Descendants() .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr) .ToDictionary(d => (string)d.Attribute(PtOpenXml.Unid)); XDocument afterProcMainXDoc = wDocAfterProc.MainDocumentPart.GetXDocument(); XElement afterProcMainRoot = afterProcMainXDoc.Root ?? throw new ArgumentException(); IEnumerable <XElement> blockLevelElements = afterProcMainRoot .Descendants() .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr); foreach (XElement blockLevelContent in blockLevelElements) { var cloneBlockLevelContentForHashing = (XElement)CloneBlockLevelContentForHashing( wDocAfterProc.MainDocumentPart, blockLevelContent, true, settings); string shaString = cloneBlockLevelContentForHashing .ToString(SaveOptions.DisableFormatting) .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", ""); string sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString); var thisUnid = (string)blockLevelContent.Attribute(PtOpenXml.Unid); if (thisUnid != null) { if (sourceUnidDict.ContainsKey(thisUnid)) { XElement correlatedBlockLevelContent = sourceUnidDict[thisUnid]; correlatedBlockLevelContent.Add(new XAttribute(PtOpenXml.CorrelatedSHA1Hash, sha1Hash)); } } } wDocSource.MainDocumentPart.PutXDocument(); } var sourceWithCorrelatedSHA1Hash = new WmlDocument(source.FileName, msSource.ToArray()); return(sourceWithCorrelatedSHA1Hash); } }
public ComparisonUnitAtom( XElement contentElement, XElement[] ancestorElements, OpenXmlPart part, WmlComparerSettings settings) { ContentElement = contentElement; AncestorElements = ancestorElements; Part = part; RevTrackElement = GetRevisionTrackingElementFromAncestors(contentElement, AncestorElements); if (RevTrackElement == null) { CorrelationStatus = CorrelationStatus.Equal; } else { if (RevTrackElement.Name == W.del) { CorrelationStatus = CorrelationStatus.Deleted; } else if (RevTrackElement.Name == W.ins) { CorrelationStatus = CorrelationStatus.Inserted; } } var sha1Hash = (string)contentElement.Attribute(PtOpenXml.SHA1Hash); if (sha1Hash != null) { SHA1Hash = sha1Hash; } else { string shaHashString = GetSha1HashStringForElement(ContentElement, settings); SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaHashString); } }
// prohibit // - altChunk // - subDoc // - contentPart // This strips all text nodes from the XML tree, thereby leaving only the structure. private static object CloneBlockLevelContentForHashing( OpenXmlPart mainDocumentPart, XNode node, bool includeRelatedParts, WmlComparerSettings settings) { if (node is XElement element) { if (element.Name == W.bookmarkStart || element.Name == W.bookmarkEnd || element.Name == W.pPr || element.Name == W.rPr) { return(null); } if (element.Name == W.p) { var clonedPara = new XElement(element.Name, element.Attributes().Where(a => a.Name != W.rsid && a.Name != W.rsidDel && a.Name != W.rsidP && a.Name != W.rsidR && a.Name != W.rsidRDefault && a.Name != W.rsidRPr && a.Name != W.rsidSect && a.Name != W.rsidTr && a.Name.Namespace != PtOpenXml.pt), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); IEnumerable <IGrouping <bool, XElement> > groupedRuns = clonedPara .Elements() .GroupAdjacent(e => e.Name == W.r && e.Elements().Count() == 1 && e.Element(W.t) != null); var clonedParaWithGroupedRuns = new XElement(element.Name, groupedRuns.Select(g => { if (g.Key) { string text = g.Select(t => t.Value).StringConcatenate(); if (settings.CaseInsensitive) { text = text.ToUpper(settings.CultureInfo); } var newRun = (object)new XElement(W.r, new XElement(W.t, text)); return(newRun); } return(g); })); return(clonedParaWithGroupedRuns); } if (element.Name == W.r) { IEnumerable <XElement> clonedRuns = element .Elements() .Where(e => e.Name != W.rPr) .Select(rc => new XElement(W.r, CloneBlockLevelContentForHashing(mainDocumentPart, rc, includeRelatedParts, settings))); return(clonedRuns); } if (element.Name == W.tbl) { var clonedTable = new XElement(W.tbl, element.Elements(W.tr).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(clonedTable); } if (element.Name == W.tr) { var clonedRow = new XElement(W.tr, element.Elements(W.tc).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(clonedRow); } if (element.Name == W.tc) { var clonedCell = new XElement(W.tc, element.Elements().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(clonedCell); } if (element.Name == W.tcPr) { var clonedCellProps = new XElement(W.tcPr, element.Elements(W.gridSpan).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(clonedCellProps); } if (element.Name == W.gridSpan) { var clonedGridSpan = new XElement(W.gridSpan, new XAttribute("val", (string)element.Attribute(W.val))); return(clonedGridSpan); } if (element.Name == W.txbxContent) { var clonedTextbox = new XElement(W.txbxContent, element.Elements().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(clonedTextbox); } if (includeRelatedParts) { if (ComparisonUnitWord.ElementsWithRelationshipIds.Contains(element.Name)) { var newElement = new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt) .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)) .Select(a => { if (!ComparisonUnitWord.RelationshipAttributeNames.Contains(a.Name)) { return(a); } var rId = (string)a; // could be an hyperlink relationship try { OpenXmlPart oxp = mainDocumentPart.GetPartById(rId); if (oxp == null) { throw new FileFormatException("Invalid WordprocessingML Document"); } var anno = oxp.Annotation <PartSHA1HashAnnotation>(); if (anno != null) { return(new XAttribute(a.Name, anno.Hash)); } if (!oxp.ContentType.EndsWith("xml")) { using (Stream str = oxp.GetStream()) { byte[] ba; using (var br = new BinaryReader(str)) { ba = br.ReadBytes((int)str.Length); } string sha1 = WmlComparerUtil.SHA1HashStringForByteArray(ba); oxp.AddAnnotation(new PartSHA1HashAnnotation(sha1)); return(new XAttribute(a.Name, sha1)); } } } catch (ArgumentOutOfRangeException) { HyperlinkRelationship hr = mainDocumentPart.HyperlinkRelationships.FirstOrDefault(z => z.Id == rId); if (hr != null) { string str = hr.Uri.ToString(); return(new XAttribute(a.Name, str)); } // could be an external relationship ExternalRelationship er = mainDocumentPart.ExternalRelationships.FirstOrDefault(z => z.Id == rId); if (er != null) { string str = er.Uri.ToString(); return(new XAttribute(a.Name, str)); } return(new XAttribute(a.Name, "NULL Relationship")); } return(null); }), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(newElement); } } if (element.Name == VML.shape) { return(new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt) .Where(a => a.Name != "style" && a.Name != "id" && a.Name != "type"), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)))); } if (element.Name == O.OLEObject) { var o = new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt) .Where(a => a.Name != "ObjectID" && a.Name != R.id), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(o); } if (element.Name == W._object) { var o = new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); return(o); } if (element.Name == WP.docPr) { return(new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt && a.Name != "id"), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)))); } return(new XElement(element.Name, element.Attributes() .Where(a => a.Name.Namespace != PtOpenXml.pt) .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)), element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)))); } if (settings.CaseInsensitive) { if (node is XText xt) { string newText = xt.Value.ToUpper(settings.CultureInfo); return(new XText(newText)); } } return(node); }