示例#1
0
        static void Main(string[] args)
        {
            var n      = DateTime.Now;
            var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second));

            tempDi.Create();

            WmlDocument originalWml = new WmlDocument("../../Original.docx");
            List <WmlRevisedDocumentInfo> revisedDocumentInfoList = new List <WmlRevisedDocumentInfo>()
            {
                new WmlRevisedDocumentInfo()
                {
                    RevisedDocument = new WmlDocument("../../RevisedByBob.docx"),
                    Revisor         = "Bob",
                    Color           = Color.LightBlue,
                },
                new WmlRevisedDocumentInfo()
                {
                    RevisedDocument = new WmlDocument("../../RevisedByMary.docx"),
                    Revisor         = "Mary",
                    Color           = Color.LightYellow,
                },
            };
            WmlComparerSettings settings        = new WmlComparerSettings();
            WmlDocument         consolidatedWml = WmlComparer.Consolidate(
                originalWml,
                revisedDocumentInfoList,
                settings);

            consolidatedWml.SaveAs(Path.Combine(tempDi.FullName, "Consolidated.docx"));
        }
        private static void AnnotateElementWithProps(
            OpenXmlPart part,
            XElement element,
            List <ComparisonUnitAtom> comparisonUnitAtomList,
            XName[] childElementPropertyNames,
            WmlComparerSettings settings)
        {
            IEnumerable <XElement> runChildrenToProcess;

            if (childElementPropertyNames == null)
            {
                runChildrenToProcess = element.Elements();
            }
            else
            {
                runChildrenToProcess = element
                                       .Elements()
                                       .Where(e => !childElementPropertyNames.Contains(e.Name));
            }

            foreach (XElement item in runChildrenToProcess)
            {
                CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
            }
        }
示例#3
0
        static void Main(string[] args)
        {
            var n      = DateTime.Now;
            var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second));

            tempDi.Create();

            WmlComparerSettings settings = new WmlComparerSettings();
            WmlDocument         result   = WmlComparer.Compare(
                new WmlDocument("../../Source1.docx"),
                new WmlDocument("../../Source2.docx"),
                settings);

            result.SaveAs(Path.Combine(tempDi.FullName, "Compared.docx"));

            var revisions = WmlComparer.GetRevisions(result, settings);

            foreach (var rev in revisions)
            {
                Console.WriteLine("Author: " + rev.Author);
                Console.WriteLine("Revision type: " + rev.RevisionType);
                Console.WriteLine("Revision text: " + rev.Text);
                Console.WriteLine();
            }
        }
        private static WmlDocument HashBlockLevelContent(
            WmlDocument source,
            WmlDocument sourceAfterProc,
            WmlComparerSettings settings)
        {
            using (var msSource = new MemoryStream())
                using (var msAfterProc = new MemoryStream())
                {
                    msSource.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
                    msAfterProc.Write(sourceAfterProc.DocumentByteArray, 0, sourceAfterProc.DocumentByteArray.Length);

                    using (WordprocessingDocument wDocSource = WordprocessingDocument.Open(msSource, true))
                        using (WordprocessingDocument wDocAfterProc = WordprocessingDocument.Open(msAfterProc, true))
                        {
                            // create Unid dictionary for source
                            XDocument sourceMainXDoc = wDocSource.MainDocumentPart.GetXDocument();
                            XElement  sourceMainRoot = sourceMainXDoc.Root ?? throw new ArgumentException();
                            Dictionary <string, XElement> sourceUnidDict = sourceMainRoot
                                                                           .Descendants()
                                                                           .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr)
                                                                           .ToDictionary(d => (string)d.Attribute(PtOpenXml.Unid));

                            XDocument afterProcMainXDoc = wDocAfterProc.MainDocumentPart.GetXDocument();
                            XElement  afterProcMainRoot = afterProcMainXDoc.Root ?? throw new ArgumentException();
                            IEnumerable <XElement> blockLevelElements = afterProcMainRoot
                                                                        .Descendants()
                                                                        .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr);

                            foreach (XElement blockLevelContent in blockLevelElements)
                            {
                                var cloneBlockLevelContentForHashing = (XElement)CloneBlockLevelContentForHashing(
                                    wDocAfterProc.MainDocumentPart,
                                    blockLevelContent,
                                    true,
                                    settings);

                                string shaString = cloneBlockLevelContentForHashing
                                                   .ToString(SaveOptions.DisableFormatting)
                                                   .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", "");

                                string sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString);
                                var    thisUnid = (string)blockLevelContent.Attribute(PtOpenXml.Unid);
                                if (thisUnid != null)
                                {
                                    if (sourceUnidDict.ContainsKey(thisUnid))
                                    {
                                        XElement correlatedBlockLevelContent = sourceUnidDict[thisUnid];
                                        correlatedBlockLevelContent.Add(new XAttribute(PtOpenXml.CorrelatedSHA1Hash, sha1Hash));
                                    }
                                }
                            }

                            wDocSource.MainDocumentPart.PutXDocument();
                        }

                    var sourceWithCorrelatedSHA1Hash = new WmlDocument(source.FileName, msSource.ToArray());
                    return(sourceWithCorrelatedSHA1Hash);
                }
        }
 private static void SaveDocumentIfDesired(WmlDocument source, string name, WmlComparerSettings settings)
 {
     if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
     {
         var fileInfo = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name));
         source.SaveAs(fileInfo.FullName);
     }
 }
        private static List <ComparisonUnitAtom> CreateComparisonUnitAtomListInternal(
            OpenXmlPart part,
            XElement contentParent,
            WmlComparerSettings settings)
        {
            var comparisonUnitAtomList = new List <ComparisonUnitAtom>();

            CreateComparisonUnitAtomListRecurse(part, contentParent, comparisonUnitAtomList, settings);
            return(comparisonUnitAtomList);
        }
        private static string GetSha1HashStringForElement(XElement contentElement, WmlComparerSettings settings)
        {
            string text = contentElement.Value;

            if (settings.CaseInsensitive)
            {
                text = text.ToUpper(settings.CultureInfo);
            }

            return(contentElement.Name.LocalName + text);
        }
        public ComparisonUnitAtom(
            XElement contentElement,
            XElement[] ancestorElements,
            OpenXmlPart part,
            WmlComparerSettings settings)
        {
            ContentElement   = contentElement;
            AncestorElements = ancestorElements;
            Part             = part;
            RevTrackElement  = GetRevisionTrackingElementFromAncestors(contentElement, AncestorElements);

            if (RevTrackElement == null)
            {
                CorrelationStatus = CorrelationStatus.Equal;
            }
            else
            {
                if (RevTrackElement.Name == W.del)
                {
                    CorrelationStatus = CorrelationStatus.Deleted;
                }
                else if (RevTrackElement.Name == W.ins)
                {
                    CorrelationStatus = CorrelationStatus.Inserted;
                }
            }

            var sha1Hash = (string)contentElement.Attribute(PtOpenXml.SHA1Hash);

            if (sha1Hash != null)
            {
                SHA1Hash = sha1Hash;
            }
            else
            {
                string shaHashString = GetSha1HashStringForElement(ContentElement, settings);
                SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaHashString);
            }
        }
        internal static ComparisonUnitAtom[] CreateComparisonUnitAtomList(
            OpenXmlPart part,
            XElement contentParent,
            WmlComparerSettings settings)
        {
            VerifyNoInvalidContent(contentParent);
            AssignUnidToAllElements(contentParent); // add the Guid id to every element
            MoveLastSectPrIntoLastParagraph(contentParent);
            ComparisonUnitAtom[] cal = CreateComparisonUnitAtomListInternal(part, contentParent, settings).ToArray();

            if (False)
            {
                var sb = new StringBuilder();
                foreach (ComparisonUnitAtom item in cal)
                {
                    sb.Append(item + Environment.NewLine);
                }
                string sbs = sb.ToString();
                TestUtil.NotePad(sbs);
            }

            return(cal);
        }
        // The following method must be made internal if we ever turn this part of the partial class
        // into its own class.
        private static ComparisonUnit[] GetComparisonUnitList(
            ComparisonUnitAtom[] comparisonUnitAtomList,
            WmlComparerSettings settings)
        {
            var seed = new Atgbw
            {
                Key = null,
                ComparisonUnitAtomMember = null,
                NextIndex = 0
            };

            IEnumerable <Atgbw> groupingKey = comparisonUnitAtomList
                                              .Rollup(seed, (sr, prevAtgbw, i) =>
            {
                int?key;
                int nextIndex = prevAtgbw.NextIndex;
                if (sr.ContentElement.Name == W.t)
                {
                    string chr = sr.ContentElement.Value;
                    char ch    = chr[0];
                    if (ch == '.' || ch == ',')
                    {
                        var beforeIsDigit = false;
                        if (i > 0)
                        {
                            ComparisonUnitAtom prev = comparisonUnitAtomList[i - 1];
                            if (prev.ContentElement.Name == W.t && char.IsDigit(prev.ContentElement.Value[0]))
                            {
                                beforeIsDigit = true;
                            }
                        }

                        var afterIsDigit = false;
                        if (i < comparisonUnitAtomList.Length - 1)
                        {
                            ComparisonUnitAtom next = comparisonUnitAtomList[i + 1];
                            if (next.ContentElement.Name == W.t && char.IsDigit(next.ContentElement.Value[0]))
                            {
                                afterIsDigit = true;
                            }
                        }

                        if (beforeIsDigit || afterIsDigit)
                        {
                            key = nextIndex;
                        }
                        else
                        {
                            nextIndex++;
                            key = nextIndex;
                            nextIndex++;
                        }
                    }
                    else if (settings.WordSeparators.Contains(ch))
                    {
                        nextIndex++;
                        key = nextIndex;
                        nextIndex++;
                    }
                    else
                    {
                        key = nextIndex;
                    }
                }
                else if (WordBreakElements.Contains(sr.ContentElement.Name))
                {
                    nextIndex++;
                    key = nextIndex;
                    nextIndex++;
                }
                else
                {
                    key = nextIndex;
                }

                return(new Atgbw
                {
                    Key = key,
                    ComparisonUnitAtomMember = sr,
                    NextIndex = nextIndex
                });
            })
                                              .ToArray();

            if (False)
            {
                var sb = new StringBuilder();
                foreach (Atgbw item in groupingKey)
                {
                    sb.Append(item.Key + Environment.NewLine);
                    sb.Append("    " + item.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
                }

                string sbs = sb.ToString();
                TestUtil.NotePad(sbs);
            }

            IEnumerable <IGrouping <int?, Atgbw> > groupedByWords = groupingKey
                                                                    .GroupAdjacent(gc => gc.Key)
                                                                    .ToArray();

            if (False)
            {
                var sb = new StringBuilder();
                foreach (IGrouping <int?, Atgbw> group in groupedByWords)
                {
                    sb.Append("Group ===== " + @group.Key + Environment.NewLine);
                    foreach (Atgbw gc in @group)
                    {
                        sb.Append("    " + gc.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine);
                    }
                }

                string sbs = sb.ToString();
                TestUtil.NotePad(sbs);
            }

            WithHierarchicalGroupingKey[] withHierarchicalGroupingKey = groupedByWords
                                                                        .Select(g =>
            {
                string[] hierarchicalGroupingArray = g
                                                     .First()
                                                     .ComparisonUnitAtomMember
                                                     .AncestorElements
                                                     .Where(a => ComparisonGroupingElements.Contains(a.Name))
                                                     .Select(a => a.Name.LocalName + ":" + (string)a.Attribute(PtOpenXml.Unid))
                                                     .ToArray();

                return(new WithHierarchicalGroupingKey
                {
                    ComparisonUnitWord = new ComparisonUnitWord(g.Select(gc => gc.ComparisonUnitAtomMember)),
                    HierarchicalGroupingArray = hierarchicalGroupingArray
                });
            }
                                                                                )
                                                                        .ToArray();

            if (False)
            {
                var sb = new StringBuilder();
                foreach (WithHierarchicalGroupingKey group in withHierarchicalGroupingKey)
                {
                    sb.Append("Grouping Array: " +
                              @group.HierarchicalGroupingArray.Select(gam => gam + " - ").StringConcatenate() +
                              Environment.NewLine);
                    foreach (ComparisonUnit gc in @group.ComparisonUnitWord.Contents)
                    {
                        sb.Append("    " + gc.ToString(0) + Environment.NewLine);
                    }
                }

                string sbs = sb.ToString();
                TestUtil.NotePad(sbs);
            }

            ComparisonUnit[] cul = GetHierarchicalComparisonUnits(withHierarchicalGroupingKey, 0).ToArray();

            if (False)
            {
                string str = ComparisonUnit.ComparisonUnitListToString(cul);
                TestUtil.NotePad(str);
            }

            return(cul);
        }
        // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal

        // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore
        // when generating the document, the appropriate row will be marked as deleted or inserted.

        public static List <WmlComparerRevision> GetRevisions(WmlDocument source, WmlComparerSettings settings)
        {
            using (var ms = new MemoryStream())
            {
                ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length);
                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true))
                {
                    TestForInvalidContent(wDoc);
                    RemoveExistingPowerToolsMarkup(wDoc);

                    XElement             contentParent = wDoc.MainDocumentPart.GetXDocument().Root?.Element(W.body);
                    ComparisonUnitAtom[] atomList      =
                        CreateComparisonUnitAtomList(wDoc.MainDocumentPart, contentParent, settings).ToArray();

                    if (False)
                    {
                        var sb = new StringBuilder();
                        foreach (ComparisonUnitAtom item in atomList)
                        {
                            sb.Append(item + Environment.NewLine);
                        }
                        string sbs = sb.ToString();
                        TestUtil.NotePad(sbs);
                    }

                    List <IGrouping <string, ComparisonUnitAtom> > grouped = atomList
                                                                             .GroupAdjacent(a =>
                    {
                        string key = a.CorrelationStatus.ToString();
                        if (a.CorrelationStatus != CorrelationStatus.Equal)
                        {
                            var rt = new XElement(a.RevTrackElement.Name,
                                                  new XAttribute(XNamespace.Xmlns + "w",
                                                                 "http://schemas.openxmlformats.org/wordprocessingml/2006/main"),
                                                  a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid));
                            key += rt.ToString(SaveOptions.DisableFormatting);
                        }

                        return(key);
                    })
                                                                             .ToList();

                    List <IGrouping <string, ComparisonUnitAtom> > revisions = grouped
                                                                               .Where(k => k.Key != "Equal")
                                                                               .ToList();

                    if (False)
                    {
                        var sb = new StringBuilder();
                        foreach (IGrouping <string, ComparisonUnitAtom> item in revisions)
                        {
                            sb.Append(item.Key + Environment.NewLine);
                        }

                        string sbs = sb.ToString();
                        TestUtil.NotePad(sbs);
                    }

                    List <WmlComparerRevision> mainDocPartRevisionList = revisions
                                                                         .Select(rg =>
                    {
                        var rev = new WmlComparerRevision();
                        if (rg.Key.StartsWith("Inserted"))
                        {
                            rev.RevisionType = WmlComparerRevisionType.Inserted;
                        }
                        else if (rg.Key.StartsWith("Deleted"))
                        {
                            rev.RevisionType = WmlComparerRevisionType.Deleted;
                        }

                        XElement revTrackElement = rg.First().RevTrackElement;
                        rev.RevisionXElement     = revTrackElement;
                        rev.Author          = (string)revTrackElement.Attribute(W.author);
                        rev.ContentXElement = rg.First().ContentElement;
                        rev.Date            = (string)revTrackElement.Attribute(W.date);
                        rev.PartUri         = wDoc.MainDocumentPart.Uri;
                        rev.PartContentType = wDoc.MainDocumentPart.ContentType;

                        if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name))
                        {
                            rev.Text = rg
                                       .Select(rgc => rgc.ContentElement.Name == W.pPr ? NewLine : rgc.ContentElement.Value)
                                       .StringConcatenate();
                        }

                        return(rev);
                    })
                                                                         .ToList();

                    IEnumerable <WmlComparerRevision> footnotesRevisionList =
                        GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.FootnotesPart, W.footnote, settings);
                    IEnumerable <WmlComparerRevision> endnotesRevisionList =
                        GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.EndnotesPart, W.endnote, settings);

                    List <WmlComparerRevision> finalRevisionList = mainDocPartRevisionList
                                                                   .Concat(footnotesRevisionList)
                                                                   .Concat(endnotesRevisionList)
                                                                   .ToList();

                    return(finalRevisionList);
                }
            }
        }
        private static IEnumerable <WmlComparerRevision> GetFootnoteEndnoteRevisionList(
            OpenXmlPart footnotesEndnotesPart,
            XName footnoteEndnoteElementName,
            WmlComparerSettings settings)
        {
            if (footnotesEndnotesPart == null)
            {
                return(Enumerable.Empty <WmlComparerRevision>());
            }

            XDocument xDoc = footnotesEndnotesPart.GetXDocument();
            IEnumerable <XElement> footnotesEndnotes =
                xDoc.Root?.Elements(footnoteEndnoteElementName) ?? throw new OpenXmlPowerToolsException("Invalid document.");

            var revisionsForPart = new List <WmlComparerRevision>();

            foreach (XElement fn in footnotesEndnotes)
            {
                ComparisonUnitAtom[] atomList = CreateComparisonUnitAtomList(footnotesEndnotesPart, fn, settings).ToArray();

                if (False)
                {
                    var sb = new StringBuilder();
                    foreach (ComparisonUnitAtom item in atomList)
                    {
                        sb.Append(item + Environment.NewLine);
                    }

                    string sbs = sb.ToString();
                    TestUtil.NotePad(sbs);
                }

                List <IGrouping <string, ComparisonUnitAtom> > grouped = atomList
                                                                         .GroupAdjacent(a =>
                {
                    string key = a.CorrelationStatus.ToString();
                    if (a.CorrelationStatus != CorrelationStatus.Equal)
                    {
                        var rt = new XElement(a.RevTrackElement.Name,
                                              new XAttribute(XNamespace.Xmlns + "w",
                                                             "http://schemas.openxmlformats.org/wordprocessingml/2006/main"),
                                              a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid));

                        key += rt.ToString(SaveOptions.DisableFormatting);
                    }

                    return(key);
                })
                                                                         .ToList();

                List <IGrouping <string, ComparisonUnitAtom> > revisions = grouped
                                                                           .Where(k => k.Key != "Equal")
                                                                           .ToList();

                IEnumerable <WmlComparerRevision> thisNoteRevisionList = revisions
                                                                         .Select(rg =>
                {
                    var rev = new WmlComparerRevision();
                    if (rg.Key.StartsWith("Inserted"))
                    {
                        rev.RevisionType = WmlComparerRevisionType.Inserted;
                    }
                    else if (rg.Key.StartsWith("Deleted"))
                    {
                        rev.RevisionType = WmlComparerRevisionType.Deleted;
                    }

                    XElement revTrackElement = rg.First().RevTrackElement;
                    rev.RevisionXElement     = revTrackElement;
                    rev.Author          = (string)revTrackElement.Attribute(W.author);
                    rev.ContentXElement = rg.First().ContentElement;
                    rev.Date            = (string)revTrackElement.Attribute(W.date);
                    rev.PartUri         = footnotesEndnotesPart.Uri;
                    rev.PartContentType = footnotesEndnotesPart.ContentType;

                    if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name))
                    {
                        rev.Text = rg
                                   .Select(rgc => rgc.ContentElement.Name == W.pPr ? NewLine : rgc.ContentElement.Value)
                                   .StringConcatenate();
                    }

                    return(rev);
                });

                revisionsForPart.AddRange(thisNoteRevisionList);
            }

            return(revisionsForPart);
        }
        // prohibit
        // - altChunk
        // - subDoc
        // - contentPart

        // This strips all text nodes from the XML tree, thereby leaving only the structure.

        private static object CloneBlockLevelContentForHashing(
            OpenXmlPart mainDocumentPart,
            XNode node,
            bool includeRelatedParts,
            WmlComparerSettings settings)
        {
            if (node is XElement element)
            {
                if (element.Name == W.bookmarkStart ||
                    element.Name == W.bookmarkEnd ||
                    element.Name == W.pPr ||
                    element.Name == W.rPr)
                {
                    return(null);
                }

                if (element.Name == W.p)
                {
                    var clonedPara = new XElement(element.Name,
                                                  element.Attributes().Where(a => a.Name != W.rsid &&
                                                                             a.Name != W.rsidDel &&
                                                                             a.Name != W.rsidP &&
                                                                             a.Name != W.rsidR &&
                                                                             a.Name != W.rsidRDefault &&
                                                                             a.Name != W.rsidRPr &&
                                                                             a.Name != W.rsidSect &&
                                                                             a.Name != W.rsidTr &&
                                                                             a.Name.Namespace != PtOpenXml.pt),
                                                  element.Nodes().Select(n =>
                                                                         CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));

                    IEnumerable <IGrouping <bool, XElement> > groupedRuns = clonedPara
                                                                            .Elements()
                                                                            .GroupAdjacent(e => e.Name == W.r &&
                                                                                           e.Elements().Count() == 1 &&
                                                                                           e.Element(W.t) != null);

                    var clonedParaWithGroupedRuns = new XElement(element.Name,
                                                                 groupedRuns.Select(g =>
                    {
                        if (g.Key)
                        {
                            string text = g.Select(t => t.Value).StringConcatenate();
                            if (settings.CaseInsensitive)
                            {
                                text = text.ToUpper(settings.CultureInfo);
                            }
                            var newRun = (object)new XElement(W.r,
                                                              new XElement(W.t,
                                                                           text));
                            return(newRun);
                        }

                        return(g);
                    }));

                    return(clonedParaWithGroupedRuns);
                }

                if (element.Name == W.r)
                {
                    IEnumerable <XElement> clonedRuns = element
                                                        .Elements()
                                                        .Where(e => e.Name != W.rPr)
                                                        .Select(rc => new XElement(W.r,
                                                                                   CloneBlockLevelContentForHashing(mainDocumentPart, rc, includeRelatedParts, settings)));
                    return(clonedRuns);
                }

                if (element.Name == W.tbl)
                {
                    var clonedTable = new XElement(W.tbl,
                                                   element.Elements(W.tr).Select(n =>
                                                                                 CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
                    return(clonedTable);
                }

                if (element.Name == W.tr)
                {
                    var clonedRow = new XElement(W.tr,
                                                 element.Elements(W.tc).Select(n =>
                                                                               CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
                    return(clonedRow);
                }

                if (element.Name == W.tc)
                {
                    var clonedCell = new XElement(W.tc,
                                                  element.Elements().Select(n =>
                                                                            CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
                    return(clonedCell);
                }

                if (element.Name == W.tcPr)
                {
                    var clonedCellProps = new XElement(W.tcPr,
                                                       element.Elements(W.gridSpan).Select(n =>
                                                                                           CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
                    return(clonedCellProps);
                }

                if (element.Name == W.gridSpan)
                {
                    var clonedGridSpan = new XElement(W.gridSpan,
                                                      new XAttribute("val", (string)element.Attribute(W.val)));
                    return(clonedGridSpan);
                }

                if (element.Name == W.txbxContent)
                {
                    var clonedTextbox = new XElement(W.txbxContent,
                                                     element.Elements().Select(n =>
                                                                               CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
                    return(clonedTextbox);
                }

                if (includeRelatedParts)
                {
                    if (ComparisonUnitWord.ElementsWithRelationshipIds.Contains(element.Name))
                    {
                        var newElement = new XElement(element.Name,
                                                      element.Attributes()
                                                      .Where(a => a.Name.Namespace != PtOpenXml.pt)
                                                      .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name))
                                                      .Select(a =>
                        {
                            if (!ComparisonUnitWord.RelationshipAttributeNames.Contains(a.Name))
                            {
                                return(a);
                            }

                            var rId = (string)a;

                            // could be an hyperlink relationship
                            try
                            {
                                OpenXmlPart oxp = mainDocumentPart.GetPartById(rId);
                                if (oxp == null)
                                {
                                    throw new FileFormatException("Invalid WordprocessingML Document");
                                }

                                var anno = oxp.Annotation <PartSHA1HashAnnotation>();
                                if (anno != null)
                                {
                                    return(new XAttribute(a.Name, anno.Hash));
                                }

                                if (!oxp.ContentType.EndsWith("xml"))
                                {
                                    using (Stream str = oxp.GetStream())
                                    {
                                        byte[] ba;
                                        using (var br = new BinaryReader(str))
                                        {
                                            ba = br.ReadBytes((int)str.Length);
                                        }

                                        string sha1 = WmlComparerUtil.SHA1HashStringForByteArray(ba);
                                        oxp.AddAnnotation(new PartSHA1HashAnnotation(sha1));
                                        return(new XAttribute(a.Name, sha1));
                                    }
                                }
                            }
                            catch (ArgumentOutOfRangeException)
                            {
                                HyperlinkRelationship hr =
                                    mainDocumentPart.HyperlinkRelationships.FirstOrDefault(z => z.Id == rId);
                                if (hr != null)
                                {
                                    string str = hr.Uri.ToString();
                                    return(new XAttribute(a.Name, str));
                                }

                                // could be an external relationship
                                ExternalRelationship er =
                                    mainDocumentPart.ExternalRelationships.FirstOrDefault(z => z.Id == rId);
                                if (er != null)
                                {
                                    string str = er.Uri.ToString();
                                    return(new XAttribute(a.Name, str));
                                }

                                return(new XAttribute(a.Name, "NULL Relationship"));
                            }

                            return(null);
                        }),
                                                      element.Nodes().Select(n =>
                                                                             CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
                        return(newElement);
                    }
                }

                if (element.Name == VML.shape)
                {
                    return(new XElement(element.Name,
                                        element.Attributes()
                                        .Where(a => a.Name.Namespace != PtOpenXml.pt)
                                        .Where(a => a.Name != "style" && a.Name != "id" && a.Name != "type"),
                                        element.Nodes().Select(n =>
                                                               CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))));
                }

                if (element.Name == O.OLEObject)
                {
                    var o = new XElement(element.Name,
                                         element.Attributes()
                                         .Where(a => a.Name.Namespace != PtOpenXml.pt)
                                         .Where(a => a.Name != "ObjectID" && a.Name != R.id),
                                         element.Nodes().Select(n =>
                                                                CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
                    return(o);
                }

                if (element.Name == W._object)
                {
                    var o = new XElement(element.Name,
                                         element.Attributes()
                                         .Where(a => a.Name.Namespace != PtOpenXml.pt),
                                         element.Nodes().Select(n =>
                                                                CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings)));
                    return(o);
                }

                if (element.Name == WP.docPr)
                {
                    return(new XElement(element.Name,
                                        element.Attributes()
                                        .Where(a => a.Name.Namespace != PtOpenXml.pt && a.Name != "id"),
                                        element.Nodes().Select(n =>
                                                               CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))));
                }

                return(new XElement(element.Name,
                                    element.Attributes()
                                    .Where(a => a.Name.Namespace != PtOpenXml.pt)
                                    .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)),
                                    element.Nodes().Select(n =>
                                                           CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))));
            }

            if (settings.CaseInsensitive)
            {
                if (node is XText xt)
                {
                    string newText = xt.Value.ToUpper(settings.CultureInfo);
                    return(new XText(newText));
                }
            }

            return(node);
        }
 public static WmlDocument Compare(WmlDocument source1, WmlDocument source2, WmlComparerSettings settings)
 {
     return(CompareInternal(source1, source2, settings, true));
 }
        private static void SaveCleanedDocuments(WmlDocument source1, WmlDocument producedDocument, WmlComparerSettings settings)
        {
            if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
            {
                WmlDocument cleanedSource = CleanPowerToolsAndRsid(source1);
                SaveDocumentIfDesired(cleanedSource, "Cleaned-Source.docx", settings);

                WmlDocument cleanedProduced = CleanPowerToolsAndRsid(producedDocument);
                SaveDocumentIfDesired(cleanedProduced, "Cleaned-Produced.docx", settings);
            }
        }
 private static void SaveDocumentsAfterProducingDocument(MemoryStream ms1, MemoryStream ms2, WmlComparerSettings settings)
 {
     if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null)
     {
         SaveDocumentIfDesired(new WmlDocument("after1.docx", ms1), "Source1-Step5-AfterProducingDocument.docx", settings);
         SaveDocumentIfDesired(new WmlDocument("after2.docx", ms2), "Source2-Step5-AfterProducingDocument.docx", settings);
     }
 }
        private static void CreateComparisonUnitAtomListRecurse(
            OpenXmlPart part,
            XElement element,
            List <ComparisonUnitAtom> comparisonUnitAtomList,
            WmlComparerSettings settings)
        {
            if (element.Name == W.body || element.Name == W.footnote || element.Name == W.endnote)
            {
                foreach (XElement item in element.Elements())
                {
                    CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
                }
                return;
            }

            if (element.Name == W.p)
            {
                IEnumerable <XElement> paraChildrenToProcess = element
                                                               .Elements()
                                                               .Where(e => e.Name != W.pPr);
                foreach (XElement item in paraChildrenToProcess)
                {
                    CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
                }
                XElement paraProps = element.Element(W.pPr);
                if (paraProps == null)
                {
                    var pPrComparisonUnitAtom = new ComparisonUnitAtom(
                        new XElement(W.pPr),
                        element.AncestorsAndSelf()
                        .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
                        .ToArray(),
                        part,
                        settings);
                    comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
                }
                else
                {
                    var pPrComparisonUnitAtom = new ComparisonUnitAtom(
                        paraProps,
                        element.AncestorsAndSelf()
                        .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
                        .ToArray(),
                        part,
                        settings);
                    comparisonUnitAtomList.Add(pPrComparisonUnitAtom);
                }

                return;
            }

            if (element.Name == W.r)
            {
                IEnumerable <XElement> runChildrenToProcess = element
                                                              .Elements()
                                                              .Where(e => e.Name != W.rPr);
                foreach (XElement item in runChildrenToProcess)
                {
                    CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings);
                }
                return;
            }

            if (element.Name == W.t || element.Name == W.delText)
            {
                string val = element.Value;
                foreach (char ch in val)
                {
                    var sr = new ComparisonUnitAtom(
                        new XElement(element.Name, ch),
                        element.AncestorsAndSelf()
                        .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse()
                        .ToArray(),
                        part,
                        settings);
                    comparisonUnitAtomList.Add(sr);
                }

                return;
            }

            if (AllowableRunChildren.Contains(element.Name) || element.Name == W._object)
            {
                var sr3 = new ComparisonUnitAtom(
                    element,
                    element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes)
                    .Reverse().ToArray(),
                    part,
                    settings);
                comparisonUnitAtomList.Add(sr3);
                return;
            }

            RecursionInfo re = RecursionElements.FirstOrDefault(z => z.ElementName == element.Name);

            if (re != null)
            {
                AnnotateElementWithProps(part, element, comparisonUnitAtomList, re.ChildElementPropertyNames, settings);
                return;
            }

            if (ElementsToThrowAway.Contains(element.Name))
            {
                return;
            }

            AnnotateElementWithProps(part, element, comparisonUnitAtomList, null, settings);
        }
        private static WmlDocument CompareInternal(
            WmlDocument source1,
            WmlDocument source2,
            WmlComparerSettings settings,
            bool preProcessMarkupInOriginal)
        {
            if (preProcessMarkupInOriginal)
            {
                source1 = PreProcessMarkup(source1, settings.StartingIdForFootnotesEndnotes + 1000);
            }

            source2 = PreProcessMarkup(source2, settings.StartingIdForFootnotesEndnotes + 2000);

            SaveDocumentIfDesired(source1, "Source1-Step1-PreProcess.docx", settings);
            SaveDocumentIfDesired(source2, "Source2-Step1-PreProcess.docx", settings);

            // at this point, both source1 and source2 have unid on every element.  These are the values that will
            // enable reassembly of the XML tree.  But we need other values.

            // In source1:
            // - accept tracked revisions
            // - determine hash code for every block-level element
            // - save as attribute on every element

            // - accept tracked revisions and reject tracked revisions leave the unids alone, where possible.
            // - after accepting and calculating the hash, then can use the unids to find the right block-level
            //   element in the unmodified source1, and install the hash

            // In source2:
            // - reject tracked revisions
            // - determine hash code for every block-level element
            // - save as an attribute on every element

            // - after rejecting and calculating the hash, then can use the unids to find the right block-level element
            //   in the unmodified source2, and install the hash

            // - sometimes after accepting or rejecting tracked revisions, several paragraphs will get coalesced into a
            //   single paragraph due to paragraph marks being inserted / deleted.
            // - in this case, some paragraphs will not get a hash injected onto them.
            // - if a paragraph doesn't have a hash, then it will never correspond to another paragraph, and such
            //   issues will need to be resolved in the normal execution of the LCS algorithm.
            // - note that when we do propagate the unid through for the first paragraph.

            // Establish correlation between the two.
            // Find the longest common sequence of block-level elements where hash codes are the same.
            // this sometimes will be every block level element in the document.  Or sometimes will be just a fair
            // number of them.

            // at the start of doing the LCS algorithm, we will match up content, and put them in corresponding unknown
            // correlated comparison units.  Those paragraphs will only ever be matched to their corresponding paragraph.
            // then the algorithm can proceed as usual.

            // need to call ChangeFootnoteEndnoteReferencesToUniqueRange before creating the wmlResult document, so that
            // the same GUID ids are used for footnote and endnote references in both the 'after' document, and in the
            // result document.

            WmlDocument source1AfterAccepting = RevisionProcessor.AcceptRevisions(source1);
            WmlDocument source2AfterRejecting = RevisionProcessor.RejectRevisions(source2);

            SaveDocumentIfDesired(source1AfterAccepting, "Source1-Step2-AfterAccepting.docx", settings);
            SaveDocumentIfDesired(source2AfterRejecting, "Source2-Step2-AfterRejecting.docx", settings);

            // this creates the correlated hash codes that enable us to match up ranges of paragraphs based on
            // accepting in source1, rejecting in source2
            source1 = HashBlockLevelContent(source1, source1AfterAccepting, settings);
            source2 = HashBlockLevelContent(source2, source2AfterRejecting, settings);

            SaveDocumentIfDesired(source1, "Source1-Step3-AfterHashing.docx", settings);
            SaveDocumentIfDesired(source2, "Source2-Step3-AfterHashing.docx", settings);

            // Accept revisions in before, and after
            source1 = RevisionProcessor.AcceptRevisions(source1);
            source2 = RevisionProcessor.AcceptRevisions(source2);

            SaveDocumentIfDesired(source1, "Source1-Step4-AfterAccepting.docx", settings);
            SaveDocumentIfDesired(source2, "Source2-Step4-AfterAccepting.docx", settings);

            // after accepting revisions, some unids may have been removed by revision accepter, along with the
            // correlatedSHA1Hash codes, this is as it should be.
            // but need to go back in and add guids to paragraphs that have had them removed.

            using (var ms = new MemoryStream())
            {
                ms.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length);
                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true))
                {
                    AddUnidsToMarkupInContentParts(wDoc);
                }
            }

            var wmlResult = new WmlDocument(source1);

            using (var ms1 = new MemoryStream())
                using (var ms2 = new MemoryStream())
                {
                    ms1.Write(source1.DocumentByteArray, 0, source1.DocumentByteArray.Length);
                    ms2.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length);
                    WmlDocument producedDocument;

                    using (WordprocessingDocument wDoc1 = WordprocessingDocument.Open(ms1, true))
                        using (WordprocessingDocument wDoc2 = WordprocessingDocument.Open(ms2, true))
                        {
                            producedDocument = ProduceDocumentWithTrackedRevisions(settings, wmlResult, wDoc1, wDoc2);
                        }

                    SaveDocumentsAfterProducingDocument(ms1, ms2, settings);
                    SaveCleanedDocuments(source1, producedDocument, settings);

                    return(producedDocument);
                }
        }