Exemplo n.º 1
0
        private static bool AnyChanges(XmlNode ours, XmlNode theirs, XmlNode ancestor)
        {
            if (ancestor == null)
            {
                if (ours == null)
                {
                    return(false);                    // they added, merge won't mess with it.
                }
                if (theirs == null)
                {
                    return(false);                                       // we added, merge won't mess with it
                }
                return(!XmlUtilities.AreXmlElementsEqual(ours, theirs)); // somehow we both added, problem unless somehow identical
            }

            // ancestor is not null.
            if (ours == null)
            {
                return(theirs != null);                // we deleted, if they didn't there's a difference.
            }
            if (theirs == null)
            {
                return(true);                // they deleted, we didn't, that's a difference.
            }
            return(!XmlUtilities.AreXmlElementsEqual(ours, theirs));
        }
        private static void RestoreOriginalIfTimestampIsTheOnlyChange(XmlNode ancestorDateTimeNode, XmlNode otherDateTimeNode)
        {
            if (ancestorDateTimeNode == null || otherDateTimeNode == null)
            {
                return;
            }

            // Values that are are the same are not of interest.
            var ancestorAttr = ancestorDateTimeNode.Attributes["val"];
            var otherAttr    = otherDateTimeNode.Attributes["val"];

            if (ancestorAttr.Value == otherAttr.Value)
            {
                return;
            }

            // Get parents of both nodes
            var ancestorDateTimeNodeParent = ancestorDateTimeNode.ParentNode;
            var otherDateTimeNodeParent    = otherDateTimeNode.ParentNode;

            // Restore the value to the ancestor
            var originalOtherValue = otherAttr.Value;

            otherAttr.Value = ancestorAttr.Value;

            if (XmlUtilities.AreXmlElementsEqual(ancestorDateTimeNodeParent, otherDateTimeNodeParent))
            {
                return;                 // Only change was the timestamp, so keep it.
            }
            // Restore the original value.
            otherAttr.Value = originalOtherValue;
        }
Exemplo n.º 3
0
        public void NonEquivalentByteArraysAreNotEqual()
        {
            var ours   = Encoding.UTF8.GetBytes(@"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5' />");
            var theirs = Encoding.UTF8.GetBytes(@"<rt class='LexEntry' guid='0030a77d-63cd-4d51-b26a-27bac7d64f18' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5' />");

            Assert.That(XmlUtilities.AreXmlElementsEqual(ours, theirs), Is.False, "ours == theirs");
            Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.False, "theirs == ours");
        }
 private static void CompareResults(bool expectedToMatch, string source, string target)
 {
     Assert.AreEqual(
         expectedToMatch,
         XmlUtilities.AreXmlElementsEqual(
             RemoveDeclaration(source),
             RemoveDeclaration(target)));
 }
Exemplo n.º 5
0
        private void RunLoopClicked(object sender, EventArgs e)
        {
            var          sb   = new StringBuilder();
            const string data = "<element />";

            var bytesTimer = new Stopwatch();
            var ourBytes   = Encoding.UTF8.GetBytes(data);
            var theirBytes = Encoding.UTF8.GetBytes(data);

            bytesTimer.Start();
            for (var i = 0; i < 100000; ++i)
            {
                XmlUtilities.AreXmlElementsEqual(ourBytes, theirBytes);
            }
            bytesTimer.Stop();
            sb.AppendFormat("Time to check (as bytes): {0}ms; {1}ticks.", bytesTimer.ElapsedMilliseconds, bytesTimer.ElapsedTicks);
            sb.AppendLine();

            var stringTimer = new Stopwatch();

            stringTimer.Start();
            for (var i = 0; i < 100000; ++i)
            {
                XmlUtilities.AreXmlElementsEqual(data, data);
            }
            stringTimer.Stop();
            sb.AppendFormat("Time to check (as string): {0}ms; {1}ticks.", stringTimer.ElapsedMilliseconds, stringTimer.ElapsedTicks);
            sb.AppendLine();

            var doc          = new XmlDocument();
            var ourNode      = XmlUtilities.GetDocumentNodeFromRawXml(data, doc);
            var theirNode    = XmlUtilities.GetDocumentNodeFromRawXml(data, doc);
            var xmlNodeTimer = new Stopwatch();

            xmlNodeTimer.Start();
            for (var i = 0; i < 100000; ++i)
            {
                XmlUtilities.AreXmlElementsEqual(ourNode, theirNode);
            }
            xmlNodeTimer.Stop();
            sb.AppendFormat("Time to check (as XmlNode): {0}ms; {1}ticks.", xmlNodeTimer.ElapsedMilliseconds, xmlNodeTimer.ElapsedTicks);
            sb.AppendLine();

            var ourInput      = new XmlInput(data);
            var theirInput    = new XmlInput(data);
            var xmlInputTimer = new Stopwatch();

            xmlInputTimer.Start();
            for (var i = 0; i < 100000; ++i)
            {
                XmlUtilities.AreXmlElementsEqual(ourInput, theirInput);
            }
            xmlInputTimer.Stop();
            sb.AppendFormat("Time to check (as XmlInput): {0}ms; {1}ticks.", xmlInputTimer.ElapsedMilliseconds, xmlInputTimer.ElapsedTicks);

            MessageBox.Show(sb.ToString());
        }
Exemplo n.º 6
0
        public void ClosedNodeAndEmptyNodeWithAttrsAreEqual()
        {
            const string ours =
                @"<foo attr='val' />";
            const string theirs =
                @"<foo attr='val'></foo>";

            Assert.True(XmlUtilities.AreXmlElementsEqual(ours, theirs));
            Assert.True(XmlUtilities.AreXmlElementsEqual(theirs, ours));
        }
Exemplo n.º 7
0
        public void ClosedNodeAndNewTextAreNotEqual()
        {
            const string ours =
                @"<foo />";
            const string theirs =
                @"<foo>New foo text.</foo>";

            Assert.IsFalse(XmlUtilities.AreXmlElementsEqual(ours, theirs));
            Assert.IsFalse(XmlUtilities.AreXmlElementsEqual(theirs, ours));
        }
Exemplo n.º 8
0
        public void ClosedNodeAndNewTextWithAttributesAreNotEqual()
        {
            const string ours =
                @"<foo attr='val' />";
            const string theirs =
                @"<foo attr='val' >New foo text.</foo>";

            Assert.That(XmlUtilities.AreXmlElementsEqual(ours, theirs), Is.False);
            Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.False);
        }
Exemplo n.º 9
0
        public void ReplaceOursWithTheirs_OursNullTheirsNot()
        {
            var     ourDoc   = CreateTestNode(@"<parent></parent>");
            var     theirDoc = CreateTestNode(@"<parent><child>theirs</child></parent>");
            XmlNode ours     = null;
            XmlNode theirs   = theirDoc.FirstChild;

            XmlUtilities.ReplaceOursWithTheirs(ourDoc, ref ours, theirs);
            Assert.AreSame(ourDoc.OwnerDocument, ours.OwnerDocument);
            Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.True, "theirs != ours");
        }
Exemplo n.º 10
0
        private static bool AreTheSame(XmlNode alphaEntry, XmlNode betaEntry)
        {
            //review: why do we need to actually parse these dates?  Could we just do a string comparison?
            if (LiftUtils.GetModifiedDate(betaEntry) == LiftUtils.GetModifiedDate(alphaEntry) &&
                !(LiftUtils.GetModifiedDate(betaEntry) == default(DateTime)))
            {
                return(true);
            }

            return(XmlUtilities.AreXmlElementsEqual(alphaEntry.OuterXml, betaEntry.OuterXml));
        }
Exemplo n.º 11
0
        public void ReplaceOursWithTheirs_OursNotNullTheirsNotNull()
        {
            var     ourDoc   = CreateTestNode(@"<parent><child>mine</child></parent>");
            var     theirDoc = CreateTestNode(@"<parent><child>theirs</child></parent>");
            XmlNode ours     = ourDoc.FirstChild;
            XmlNode theirs   = theirDoc.FirstChild;

            XmlUtilities.ReplaceOursWithTheirs(ourDoc, ref ours, theirs);
            Assert.AreSame(ourDoc.OwnerDocument, ours.OwnerDocument, "Returned node not in inserted into our parent document");
            Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.True, "theirs != ours");
        }
Exemplo n.º 12
0
        public void NestedClosedNodeAndTextWithAttributesAreNotEqual()
        {
            const string ours =
                @"<foo attr='val'>
<bar attr='val'/>
</foo>";
            const string theirs =
                @"<foo attr='val'>
<bar attr='val'>new stuff.</bar>
</foo>";

            Assert.That(XmlUtilities.AreXmlElementsEqual(ours, theirs), Is.False);
            Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.False);
        }
Exemplo n.º 13
0
        public void EquivalentByteArraysAreEqual()
        {
            var ours   = Encoding.UTF8.GetBytes(@"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'>
<Contents>
<Str>
<Run ws='tuz' />
</Str>
</Contents>
<ParseIsCurrent val='False' />
</rt>");
            var theirs = Encoding.UTF8.GetBytes(@"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'>
<Contents>
<Str>
<Run ws='tuz' />
</Str>
</Contents>
<ParseIsCurrent val='False' />
</rt>");

            Assert.That(XmlUtilities.AreXmlElementsEqual(ours, theirs), Is.True, "ours != theirs");
            Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.True, "theirs != ours");
        }
Exemplo n.º 14
0
        public void MoveToFirstAttributeFix_HasElementsEqual()
        {
            const string ours =
                @"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'>
<Contents>
<Str>
<Run ws='tuz'></Run>
</Str>
</Contents>
<ParseIsCurrent val='False' />
<StyleRules>
<Prop namedStyle='Section Head' />
</StyleRules>
<Translations>
<objsur guid='fe6f0999-ecb9-403f-abab-e934318542bc' t='o' />
</Translations>
</rt>";
            const string theirs =
                @"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'>
<Contents>
<Str>
<Run ws='tuz' />
</Str>
</Contents>
<ParseIsCurrent val='False' />
<StyleRules>
<Prop namedStyle='Section Head' />
</StyleRules>
<Translations>
<objsur guid='fe6f0999-ecb9-403f-abab-e934318542bc' t='o' />
</Translations>
</rt>";

            Assert.True(XmlUtilities.AreXmlElementsEqual(ours, theirs));
            Assert.True(XmlUtilities.AreXmlElementsEqual(theirs, ours));
        }
Exemplo n.º 15
0
        public void MoreMinimal_MoveToFirstAttributeFix_HasElementsEqual()
        {
            const string ours =
                @"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'>
<Contents>
<Str>
<Run ws='tuz'></Run>
</Str>
</Contents>
<ParseIsCurrent val='False' />
</rt>";
            const string theirs =
                @"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'>
<Contents>
<Str>
<Run ws='tuz' />
</Str>
</Contents>
<ParseIsCurrent val='False' />
</rt>";

            Assert.True(XmlUtilities.AreXmlElementsEqual(ours, theirs));
            Assert.True(XmlUtilities.AreXmlElementsEqual(theirs, ours));
        }
Exemplo n.º 16
0
        /// <summary>
        /// handles that date business, so it doesn't overwhelm the poor user with conflict reports
        /// </summary>
        /// <param name="mergeOrder"></param>
        /// <param name="addedCollationAttr"></param>
        private static void PreMergeFile(MergeOrder mergeOrder, out bool addedCollationAttr)
        {
            addedCollationAttr = false;
            var ourDoc    = File.Exists(mergeOrder.pathToOurs) && File.ReadAllText(mergeOrder.pathToOurs).Contains("<ldml>") ? XDocument.Load(mergeOrder.pathToOurs) : null;
            var theirDoc  = File.Exists(mergeOrder.pathToTheirs) && File.ReadAllText(mergeOrder.pathToTheirs).Contains("<ldml>") ? XDocument.Load(mergeOrder.pathToTheirs) : null;
            var commonDoc = File.Exists(mergeOrder.pathToCommonAncestor) && File.ReadAllText(mergeOrder.pathToCommonAncestor).Contains("<ldml>") ? XDocument.Load(mergeOrder.pathToCommonAncestor) : null;

            if (ourDoc == null || theirDoc == null)
            {
                return;
            }

            // ldml files are kind of big now we want to avoid unnecessary disk IO
            bool commonNeedsSave, oursNeedsSave, theirsNeedsSave;

            commonNeedsSave = oursNeedsSave = theirsNeedsSave = false;

            // Add optional key attr and default value on 'collation' element that has no 'type' attr.
            var ourDocDefaultCollation   = GetDefaultCollationNode(ourDoc);
            var theirDocDefaultCollation = GetDefaultCollationNode(theirDoc);

            if (commonDoc != null)
            {
                var commonDocDefaultCollation = GetDefaultCollationNode(commonDoc);
                if (commonDocDefaultCollation != null)
                {
                    if (ourDocDefaultCollation != null || theirDocDefaultCollation != null)
                    {
                        // add type attribute to the commonDoc only when we are certain it will also be added to at least one modified document
                        commonDocDefaultCollation.Add(new XAttribute("type", "standard"));
                        commonNeedsSave = true;
                    }
                }
            }
            if (ourDocDefaultCollation != null)
            {
                ourDocDefaultCollation.Add(new XAttribute("type", "standard"));
                oursNeedsSave      = true;
                addedCollationAttr = true;
            }
            if (theirDocDefaultCollation != null)
            {
                theirDocDefaultCollation.Add(new XAttribute("type", "standard"));
                theirsNeedsSave    = true;
                addedCollationAttr = true;
            }

            // If there is no commonDoc then the results will be DateTime.MinValue and null
            string ancestorRawGenDate;

            GetGenDate(commonDoc, out ancestorRawGenDate);

            string theirRawGenDate;
            var    theirGenDate = GetGenDate(theirDoc, out theirRawGenDate);

            string ourRawGenDate;
            var    ourGenDate = GetGenDate(ourDoc, out ourRawGenDate);

            // If there was no common we will set the date to empty.
            // The xml equality test will fail and the results will be correct
            theirDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(ancestorRawGenDate ?? string.Empty);
            ourDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(ancestorRawGenDate ?? string.Empty);
            // if only the generation date has changed just use the newest timestamp
            if (XmlUtilities.AreXmlElementsEqual(commonDoc?.Root, theirDoc.Root) &&
                XmlUtilities.AreXmlElementsEqual(commonDoc?.Root, ourDoc.Root))
            {
                // Pre-merge <generation> date attr to newest
                var mostRecentGenDate = ourGenDate == theirGenDate ? ourGenDate : ourGenDate > theirGenDate ? ourGenDate : theirGenDate;

                if (theirGenDate != mostRecentGenDate)
                {
                    theirDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(mostRecentGenDate);
                    theirsNeedsSave = true;
                }

                if (ourGenDate != mostRecentGenDate)
                {
                    ourDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(mostRecentGenDate);
                    oursNeedsSave = true;
                }
            }
            else
            {
                // Some real content changed so update to the current time to represent the merge
                var mostRecentGenDateRaw = DateTimeProvider.Current.UtcNow.ToISO8601TimeFormatWithUTCString();
                theirDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(mostRecentGenDateRaw);
                ourDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(mostRecentGenDateRaw);
                oursNeedsSave = theirsNeedsSave = true;
            }

            if (commonNeedsSave)
            {
                commonDoc?.Save(mergeOrder.pathToCommonAncestor);
            }

            if (theirsNeedsSave)
            {
                theirDoc?.Save(mergeOrder.pathToTheirs);
            }

            if (oursNeedsSave)
            {
                ourDoc?.Save(mergeOrder.pathToOurs);
            }
        }
Exemplo n.º 17
0
        /// <summary>
        /// Report the differences between two versions of files in the repository.
        /// </summary>
        /// <returns>Zero or more change reports.</returns>
        public static IEnumerable <IChangeReport> ReportDifferences(
            FileInRevision parentFileInRevision, Dictionary <string, byte[]> parentIndex,
            FileInRevision childFileInRevision, Dictionary <string, byte[]> childIndex)
        {
            const string deletedAttr   = "dateDeleted=";
            var          changeReports = new List <IChangeReport>();
            var          enc           = Encoding.UTF8;
            var          parentDoc     = new XmlDocument();
            var          childDoc      = new XmlDocument();

            foreach (var kvpParent in parentIndex)
            {
                var    parentKey   = kvpParent.Key;
                var    parentValue = kvpParent.Value;
                byte[] childValue;
                if (childIndex.TryGetValue(parentKey, out childValue))
                {
                    childIndex.Remove(parentKey);
                    // It is faster to skip this and just turn them into strings and then do the check.
                    //if (!parentValue.Where((t, i) => t != childValue[i]).Any())
                    //    continue; // Bytes are all the same.

                    var parentStr = enc.GetString(parentValue);
                    var childStr  = enc.GetString(childValue);
                    if (parentStr == childStr)
                    {
                        continue;                         // Route tested
                    }
                    // May have added 'dateDeleted' attr, in which case treat it as deleted, not changed.
                    // NB: This is only for Lift diffing, not FW diffing,
                    // so figure a way to have the client do this kind of check.
                    if (childStr.Contains(deletedAttr))
                    {
                        // Only report it as deleted, if it is not already marked as deleted in the parent.
                        if (!parentStr.Contains(deletedAttr))
                        {
                            // Route tested
                            changeReports.Add(new XmlDeletionChangeReport(
                                                  parentFileInRevision,
                                                  XmlUtilities.GetDocumentNodeFromRawXml(enc.GetString(kvpParent.Value), parentDoc),
                                                  XmlUtilities.GetDocumentNodeFromRawXml(childStr, childDoc)));
                        }
                    }
                    else
                    {
                        try
                        {
                            if (XmlUtilities.AreXmlElementsEqual(new XmlInput(childStr), new XmlInput(parentStr)))
                            {
                                continue;                                 // Route tested
                            }
                        }
                        catch (Exception error)
                        {
                            // Route not tested, and I don't know how to get XmlUtilities.AreXmlElementsEqual to throw.
                            changeReports.Add(new ErrorDeterminingChangeReport(
                                                  parentFileInRevision,
                                                  childFileInRevision,
                                                  XmlUtilities.GetDocumentNodeFromRawXml(parentStr, parentDoc),
                                                  XmlUtilities.GetDocumentNodeFromRawXml(childStr, childDoc),
                                                  error));
                            continue;
                        }
                        // NB: This comment is from the class description of XmlChangedRecordReport
                        // This may only be useful for quick, high-level identification that an entry changed,
                        // leaving *what* changed to a second pass, if needed by the user
                        // I (RBR), believe this can overproduce, otherwise useless change reports in a merge, if the merger uses it.
                        // Route tested
                        changeReports.Add(new XmlChangedRecordReport(
                                              parentFileInRevision,
                                              childFileInRevision,
                                              XmlUtilities.GetDocumentNodeFromRawXml(parentStr, parentDoc),
                                              XmlUtilities.GetDocumentNodeFromRawXml(childStr, childDoc)));
                    }
                }
                else
                {
                    //don't report deletions where there was a tombstone, but then someone removed the entry (which is what FLEx does)
                    var parentStr = enc.GetString(parentValue);
                    if (parentStr.Contains(deletedAttr))
                    {
                        // Route tested
                        continue;
                    }
                    // Route tested
                    changeReports.Add(new XmlDeletionChangeReport(
                                          parentFileInRevision,
                                          XmlUtilities.GetDocumentNodeFromRawXml(enc.GetString(kvpParent.Value), parentDoc),
                                          null));                                                               // Child Node? How can we put it in, if it was deleted?
                }
            }

            // Values that are still in childIndex are new,
            // since values that were not new have been removed by now.
            foreach (var child in childIndex.Values)
            {
                // Route tested
                changeReports.Add(new XmlAdditionChangeReport(
                                      childFileInRevision,
                                      XmlUtilities.GetDocumentNodeFromRawXml(enc.GetString(child), childDoc)));
            }

            return(changeReports);
        }
Exemplo n.º 18
0
        private void TestFileButtonClicked(object sender, EventArgs e)
        {
            var sb         = new StringBuilder();
            var currentSet = new HashSet <string>();

            var liftDoc = XDocument.Load(_liftPathname);

#if !ORIGINAL
            //liftDoc.Root.Element("header").Remove();
            foreach (var entryElement in liftDoc.Root.Elements("entry").ToArray())
            {
                foreach (var gonerChild in entryElement.Elements().Where(child => child.Name.LocalName != "variant").ToArray())
                {
                    gonerChild.Remove();
                }
                if (entryElement.Elements("variant").Count() < 2)
                {
                    entryElement.Remove();
                }

                // Check variant elements
                currentSet.Clear();
                var duplicateVariantsAndCounts = new Dictionary <string, List <XElement> >();
                {
                    foreach (var variantElement in entryElement.Elements("variant"))
                    {
                        var             currentStr = variantElement.ToString();
                        List <XElement> dups;
                        if (!duplicateVariantsAndCounts.TryGetValue(currentStr, out dups))
                        {
                            duplicateVariantsAndCounts.Add(currentStr, new List <XElement> {
                                variantElement
                            });
                        }
                        else
                        {
                            dups.AddRange(from duplicateVariantsAndCountTempKvp in duplicateVariantsAndCounts
                                          where XmlUtilities.AreXmlElementsEqual(duplicateVariantsAndCountTempKvp.Key, currentStr)
                                          select variantElement);
                        }
                    }
                }
                foreach (var variantKvp in duplicateVariantsAndCounts.Where(variantKvp => variantKvp.Value.Count == 1))
                {
                    currentSet.Add(variantKvp.Key);
                }
                foreach (var key in currentSet)
                {
                    duplicateVariantsAndCounts[key][0].Remove();
                    duplicateVariantsAndCounts.Remove(key);
                }
                if (duplicateVariantsAndCounts.Count > 0)
                {
                    entryElement.Attributes().Where(attr => attr.Name.LocalName != "guid").Remove();
                    entryElement.Add(new XAttribute("TOTALDUPVARIANTCOUNT", entryElement.Elements("variant").Count()));
                    foreach (var dupList in duplicateVariantsAndCounts.Values)
                    {
                        for (var i = 1; i < dupList.Count; ++i)
                        {
                            dupList[i].Remove();
                        }
                        dupList[0].Add(new XAttribute("DUPVARCOUNT", dupList.Count));
                    }
                }
            }
            foreach (var gonnerEntry in liftDoc.Root.Elements("entry").Where(entry => !entry.HasElements).ToArray())
            {
                gonnerEntry.Remove();
            }

            liftDoc.Root.Attributes().Remove();
            liftDoc.Root.Add(new XAttribute("ENTRIESWITHDUPVARCOUNT", liftDoc.Root.Elements("entry").Count()));

            liftDoc.Save(_liftPathname.Replace(".lift", "-variants-new.lift"));
#else
#if false
            // Check out header element.
            // For now, only work with root/header/fields/field (<form> elelments).
            foreach (var headerFieldElement in liftDoc.Root.Element("header").Element("fields").Elements("field"))
            {
                var fieldTagAttrValue = headerFieldElement.Attribute("tag").Value;
                currentSet.Clear();
                foreach (var headerFieldFormAttrValue in headerFieldElement.Elements("form").Select(formAltElement => formAltElement.Attribute("lang").Value))
                {
                    if (currentSet.Contains(headerFieldFormAttrValue) || currentSet.Contains(headerFieldFormAttrValue.ToLowerInvariant()))
                    {
                        sb.AppendFormat("Found header field form element with duplicate 'lang' attribute '{0}' in header field with tag '{1}'", headerFieldFormAttrValue, fieldTagAttrValue);
                        sb.AppendLine();
                    }
                    else
                    {
                        currentSet.Add(headerFieldFormAttrValue);
                    }
                }
            }
#endif

            foreach (var entryElement in liftDoc.Root.Elements("entry"))
            {
                if (entryElement.Attribute("dateDeleted") != null)
                {
                    continue;
                }

                var entryGuid = entryElement.Attribute("guid").Value;

#if false
                // 1. Check out "form' alts in:

                /*
                 * <lexical-unit>
                 * <form
                 * lang="azj-Latn">
                 * <text>asqır</text>
                 * </form>
                 * </lexical-unit>
                 */
                currentSet.Clear();
                var lexUnit = entryElement.Element("lexical-unit");
                if (lexUnit != null)
                {
                    foreach (var formLang in entryElement.Element("lexical-unit").Elements("form").Select(formAltElement => formAltElement.Attribute("lang").Value))
                    {
                        if (currentSet.Contains(formLang) || currentSet.Contains(formLang.ToLowerInvariant()))
                        {
                            sb.AppendFormat("Found lexical-unit form element with duplicate 'lang' attribute '{0}' in entry with guid '{1}'", formLang, entryGuid);
                            sb.AppendLine();
                        }
                        else
                        {
                            currentSet.Add(formLang);
                        }
                    }
                }

                // 2. Check out form alts in:

                /*
                 * <citation>
                 * <form
                 * lang="azj-Latn">
                 * <text>asqırmaq</text>
                 * </form>
                 * </citation>
                 */
                currentSet.Clear();
                var citElement = entryElement.Element("citation");
                if (citElement != null)
                {
                    foreach (var formLang in entryElement.Element("citation").Elements("form").Select(formAltElement => formAltElement.Attribute("lang").Value))
                    {
                        if (currentSet.Contains(formLang) || currentSet.Contains(formLang.ToLowerInvariant()))
                        {
                            sb.AppendFormat("Found citation form element with duplicate 'lang' attribute '{0}' in entry with guid '{1}'", formLang, entryGuid);
                            sb.AppendLine();
                        }
                        else
                        {
                            currentSet.Add(formLang);
                        }
                    }
                }

                // Check out dups in entry level:
#endif
                // Check variant elements
                currentSet.Clear();
                var duplicateVariantsAndCounts = new Dictionary <string, int>(StringComparer.InvariantCulture);
                {
                    foreach (var variantElement in entryElement.Elements("variant"))
                    {
                        var currentStr = variantElement.ToString();
                        if (!duplicateVariantsAndCounts.ContainsKey(currentStr))
                        {
                            duplicateVariantsAndCounts.Add(currentStr, 1);
                            continue;
                        }
                        var duplicateVariantsAndCountsTemp = new Dictionary <string, int>(duplicateVariantsAndCounts);
                        foreach (var duplicateVariantsAndCountTempKvp in duplicateVariantsAndCountsTemp)
                        {
                            var currentCount = duplicateVariantsAndCounts[duplicateVariantsAndCountTempKvp.Key];
                            if (XmlUtilities.AreXmlElementsEqual(duplicateVariantsAndCountTempKvp.Key, currentStr))
                            {
                                duplicateVariantsAndCounts[duplicateVariantsAndCountTempKvp.Key] = currentCount + 1;
                            }
                        }
                    }
                }
                foreach (var variantKvp in duplicateVariantsAndCounts.Where(variantKvp => variantKvp.Value == 1))
                {
                    currentSet.Add(variantKvp.Key);
                }
                foreach (var key in currentSet)
                {
                    duplicateVariantsAndCounts.Remove(key);
                }
                if (duplicateVariantsAndCounts.Count > 0)
                {
                    sb.AppendFormat("Found duplicate variant element(s) in entry with guid '{0}'", entryGuid);
                    sb.AppendLine();
                    foreach (var variantKvp in duplicateVariantsAndCounts)
                    {
                        sb.AppendFormat("Duplicate variant element count '{0}' for:", variantKvp.Value);
                        sb.AppendLine();
                        sb.Append(variantKvp.Key);
                        sb.AppendLine();
                    }
                }

#if false
                // type attr is a key, so assume multiple entry field elements
                // Assume repeating <form> elments in the <field> element.

                /*
                 * <field type="scientific-name">
                 * <form lang="ru"><text>Rutilus rutilus</text></form>
                 * </field>
                 */
                currentSet.Clear();
                foreach (var entryFieldElement in entryElement.Elements("field"))
                {
                    var typeAttrValue = entryFieldElement.Attribute("type").Value;
                    if (currentSet.Contains(typeAttrValue) || currentSet.Contains(typeAttrValue.ToLowerInvariant()))
                    {
                        sb.AppendFormat("Found field element with duplicate 'type' attribute '{0}' in entry with guid '{1}'", typeAttrValue, entryGuid);
                        sb.AppendLine();
                    }
                    else
                    {
                        currentSet.Add(typeAttrValue);
                    }

                    // Now check for dup lang attrs on form elements.
                    var fieldFormSet = new HashSet <string>();
                    foreach (var fieldFormAttrValue in entryFieldElement.Elements("form").Select(formAltElement => formAltElement.Attribute("lang").Value))
                    {
                        if (fieldFormSet.Contains(fieldFormAttrValue) || fieldFormSet.Contains(fieldFormAttrValue.ToLowerInvariant()))
                        {
                            sb.AppendFormat("Found field element with duplicate 'lang' attribute in field of type '{0}' with a form 'lang' of '{1}' in entry with guid '{2}'", typeAttrValue, fieldFormAttrValue, entryGuid);
                            sb.AppendLine();
                        }
                        else
                        {
                            fieldFormSet.Add(fieldFormAttrValue);
                        }
                    }
                }

                // Check out dup form lang attrs in label of illustration:
                // Assume:
                //	1. multiple <illustration> elements per entry,
                //	2. multiple <label> elements per <illustration> elemtn, and
                //	3. multiple <form> elements per <label> (Only testable keyed element.)

                /*
                 * <illustration href="Rutilusrutilus38cm_2143x1060.JPG">
                 * <label>
                 * <form lang="azj-Latn"><text>külmə</text></form>
                 * <form lang="en"><text>roach, common</text></form>
                 * <form lang="ru"><text>плотва</text></form>
                 * </label>
                 * </illustration>
                 */
                foreach (var illustrationElement in entryElement.Elements("illustration"))
                {
                    foreach (var labelElement in illustrationElement.Elements("label"))
                    {
                        currentSet.Clear();
                        foreach (var labelFormAttrValue in labelElement.Elements("form").Select(formAltElement => formAltElement.Attribute("lang").Value))
                        {
                            if (currentSet.Contains(labelFormAttrValue) || currentSet.Contains(labelFormAttrValue.ToLowerInvariant()))
                            {
                                sb.AppendFormat("Found field element with duplicate 'lang' attribute in some label of some illustration with a 'lang' attribute of '{0}' in entry with guid '{1}'", labelFormAttrValue, entryGuid);
                                sb.AppendLine();
                            }
                            else
                            {
                                currentSet.Add(labelFormAttrValue);
                            }
                        }
                    }
                }

                // Check out duplicate sense ids (the sense id attr is what is used in the lift merge code for finding a matching sense.)
                // But a dup guid is just as bad, so report it, too. But then, a sense may not have a guid attr.
                currentSet.Clear();
                foreach (var senseElement in entryElement.Elements("sense"))
                {
                    var senseId = senseElement.Attribute("id").Value;
                    if (currentSet.Contains(senseId) || currentSet.Contains(senseId.ToLowerInvariant()))
                    {
                        sb.AppendFormat("Found sense element with duplicate id attribute '{0}' in entry with guid '{1}'", senseId, entryGuid);
                        sb.AppendLine();
                    }
                    else
                    {
                        currentSet.Add(senseId);
                    }

                    // Check out duplicate glosses.

                    /*
                     * <gloss
                     * lang="en">
                     * <text>to sneeze</text>
                     * </gloss>
                     * <gloss
                     * lang="ru">
                     * <text>чихать</text>
                     * </gloss>
                     */
                    var glossSet = new HashSet <string>();
                    foreach (var glossLangAttrValue in senseElement.Elements("gloss").Select(glossElement => glossElement.Attribute("lang").Value))
                    {
                        if (glossSet.Contains(glossLangAttrValue) || glossSet.Contains(glossLangAttrValue.ToLowerInvariant()))
                        {
                            sb.AppendFormat("Found gloss element with duplicate lang attribute '{0}' in sense with id '{1}' in entry with guid '{2}'", glossLangAttrValue, senseId, entryGuid);
                            sb.AppendLine();
                        }
                        else
                        {
                            glossSet.Add(glossLangAttrValue);
                        }
                    }

                    // Check out duplicate definition forms

                    /*
                     * <definition>
                     * <form
                     * lang="en">
                     * <text>to sneeze</text>
                     * </form>
                     * <form
                     * lang="ru">
                     * <text>чихать</text>
                     * </form>
                     * </definition>
                     */
                    var definitionFormsSet = new HashSet <string>();
                    foreach (var definitionFormLangAttrValue in senseElement.Elements("definition").Elements("form").Select(glossElement => glossElement.Attribute("lang").Value))
                    {
                        if (definitionFormsSet.Contains(definitionFormLangAttrValue) || definitionFormsSet.Contains(definitionFormLangAttrValue.ToLowerInvariant()))
                        {
                            sb.AppendFormat("Found definition form element with duplicate lang attribute '{0}' in sense with id '{1}' in entry with guid '{2}'", definitionFormLangAttrValue, senseId, entryGuid);
                            sb.AppendLine();
                        }
                        else
                        {
                            definitionFormsSet.Add(definitionFormLangAttrValue);
                        }
                    }

                    // Check out examples.
                    // Assumptions:
                    //	1. There can be muiltiple examples.
                    //	2. Each example can have multiple forms.
                    //	3. Each example can have multiple translation elements each of which can have multiple form elements.
                    // The assumptions may not hold, but they may flush out more dups.

                    /*
                     * <example>
                     * <form lang="azj-Latn"><text></text></form>
                     * <translation>
                     * <form lang="en"><text></text></form>
                     * </translation>
                     * </example>
                     */
                    foreach (var exampleElement in senseElement.Elements("example"))
                    {
                        var exampleFormsSet = new HashSet <string>();
                        foreach (var exampleFormLangAttrValue in exampleElement.Elements("form").Select(exampleFormElement => exampleFormElement.Attribute("lang").Value))
                        {
                            if (exampleFormsSet.Contains(exampleFormLangAttrValue) || exampleFormsSet.Contains(exampleFormLangAttrValue.ToLowerInvariant()))
                            {
                                sb.AppendFormat("Found example form element with duplicate lang attribute '{0}' in some example in the sense with id '{1}' in entry with guid '{2}'", exampleFormLangAttrValue, senseId, entryGuid);
                                sb.AppendLine();
                            }
                            else
                            {
                                exampleFormsSet.Add(exampleFormLangAttrValue);
                            }
                        }
                        foreach (var exampleTranslationElement in exampleElement.Elements("translation"))
                        {
                            var exampleTranslationFormsSet = new HashSet <string>();
                            foreach (var exampleTranslationFormLangAttrValue in exampleTranslationElement.Elements("form").Select(exampleTranlationFormElement => exampleTranlationFormElement.Attribute("lang").Value))
                            {
                                if (exampleTranslationFormsSet.Contains(exampleTranslationFormLangAttrValue) || exampleTranslationFormsSet.Contains(exampleTranslationFormLangAttrValue.ToLowerInvariant()))
                                {
                                    sb.AppendFormat("Found example translation form element with duplicate lang attribute '{0}' in some example's translation in the sense with id '{1}' in entry with guid '{2}'", exampleTranslationFormLangAttrValue, senseId, entryGuid);
                                    sb.AppendLine();
                                }
                                else
                                {
                                    exampleTranslationFormsSet.Add(exampleTranslationFormLangAttrValue);
                                }
                            }
                        }
                    }
                }
#endif
            }
            var results = sb.ToString();
            if (String.IsNullOrEmpty(results))
            {
                Console.WriteLine("No dups yet.");
            }
            else
            {
                Console.WriteLine("Found dups.");
                Console.Write(results);
            }
#endif
        }
Exemplo n.º 19
0
        private void Verify(Stopwatch verifyTimer, StringBuilder sb)
        {
            GC.Collect(2, GCCollectionMode.Forced);
            verifyTimer.Start();
            GetFreshMdc();             // Want it fresh.
            var origData = new Dictionary <string, byte[]>(StringComparer.InvariantCultureIgnoreCase);

            using (var fastSplitterOrig = new FastXmlElementSplitter(_srcFwdataPathname + ".orig"))
            {
                var foundOrigOptionalFirstElement = false;
                var testedforExistanceOfOrigOptionalFirstElement = false;
                foreach (var origRecord in fastSplitterOrig.GetSecondLevelElementBytes(SharedConstants.AdditionalFieldsTag, SharedConstants.RtTag))
                {
                    if (!testedforExistanceOfOrigOptionalFirstElement)
                    {
                        foundOrigOptionalFirstElement = FLExProjectSplitter.IsOptionalFirstElement(origRecord);
                        testedforExistanceOfOrigOptionalFirstElement = true;
                    }
                    if (foundOrigOptionalFirstElement)
                    {
                        origData.Add(SharedConstants.AdditionalFieldsTag, origRecord);
                        foundOrigOptionalFirstElement = false;
                        continue;
                    }
                    origData.Add(XmlUtils.GetAttributes(origRecord, new HashSet <string> {
                        SharedConstants.GuidStr
                    })[SharedConstants.GuidStr].ToLowerInvariant(), origRecord);
                }
            }
            verifyTimer.Stop();
            GC.Collect(2, GCCollectionMode.Forced);
            verifyTimer.Start();
            using (var fastSplitterNew = new FastXmlElementSplitter(_srcFwdataPathname))
            {
                // NB: The main input file *does* have to deal with the optional first element.
                //var counter = 0;
                var foundNewOptionalFirstElement = false;
                var testedforExistanceOfNewOptionalFirstElement = false;
                foreach (var newRecordAsBytes in fastSplitterNew.GetSecondLevelElementBytes(SharedConstants.AdditionalFieldsTag, SharedConstants.RtTag))
                {
                    if (!testedforExistanceOfNewOptionalFirstElement)
                    {
                        foundNewOptionalFirstElement = FLExProjectSplitter.IsOptionalFirstElement(newRecordAsBytes);
                        testedforExistanceOfNewOptionalFirstElement = true;
                    }
                    var    newRecCopyAsBytes = newRecordAsBytes;
                    byte[] origRecAsBytes;
                    string srcGuid = null;
                    if (foundNewOptionalFirstElement)
                    {
                        origRecAsBytes = origData[SharedConstants.AdditionalFieldsTag];
                        origData.Remove(SharedConstants.AdditionalFieldsTag);
                        foundNewOptionalFirstElement = false;
                    }
                    else
                    {
                        var attrValues = XmlUtils.GetAttributes(newRecordAsBytes, new HashSet <string> {
                            SharedConstants.GuidStr, SharedConstants.Class
                        });
                        srcGuid        = attrValues[SharedConstants.GuidStr];
                        origRecAsBytes = origData[srcGuid];
                        origData.Remove(srcGuid);
                        if (attrValues[SharedConstants.Class] == "WfiWordform")
                        {
                            var wfElement = Utilities.CreateFromBytes(origRecAsBytes);
                            var csProp    = wfElement.Element("Checksum");
                            if (csProp != null)
                            {
                                csProp.Attribute(SharedConstants.Val).Value = "0";
                                origRecAsBytes = SharedConstants.Utf8.GetBytes(wfElement.ToString());
                            }
                        }
                    }

                    //if (counter == 1000)
                    //{
                    //    verifyTimer.Stop();
                    //    GC.Collect(2, GCCollectionMode.Forced);
                    //    verifyTimer.Start();
                    //    counter = 0;
                    //}
                    //else
                    //{
                    //    counter++;
                    //}
                    // Way too slow, since it has to always make the XmlNodes.
                    // Just feeding strings to XmlUtilities.AreXmlElementsEqual is faster,
                    // since it skips making them, if the strings are the same.
                    //var origNode = CreateXmlNodeFromBytes(origRecAsBytes);
                    //var newNode = CreateXmlNodeFromBytes(newRecCopyAsBytes);
                    //if (XmlUtilities.AreXmlElementsEqual(origNode, newNode))
                    //    continue;
                    //if (srcGuid == null)
                    //{
                    //    WriteProblemDataFile(Path.Combine(_workingDir, "CustomProperties-SRC.txt"), origNode);
                    //    WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "CustomProperties-TRG.txt"), newNode);
                    //    sb.Append("Main src and trg custom properties are different in the resulting xml.");
                    //}
                    //else
                    //{
                    //    WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-SRC.txt"), origNode);
                    //    WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-TRG.txt"), newNode);
                    //    sb.AppendFormat("Main src and trg object with guid '{0}' are different in the resulting xml.", srcGuid);
                    //}
                    //if (XmlUtilities.AreXmlElementsEqual(SharedConstants.Utf8.GetString(origRecAsBytes), SharedConstants.Utf8.GetString(newRecCopyAsBytes)))
                    //	continue;
                    if (XmlUtilities.AreXmlElementsEqual(origRecAsBytes, newRecCopyAsBytes))
                    {
                        continue;
                    }
                    if (srcGuid == null)
                    {
                        WriteProblemDataFile(Path.Combine(_workingDir, "CustomProperties-SRC.txt"), origRecAsBytes);
                        WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "CustomProperties-TRG.txt"), newRecCopyAsBytes);
                        sb.Append("Main src and trg custom properties are different in the resulting xml.");
                    }
                    else
                    {
                        WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-SRC.txt"), origRecAsBytes);
                        WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-TRG.txt"), newRecCopyAsBytes);
                        sb.AppendFormat("Main src and trg object with guid '{0}' are different in the resulting xml.", srcGuid);
                    }
                    sb.AppendLine();
                }
            }
            if (origData.Count > 0)
            {
                sb.AppendFormat("Hmm, there are {0} more <rt> elements in the original than in the rebuilt fwdata file.", origData.Count);
                sb.AppendLine();
                foreach (var attrs in origData.Values.Select(byteData => XmlUtils.GetAttributes(byteData, new HashSet <string> {
                    SharedConstants.GuidStr, SharedConstants.Class
                })))
                {
                    sb.AppendFormat("\t\t'{0}' of class '{1}' is not in rebuilt file.", attrs[SharedConstants.GuidStr], attrs[SharedConstants.Class]);
                    sb.AppendLine();
                }
            }
            verifyTimer.Stop();
        }