private static bool AnyChanges(XmlNode ours, XmlNode theirs, XmlNode ancestor) { if (ancestor == null) { if (ours == null) { return(false); // they added, merge won't mess with it. } if (theirs == null) { return(false); // we added, merge won't mess with it } return(!XmlUtilities.AreXmlElementsEqual(ours, theirs)); // somehow we both added, problem unless somehow identical } // ancestor is not null. if (ours == null) { return(theirs != null); // we deleted, if they didn't there's a difference. } if (theirs == null) { return(true); // they deleted, we didn't, that's a difference. } return(!XmlUtilities.AreXmlElementsEqual(ours, theirs)); }
private static void RestoreOriginalIfTimestampIsTheOnlyChange(XmlNode ancestorDateTimeNode, XmlNode otherDateTimeNode) { if (ancestorDateTimeNode == null || otherDateTimeNode == null) { return; } // Values that are are the same are not of interest. var ancestorAttr = ancestorDateTimeNode.Attributes["val"]; var otherAttr = otherDateTimeNode.Attributes["val"]; if (ancestorAttr.Value == otherAttr.Value) { return; } // Get parents of both nodes var ancestorDateTimeNodeParent = ancestorDateTimeNode.ParentNode; var otherDateTimeNodeParent = otherDateTimeNode.ParentNode; // Restore the value to the ancestor var originalOtherValue = otherAttr.Value; otherAttr.Value = ancestorAttr.Value; if (XmlUtilities.AreXmlElementsEqual(ancestorDateTimeNodeParent, otherDateTimeNodeParent)) { return; // Only change was the timestamp, so keep it. } // Restore the original value. otherAttr.Value = originalOtherValue; }
public void NonEquivalentByteArraysAreNotEqual() { var ours = Encoding.UTF8.GetBytes(@"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5' />"); var theirs = Encoding.UTF8.GetBytes(@"<rt class='LexEntry' guid='0030a77d-63cd-4d51-b26a-27bac7d64f18' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5' />"); Assert.That(XmlUtilities.AreXmlElementsEqual(ours, theirs), Is.False, "ours == theirs"); Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.False, "theirs == ours"); }
private static void CompareResults(bool expectedToMatch, string source, string target) { Assert.AreEqual( expectedToMatch, XmlUtilities.AreXmlElementsEqual( RemoveDeclaration(source), RemoveDeclaration(target))); }
private void RunLoopClicked(object sender, EventArgs e) { var sb = new StringBuilder(); const string data = "<element />"; var bytesTimer = new Stopwatch(); var ourBytes = Encoding.UTF8.GetBytes(data); var theirBytes = Encoding.UTF8.GetBytes(data); bytesTimer.Start(); for (var i = 0; i < 100000; ++i) { XmlUtilities.AreXmlElementsEqual(ourBytes, theirBytes); } bytesTimer.Stop(); sb.AppendFormat("Time to check (as bytes): {0}ms; {1}ticks.", bytesTimer.ElapsedMilliseconds, bytesTimer.ElapsedTicks); sb.AppendLine(); var stringTimer = new Stopwatch(); stringTimer.Start(); for (var i = 0; i < 100000; ++i) { XmlUtilities.AreXmlElementsEqual(data, data); } stringTimer.Stop(); sb.AppendFormat("Time to check (as string): {0}ms; {1}ticks.", stringTimer.ElapsedMilliseconds, stringTimer.ElapsedTicks); sb.AppendLine(); var doc = new XmlDocument(); var ourNode = XmlUtilities.GetDocumentNodeFromRawXml(data, doc); var theirNode = XmlUtilities.GetDocumentNodeFromRawXml(data, doc); var xmlNodeTimer = new Stopwatch(); xmlNodeTimer.Start(); for (var i = 0; i < 100000; ++i) { XmlUtilities.AreXmlElementsEqual(ourNode, theirNode); } xmlNodeTimer.Stop(); sb.AppendFormat("Time to check (as XmlNode): {0}ms; {1}ticks.", xmlNodeTimer.ElapsedMilliseconds, xmlNodeTimer.ElapsedTicks); sb.AppendLine(); var ourInput = new XmlInput(data); var theirInput = new XmlInput(data); var xmlInputTimer = new Stopwatch(); xmlInputTimer.Start(); for (var i = 0; i < 100000; ++i) { XmlUtilities.AreXmlElementsEqual(ourInput, theirInput); } xmlInputTimer.Stop(); sb.AppendFormat("Time to check (as XmlInput): {0}ms; {1}ticks.", xmlInputTimer.ElapsedMilliseconds, xmlInputTimer.ElapsedTicks); MessageBox.Show(sb.ToString()); }
public void ClosedNodeAndEmptyNodeWithAttrsAreEqual() { const string ours = @"<foo attr='val' />"; const string theirs = @"<foo attr='val'></foo>"; Assert.True(XmlUtilities.AreXmlElementsEqual(ours, theirs)); Assert.True(XmlUtilities.AreXmlElementsEqual(theirs, ours)); }
public void ClosedNodeAndNewTextAreNotEqual() { const string ours = @"<foo />"; const string theirs = @"<foo>New foo text.</foo>"; Assert.IsFalse(XmlUtilities.AreXmlElementsEqual(ours, theirs)); Assert.IsFalse(XmlUtilities.AreXmlElementsEqual(theirs, ours)); }
public void ClosedNodeAndNewTextWithAttributesAreNotEqual() { const string ours = @"<foo attr='val' />"; const string theirs = @"<foo attr='val' >New foo text.</foo>"; Assert.That(XmlUtilities.AreXmlElementsEqual(ours, theirs), Is.False); Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.False); }
public void ReplaceOursWithTheirs_OursNullTheirsNot() { var ourDoc = CreateTestNode(@"<parent></parent>"); var theirDoc = CreateTestNode(@"<parent><child>theirs</child></parent>"); XmlNode ours = null; XmlNode theirs = theirDoc.FirstChild; XmlUtilities.ReplaceOursWithTheirs(ourDoc, ref ours, theirs); Assert.AreSame(ourDoc.OwnerDocument, ours.OwnerDocument); Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.True, "theirs != ours"); }
private static bool AreTheSame(XmlNode alphaEntry, XmlNode betaEntry) { //review: why do we need to actually parse these dates? Could we just do a string comparison? if (LiftUtils.GetModifiedDate(betaEntry) == LiftUtils.GetModifiedDate(alphaEntry) && !(LiftUtils.GetModifiedDate(betaEntry) == default(DateTime))) { return(true); } return(XmlUtilities.AreXmlElementsEqual(alphaEntry.OuterXml, betaEntry.OuterXml)); }
public void ReplaceOursWithTheirs_OursNotNullTheirsNotNull() { var ourDoc = CreateTestNode(@"<parent><child>mine</child></parent>"); var theirDoc = CreateTestNode(@"<parent><child>theirs</child></parent>"); XmlNode ours = ourDoc.FirstChild; XmlNode theirs = theirDoc.FirstChild; XmlUtilities.ReplaceOursWithTheirs(ourDoc, ref ours, theirs); Assert.AreSame(ourDoc.OwnerDocument, ours.OwnerDocument, "Returned node not in inserted into our parent document"); Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.True, "theirs != ours"); }
public void NestedClosedNodeAndTextWithAttributesAreNotEqual() { const string ours = @"<foo attr='val'> <bar attr='val'/> </foo>"; const string theirs = @"<foo attr='val'> <bar attr='val'>new stuff.</bar> </foo>"; Assert.That(XmlUtilities.AreXmlElementsEqual(ours, theirs), Is.False); Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.False); }
public void EquivalentByteArraysAreEqual() { var ours = Encoding.UTF8.GetBytes(@"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'> <Contents> <Str> <Run ws='tuz' /> </Str> </Contents> <ParseIsCurrent val='False' /> </rt>"); var theirs = Encoding.UTF8.GetBytes(@"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'> <Contents> <Str> <Run ws='tuz' /> </Str> </Contents> <ParseIsCurrent val='False' /> </rt>"); Assert.That(XmlUtilities.AreXmlElementsEqual(ours, theirs), Is.True, "ours != theirs"); Assert.That(XmlUtilities.AreXmlElementsEqual(theirs, ours), Is.True, "theirs != ours"); }
public void MoveToFirstAttributeFix_HasElementsEqual() { const string ours = @"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'> <Contents> <Str> <Run ws='tuz'></Run> </Str> </Contents> <ParseIsCurrent val='False' /> <StyleRules> <Prop namedStyle='Section Head' /> </StyleRules> <Translations> <objsur guid='fe6f0999-ecb9-403f-abab-e934318542bc' t='o' /> </Translations> </rt>"; const string theirs = @"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'> <Contents> <Str> <Run ws='tuz' /> </Str> </Contents> <ParseIsCurrent val='False' /> <StyleRules> <Prop namedStyle='Section Head' /> </StyleRules> <Translations> <objsur guid='fe6f0999-ecb9-403f-abab-e934318542bc' t='o' /> </Translations> </rt>"; Assert.True(XmlUtilities.AreXmlElementsEqual(ours, theirs)); Assert.True(XmlUtilities.AreXmlElementsEqual(theirs, ours)); }
public void MoreMinimal_MoveToFirstAttributeFix_HasElementsEqual() { const string ours = @"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'> <Contents> <Str> <Run ws='tuz'></Run> </Str> </Contents> <ParseIsCurrent val='False' /> </rt>"; const string theirs = @"<rt class='ScrTxtPara' guid='0030a77d-63cd-4d51-b26a-27bac7d64f17' ownerguid='046d6079-2337-425f-a8bd-b0af047fb5e5'> <Contents> <Str> <Run ws='tuz' /> </Str> </Contents> <ParseIsCurrent val='False' /> </rt>"; Assert.True(XmlUtilities.AreXmlElementsEqual(ours, theirs)); Assert.True(XmlUtilities.AreXmlElementsEqual(theirs, ours)); }
/// <summary> /// handles that date business, so it doesn't overwhelm the poor user with conflict reports /// </summary> /// <param name="mergeOrder"></param> /// <param name="addedCollationAttr"></param> private static void PreMergeFile(MergeOrder mergeOrder, out bool addedCollationAttr) { addedCollationAttr = false; var ourDoc = File.Exists(mergeOrder.pathToOurs) && File.ReadAllText(mergeOrder.pathToOurs).Contains("<ldml>") ? XDocument.Load(mergeOrder.pathToOurs) : null; var theirDoc = File.Exists(mergeOrder.pathToTheirs) && File.ReadAllText(mergeOrder.pathToTheirs).Contains("<ldml>") ? XDocument.Load(mergeOrder.pathToTheirs) : null; var commonDoc = File.Exists(mergeOrder.pathToCommonAncestor) && File.ReadAllText(mergeOrder.pathToCommonAncestor).Contains("<ldml>") ? XDocument.Load(mergeOrder.pathToCommonAncestor) : null; if (ourDoc == null || theirDoc == null) { return; } // ldml files are kind of big now we want to avoid unnecessary disk IO bool commonNeedsSave, oursNeedsSave, theirsNeedsSave; commonNeedsSave = oursNeedsSave = theirsNeedsSave = false; // Add optional key attr and default value on 'collation' element that has no 'type' attr. var ourDocDefaultCollation = GetDefaultCollationNode(ourDoc); var theirDocDefaultCollation = GetDefaultCollationNode(theirDoc); if (commonDoc != null) { var commonDocDefaultCollation = GetDefaultCollationNode(commonDoc); if (commonDocDefaultCollation != null) { if (ourDocDefaultCollation != null || theirDocDefaultCollation != null) { // add type attribute to the commonDoc only when we are certain it will also be added to at least one modified document commonDocDefaultCollation.Add(new XAttribute("type", "standard")); commonNeedsSave = true; } } } if (ourDocDefaultCollation != null) { ourDocDefaultCollation.Add(new XAttribute("type", "standard")); oursNeedsSave = true; addedCollationAttr = true; } if (theirDocDefaultCollation != null) { theirDocDefaultCollation.Add(new XAttribute("type", "standard")); theirsNeedsSave = true; addedCollationAttr = true; } // If there is no commonDoc then the results will be DateTime.MinValue and null string ancestorRawGenDate; GetGenDate(commonDoc, out ancestorRawGenDate); string theirRawGenDate; var theirGenDate = GetGenDate(theirDoc, out theirRawGenDate); string ourRawGenDate; var ourGenDate = GetGenDate(ourDoc, out ourRawGenDate); // If there was no common we will set the date to empty. // The xml equality test will fail and the results will be correct theirDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(ancestorRawGenDate ?? string.Empty); ourDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(ancestorRawGenDate ?? string.Empty); // if only the generation date has changed just use the newest timestamp if (XmlUtilities.AreXmlElementsEqual(commonDoc?.Root, theirDoc.Root) && XmlUtilities.AreXmlElementsEqual(commonDoc?.Root, ourDoc.Root)) { // Pre-merge <generation> date attr to newest var mostRecentGenDate = ourGenDate == theirGenDate ? ourGenDate : ourGenDate > theirGenDate ? ourGenDate : theirGenDate; if (theirGenDate != mostRecentGenDate) { theirDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(mostRecentGenDate); theirsNeedsSave = true; } if (ourGenDate != mostRecentGenDate) { ourDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(mostRecentGenDate); oursNeedsSave = true; } } else { // Some real content changed so update to the current time to represent the merge var mostRecentGenDateRaw = DateTimeProvider.Current.UtcNow.ToISO8601TimeFormatWithUTCString(); theirDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(mostRecentGenDateRaw); ourDoc.Root?.Element("identity")?.Element("generation")?.Attribute("date")?.SetValue(mostRecentGenDateRaw); oursNeedsSave = theirsNeedsSave = true; } if (commonNeedsSave) { commonDoc?.Save(mergeOrder.pathToCommonAncestor); } if (theirsNeedsSave) { theirDoc?.Save(mergeOrder.pathToTheirs); } if (oursNeedsSave) { ourDoc?.Save(mergeOrder.pathToOurs); } }
/// <summary> /// Report the differences between two versions of files in the repository. /// </summary> /// <returns>Zero or more change reports.</returns> public static IEnumerable <IChangeReport> ReportDifferences( FileInRevision parentFileInRevision, Dictionary <string, byte[]> parentIndex, FileInRevision childFileInRevision, Dictionary <string, byte[]> childIndex) { const string deletedAttr = "dateDeleted="; var changeReports = new List <IChangeReport>(); var enc = Encoding.UTF8; var parentDoc = new XmlDocument(); var childDoc = new XmlDocument(); foreach (var kvpParent in parentIndex) { var parentKey = kvpParent.Key; var parentValue = kvpParent.Value; byte[] childValue; if (childIndex.TryGetValue(parentKey, out childValue)) { childIndex.Remove(parentKey); // It is faster to skip this and just turn them into strings and then do the check. //if (!parentValue.Where((t, i) => t != childValue[i]).Any()) // continue; // Bytes are all the same. var parentStr = enc.GetString(parentValue); var childStr = enc.GetString(childValue); if (parentStr == childStr) { continue; // Route tested } // May have added 'dateDeleted' attr, in which case treat it as deleted, not changed. // NB: This is only for Lift diffing, not FW diffing, // so figure a way to have the client do this kind of check. if (childStr.Contains(deletedAttr)) { // Only report it as deleted, if it is not already marked as deleted in the parent. if (!parentStr.Contains(deletedAttr)) { // Route tested changeReports.Add(new XmlDeletionChangeReport( parentFileInRevision, XmlUtilities.GetDocumentNodeFromRawXml(enc.GetString(kvpParent.Value), parentDoc), XmlUtilities.GetDocumentNodeFromRawXml(childStr, childDoc))); } } else { try { if (XmlUtilities.AreXmlElementsEqual(new XmlInput(childStr), new XmlInput(parentStr))) { continue; // Route tested } } catch (Exception error) { // Route not tested, and I don't know how to get XmlUtilities.AreXmlElementsEqual to throw. changeReports.Add(new ErrorDeterminingChangeReport( parentFileInRevision, childFileInRevision, XmlUtilities.GetDocumentNodeFromRawXml(parentStr, parentDoc), XmlUtilities.GetDocumentNodeFromRawXml(childStr, childDoc), error)); continue; } // NB: This comment is from the class description of XmlChangedRecordReport // This may only be useful for quick, high-level identification that an entry changed, // leaving *what* changed to a second pass, if needed by the user // I (RBR), believe this can overproduce, otherwise useless change reports in a merge, if the merger uses it. // Route tested changeReports.Add(new XmlChangedRecordReport( parentFileInRevision, childFileInRevision, XmlUtilities.GetDocumentNodeFromRawXml(parentStr, parentDoc), XmlUtilities.GetDocumentNodeFromRawXml(childStr, childDoc))); } } else { //don't report deletions where there was a tombstone, but then someone removed the entry (which is what FLEx does) var parentStr = enc.GetString(parentValue); if (parentStr.Contains(deletedAttr)) { // Route tested continue; } // Route tested changeReports.Add(new XmlDeletionChangeReport( parentFileInRevision, XmlUtilities.GetDocumentNodeFromRawXml(enc.GetString(kvpParent.Value), parentDoc), null)); // Child Node? How can we put it in, if it was deleted? } } // Values that are still in childIndex are new, // since values that were not new have been removed by now. foreach (var child in childIndex.Values) { // Route tested changeReports.Add(new XmlAdditionChangeReport( childFileInRevision, XmlUtilities.GetDocumentNodeFromRawXml(enc.GetString(child), childDoc))); } return(changeReports); }
private void TestFileButtonClicked(object sender, EventArgs e) { var sb = new StringBuilder(); var currentSet = new HashSet <string>(); var liftDoc = XDocument.Load(_liftPathname); #if !ORIGINAL //liftDoc.Root.Element("header").Remove(); foreach (var entryElement in liftDoc.Root.Elements("entry").ToArray()) { foreach (var gonerChild in entryElement.Elements().Where(child => child.Name.LocalName != "variant").ToArray()) { gonerChild.Remove(); } if (entryElement.Elements("variant").Count() < 2) { entryElement.Remove(); } // Check variant elements currentSet.Clear(); var duplicateVariantsAndCounts = new Dictionary <string, List <XElement> >(); { foreach (var variantElement in entryElement.Elements("variant")) { var currentStr = variantElement.ToString(); List <XElement> dups; if (!duplicateVariantsAndCounts.TryGetValue(currentStr, out dups)) { duplicateVariantsAndCounts.Add(currentStr, new List <XElement> { variantElement }); } else { dups.AddRange(from duplicateVariantsAndCountTempKvp in duplicateVariantsAndCounts where XmlUtilities.AreXmlElementsEqual(duplicateVariantsAndCountTempKvp.Key, currentStr) select variantElement); } } } foreach (var variantKvp in duplicateVariantsAndCounts.Where(variantKvp => variantKvp.Value.Count == 1)) { currentSet.Add(variantKvp.Key); } foreach (var key in currentSet) { duplicateVariantsAndCounts[key][0].Remove(); duplicateVariantsAndCounts.Remove(key); } if (duplicateVariantsAndCounts.Count > 0) { entryElement.Attributes().Where(attr => attr.Name.LocalName != "guid").Remove(); entryElement.Add(new XAttribute("TOTALDUPVARIANTCOUNT", entryElement.Elements("variant").Count())); foreach (var dupList in duplicateVariantsAndCounts.Values) { for (var i = 1; i < dupList.Count; ++i) { dupList[i].Remove(); } dupList[0].Add(new XAttribute("DUPVARCOUNT", dupList.Count)); } } } foreach (var gonnerEntry in liftDoc.Root.Elements("entry").Where(entry => !entry.HasElements).ToArray()) { gonnerEntry.Remove(); } liftDoc.Root.Attributes().Remove(); liftDoc.Root.Add(new XAttribute("ENTRIESWITHDUPVARCOUNT", liftDoc.Root.Elements("entry").Count())); liftDoc.Save(_liftPathname.Replace(".lift", "-variants-new.lift")); #else #if false // Check out header element. // For now, only work with root/header/fields/field (<form> elelments). foreach (var headerFieldElement in liftDoc.Root.Element("header").Element("fields").Elements("field")) { var fieldTagAttrValue = headerFieldElement.Attribute("tag").Value; currentSet.Clear(); foreach (var headerFieldFormAttrValue in headerFieldElement.Elements("form").Select(formAltElement => formAltElement.Attribute("lang").Value)) { if (currentSet.Contains(headerFieldFormAttrValue) || currentSet.Contains(headerFieldFormAttrValue.ToLowerInvariant())) { sb.AppendFormat("Found header field form element with duplicate 'lang' attribute '{0}' in header field with tag '{1}'", headerFieldFormAttrValue, fieldTagAttrValue); sb.AppendLine(); } else { currentSet.Add(headerFieldFormAttrValue); } } } #endif foreach (var entryElement in liftDoc.Root.Elements("entry")) { if (entryElement.Attribute("dateDeleted") != null) { continue; } var entryGuid = entryElement.Attribute("guid").Value; #if false // 1. Check out "form' alts in: /* * <lexical-unit> * <form * lang="azj-Latn"> * <text>asqır</text> * </form> * </lexical-unit> */ currentSet.Clear(); var lexUnit = entryElement.Element("lexical-unit"); if (lexUnit != null) { foreach (var formLang in entryElement.Element("lexical-unit").Elements("form").Select(formAltElement => formAltElement.Attribute("lang").Value)) { if (currentSet.Contains(formLang) || currentSet.Contains(formLang.ToLowerInvariant())) { sb.AppendFormat("Found lexical-unit form element with duplicate 'lang' attribute '{0}' in entry with guid '{1}'", formLang, entryGuid); sb.AppendLine(); } else { currentSet.Add(formLang); } } } // 2. Check out form alts in: /* * <citation> * <form * lang="azj-Latn"> * <text>asqırmaq</text> * </form> * </citation> */ currentSet.Clear(); var citElement = entryElement.Element("citation"); if (citElement != null) { foreach (var formLang in entryElement.Element("citation").Elements("form").Select(formAltElement => formAltElement.Attribute("lang").Value)) { if (currentSet.Contains(formLang) || currentSet.Contains(formLang.ToLowerInvariant())) { sb.AppendFormat("Found citation form element with duplicate 'lang' attribute '{0}' in entry with guid '{1}'", formLang, entryGuid); sb.AppendLine(); } else { currentSet.Add(formLang); } } } // Check out dups in entry level: #endif // Check variant elements currentSet.Clear(); var duplicateVariantsAndCounts = new Dictionary <string, int>(StringComparer.InvariantCulture); { foreach (var variantElement in entryElement.Elements("variant")) { var currentStr = variantElement.ToString(); if (!duplicateVariantsAndCounts.ContainsKey(currentStr)) { duplicateVariantsAndCounts.Add(currentStr, 1); continue; } var duplicateVariantsAndCountsTemp = new Dictionary <string, int>(duplicateVariantsAndCounts); foreach (var duplicateVariantsAndCountTempKvp in duplicateVariantsAndCountsTemp) { var currentCount = duplicateVariantsAndCounts[duplicateVariantsAndCountTempKvp.Key]; if (XmlUtilities.AreXmlElementsEqual(duplicateVariantsAndCountTempKvp.Key, currentStr)) { duplicateVariantsAndCounts[duplicateVariantsAndCountTempKvp.Key] = currentCount + 1; } } } } foreach (var variantKvp in duplicateVariantsAndCounts.Where(variantKvp => variantKvp.Value == 1)) { currentSet.Add(variantKvp.Key); } foreach (var key in currentSet) { duplicateVariantsAndCounts.Remove(key); } if (duplicateVariantsAndCounts.Count > 0) { sb.AppendFormat("Found duplicate variant element(s) in entry with guid '{0}'", entryGuid); sb.AppendLine(); foreach (var variantKvp in duplicateVariantsAndCounts) { sb.AppendFormat("Duplicate variant element count '{0}' for:", variantKvp.Value); sb.AppendLine(); sb.Append(variantKvp.Key); sb.AppendLine(); } } #if false // type attr is a key, so assume multiple entry field elements // Assume repeating <form> elments in the <field> element. /* * <field type="scientific-name"> * <form lang="ru"><text>Rutilus rutilus</text></form> * </field> */ currentSet.Clear(); foreach (var entryFieldElement in entryElement.Elements("field")) { var typeAttrValue = entryFieldElement.Attribute("type").Value; if (currentSet.Contains(typeAttrValue) || currentSet.Contains(typeAttrValue.ToLowerInvariant())) { sb.AppendFormat("Found field element with duplicate 'type' attribute '{0}' in entry with guid '{1}'", typeAttrValue, entryGuid); sb.AppendLine(); } else { currentSet.Add(typeAttrValue); } // Now check for dup lang attrs on form elements. var fieldFormSet = new HashSet <string>(); foreach (var fieldFormAttrValue in entryFieldElement.Elements("form").Select(formAltElement => formAltElement.Attribute("lang").Value)) { if (fieldFormSet.Contains(fieldFormAttrValue) || fieldFormSet.Contains(fieldFormAttrValue.ToLowerInvariant())) { sb.AppendFormat("Found field element with duplicate 'lang' attribute in field of type '{0}' with a form 'lang' of '{1}' in entry with guid '{2}'", typeAttrValue, fieldFormAttrValue, entryGuid); sb.AppendLine(); } else { fieldFormSet.Add(fieldFormAttrValue); } } } // Check out dup form lang attrs in label of illustration: // Assume: // 1. multiple <illustration> elements per entry, // 2. multiple <label> elements per <illustration> elemtn, and // 3. multiple <form> elements per <label> (Only testable keyed element.) /* * <illustration href="Rutilusrutilus38cm_2143x1060.JPG"> * <label> * <form lang="azj-Latn"><text>külmə</text></form> * <form lang="en"><text>roach, common</text></form> * <form lang="ru"><text>плотва</text></form> * </label> * </illustration> */ foreach (var illustrationElement in entryElement.Elements("illustration")) { foreach (var labelElement in illustrationElement.Elements("label")) { currentSet.Clear(); foreach (var labelFormAttrValue in labelElement.Elements("form").Select(formAltElement => formAltElement.Attribute("lang").Value)) { if (currentSet.Contains(labelFormAttrValue) || currentSet.Contains(labelFormAttrValue.ToLowerInvariant())) { sb.AppendFormat("Found field element with duplicate 'lang' attribute in some label of some illustration with a 'lang' attribute of '{0}' in entry with guid '{1}'", labelFormAttrValue, entryGuid); sb.AppendLine(); } else { currentSet.Add(labelFormAttrValue); } } } } // Check out duplicate sense ids (the sense id attr is what is used in the lift merge code for finding a matching sense.) // But a dup guid is just as bad, so report it, too. But then, a sense may not have a guid attr. currentSet.Clear(); foreach (var senseElement in entryElement.Elements("sense")) { var senseId = senseElement.Attribute("id").Value; if (currentSet.Contains(senseId) || currentSet.Contains(senseId.ToLowerInvariant())) { sb.AppendFormat("Found sense element with duplicate id attribute '{0}' in entry with guid '{1}'", senseId, entryGuid); sb.AppendLine(); } else { currentSet.Add(senseId); } // Check out duplicate glosses. /* * <gloss * lang="en"> * <text>to sneeze</text> * </gloss> * <gloss * lang="ru"> * <text>чихать</text> * </gloss> */ var glossSet = new HashSet <string>(); foreach (var glossLangAttrValue in senseElement.Elements("gloss").Select(glossElement => glossElement.Attribute("lang").Value)) { if (glossSet.Contains(glossLangAttrValue) || glossSet.Contains(glossLangAttrValue.ToLowerInvariant())) { sb.AppendFormat("Found gloss element with duplicate lang attribute '{0}' in sense with id '{1}' in entry with guid '{2}'", glossLangAttrValue, senseId, entryGuid); sb.AppendLine(); } else { glossSet.Add(glossLangAttrValue); } } // Check out duplicate definition forms /* * <definition> * <form * lang="en"> * <text>to sneeze</text> * </form> * <form * lang="ru"> * <text>чихать</text> * </form> * </definition> */ var definitionFormsSet = new HashSet <string>(); foreach (var definitionFormLangAttrValue in senseElement.Elements("definition").Elements("form").Select(glossElement => glossElement.Attribute("lang").Value)) { if (definitionFormsSet.Contains(definitionFormLangAttrValue) || definitionFormsSet.Contains(definitionFormLangAttrValue.ToLowerInvariant())) { sb.AppendFormat("Found definition form element with duplicate lang attribute '{0}' in sense with id '{1}' in entry with guid '{2}'", definitionFormLangAttrValue, senseId, entryGuid); sb.AppendLine(); } else { definitionFormsSet.Add(definitionFormLangAttrValue); } } // Check out examples. // Assumptions: // 1. There can be muiltiple examples. // 2. Each example can have multiple forms. // 3. Each example can have multiple translation elements each of which can have multiple form elements. // The assumptions may not hold, but they may flush out more dups. /* * <example> * <form lang="azj-Latn"><text></text></form> * <translation> * <form lang="en"><text></text></form> * </translation> * </example> */ foreach (var exampleElement in senseElement.Elements("example")) { var exampleFormsSet = new HashSet <string>(); foreach (var exampleFormLangAttrValue in exampleElement.Elements("form").Select(exampleFormElement => exampleFormElement.Attribute("lang").Value)) { if (exampleFormsSet.Contains(exampleFormLangAttrValue) || exampleFormsSet.Contains(exampleFormLangAttrValue.ToLowerInvariant())) { sb.AppendFormat("Found example form element with duplicate lang attribute '{0}' in some example in the sense with id '{1}' in entry with guid '{2}'", exampleFormLangAttrValue, senseId, entryGuid); sb.AppendLine(); } else { exampleFormsSet.Add(exampleFormLangAttrValue); } } foreach (var exampleTranslationElement in exampleElement.Elements("translation")) { var exampleTranslationFormsSet = new HashSet <string>(); foreach (var exampleTranslationFormLangAttrValue in exampleTranslationElement.Elements("form").Select(exampleTranlationFormElement => exampleTranlationFormElement.Attribute("lang").Value)) { if (exampleTranslationFormsSet.Contains(exampleTranslationFormLangAttrValue) || exampleTranslationFormsSet.Contains(exampleTranslationFormLangAttrValue.ToLowerInvariant())) { sb.AppendFormat("Found example translation form element with duplicate lang attribute '{0}' in some example's translation in the sense with id '{1}' in entry with guid '{2}'", exampleTranslationFormLangAttrValue, senseId, entryGuid); sb.AppendLine(); } else { exampleTranslationFormsSet.Add(exampleTranslationFormLangAttrValue); } } } } } #endif } var results = sb.ToString(); if (String.IsNullOrEmpty(results)) { Console.WriteLine("No dups yet."); } else { Console.WriteLine("Found dups."); Console.Write(results); } #endif }
private void Verify(Stopwatch verifyTimer, StringBuilder sb) { GC.Collect(2, GCCollectionMode.Forced); verifyTimer.Start(); GetFreshMdc(); // Want it fresh. var origData = new Dictionary <string, byte[]>(StringComparer.InvariantCultureIgnoreCase); using (var fastSplitterOrig = new FastXmlElementSplitter(_srcFwdataPathname + ".orig")) { var foundOrigOptionalFirstElement = false; var testedforExistanceOfOrigOptionalFirstElement = false; foreach (var origRecord in fastSplitterOrig.GetSecondLevelElementBytes(SharedConstants.AdditionalFieldsTag, SharedConstants.RtTag)) { if (!testedforExistanceOfOrigOptionalFirstElement) { foundOrigOptionalFirstElement = FLExProjectSplitter.IsOptionalFirstElement(origRecord); testedforExistanceOfOrigOptionalFirstElement = true; } if (foundOrigOptionalFirstElement) { origData.Add(SharedConstants.AdditionalFieldsTag, origRecord); foundOrigOptionalFirstElement = false; continue; } origData.Add(XmlUtils.GetAttributes(origRecord, new HashSet <string> { SharedConstants.GuidStr })[SharedConstants.GuidStr].ToLowerInvariant(), origRecord); } } verifyTimer.Stop(); GC.Collect(2, GCCollectionMode.Forced); verifyTimer.Start(); using (var fastSplitterNew = new FastXmlElementSplitter(_srcFwdataPathname)) { // NB: The main input file *does* have to deal with the optional first element. //var counter = 0; var foundNewOptionalFirstElement = false; var testedforExistanceOfNewOptionalFirstElement = false; foreach (var newRecordAsBytes in fastSplitterNew.GetSecondLevelElementBytes(SharedConstants.AdditionalFieldsTag, SharedConstants.RtTag)) { if (!testedforExistanceOfNewOptionalFirstElement) { foundNewOptionalFirstElement = FLExProjectSplitter.IsOptionalFirstElement(newRecordAsBytes); testedforExistanceOfNewOptionalFirstElement = true; } var newRecCopyAsBytes = newRecordAsBytes; byte[] origRecAsBytes; string srcGuid = null; if (foundNewOptionalFirstElement) { origRecAsBytes = origData[SharedConstants.AdditionalFieldsTag]; origData.Remove(SharedConstants.AdditionalFieldsTag); foundNewOptionalFirstElement = false; } else { var attrValues = XmlUtils.GetAttributes(newRecordAsBytes, new HashSet <string> { SharedConstants.GuidStr, SharedConstants.Class }); srcGuid = attrValues[SharedConstants.GuidStr]; origRecAsBytes = origData[srcGuid]; origData.Remove(srcGuid); if (attrValues[SharedConstants.Class] == "WfiWordform") { var wfElement = Utilities.CreateFromBytes(origRecAsBytes); var csProp = wfElement.Element("Checksum"); if (csProp != null) { csProp.Attribute(SharedConstants.Val).Value = "0"; origRecAsBytes = SharedConstants.Utf8.GetBytes(wfElement.ToString()); } } } //if (counter == 1000) //{ // verifyTimer.Stop(); // GC.Collect(2, GCCollectionMode.Forced); // verifyTimer.Start(); // counter = 0; //} //else //{ // counter++; //} // Way too slow, since it has to always make the XmlNodes. // Just feeding strings to XmlUtilities.AreXmlElementsEqual is faster, // since it skips making them, if the strings are the same. //var origNode = CreateXmlNodeFromBytes(origRecAsBytes); //var newNode = CreateXmlNodeFromBytes(newRecCopyAsBytes); //if (XmlUtilities.AreXmlElementsEqual(origNode, newNode)) // continue; //if (srcGuid == null) //{ // WriteProblemDataFile(Path.Combine(_workingDir, "CustomProperties-SRC.txt"), origNode); // WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "CustomProperties-TRG.txt"), newNode); // sb.Append("Main src and trg custom properties are different in the resulting xml."); //} //else //{ // WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-SRC.txt"), origNode); // WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-TRG.txt"), newNode); // sb.AppendFormat("Main src and trg object with guid '{0}' are different in the resulting xml.", srcGuid); //} //if (XmlUtilities.AreXmlElementsEqual(SharedConstants.Utf8.GetString(origRecAsBytes), SharedConstants.Utf8.GetString(newRecCopyAsBytes))) // continue; if (XmlUtilities.AreXmlElementsEqual(origRecAsBytes, newRecCopyAsBytes)) { continue; } if (srcGuid == null) { WriteProblemDataFile(Path.Combine(_workingDir, "CustomProperties-SRC.txt"), origRecAsBytes); WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "CustomProperties-TRG.txt"), newRecCopyAsBytes); sb.Append("Main src and trg custom properties are different in the resulting xml."); } else { WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-SRC.txt"), origRecAsBytes); WriteProblemDataFile(Path.Combine(_workingDir, srcGuid + "-TRG.txt"), newRecCopyAsBytes); sb.AppendFormat("Main src and trg object with guid '{0}' are different in the resulting xml.", srcGuid); } sb.AppendLine(); } } if (origData.Count > 0) { sb.AppendFormat("Hmm, there are {0} more <rt> elements in the original than in the rebuilt fwdata file.", origData.Count); sb.AppendLine(); foreach (var attrs in origData.Values.Select(byteData => XmlUtils.GetAttributes(byteData, new HashSet <string> { SharedConstants.GuidStr, SharedConstants.Class }))) { sb.AppendFormat("\t\t'{0}' of class '{1}' is not in rebuilt file.", attrs[SharedConstants.GuidStr], attrs[SharedConstants.Class]); sb.AppendLine(); } } verifyTimer.Stop(); }