internal static void SortEntireFile(Dictionary<string, Dictionary<string, HashSet<string>>> sortableProperties, XmlWriter writer, string pathname) { var readerSettings = new XmlReaderSettings { IgnoreWhitespace = true }; // Step 2: Sort and rewrite file. using (var fastSplitter = new FastXmlElementSplitter(pathname)) { var sorter = new BigDataSorter(); bool foundOptionalFirstElement; foreach (var record in fastSplitter.GetSecondLevelElementStrings(OptionalFirstElementTag, StartTag, out foundOptionalFirstElement)) { if (foundOptionalFirstElement) { // Step 2A: Write out custom property declaration(s). WriteElement(writer, SortCustomPropertiesRecord(record)); foundOptionalFirstElement = false; } else { // Step 2B: Sort main CmObject record. var sortedMainObject = SortMainElement(sortableProperties, record); sorter.Add(sortedMainObject.Attribute("guid").Value, Utf8.GetBytes(sortedMainObject.ToString())); } } sorter.WriteResults(val => WriteElement(writer, readerSettings, val)); } }
/// <summary> /// Sort the provided lift file into canonical order. /// /// The resulting sorted file will be in a canonical order for the attributes and elements /// </summary> /// <param name="liftPathname">The assumed main lift file in a folder.</param> public static void SortLiftFile(string liftPathname) { Guard.AgainstNullOrEmptyString(liftPathname, "liftPathname"); Guard.Against(Path.GetExtension(liftPathname).ToLowerInvariant() != ".lift", "Unexpected file extension"); Guard.Against<FileNotFoundException>(!File.Exists(liftPathname), "Lift file does not exist."); using (var tempFile = new TempFile(File.ReadAllText(liftPathname), Utf8)) { var sortedRootAttributes = SortRootElementAttributes(tempFile.Path); var sortedEntries = new SortedDictionary<string, XElement>(StringComparer.InvariantCultureIgnoreCase); XElement header = null; using (var splitter = new FastXmlElementSplitter(tempFile.Path)) { bool hasHeader; foreach (var record in splitter.GetSecondLevelElementStrings("header", "entry", out hasHeader)) { XElement element = FixBadTextElements(record); SortAttributes(element); if (hasHeader) { hasHeader = false; header = element; SortHeader(header); } else { var guidKey = element.Attribute("guid").Value.ToLowerInvariant(); if (!sortedEntries.ContainsKey(guidKey)) { SortEntry(element); sortedEntries.Add(GetUniqueKey(sortedEntries.Keys, guidKey), element); } } } } using (var writer = XmlWriter.Create(tempFile.Path, CanonicalXmlSettings.CreateXmlWriterSettings())) { writer.WriteStartDocument(); writer.WriteStartElement("lift"); foreach (var rootAttributeKvp in sortedRootAttributes) { var keyParts = rootAttributeKvp.Key.Split(':'); if (keyParts.Length > 1) writer.WriteAttributeString(keyParts[0], keyParts[1], null, rootAttributeKvp.Value); else writer.WriteAttributeString(rootAttributeKvp.Key, rootAttributeKvp.Value); } if (header != null) { WriteElement(writer, header); } foreach (var entryElement in sortedEntries.Values) { WriteElement(writer, entryElement); } writer.WriteEndElement(); writer.WriteEndDocument(); } File.Copy(tempFile.Path, liftPathname, true); } }
// This test may be uncommented to try the splitter on some particular file which causes problems. //[Test] //public void SplitterParsesProblemFile() //{ // using (var fastXmlElementSplitter = new FastXmlElementSplitter(@"D:\DownLoads\y.lift")) // { // bool foundOptionalFirstElement; // fastXmlElementSplitter.GetSecondLevelElementBytes("header", "entry", out foundOptionalFirstElement) // .ToList(); // } //} private static void CheckGoodFile(string hasRecordsInput, int expectedCount, string firstElementMarker, string recordMarker, Encoding enc = null) { var goodPathname = Path.GetTempFileName(); try { if (enc == null) enc = Encoding.UTF8; File.WriteAllText(goodPathname, hasRecordsInput, enc); using (var fastXmlElementSplitter = new FastXmlElementSplitter(goodPathname)) { bool foundOptionalFirstElement; var elementBytes = fastXmlElementSplitter.GetSecondLevelElementBytes(firstElementMarker, recordMarker, out foundOptionalFirstElement) .ToList(); Assert.AreEqual(expectedCount, elementBytes.Count); var elementStrings = fastXmlElementSplitter.GetSecondLevelElementStrings(firstElementMarker, recordMarker, out foundOptionalFirstElement).ToList(); Assert.AreEqual(expectedCount, elementStrings.Count); for (var i = 0; i < elementStrings.Count; ++i) { var currentStr = elementStrings[i]; Assert.AreEqual( currentStr, enc.GetString(elementBytes[i])); var el = XElement.Parse(currentStr); } } } finally { File.Delete(goodPathname); } }
private static void ProcessContent(FastXmlElementSplitter fastXmlElementSplitter, int expectedCount, string firstElementMarker, string recordMarker, Encoding enc) { bool foundOptionalFirstElement; var elementBytes = fastXmlElementSplitter.GetSecondLevelElementBytes(firstElementMarker, recordMarker, out foundOptionalFirstElement) .ToList(); Assert.AreEqual(expectedCount, elementBytes.Count); var elementStrings = fastXmlElementSplitter.GetSecondLevelElementStrings(firstElementMarker, recordMarker, out foundOptionalFirstElement).ToList(); Assert.AreEqual(expectedCount, elementStrings.Count); for (var i = 0; i < elementStrings.Count; ++i) { var currentStr = elementStrings[i]; Assert.AreEqual( currentStr, enc.GetString(elementBytes[i])); var el = XElement.Parse(currentStr); } }
private static Dictionary<string, string> MakeRecordDictionary(IMergeEventListener mainMergeEventListener, IMergeStrategy mergeStrategy, string pathname, bool removeAmbiguousChildren, string firstElementMarker, string recordStartingTag, string identifierAttribute) { var records = new Dictionary<string, string>(EstimatedObjectCount(pathname), StringComparer.InvariantCultureIgnoreCase); using (var fastSplitter = new FastXmlElementSplitter(pathname)) { bool foundOptionalFirstElement; foreach (var record in fastSplitter.GetSecondLevelElementStrings(firstElementMarker, recordStartingTag, out foundOptionalFirstElement)) { if (foundOptionalFirstElement) { var key = firstElementMarker.ToLowerInvariant(); if (records.ContainsKey(key)) { mainMergeEventListener.WarningOccurred( new MergeWarning(string.Format("{0}: There is more than one optional first element '{1}'", pathname, key))); } else { if (removeAmbiguousChildren) { var possiblyRevisedRecord = RemoveAmbiguousChildren(mainMergeEventListener, mergeStrategy.GetStrategies(), record); records.Add(key, possiblyRevisedRecord); } else { records.Add(key, record); } } foundOptionalFirstElement = false; } else { var attrValues = XmlUtils.GetAttributes(record, new HashSet<string> {"dateDeleted", identifierAttribute}); // Eat tombstones. if (attrValues["dateDeleted"] != null) continue; var identifier = attrValues[identifierAttribute]; if (string.IsNullOrEmpty(identifierAttribute)) { mainMergeEventListener.WarningOccurred( new MergeWarning(string.Format("{0}: There was no identifier for the record", pathname))); continue; } if (records.ContainsKey(identifier)) { mainMergeEventListener.WarningOccurred( new MergeWarning(string.Format("{0}: There is more than one element with the identifier '{1}'", pathname, identifier))); } else { if (removeAmbiguousChildren) { var possiblyRevisedRecord = RemoveAmbiguousChildren(mainMergeEventListener, mergeStrategy.GetStrategies(), record); records.Add(identifier, possiblyRevisedRecord); } else { records.Add(identifier, record); } } } } } return records; }