private static void CollectColumns(XmlReadInstructions instruction, Node current, HashSet <string> addedColumns, Node relativeTo) { if (current.HasText) { AddAttribute(instruction, current.Name, current.GetXPath(relativeTo), addedColumns); } foreach (var attr in current.AttributeList) { if (attr.Contains(":")) { continue; } string xpath = current.GetXPath(relativeTo); if (!String.IsNullOrEmpty(xpath) && !xpath.EndsWith("/")) { xpath += "/"; } xpath += "@" + attr; AddAttribute(instruction, attr, xpath, addedColumns); } foreach (var child in current.ChildList) { if (child.IsRepetetive) { continue; } CollectColumns(instruction, child, addedColumns, relativeTo); } }
private static void AddAttribute(XmlReadInstructions instruction, string name, string xpath, HashSet <string> addedColumns) { if (addedColumns.Contains(name)) { int index = 2; while (addedColumns.Contains(name + index)) { index++; } name = name + index; } addedColumns.Add(name); instruction.Columns.Add(new XmlColumn { Name = name, XPath = xpath, }); }
public static List <XmlReadInstructions> AnalyseXmlReader(System.Xml.XmlReader reader, bool globalUniqueColumnNames) { var root = new Node(); var current = root; var resultSets = new Dictionary <string, Node>(); while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.Element: current = current.SubElement(reader.Name); if (reader.HasAttributes) { reader.MoveToFirstAttribute(); do { current.Attribute(reader.Name); } while (reader.MoveToNextAttribute()); reader.MoveToElement(); } if (current.CurrentCount > 1) { string xpath = current.AbsXPath; if (!resultSets.ContainsKey(xpath)) { resultSets[xpath] = current; current.IsRepetetive = true; } } if (reader.IsEmptyElement) { current = current.Parent; } break; case XmlNodeType.Text: if (!String.IsNullOrWhiteSpace(reader.Value)) { current.HasText = true; } break; case XmlNodeType.XmlDeclaration: case XmlNodeType.ProcessingInstruction: case XmlNodeType.Comment: continue; case XmlNodeType.EndElement: current = current.Parent; break; } } // remove repetetive parents. Remains only innermost repetetives foreach (var resultSet in resultSets.Values.ToList()) { var node = resultSet; node = node.Parent; while (node != null && !node.IsRepetetive) { node = node.Parent; } if (node != null) { resultSets.Remove(node.AbsXPath); node.IsRepetetive = false; } } if (!resultSets.Any()) { resultSets["/"] = root; } var res = new List <XmlReadInstructions>(); var addedColumns = new HashSet <string>(); var collectionNames = new HashSet <string>(); foreach (var resultSet in resultSets.Values) { var instruction = new XmlReadInstructions(); instruction.XPath = resultSet.AbsXPath ?? "/"; string collectionName = resultSet.Name; if (collectionNames.Contains(collectionName)) { int index = 2; while (collectionNames.Contains(collectionName + index)) { index++; } collectionName = collectionName + index; } instruction.CollectionName = collectionName; if (!globalUniqueColumnNames) { addedColumns.Clear(); } CollectColumns(instruction, root, addedColumns, resultSet); if (resultSet != root) { CollectColumns(instruction, resultSet, addedColumns, resultSet); } res.Add(instruction); } return(res); }