private static void AddAttribute(XmlReadInstructions instruction, string name, string xpath, HashSet<string> addedColumns) { if (addedColumns.Contains(name)) { int index = 2; while (addedColumns.Contains(name + index)) index++; name = name + index; } addedColumns.Add(name); instruction.Columns.Add(new XmlColumn { Name = name, XPath = xpath, }); }
private static void CollectColumns(XmlReadInstructions instruction, Node current, HashSet<string> addedColumns, Node relativeTo) { if (current.HasText) { AddAttribute(instruction, current.Name, current.GetXPath(relativeTo), addedColumns); } foreach (var attr in current.AttributeList) { if (attr.Contains(":")) continue; string xpath = current.GetXPath(relativeTo); if (!String.IsNullOrEmpty(xpath) && !xpath.EndsWith("/")) xpath += "/"; xpath += "@" + attr; AddAttribute(instruction, attr, xpath, addedColumns); } foreach (var child in current.ChildList) { if (child.IsRepetetive) continue; CollectColumns(instruction, child, addedColumns, relativeTo); } }
public static List<XmlReadInstructions> AnalyseXmlReader(System.Xml.XmlReader reader, bool globalUniqueColumnNames) { var root = new Node(); var current = root; var resultSets = new Dictionary<string, Node>(); while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.Element: current = current.SubElement(reader.Name); if (reader.HasAttributes) { reader.MoveToFirstAttribute(); do { current.Attribute(reader.Name); } while (reader.MoveToNextAttribute()); reader.MoveToElement(); } if (current.CurrentCount > 1) { string xpath = current.AbsXPath; if (!resultSets.ContainsKey(xpath)) { resultSets[xpath] = current; current.IsRepetetive = true; } } if (reader.IsEmptyElement) current = current.Parent; break; case XmlNodeType.Text: if (!String.IsNullOrWhiteSpace(reader.Value)) { current.HasText = true; } break; case XmlNodeType.XmlDeclaration: case XmlNodeType.ProcessingInstruction: case XmlNodeType.Comment: continue; case XmlNodeType.EndElement: current = current.Parent; break; } } // remove repetetive parents. Remains only innermost repetetives foreach (var resultSet in resultSets.Values.ToList()) { var node = resultSet; node = node.Parent; while (node != null && !node.IsRepetetive) node = node.Parent; if (node != null) { resultSets.Remove(node.AbsXPath); node.IsRepetetive = false; } } if (!resultSets.Any()) { resultSets["/"] = root; } var res = new List<XmlReadInstructions>(); var addedColumns = new HashSet<string>(); var collectionNames = new HashSet<string>(); foreach (var resultSet in resultSets.Values) { var instruction = new XmlReadInstructions(); instruction.XPath = resultSet.AbsXPath ?? "/"; string collectionName = resultSet.Name; if (collectionNames.Contains(collectionName)) { int index = 2; while (collectionNames.Contains(collectionName + index)) index++; collectionName = collectionName + index; } instruction.CollectionName = collectionName; if (!globalUniqueColumnNames) addedColumns.Clear(); CollectColumns(instruction, root, addedColumns, resultSet); if (resultSet != root) { CollectColumns(instruction, resultSet, addedColumns, resultSet); } res.Add(instruction); } return res; }