public XDocument ParseDocument(string text) { //System.Diagnostics.Stopwatch sw = System.Diagnostics.Stopwatch.StartNew(); XDocument doc = new XDocument(); try { StringCounter sc = new StringCounter(text); sc.TrimStart(); if (sc.StartsWith("<")) { while (!sc.IsAtEnd) { XContainer childNode = this.ParseNode(sc, doc); } XElement[] enm = MyHelper.EnumToArray(doc.Elements()); if (enm.Length > 1) { XElement root = new XElement(XName.Get("root")); foreach (XElement elem in enm) { root.Add(elem); } doc.Elements().Remove(); doc.Add(root); } } } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(ex.Message); } //sw.Stop(); //System.Diagnostics.Debug.WriteLine(sw.Elapsed.TotalMilliseconds.ToString("N0")); return doc; }
public XParseDocument ParseDocument(string text) { //System.Diagnostics.Stopwatch sw = System.Diagnostics.Stopwatch.StartNew(); XParseDocument doc = new XParseDocument(); StringCounter sc; try { sc = new StringCounter(text); sc.TrimStart(); if (sc.StartsWith("<")) { while (!sc.IsAtEnd) { XContainer childNode = this.ParseNode(sc, doc); } XParseElement[] enm = MyHelper.EnumToArray(doc.Elements()); if (enm.Length > 1) { XParseElement root = new XParseElement(XParseName.Get("root")); foreach (XParseElement elem in enm) { root.Add(elem); } doc.Elements().Remove(); doc.Add(root); } } } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(ex.Message); } //sw.Stop(); //System.Diagnostics.Debug.WriteLine(sw.Elapsed.TotalMilliseconds.ToString("N0")); return(doc); }
private XContainer ParseNode(StringCounter sc, XContainer parent) { sc.TrimStart(); if (sc.StartValue == '<') { sc.Index = sc.Index + 1; XContainer node = null; bool isComment = false; bool isDeclaration = false; int breakOff = 0; XParseName name = null; if (sc.StartsWith("?xml")) { //Declaration isDeclaration = true; sc.Index += 4; } else if (sc.StartValue == '!') { //Comment isComment = true; if (sc.Value[sc.Index + 1] == '-') { breakOff = 1; } if (sc.Value[sc.Index + 2] == '-') { breakOff = 2; } sc.Index += breakOff + 1; sc.TrimStart(); } else if (IsValidTagChar(sc.StartValue)) { //Name name = ParseName(sc); if (name != null) { node = new XParseElement(name); } else { throw new Exception("Invalid Node Name."); } } else { throw new Exception("Invalid Node Name."); } if (node != null || isComment || isDeclaration) { //Attributes bool elementAtEnd = name != null ? (name.LocalName == "br") : false; string comment = string.Empty; string declVer = string.Empty; string declEnc = string.Empty; string declSta = string.Empty; for (int i = sc.Index; i < sc.Value.Length; i++) { if (!isComment && !isDeclaration) { //Node Attributes if (sc.Value[i] != ' ') { if (sc.Value[i] == '>') { sc.Index = i + 1; break; } else if (sc.Value[i] == '/' && sc.Value[i + 1] == '>') { elementAtEnd = true; sc.Index += 2; break; } else if (IsValidTagChar(sc.Value[i])) { XParseAttribute att = ParseAttribute(sc.NewIndex(i)); i = sc.Index - 1; if (att != null) { node.Add(att); } } } } else { if (isComment) { //Comment if ((breakOff == 2 && sc.Value[i] == '-' && sc.Value[i + 1] == '-' && sc.Value[i + 2] == '>') || (breakOff == 0 && sc.Value[i] == '>') || (breakOff == 1 && sc.Value[i] == '-' && sc.Value[i + 1] == '>')) { if (parent != null) { parent.Add(new XComment(comment)); } sc.Index = i + breakOff + 1; break; } else { comment += sc.Value[i]; } } else if (isDeclaration) { //Declaration if (sc.Value[i] == '?' && sc.Value[i + 1] == '>') { if (parent != null && parent is XParseDocument) { ((XParseDocument)parent).Declaration = new XDeclaration(declVer, declEnc, declSta); } sc.Index = i + 2; break; } else if (IsValidTagChar(sc.Value[i])) { XParseAttribute att = ParseAttribute(sc.NewIndex(i)); i = sc.Index - 1; if (att != null) { if (att.Name.LocalName.ToLower() == "version") { declVer = att.Value; } else if (att.Name.LocalName.ToLower() == "encoding") { declEnc = att.Value; } else if (att.Name.LocalName.ToLower() == "standalone") { declSta = att.Value; } } } } } } if (node != null) { parent.Add(node); } //Content if (node != null && !elementAtEnd) { string innerText = string.Empty; string fullName = name.ToString(); for (int i = sc.Index; i < sc.Value.Length; i++) { if (sc.Value[i] == '<') { if (innerText != string.Empty) { node.Add(XmlParser.DecodeXml(innerText)); innerText = string.Empty; } if (sc.Value[i + 1] == '/') { //End Tag XParseName endName = ParseName(sc.NewIndex(i + 2)); if (endName != null) { if (CompareXName(endName, name)) { //Actual End Name sc.Index = i + 3 + fullName.Length; break; } else { if (name.LocalName != "script") { //Other End Name XContainer par = ((parent is XParseElement && XmlParser.CompareXName(((XParseElement)parent).Name, endName)) ? parent : (XContainer)this.FindParent(parent, endName)); if (par != null) { if (name.LocalName != "form") { XParseElement[] enm = MyHelper.EnumToArray(node.Elements()); if (enm.Length > 0) { foreach (XParseElement elem in enm) { parent.Add(elem); } } node.Elements().Remove(); sc.Index = i; break; } else { sc.Index = i + endName.ToString().Length + 3; i = sc.Index - 1; } } else { sc.Index = i + endName.ToString().Length + 2; i = sc.Index - 1; } } } } else if (fullName == "script") { //Script Text //innerText += text[i]; } else { throw new Exception("Invalid End Name."); } } else if (fullName == "script") { //Script Text //innerText += text[i]; } else { //Start Tag XContainer childNode = this.ParseNode(sc.NewIndex(i), node); i = sc.Index - 1; } } else if (!(sc.Value[i] == ' ' && innerText == string.Empty)) { //Inner Text if (fullName != "script") { innerText += sc.Value[i]; } } } } } sc.TrimStart(); return(node); } else { throw new Exception("Invalid Start Tag. A node has to start with [<]."); } }
private XContainer ParseNode(StringCounter sc, XContainer parent) { sc.TrimStart(); if (sc.StartValue == '<') { sc.Index = sc.Index + 1; XContainer node = null; bool isComment = false; bool isDeclaration = false; int breakOff = 0; XName name = null; if (sc.StartsWith("?xml")) { //Declaration isDeclaration = true; sc.Index += 4; } else if (sc.StartValue == '!') { //Comment isComment = true; if (sc.Value[sc.Index + 1] == '-') breakOff = 1; if (sc.Value[sc.Index + 2] == '-') breakOff = 2; sc.Index += breakOff + 1; sc.TrimStart(); } else if (IsValidTagChar(sc.StartValue)) { //Name name = ParseName(sc); if (name != null) { node = new XElement(name); } else { throw new Exception("Invalid Node Name."); } } else { throw new Exception("Invalid Node Name."); } if (node != null || isComment || isDeclaration) { //Attributes bool elementAtEnd = name != null ? (name.LocalName == "br") : false; string comment = string.Empty; string declVer = string.Empty; string declEnc = string.Empty; string declSta = string.Empty; for (int i = sc.Index; i < sc.Value.Length; i++) { if (!isComment && !isDeclaration) { //Node Attributes if (sc.Value[i] != ' ') { if (sc.Value[i] == '>') { sc.Index = i + 1; break; } else if (sc.Value[i] == '/' && sc.Value[i + 1] == '>') { elementAtEnd = true; sc.Index += 2; break; } else if (IsValidTagChar(sc.Value[i])) { XAttribute att = ParseAttribute(sc.NewIndex(i)); i = sc.Index - 1; if (att != null) { node.Add(att); } } } } else { if (isComment) { //Comment if ((breakOff == 2 && sc.Value[i] == '-' && sc.Value[i + 1] == '-' && sc.Value[i + 2] == '>') || (breakOff == 0 && sc.Value[i] == '>') || (breakOff == 1 && sc.Value[i] == '-' && sc.Value[i + 1] == '>')) { if (parent != null) parent.Add(new XComment(comment)); sc.Index = i + breakOff + 1; break; } else { comment += sc.Value[i]; } } else if (isDeclaration) { //Declaration if (sc.Value[i] == '?' && sc.Value[i + 1] == '>') { if (parent != null && parent is XDocument) ((XDocument)parent).Declaration = new XDeclaration(declVer, declEnc, declSta); sc.Index = i + 2; break; } else if (IsValidTagChar(sc.Value[i])) { XAttribute att = ParseAttribute(sc.NewIndex(i)); i = sc.Index - 1; if (att != null) { if (att.Name.LocalName.ToLower() == "version") { declVer = att.Value; } else if (att.Name.LocalName.ToLower() == "encoding") { declEnc = att.Value; } else if (att.Name.LocalName.ToLower() == "standalone") { declSta = att.Value; } } } } } } if (node != null) parent.Add(node); //Content if (node != null && !elementAtEnd) { string innerText = string.Empty; string fullName = name.ToString(); for (int i = sc.Index; i < sc.Value.Length; i++) { if (sc.Value[i] == '<') { if (innerText != string.Empty) { node.Add(XmlParser.DecodeXml(innerText)); innerText = string.Empty; } if (sc.Value[i + 1] == '/') { //End Tag XName endName = ParseName(sc.NewIndex(i + 2)); if (endName != null) { if (CompareXName(endName, name)) { //Actual End Name sc.Index = i + 3 + fullName.Length; break; } else { if (name.LocalName != "script") { //Other End Name XContainer par = ((parent is XElement && XmlParser.CompareXName(((XElement)parent).Name, endName)) ? parent : (XContainer)this.FindParent(parent, endName)); if (par != null) { if (name.LocalName != "form") { XElement[] enm = MyHelper.EnumToArray(node.Elements()); if (enm.Length > 0) { foreach (XElement elem in enm) { parent.Add(elem); } } node.Elements().Remove(); sc.Index = i; break; } else { sc.Index = i + endName.ToString().Length + 3; i = sc.Index - 1; } } else { sc.Index = i + endName.ToString().Length + 2; i = sc.Index - 1; } } } } else if (fullName == "script") { //Script Text //innerText += text[i]; } else { throw new Exception("Invalid End Name."); } } else if (fullName == "script") { //Script Text //innerText += text[i]; } else { //Start Tag XContainer childNode = this.ParseNode(sc.NewIndex(i), node); i = sc.Index - 1; } } else if (!(sc.Value[i] == ' ' && innerText == string.Empty)) { //Inner Text if (fullName != "script") innerText += sc.Value[i]; } } } } sc.TrimStart(); return node; } else { throw new Exception("Invalid Start Tag. A node has to start with [<]."); } }