public HTML_ELEMENT(string _type, string _params, string _content, HTML_ELEMENT _parent = null) { this._type = _type; this._params = _params; this._content = _content; this._parent = _parent; }
public static HTMLDom Parse_HTML(string html) { HTMLDom Dom = new HTMLDom(); string html_pattern = @"^((<)(?<Tag>[\S^<>]+)(\s*)(?<Params>.*)(>))?"; Regex html_regex = new Regex(html_pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); //Debug.WriteLine(html); Stack <char> S = new Stack <char>(html.ToCharArray().Reverse()); //Array.ForEach(S.ToArray(), e => Debug.Write(e)); //Debug.WriteLine(""); Stack <string> MS = new Stack <string>(); ArrayList clist = new ArrayList(); int reader_state = 0; //0-Default, 1-Script, 2-Comments while (S.Count() != 0) { //If the code finds what looks like an html statement it will iterate over it using a stack in order to //ensure that is is a regular statement. char c = S.Pop(); if (c.Equals('<')) { clist.Clear(); clist.Add(c); while (S.Count != 0 && !c.Equals('>')) { c = S.Pop(); //Debug.WriteLine(c); clist.Add(c); } string p = new string((char[])clist.ToArray(typeof(char))).ToLowerInvariant(); Debug.WriteLine(p); //Now handle it using a regex expression. //Array.ForEach(clist.ToArray(), e => Debug.Write(e)); //Debug.WriteLine(""); Match M = html_regex.Match(p); //GroupCollection G = M.Groups; //Debug.WriteLine("\t" + M.Name + " :: " + M.Value); //foreach(Group g in G) //{ // Debug.WriteLine("\t\t" + g.Name + " :: " + g.Value); //} /*Debug.WriteLine("" + M.Groups[5].Name + " :: " + M.Groups[5].Value);*/ //If prog finds opening bracket then simply push. if (html_spec_openers.Contains(M.Groups[5].Value)) { if (html_spec_openers[M.Groups[5].Value].Equals(M.Groups[5].Value)) { string _element_type = (string)M.Groups[5].Value; HTML_ELEMENT _element = new HTML_ELEMENT(_element_type, "", ""); Log.WriteTime("Indentified element type=" + _element_type); } else { Log.WriteTime("Pushed onto MStack='" + M.Groups[5].Value + "'"); MS.Push(M.Groups[5].Value); } } //Check to see if the prog found a html dom child element. If not then prog expects one of more child objects. else if (html_spec_closers.Contains(M.Groups[5].Value)) { string closer_check = MS.Peek(); //Check for matching open and closing bracket. Log.WriteTime("Checked HTML DOM potential match: Stack.Peek()='" + closer_check + "', Regex Match='" + M.Groups[5].Value + "', Openers,Closers='," + html_spec_closers[M.Groups[5].Value] + "," + html_spec_openers[closer_check]); if (html_spec_openers[closer_check].Equals(M.Groups[5].Value)) { MS.Pop(); Log.WriteTime("\tSuccess"); Debug.Assert(html_spec_closers[M.Groups[5].Value] is string); string _element_type = (string)html_spec_closers[M.Groups[5].Value]; Log.WriteTime("Indentified element type=" + _element_type); HTML_ELEMENT _element = new HTML_ELEMENT(_element_type, "", ""); } } else { Log.WriteTime("Failed to parse '" + M.Groups[5].Value + "' with name '" + M.Groups[5].Name + "'."); } Debug.WriteLine("\t" + M.Groups[5].Value + " :: " + (html_spec_openers.Contains(M.Groups[5].Value) ? html_spec_openers[M.Groups[5].Value] : html_spec_closers[M.Groups[5].Value])); } } if (MS.Count != 0) { Log.WriteTime("ERROR: Failed to fully unwind MatchStack with " + MS.Count + " remaining"); Log.WriteTime("\tUnwinding Stack..."); foreach (string er in MS) { Log.WriteTime("\t\t" + er); } } return(Dom); }