private void GetElementsStringList(string html, ref List <string> eleList) { var info = new HtmlInfo { TagName = System.Text.RegularExpressions.Regex.Match(html, @"(?<=\<\s{0,5}|\<)([a-z,A-Z]+|h\d{1})(?=\>|\s)", RegexOptions.IgnoreCase).Value }; var currentTagBeginReg = @"<\s{0,10}" + info.TagName + @".*?>"; //获取当前标签元素开始标签正则 var currentTagEndReg = @"\<\/" + info.TagName + @"\>"; //获取当前标签元素收尾标签正则 if (string.IsNullOrEmpty(info.TagName)) { return; } string eleHtml; //情况1 <a/> //情况2 <a></a> //情况3 <a> 错误格式 //情况4endif if (Regex.IsMatch(html, @"<\s{0,10}" + info.TagName + "[^<].*?/>"))//单标签 { eleHtml = Regex.Match(html, @"<\s{0,10}" + info.TagName + "[^<].*?/>").Value; } else if (!Regex.IsMatch(html, currentTagEndReg))//没有收尾 { if (Regex.IsMatch(html, @"\s{0,10}\<\!\-\-\[if")) { eleHtml = GetElementString(html, @"\s{0,10}\<\!\-\-\[if", @"\[endif\]\-\-\>", 1); } else { eleHtml = Regex.Match(html, currentTagBeginReg, RegexOptions.Singleline).Value; } } else { eleHtml = GetElementString(html, currentTagBeginReg, currentTagEndReg, 1); } try { eleList.Add(eleHtml); html = html.Replace(eleHtml, ""); html = Regex.Replace(html, @"<\!DOCTYPE.*?>", ""); if (!Regex.IsMatch(html, @"^\s*$")) { GetElementsStringList(html, ref eleList); } } catch (Exception ex) { throw new Exception("SORRY,您的HTML格式不能解析!!!"); } }