/// <summary> /// 解析 html /// 返回文档 /// </summary> /// <param name="content">要解析的 html</param> /// <returns>DOM 文档</returns> public static Document Parse(string content) { //创建标签阅读器 ReaderBase reader = new TagReader(); //阅读器上下文 Context context = new Context(content, c => HtmlParser.Parse(c)); //设置上下文 reader.Context = context; //读取所有内容 while (true) { //执行读取 返回下一个阅读器 reader = reader.Read(); //不返回阅读器 读取完毕 跳出 if (reader.IsNull()) { break; } } //返回文档元素 return(context.Document); }
/// <summary> /// 阅读 /// </summary> /// <returns></returns> internal override ReaderBase Read() { //匹配标签 或注释 Match match = TagReader.Pattern.Match(this.Content, this.Position); //如果匹配成功 if (match.Success) { //提升阅读位置 this.Ascend(match); //捕获的注释 Group comment = match.Groups["comment"]; //如果是注释 if (comment.Success) { //加入文本节点 this.AddText(match); //加入注释节点 this.CurrentNode.appendChild(new Comment(match.Index, this.Position - 1)); //结束标签 return(this.EndTag(Comment.NodeName)); } //捕获的开标签 Group open = match.Groups["open"]; //如果是开标签 if (open.Success) { //捕获的空白结束 Group space = match.Groups["space"]; //捕获的开双标签结束 Group xdouble = match.Groups["double"]; //加入文本节点 this.AddText(match); //创建元素实例 Element element = this.Document.createElement(open.Value); //设置元素起始位置 element.Begin = match.Index; //如果标签未结束 if (space.Success) { //对表格型元素的处理 if (element.IsTable) { if (this.MarkStackExists(element.Name)) { int end = this.Previous.Position - 1; this.CloseTag(element.Name, end, end); } } //对枚举型元素的处理 if (element.IsEnum) { if (this.MarkStackLast(element.Name)) { int end = this.Previous.Position - 1; this.CloseTag(element.Name, end, end); } } //加入节点 this.CurrentNode.appendChild(element); //读取属性 return(this.Pass <AttributeReader>().Set(element)); } //如果标签已结束 else { TagReader reader = null; //对表格型元素的处理 if (element.IsTable) { if (this.MarkStackExists(element.Name)) { int end = this.Previous.Position - 1; reader = this.CloseTag(element.Name, end, end); } } //对枚举型元素的处理 if (element.IsEnum) { if (this.MarkStackLast(element.Name)) { int end = this.Previous.Position - 1; reader = this.CloseTag(element.Name, end, end); } } //加入节点 this.CurrentNode.appendChild(element); element.InnerBegin = this.Position; //如果是双标签 if (xdouble.Success) { //单标签元素 if (element.IsSingle) { element.InnerEnd = this.Position - 1; element.End = element.InnerEnd; } //不是单标签元素 else { //开标签 this.OpenTag(element); } } //如果是单标签 else { element.InnerEnd = this.Position - 1; element.End = element.InnerEnd; } if (reader.IsNull()) { //结束标签 return(this.EndTag(element.Name)); } else { return(reader); } } } //如果是闭标签 else { //捕获的闭标签名 string name = match.Groups["close"].Value; //捕获的空白结束 Group closeSpace = match.Groups["space"]; //加入文本节点 this.AddText(match); //如果闭标签未结束 if (closeSpace.Success) { //读取属性 return(this.Pass <AttributeReader>().Set(name, match.Index)); } //如果闭标签已结束 else { //关闭标签 return(this.CloseTag(name, match.Index - 1)); } } } else { return(this.Pass <EndingReader>()); } }