Ejemplo n.º 1
0
        /// <summary>
        /// 解析 html
        /// 返回文档
        /// </summary>
        /// <param name="content">要解析的 html</param>
        /// <returns>DOM 文档</returns>
        public static Document Parse(string content)
        {
            //创建标签阅读器
            ReaderBase reader = new TagReader();

            //阅读器上下文
            Context context = new Context(content, c => HtmlParser.Parse(c));

            //设置上下文
            reader.Context = context;

            //读取所有内容
            while (true)
            {
                //执行读取 返回下一个阅读器
                reader = reader.Read();

                //不返回阅读器 读取完毕 跳出
                if (reader.IsNull())
                {
                    break;
                }
            }

            //返回文档元素
            return(context.Document);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 阅读
        /// </summary>
        /// <returns></returns>
        internal override ReaderBase Read()
        {
            //匹配标签 或注释
            Match match = TagReader.Pattern.Match(this.Content, this.Position);

            //如果匹配成功
            if (match.Success)
            {
                //提升阅读位置
                this.Ascend(match);

                //捕获的注释
                Group comment = match.Groups["comment"];

                //如果是注释
                if (comment.Success)
                {
                    //加入文本节点
                    this.AddText(match);

                    //加入注释节点
                    this.CurrentNode.appendChild(new Comment(match.Index, this.Position - 1));

                    //结束标签
                    return(this.EndTag(Comment.NodeName));
                }

                //捕获的开标签
                Group open = match.Groups["open"];

                //如果是开标签
                if (open.Success)
                {
                    //捕获的空白结束
                    Group space = match.Groups["space"];
                    //捕获的开双标签结束
                    Group xdouble = match.Groups["double"];

                    //加入文本节点
                    this.AddText(match);

                    //创建元素实例
                    Element element = this.Document.createElement(open.Value);

                    //设置元素起始位置
                    element.Begin = match.Index;

                    //如果标签未结束
                    if (space.Success)
                    {
                        //对表格型元素的处理
                        if (element.IsTable)
                        {
                            if (this.MarkStackExists(element.Name))
                            {
                                int end = this.Previous.Position - 1;

                                this.CloseTag(element.Name, end, end);
                            }
                        }

                        //对枚举型元素的处理
                        if (element.IsEnum)
                        {
                            if (this.MarkStackLast(element.Name))
                            {
                                int end = this.Previous.Position - 1;

                                this.CloseTag(element.Name, end, end);
                            }
                        }

                        //加入节点
                        this.CurrentNode.appendChild(element);

                        //读取属性
                        return(this.Pass <AttributeReader>().Set(element));
                    }
                    //如果标签已结束
                    else
                    {
                        TagReader reader = null;

                        //对表格型元素的处理
                        if (element.IsTable)
                        {
                            if (this.MarkStackExists(element.Name))
                            {
                                int end = this.Previous.Position - 1;

                                reader = this.CloseTag(element.Name, end, end);
                            }
                        }

                        //对枚举型元素的处理
                        if (element.IsEnum)
                        {
                            if (this.MarkStackLast(element.Name))
                            {
                                int end = this.Previous.Position - 1;

                                reader = this.CloseTag(element.Name, end, end);
                            }
                        }

                        //加入节点
                        this.CurrentNode.appendChild(element);

                        element.InnerBegin = this.Position;

                        //如果是双标签
                        if (xdouble.Success)
                        {
                            //单标签元素
                            if (element.IsSingle)
                            {
                                element.InnerEnd = this.Position - 1;

                                element.End = element.InnerEnd;
                            }
                            //不是单标签元素
                            else
                            {
                                //开标签
                                this.OpenTag(element);
                            }
                        }
                        //如果是单标签
                        else
                        {
                            element.InnerEnd = this.Position - 1;

                            element.End = element.InnerEnd;
                        }

                        if (reader.IsNull())
                        {
                            //结束标签
                            return(this.EndTag(element.Name));
                        }
                        else
                        {
                            return(reader);
                        }
                    }
                }
                //如果是闭标签
                else
                {
                    //捕获的闭标签名
                    string name = match.Groups["close"].Value;

                    //捕获的空白结束
                    Group closeSpace = match.Groups["space"];

                    //加入文本节点
                    this.AddText(match);

                    //如果闭标签未结束
                    if (closeSpace.Success)
                    {
                        //读取属性
                        return(this.Pass <AttributeReader>().Set(name, match.Index));
                    }
                    //如果闭标签已结束
                    else
                    {
                        //关闭标签
                        return(this.CloseTag(name, match.Index - 1));
                    }
                }
            }
            else
            {
                return(this.Pass <EndingReader>());
            }
        }