Example #1
0
        /// <summary>
        /// 从表示节点的字符串解析出节点
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        public static HtmlNode ParseNode(string text)
        {
            int type = GetNodeStringType(text);

            if (type == 3)
            {
                return(HtmlNode.CreateTextNode(text));          //文本节点
            }
            string tagName = HtmlNode.GetNodeName(text);

            if (type == 0)
            {
                HtmlNode node = new HtmlNode(tagName);
                node.Attributes = ParseAttribute(text);
                return(node);
            }
            else if (type == 2)
            {
                HtmlNode node = HtmlNode.CreateClosedNode(tagName);
                node.Attributes = ParseAttribute(text);
                return(node);
            }
            else
            {
                return(null);
            }
        }
Example #2
0
        private static NodeString ParseNodeStringByName(string text, ref int index, string nodeName)
        {
            if (text == "")
            {
                return(null);
            }
            NodeString nodeStr = new NodeString();

            while (index < text.Length)
            {
                int start = text.IndexOf('<', index);
                if (start == -1)
                {
                    return(null);
                }
                int end = text.IndexOf('>', start);
                if (end == -1)
                {
                    return(null);
                }
                int length = end - start + 1;
                if (length < 3)
                {
                    continue;
                }
                else
                {
                    string tagBlock = text.Substring(start, length);
                    string tagName  = HtmlNode.GetNodeName(tagBlock);
                    index = end + 1;                                                                  //索引前进
                    if (HtmlNode.IsValidHtmlNode(tagName) && tagName.ToLower() == nodeName.ToLower()) //检验标签名是否合法
                    {
                        nodeStr.Type      = HtmlNode.GetNodeStringType(tagBlock);
                        nodeStr.NodeBlock = tagBlock;
                        return(nodeStr);
                    }
                }
            }
            return(null);
        }
Example #3
0
        public static List <HtmlNode> ParseNodeByName(string text, string nodeName)
        {
            if (text == "")
            {
                return(null);
            }
            List <HtmlNode> nodeList  = new List <HtmlNode>();
            int             nodeIndex = 0;

            while (nodeIndex < text.Length - 1)
            {
                NodeString nodeStr = ParseNodeStringByName(text, ref nodeIndex, nodeName);
                if (nodeStr == null)
                {
                    break;
                }
                else if (nodeStr.Type == 2)
                {
                    nodeList.Add(HtmlNode.ParseNode(nodeStr.NodeBlock));
                }
                else if (nodeStr.Type == 0)
                {
                    int         tmpIndex = nodeIndex;
                    string      tagName  = HtmlNode.GetNodeName(nodeStr.NodeBlock);
                    NodeString  tmpNode  = null;
                    Stack <int> cntStack = new Stack <int>();
                    cntStack.Push(0);
                    while (cntStack.Count != 0)
                    {
                        if (tmpIndex >= text.Length)
                        {
                            break;                         //到文本结尾仍未找到结束标签,则跳出
                        }
                        tmpNode = ParseNodeString(text, ref tmpIndex);
                        string tmpTagName = HtmlNode.GetNodeName(tmpNode.NodeBlock);
                        if (tmpTagName.ToLower() == nodeName.ToLower())
                        {
                            if (tmpNode.Type == 0)
                            {
                                cntStack.Push(0);
                            }
                            else if (tmpNode.Type == 1)
                            {
                                cntStack.Pop();
                            }
                        }
                    }
                    if (cntStack.Count == 0)//找到结束标签
                    {
                        HtmlNode node      = HtmlNode.ParseNode(nodeStr.NodeBlock);
                        string   innerNode = text.Substring(nodeIndex, tmpIndex - nodeIndex - tmpNode.NodeBlock.Length);
                        if (innerNode != "")//递归解析子节点
                        {
                            node.Childs = ParseNode(innerNode);
                        }
                        nodeList.Add(node);
                        nodeIndex = tmpIndex;
                    }
                    else
                    {
                        break;
                    }
                }
            }//循环结束
            return(nodeList);
        }
Example #4
0
        /// <summary>
        /// 解析第一个节点(包括文本节点)
        /// </summary>
        /// <param name="text"></param>
        /// <param name="index"></param>
        /// <returns></returns>
        private static NodeString ParseNodeString(string text, ref int index)
        {
            if (text == "")
            {
                return(null);
            }
            NodeString nodeStr = new NodeString();
            int        start   = text.IndexOf('<', index);

            if (start == -1)//文本节点
            {
                nodeStr.Type      = 3;
                nodeStr.NodeBlock = text.Substring(index);
                index             = text.Length - 1;
                return(nodeStr);
            }
            else if (start > index)//前面有文本节点
            {
                nodeStr.Type      = 3;
                nodeStr.NodeBlock = text.Substring(index, start - index);//保存文本节点
                index             = start;
                int        tmpIndex = index;
                NodeString tmp      = ParseNodeString(text, ref tmpIndex);
                if (tmp.Type == 3)
                {
                    nodeStr.NodeBlock = nodeStr.NodeBlock + tmp.NodeBlock;
                    index             = tmpIndex;
                }
                //如果不是文本节点,则还原index到上一次位置
                return(nodeStr);
            }
            int end = text.IndexOf('>', start); //若Tag内出现>会出错

            if (end == -1)                      //无>标记表示为文本节点
            {
                index             = text.Length - 1;
                nodeStr.Type      = 3;
                nodeStr.NodeBlock = text.Substring(index);
                return(nodeStr);
            }
            int length = end - start + 1;

            if (length < 3)//一个标签长度最少为3
            {
                nodeStr.Type      = 3;
                nodeStr.NodeBlock = text.Substring(index, length);
                index             = end + 1;
                int        tmpIndex = index;
                NodeString tmp      = ParseNodeString(text, ref tmpIndex);
                if (tmp.Type == 3)
                {
                    nodeStr.NodeBlock = nodeStr.NodeBlock + tmp.NodeBlock;
                    index             = tmpIndex;
                }
                return(nodeStr); //若文本里出现<>继续搜索
            }
            else//解析到类常规标签,可能是<123>等不合法标签
            {
                string tagBlock = text.Substring(start, length);
                string tagName  = HtmlNode.GetNodeName(tagBlock);
                if (HtmlNode.IsValidHtmlNode(tagName)) //检验标签名是否合法
                {
                    index             = end + 1;       //索引前进
                    nodeStr.Type      = HtmlNode.GetNodeStringType(tagBlock);
                    nodeStr.NodeBlock = tagBlock;
                    return(nodeStr);
                }
                else//标签不合法则视为文本
                {
                    nodeStr.Type      = 3;
                    nodeStr.NodeBlock = text.Substring(index, length);
                    index             = end + 1;
                    int        tmpIndex = index;
                    NodeString tmp      = ParseNodeString(text, ref tmpIndex);
                    if (tmp.Type == 3)
                    {
                        nodeStr.NodeBlock = nodeStr.NodeBlock + tmp.NodeBlock;
                        index             = tmpIndex;
                    }
                    return(nodeStr); //若文本里出现<>继续搜索
                }
            }
        }
Example #5
0
        /// <summary>
        /// 解析传入的字符串,并返回最上层节点列表
        /// </summary>
        /// <param name="context"></param>
        /// <returns></returns>
        public static List <HtmlNode> ParseNode(string text)
        {
            if (text == "")
            {
                return(null);
            }
            List <HtmlNode> nodeList  = new List <HtmlNode>();
            int             nodeIndex = 0;

            while (nodeIndex < text.Length - 1)
            {
                NodeString nodeStr = ParseNodeString(text, ref nodeIndex);
                if (nodeStr.Type == 3 || nodeStr.Type == 2)//文本节点和闭合标签
                {
                    nodeList.Add(HtmlNode.ParseNode(nodeStr.NodeBlock));
                }
                else if (nodeStr.Type == 0)//处理开放节点,寻找结束标记
                {
                    int         tmpIndex = nodeIndex;
                    string      tagName  = HtmlNode.GetNodeName(nodeStr.NodeBlock);
                    NodeString  tmpNode  = null;
                    Stack <int> cntStack = new Stack <int>();
                    cntStack.Push(0);
                    while (cntStack.Count != 0)
                    {
                        if (tmpIndex >= text.Length)
                        {
                            break;                        //到文本结尾仍未找到结束标签,则跳出
                        }
                        tmpNode = ParseNodeString(text, ref tmpIndex);
                        string tmpTagName = HtmlNode.GetNodeName(tmpNode.NodeBlock);
                        if (tmpTagName.ToLower() == tagName.ToLower())
                        {
                            if (tmpNode.Type == 0)
                            {
                                cntStack.Push(0);
                            }
                            else if (tmpNode.Type == 1)
                            {
                                cntStack.Pop();
                            }
                        }
                    }
                    if (cntStack.Count == 0)//找到结束标签
                    {
                        HtmlNode node      = HtmlNode.ParseNode(nodeStr.NodeBlock);
                        string   innerNode = text.Substring(nodeIndex, tmpIndex - nodeIndex - tmpNode.NodeBlock.Length);
                        if (innerNode != "")//递归解析子节点
                        {
                            node.Childs = ParseNode(innerNode);
                        }
                        nodeList.Add(node);
                        nodeIndex = tmpIndex;
                    }
                    else
                    {
                        HtmlDocParseExeption err = new HtmlDocParseExeption("Bad Document,Miss enclosed tag for " + nodeStr.NodeBlock);
                        throw err;
                    }
                }
            }//循环结束
            return(nodeList);
        }