예제 #1
0
파일: HtmlDoc.cs 프로젝트: alvaka/ucml
        /// <summary>
        /// 解析传入的字符串,并返回最上层节点列表
        /// </summary>
        /// <param name="context"></param>
        /// <returns></returns>
        public static List <HtmlNode> ParseNode(string text)
        {
            if (text == "")
            {
                return(null);
            }
            List <HtmlNode> nodeList  = new List <HtmlNode>();
            int             nodeIndex = 0;

            while (nodeIndex < text.Length - 1)
            {
                NodeString nodeStr = ParseNodeString(text, ref nodeIndex);
                if (nodeStr.Type == 3 || nodeStr.Type == 2)//文本节点和闭合标签
                {
                    nodeList.Add(HtmlNode.ParseNode(nodeStr.NodeBlock));
                }
                else if (nodeStr.Type == 0)//处理开放节点,寻找结束标记
                {
                    int         tmpIndex = nodeIndex;
                    string      tagName  = HtmlNode.GetNodeName(nodeStr.NodeBlock);
                    NodeString  tmpNode  = null;
                    Stack <int> cntStack = new Stack <int>();
                    cntStack.Push(0);
                    while (cntStack.Count != 0)
                    {
                        if (tmpIndex >= text.Length)
                        {
                            break;                        //到文本结尾仍未找到结束标签,则跳出
                        }
                        tmpNode = ParseNodeString(text, ref tmpIndex);
                        string tmpTagName = HtmlNode.GetNodeName(tmpNode.NodeBlock);
                        if (tmpTagName.ToLower() == tagName.ToLower())
                        {
                            if (tmpNode.Type == 0)
                            {
                                cntStack.Push(0);
                            }
                            else if (tmpNode.Type == 1)
                            {
                                cntStack.Pop();
                            }
                        }
                    }
                    if (cntStack.Count == 0)//找到结束标签
                    {
                        HtmlNode node      = HtmlNode.ParseNode(nodeStr.NodeBlock);
                        string   innerNode = text.Substring(nodeIndex, tmpIndex - nodeIndex - tmpNode.NodeBlock.Length);
                        if (innerNode != "")//递归解析子节点
                        {
                            node.Childs = ParseNode(innerNode);
                        }
                        nodeList.Add(node);
                        nodeIndex = tmpIndex;
                    }
                    else
                    {
                        HtmlDocParseExeption err = new HtmlDocParseExeption("Bad Document,Miss enclosed tag for " + nodeStr.NodeBlock);
                        throw err;
                    }
                }
            }//循环结束
            return(nodeList);
        }
예제 #2
0
파일: HtmlDoc.cs 프로젝트: alvaka/ucml
        /// <summary>
        /// 解析传入的字符串,并返回最上层节点列表
        /// </summary>
        /// <param name="context"></param>
        /// <returns></returns>
        public static List<HtmlNode> ParseNode(string text)
        {
            if (text == "") return null;
            List<HtmlNode> nodeList = new List<HtmlNode>();
            int nodeIndex=0;
            while(nodeIndex<text.Length-1)
            {
               NodeString nodeStr = ParseNodeString(text,ref nodeIndex);
               if (nodeStr.Type == 3||nodeStr.Type==2)//文本节点和闭合标签
               {
                   nodeList.Add(HtmlNode.ParseNode(nodeStr.NodeBlock));
               }
               else if(nodeStr.Type==0)//处理开放节点,寻找结束标记
               {
                   int tmpIndex = nodeIndex;
                   string tagName=HtmlNode.GetNodeName(nodeStr.NodeBlock);
                   NodeString tmpNode=null;
                   Stack<int> cntStack = new Stack<int>();
                   cntStack.Push(0);
                   while (cntStack.Count != 0)
                   {
                       if (tmpIndex >= text.Length) break;//到文本结尾仍未找到结束标签,则跳出
                       tmpNode=ParseNodeString(text, ref tmpIndex);
                       string tmpTagName = HtmlNode.GetNodeName(tmpNode.NodeBlock);
                       if(tmpTagName.ToLower()==tagName.ToLower())
                       {
                           if (tmpNode.Type == 0) cntStack.Push(0);
                           else if (tmpNode.Type == 1) cntStack.Pop();
                       }
                   }
                   if (cntStack.Count == 0)//找到结束标签
                   {
                       HtmlNode node = HtmlNode.ParseNode(nodeStr.NodeBlock);
                       string innerNode = text.Substring(nodeIndex, tmpIndex - nodeIndex-tmpNode.NodeBlock.Length);
                       if (innerNode != "")//递归解析子节点
                       {
                           node.Childs = ParseNode(innerNode);
                       }
                       nodeList.Add(node);
                       nodeIndex = tmpIndex;
                   }
                   else
                   {
                       HtmlDocParseExeption err = new HtmlDocParseExeption("Bad Document,Miss enclosed tag for " + nodeStr.NodeBlock);
                       throw err;
                   }
               }

            }//循环结束
            return nodeList;
        }