private static NodeString ParseNodeStringByName(string text, ref int index, string nodeName) { if (text == "") { return(null); } NodeString nodeStr = new NodeString(); while (index < text.Length) { int start = text.IndexOf('<', index); if (start == -1) { return(null); } int end = text.IndexOf('>', start); if (end == -1) { return(null); } int length = end - start + 1; if (length < 3) { continue; } else { string tagBlock = text.Substring(start, length); string tagName = HtmlNode.GetNodeName(tagBlock); index = end + 1; //索引前进 if (HtmlNode.IsValidHtmlNode(tagName) && tagName.ToLower() == nodeName.ToLower()) //检验标签名是否合法 { nodeStr.Type = HtmlNode.GetNodeStringType(tagBlock); nodeStr.NodeBlock = tagBlock; return(nodeStr); } } } return(null); }
/// <summary> /// 解析第一个节点(包括文本节点) /// </summary> /// <param name="text"></param> /// <param name="index"></param> /// <returns></returns> private static NodeString ParseNodeString(string text, ref int index) { if (text == "") { return(null); } NodeString nodeStr = new NodeString(); int start = text.IndexOf('<', index); if (start == -1)//文本节点 { nodeStr.Type = 3; nodeStr.NodeBlock = text.Substring(index); index = text.Length - 1; return(nodeStr); } else if (start > index)//前面有文本节点 { nodeStr.Type = 3; nodeStr.NodeBlock = text.Substring(index, start - index);//保存文本节点 index = start; int tmpIndex = index; NodeString tmp = ParseNodeString(text, ref tmpIndex); if (tmp.Type == 3) { nodeStr.NodeBlock = nodeStr.NodeBlock + tmp.NodeBlock; index = tmpIndex; } //如果不是文本节点,则还原index到上一次位置 return(nodeStr); } int end = text.IndexOf('>', start); //若Tag内出现>会出错 if (end == -1) //无>标记表示为文本节点 { index = text.Length - 1; nodeStr.Type = 3; nodeStr.NodeBlock = text.Substring(index); return(nodeStr); } int length = end - start + 1; if (length < 3)//一个标签长度最少为3 { nodeStr.Type = 3; nodeStr.NodeBlock = text.Substring(index, length); index = end + 1; int tmpIndex = index; NodeString tmp = ParseNodeString(text, ref tmpIndex); if (tmp.Type == 3) { nodeStr.NodeBlock = nodeStr.NodeBlock + tmp.NodeBlock; index = tmpIndex; } return(nodeStr); //若文本里出现<>继续搜索 } else//解析到类常规标签,可能是<123>等不合法标签 { string tagBlock = text.Substring(start, length); string tagName = HtmlNode.GetNodeName(tagBlock); if (HtmlNode.IsValidHtmlNode(tagName)) //检验标签名是否合法 { index = end + 1; //索引前进 nodeStr.Type = HtmlNode.GetNodeStringType(tagBlock); nodeStr.NodeBlock = tagBlock; return(nodeStr); } else//标签不合法则视为文本 { nodeStr.Type = 3; nodeStr.NodeBlock = text.Substring(index, length); index = end + 1; int tmpIndex = index; NodeString tmp = ParseNodeString(text, ref tmpIndex); if (tmp.Type == 3) { nodeStr.NodeBlock = nodeStr.NodeBlock + tmp.NodeBlock; index = tmpIndex; } return(nodeStr); //若文本里出现<>继续搜索 } } }