private string HtmlText(string sourceHtml) { hParser.Parser parser = hParser.Parser.CreateParser(sourceHtml.Replace(System.Environment.NewLine, ""), "utf-8"); StringBuilder builderHead = new StringBuilder(); StringBuilder builderBody = new StringBuilder(); hParser.NodeFilter html = new TagNameFilter("TR"); hParser.INode nodes = parser.Parse(html)[0]; builderHead.Append(nodes.Children[0].ToHtml()); hParser.INode body = nodes.Children[1]; hParser.INode div = body.Children[0]; for (int i = 0; i < div.Children.Count; i++) { if (div.Children[i] is hParser.ITag) { builderBody.Append(div.Children[i].ToHtml()); } } StringBuilder builder = new StringBuilder(); builder.Append("<html>"); builder.Append(builderHead.ToString()); builder.Append("<body>"); builder.Append(string.Format("<{0}>", div.GetText())); builder.Append(builderBody.ToString()); builder.Append("</div>"); builder.Append("</body>"); builder.Append("</html>"); return(builder.ToString()); }
private void paserData(hParser.INode node) { hParser.ITag tag = getTag(node); if (tag != null && !tag.IsEndTag() && !start.Contains(tag.StartPosition)) { object oId = tag.GetAttribute("ID"); object oName = tag.GetAttribute("name"); object oClass = tag.GetAttribute("class"); parseResult += tag.TagName + ":\r\nID:" + oId + " Name:" + oName + " Class:" + oClass + " StartPosition:" + tag.StartPosition.ToString() + "\r\n"; start.Add(tag.StartPosition); } //子节点 if (node.Children != null && node.Children.Count > 0) { paserData(node.FirstChild); } //兄弟节点 hParser.INode siblingNode = node.NextSibling; while (siblingNode != null) { paserData(siblingNode); siblingNode = siblingNode.NextSibling; } }
private void parserTR(hParser.INode node) { hParser.Tags.TableRow tagTR = getTagRow(node); //TD在子节点 if (tagTR.Headers != null && tagTR.Headers.Count() > 0) { for (int i = 0; i < tagTR.Headers.Count(); i++) { var header = tagTR.Headers[i] as hParser.Tags.TableHeader; // th if (header.TagName == "TH" && !string.IsNullOrEmpty(header.StringText)) { parseResult += header.TagName + ":\r\nStringText:" + header.StringText + " ChildrenHTML:" + header.ChildrenHTML + " StartPosition:" + header.StartPosition.ToString() + " EndPosition:" + header.EndPosition.ToString() + "\r\n"; } } } if ((tagTR.Headers == null || tagTR.Headers.Count() == 0) && tagTR.ChildrenAsNodeArray != null && tagTR.ChildrenAsNodeArray.Count() > 0) { for (int i = 0; i < tagTR.ChildrenAsNodeArray.Count(); i++) { var colum = tagTR.ChildrenAsNodeArray[i] as hParser.Tags.TableColumn; //td if (colum != null && colum.TagName == "TD" && !string.IsNullOrEmpty(colum.StringText) && colum.StringText != "\n") { parseResult += colum.TagName + ":\r\nStringText:" + colum.StringText + " ChildrenHTML:" + colum.ChildrenHTML + " StartPosition:" + colum.StartPosition.ToString() + " EndPosition:" + colum.EndPosition.ToString() + "\r\n"; } } } }
private hParser.ITag getTag(hParser.INode node) { if (node == null) { return(null); } return(node is hParser.ITag ? node as hParser.ITag : null); }
private hParser.Tags.TableRow getTagRow(hParser.INode node) { if (node == null) { return(null); } return(node is hParser.Tags.TableRow ? node as hParser.Tags.TableRow : null); }
private hParser.Tags.TableRow parserTR(hParser.INode node) { hParser.Tags.TableRow tagTR = getTagRow(node); bool isValid = false; //TD在子节点 //抓取th if (tagTR.Headers != null && tagTR.Headers.Count() > 0) { for (int i = 0; i < tagTR.Headers.Count(); i++) { var header = tagTR.Headers[i] as hParser.Tags.TableHeader; // th if (header.TagName == "TH" && !string.IsNullOrEmpty(header.StringText)) { // parseResult += header.TagName + ":\r\nStringText:" + header.StringText + " ChildrenHTML:" + header.ChildrenHTML //+ " StartPosition:" + header.StartPosition.ToString() + " EndPosition:" + header.EndPosition.ToString() + "\r\n"; } } } //抓取td if ((tagTR.Headers == null || tagTR.Headers.Count() == 0) && tagTR.ChildrenAsNodeArray != null && tagTR.ChildrenAsNodeArray.Count() > 0 ) { for (int i = 0; i < tagTR.ChildrenAsNodeArray.Count(); i++) { var colum = tagTR.ChildrenAsNodeArray[i] as hParser.Tags.TableColumn; //td if (colum != null && colum.TagName == "TD" && !string.IsNullOrEmpty(colum.StringText) && colum.StringText != "\n" //&& //(0 == string.Compare(colum.StringText.Trim(), "底账", StringComparison.InvariantCultureIgnoreCase) //|| 0 == string.Compare(colum.StringText.Trim(), "认证", StringComparison.InvariantCultureIgnoreCase) //|| 0 == string.Compare(colum.StringText.Trim(), "全部", StringComparison.InvariantCultureIgnoreCase) //) ) { isValid = true; } if (isValid) { break; } } } return(isValid ? tagTR : null); }