public static string ToString(string html, HtmlNode node) { StringBuilder builder = new StringBuilder(128); if (node.NodeType == HtmlNodeType.Element) { string tagName = HtmlTags.GetTagName(node.TagId); builder.Append("<"); builder.Append(tagName); if (node.Attributes != null) { foreach (AttributeItem item in node.Attributes) { if (item.Value.Contains("\"")) { builder.AppendFormat(" {0}='{1}'", item.Name, item.Value); } else { builder.AppendFormat(" {0}=\"{1}\"", item.Name, item.Value); } } } builder.Append(">"); } else if (node.NodeType == HtmlNodeType.EndTag) { string tagName = HtmlTags.GetTagName(node.TagId); builder.AppendFormat("</{0}>", tagName); } else if (node.NodeType == HtmlNodeType.Text) { string text = HtmlUtility.NormalizeText(html, node.Index, node.Length); if (text != null) { builder.Append(text); } } else if (node.NodeType == HtmlNodeType.Comment) { string text = html.Substring(node.Index, node.Length); builder.Append(text); } else { Debug.Assert(false); } return(builder.ToString()); }
private string GetNodeStack(int startTag, int endTag) { string nodeStack = ""; if (endTag < parsedHtml.Nodes.Count - 1) { endTag++; } if (startTag > 0) { startTag--; } while (startTag <= endTag) { HtmlNode node = GetNode(startTag); switch (node.NodeType) { case HtmlNodeType.Element: if (node.GetAttributeValue(HtmlAttributeId.Class) != null) { nodeStack += "<" + HtmlTags.GetTagName(node.TagId) + " class=" + node.GetAttributeValue(HtmlAttributeId.Class) + ">"; } else { nodeStack += "<" + HtmlTags.GetTagName(node.TagId) + ">"; } break; case HtmlNodeType.EndTag: nodeStack += "</" + HtmlTags.GetTagName(node.TagId) + ">"; break; case HtmlNodeType.Text: nodeStack += "_"; break; } startTag++; } return(nodeStack); }
public static void RootRightpath(string html, List <HtmlNode> nodes) { string L1_path = @"D:\265.com\L1\RightL1Categories.txt"; StreamWriter sw = new StreamWriter(L1_path); int start = 0; string url = "", category = ""; for (int i = 0; i < nodes.Count; i++) { if (nodes[i].NodeType == HtmlNodeType.Element) { string tagName = HtmlTags.GetTagName(nodes[i].TagId); if (tagName.ToLower() == "li" && nodes[i].Attributes != null && nodes[i].Attributes.Count > 0) { for (int k = 0; k < nodes[i].Attributes.Count; k++) { if (nodes[i].Attributes[k].Name == "class" && nodes[i].Attributes[k].Value == "title") { start = 1; continue; } } } if (start == 1 && tagName.ToLower() == "a" && nodes[i].Attributes != null && nodes[i].Attributes.Count > 0) { string site = @"http://www.265.com"; for (int k = 0; k < nodes[i].Attributes.Count; k++) { if (nodes[i].Attributes[k].Name == "href") { url = site + nodes[i].Attributes[k].Value; } } } } else if (nodes[i].NodeType == HtmlNodeType.Text) { if (start == 1) { category = html.Substring(nodes[i].Index, nodes[i].Length); //Console.WriteLine(category); } } else if (nodes[i].NodeType == HtmlNodeType.EndTag) { string tagName = HtmlTags.GetTagName(nodes[i].TagId); if (tagName.ToLower() == "li" && start > 0) { start = 0; sw.WriteLine("{0}\t{1}", category, url); sw.Flush(); } } else { continue; } } sw.Close(); }
public static void Subpath(string html, List <HtmlNode> nodes, string filepath) { StreamWriter sw = new StreamWriter(filepath); int start = 0; string category = "", url = ""; for (int i = 0; i < nodes.Count; i++) { if (nodes[i].NodeType == HtmlNodeType.Element) { string tagName = HtmlTags.GetTagName(nodes[i].TagId); if (tagName.ToLower() == "div" && nodes[i].Attributes != null && nodes[i].Attributes.Count > 0) { for (int k = 0; k < nodes[i].Attributes.Count; k++) { if (nodes[i].Attributes[k].Name == "class" && nodes[i].Attributes[k].Value == "titleCS") { start = 1; } } } if (tagName.ToLower() == "ul" && nodes[i].Attributes != null && nodes[i].Attributes.Count > 0) { for (int k = 0; k < nodes[i].Attributes.Count; k++) { if (nodes[i].Attributes[k].Name == "class" && nodes[i].Attributes[k].Value == "listUrl") { start = 2; } } } if (tagName.ToLower() == "a" && nodes[i].Attributes != null && nodes[i].Attributes.Count > 0 && start == 2) { for (int k = 0; k < nodes[i].Attributes.Count; k++) { if (nodes[i].Attributes[k].Name == "href") { url = nodes[i].Attributes[k].Value; } } } } else if (nodes[i].NodeType == HtmlNodeType.Text) { if (start == 1) { category = html.Substring(nodes[i].Index, nodes[i].Length).ToString(); start = 2; } } else if (nodes[i].NodeType == HtmlNodeType.EndTag) { string tagName = HtmlTags.GetTagName(nodes[i].TagId); if (tagName.ToLower() == "li" && start == 2) { sw.WriteLine("{0}\t{1}", category, url); sw.Flush(); } if (tagName.ToLower() == "ul" && start == 2) { start = 0; } } } sw.Close(); }