public void LoadRaw(string rawContent) { var doc = new HtmlDocument(); doc.LoadHtml(rawContent); HtmlNodes.Add(doc.DocumentNode); }
public void LoadFromFile(string filePath) { var doc = new HtmlDocument(); doc.Load(filePath); HtmlNodes.Add(doc.DocumentNode); }
public IList <string> ToStringList() { if (HtmlNodes == null || HtmlNodes.Count == 0) { return(new List <string>()); } return(HtmlNodes.Select(x => x.OuterHtml).ToList()); }
public void RecursionAllChildNodes(Func <HtmlNodes, bool> where, ref HtmlNodes result) { foreach (var nd in Child) { if (result != null) { return; } if (where == null || where (nd)) { result = nd; return; } else { nd.RecursionAllChildNodes(where, ref result); } } }
/// <summary> /// 获得该html页面的指定标签 /// </summary> /// <param name="html"></param> /// <param name="selectXPath">选择标签开始路径,null则从根目录开始</param> /// <param name="where">判断标签是否符合,返回是则符合</param> /// <returns></returns> public static HtmlNodes GetHtmlNodesWhere(this string html, string selectXPath = null, Func <HtmlNodes, bool> where = null) { var doc = new HtmlDocument(); try { doc.LoadHtml(html); var node = selectXPath == null ? doc.DocumentNode : doc.DocumentNode.SelectSingleNode(selectXPath); var hd = new HtmlNodes(node); if (where == null || where (hd)) { return(hd); } HtmlNodes result = null; hd.RecursionAllChildNodes(where, ref result); return(result); } catch (Exception ex) { Console.WriteLine(ex.Message); } return(null); }
public void Clear() { HtmlNodes.Clear(); }