public void LoadRaw(string rawContent)
        {
            var doc = new HtmlDocument();

            doc.LoadHtml(rawContent);
            HtmlNodes.Add(doc.DocumentNode);
        }
        public void LoadFromFile(string filePath)
        {
            var doc = new HtmlDocument();

            doc.Load(filePath);
            HtmlNodes.Add(doc.DocumentNode);
        }
        public IList <string> ToStringList()
        {
            if (HtmlNodes == null || HtmlNodes.Count == 0)
            {
                return(new List <string>());
            }

            return(HtmlNodes.Select(x => x.OuterHtml).ToList());
        }
Ejemplo n.º 4
0
 public void RecursionAllChildNodes(Func <HtmlNodes, bool> where, ref HtmlNodes result)
 {
     foreach (var nd in Child)
     {
         if (result != null)
         {
             return;
         }
         if (where == null || where (nd))
         {
             result = nd;
             return;
         }
         else
         {
             nd.RecursionAllChildNodes(where, ref result);
         }
     }
 }
Ejemplo n.º 5
0
        /// <summary>
        /// 获得该html页面的指定标签
        /// </summary>
        /// <param name="html"></param>
        /// <param name="selectXPath">选择标签开始路径,null则从根目录开始</param>
        /// <param name="where">判断标签是否符合,返回是则符合</param>
        /// <returns></returns>
        public static HtmlNodes GetHtmlNodesWhere(this string html, string selectXPath = null, Func <HtmlNodes, bool> where = null)
        {
            var doc = new HtmlDocument();

            try
            {
                doc.LoadHtml(html);
                var node = selectXPath == null ? doc.DocumentNode : doc.DocumentNode.SelectSingleNode(selectXPath);
                var hd   = new HtmlNodes(node);
                if (where == null || where (hd))
                {
                    return(hd);
                }
                HtmlNodes result = null;
                hd.RecursionAllChildNodes(where, ref result);
                return(result);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            return(null);
        }
 public void Clear()
 {
     HtmlNodes.Clear();
 }