public ArticleData Begin(string url) { HttpDownloader downloader = new HttpDownloader(url, null, null); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(downloader.GetPage()); ArticleData aData = new ArticleData(); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//*[not(ancestor::head or self::script or self::style or ancestor::script or ancestor::style)]/text()[normalize-space()]"); for (int i = 0; i < nodes.Count; i++) { HtmlNode node = nodes[i]; string inner = node.InnerHtml; if (node.ParentNode.Name.ToUpper().StartsWith("H")) { inner = String.Format("{0}{1}{0}", "[H" + node.ParentNode.Name.Substring(1) + "]", inner); } aData.Texts.Add(i, new NodeData(inner, 0)); } HttpContext.Current.Session["doc"] = doc; HttpContext.Current.Session["articleData"] = aData; return aData; }
private void AddSpans(ArticleData data, HtmlDocument doc) { HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//*[not(ancestor::head or self::script or self::style or ancestor::script or ancestor::style)]/text()[normalize-space()]"); for (int i = 0; i < nodes.Count; i++) { HtmlNode node = nodes[i]; string replaceHtml = "<span class=\"data-type-" + data.Texts[i].type + "\">" + node.OuterHtml + "</span>"; HtmlNode replacementNode = HtmlNode.CreateNode(replaceHtml).ParentNode; node.ParentNode.ReplaceChild(replacementNode, node); } }