Esempio n. 1
0
 public ArticleData Begin(string url)
 {
     HttpDownloader downloader = new HttpDownloader(url, null, null);
     HtmlDocument doc = new HtmlDocument();
     doc.LoadHtml(downloader.GetPage());
     ArticleData aData = new ArticleData();
     HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//*[not(ancestor::head or self::script or self::style or ancestor::script or ancestor::style)]/text()[normalize-space()]");
     for (int i = 0; i < nodes.Count; i++)
     {
         HtmlNode node = nodes[i];
         string inner = node.InnerHtml;
         if (node.ParentNode.Name.ToUpper().StartsWith("H"))
         {
             inner = String.Format("{0}{1}{0}", "[H" + node.ParentNode.Name.Substring(1) + "]", inner);
         }
         aData.Texts.Add(i, new NodeData(inner, 0));
     }
     HttpContext.Current.Session["doc"] = doc;
     HttpContext.Current.Session["articleData"] = aData;
     return aData;
 }
Esempio n. 2
0
 private void AddSpans(ArticleData data, HtmlDocument doc)
 {
     HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//*[not(ancestor::head or self::script or self::style or ancestor::script or ancestor::style)]/text()[normalize-space()]");
     for (int i = 0; i < nodes.Count; i++)
     {
         HtmlNode node = nodes[i];
         string replaceHtml = "<span class=\"data-type-" + data.Texts[i].type + "\">" + node.OuterHtml + "</span>";
         HtmlNode replacementNode = HtmlNode.CreateNode(replaceHtml).ParentNode;
         node.ParentNode.ReplaceChild(replacementNode, node);
     }
 }