public void testSpider() { StringBuilder log = new StringBuilder(); SpiderTemplate s = getTemplate("http://news.163.com", "<div class=\"content\" style=\"zoom:1;\">", "<h2>图片新闻</h2>"); List <DetailLink> list = SpiderTool.GetDataList(s, log); Assert.Greater(list.Count, 1); s = getTemplate("http://women.sohu.com/love-story/", "<div class=\"f14list\">", "<div class=\"pages\">"); list = SpiderTool.GetDataList(s, log); Assert.Greater(list.Count, 1); }
public void GetList() { SpiderTemplate s = ctx.PostValue <SpiderTemplate>(); //String beginCode = ctx.PostHtmlAll( "listBeginCode" ); //String endCode = ctx.PostHtmlAll( "listEndCode" ); //s.ListBodyPattern = beginCode + ".+?" + endCode; String listBodyPattern = ctx.PostHtmlAll("ListBodyPattern"); String ListPattern = ctx.PostHtmlAll("ListPattern"); s.ListBodyPattern = listBodyPattern; if (strUtil.IsNullOrEmpty(ListPattern)) { ListPattern = SpiderConfig.ListLinkPattern; } s.ListPattern = ListPattern; String listEncoding = ctx.Post("listEncoding"); s.ListEncoding = listEncoding; StringBuilder log = new StringBuilder(); List <DetailLink> list = SpiderTool.GetDataList(s, log); if (list.Count == 0) { Dictionary <String, Object> dic = new Dictionary <String, Object>(); dic.Add("IsValid", false); dic.Add("listUrl", s.ListUrl); dic.Add("patternBody", s.ListBodyPattern); dic.Add("patternLinks", s.ListPattern); echoJson(JsonString.Convert(dic)); } else { renderJson(list); } }