public ImportController() { templateService = new SpiderTemplateService(); importService = new SportImportService(); spiderTool = new SpiderTool(); appService = new SiteAppService(); userService = new UserService(); }
public void testSpider() { StringBuilder log = new StringBuilder(); SpiderTemplate s = getTemplate("http://news.163.com", "<div class=\"content\" style=\"zoom:1;\">", "<h2>图片新闻</h2>"); List <DetailLink> list = SpiderTool.GetDataList(s, log); Assert.Greater(list.Count, 1); s = getTemplate("http://women.sohu.com/love-story/", "<div class=\"f14list\">", "<div class=\"pages\">"); list = SpiderTool.GetDataList(s, log); Assert.Greater(list.Count, 1); }
public async Task AddRecord(string url, string userAgent, int code) { string spiderName = SpiderTool.UaSelect(userAgent); if (spiderName == null) { return; } await _spiderRepository.InsertAsync(new Spider() { Ip = _clientInfoProvider.ClientIpAddress, Name = spiderName ?? "未知", Url = url, Code = code, UserAgent = userAgent, CreationTime = DateTime.Now }); }
public void GetList() { SpiderTemplate s = ctx.PostValue <SpiderTemplate>(); //String beginCode = ctx.PostHtmlAll( "listBeginCode" ); //String endCode = ctx.PostHtmlAll( "listEndCode" ); //s.ListBodyPattern = beginCode + ".+?" + endCode; String listBodyPattern = ctx.PostHtmlAll("ListBodyPattern"); String ListPattern = ctx.PostHtmlAll("ListPattern"); s.ListBodyPattern = listBodyPattern; if (strUtil.IsNullOrEmpty(ListPattern)) { ListPattern = SpiderConfig.ListLinkPattern; } s.ListPattern = ListPattern; String listEncoding = ctx.Post("listEncoding"); s.ListEncoding = listEncoding; StringBuilder log = new StringBuilder(); List <DetailLink> list = SpiderTool.GetDataList(s, log); if (list.Count == 0) { Dictionary <String, Object> dic = new Dictionary <String, Object>(); dic.Add("IsValid", false); dic.Add("listUrl", s.ListUrl); dic.Add("patternBody", s.ListBodyPattern); dic.Add("patternLinks", s.ListPattern); echoJson(JsonString.Convert(dic)); } else { renderJson(list); } }