public List<JobInfo> CrawlerJob(string city, string kw, string pagenum) { var jobInfos = new List<JobInfo>(); if (_cityDic.ContainsKey(city)) { city = _cityDic[city]; } var url = string.Format(SouUrl, kw, city, pagenum); var html = GetHtml(url); if (!string.IsNullOrEmpty(html)) { var htmlDocument = htmlParser.Parse(html); var elements = htmlDocument.QuerySelectorAll("ul.sojob-result-list li a"); foreach (var element in elements) { var title = element.Attributes["title"].Value; var link = element.Attributes["href"].Value; var salary = element.QuerySelector("dt.salary").InnerHtml; var company = element.QuerySelector("dt.company").InnerHtml; var citystring = element.QuerySelector("dt.city span").InnerHtml; var date = element.QuerySelector("dt.date span").InnerHtml; var jobInfo = new JobInfo() { JobTitle = title, JobLink = link, JobAddress = citystring, JobCompany = company, PublishDate = date, JobSalary = salary }; var jobhtml = GetHtml(link); var document = htmlParser.Parse(jobhtml); jobInfo.JobBaseInfo = document.QuerySelector("div.job-title-left").InnerHtml; var querySelectorAll = document.QuerySelectorAll("div.job-main.main-message"); foreach (var query in querySelectorAll) { if (query.InnerHtml.Contains("薪酬福利")) { jobInfo.JobWelfare = query.InnerHtml; continue; } if (query.InnerHtml.Contains("职位描述")) { jobInfo.JobDetail = query.InnerHtml; } } jobInfos.Add(jobInfo); } } return jobInfos; }
public List<JobInfo> CrawlerJob(string city, string kw, string pagenum) { var jobInfos = new List<JobInfo>(); try { if (_cityDic.ContainsKey(city)) { city = _cityDic[city]; } var url = string.Format(SouUrl, city, HttpUtility.UrlEncode(kw), pagenum); var html = PostHtml(url, ""); if (!string.IsNullOrEmpty(html)) { var htmlDocument = htmlParser.Parse(html); var enumerable = htmlDocument.QuerySelector("div.resultListDiv").QuerySelectorAll("tr").Where(x => x.ClassName == "tr0"); foreach (var element in enumerable) { var jobInfo = new JobInfo(); jobInfo.JobTitle = element.QuerySelector("td.td1 a").InnerHtml; jobInfo.JobLink = element.QuerySelector("td.td1 a").Attributes["href"].Value; jobInfo.JobCompany = element.QuerySelector("td.td2 a").InnerHtml; jobInfo.JobAddress = element.QuerySelector("td.td3 span").InnerHtml; jobInfo.PublishDate = element.QuerySelector("td.td4 span").InnerHtml; var jobhtml = GetHtml(jobInfo.JobLink); var document = htmlParser.Parse(jobhtml); var querySelector = document.QuerySelector("td.txt_2.jobdetail_xsfw_color"); if (querySelector != null) jobInfo.JobSalary = querySelector.InnerHtml; var selector = document.QuerySelector("div.jobdetail_divRight_span"); if (selector != null) jobInfo.JobWelfare = selector.InnerHtml.Trim(); IElement selector1 = document.QuerySelector("td.wordBreakNormal.job_detail"); if (selector1 != null) jobInfo.JobDetail = selector1.InnerHtml.Trim(); jobInfos.Add(jobInfo); } } } catch (System.Exception e) { _loger.Error(e); } return jobInfos; }
public List<JobInfo> CrawlerJob(string city, string kw, string pagenum) { var jobInfos = new List<JobInfo>(); var url = string.Format(SouUrl, HttpUtility.UrlEncode(city), kw, pagenum); var html = GetHtml(url); if (!string.IsNullOrEmpty(html)) { var document = parser.Parse(html); var elements = document.QuerySelectorAll("table.newlist"); foreach (var element in elements) { string zwmc, gsmc, zwyx, gzdd, gxsj, link; var querySelector = element.QuerySelector("td.zwmc a"); if (querySelector == null) continue; zwmc = querySelector.InnerHtml; link = element.QuerySelector("td.zwmc a").Attributes["href"].Value; gsmc = element.QuerySelector("td.gsmc a").InnerHtml; zwyx = element.QuerySelector("td.zwyx").InnerHtml; gzdd = element.QuerySelector("td.gzdd").InnerHtml; gxsj = element.QuerySelector("td.gxsj span").InnerHtml; var jobInfo = new JobInfo() { JobTitle = zwmc, JobAddress = gzdd, JobCompany = gsmc, JobSalary = zwyx, JobLink = link, PublishDate = gxsj }; var detail = GetHtml(link); var htmlDocument = parser.Parse(detail); jobInfo.JobWelfare = htmlDocument.QuerySelector("div .welfare-tab-box") .InnerHtml.Replace("<span>", "") .Replace("</span>", ""); jobInfo.JobBaseInfo = htmlDocument.QuerySelector("div.terminalpage-left").QuerySelector("ul.terminal-ul") .InnerHtml.Replace("<li>", "") .Replace("</li>", "") .Replace("<span>", "") .Replace("</span>", "").Replace("<strong>", "").Replace("</strong>", ""); jobInfo.JobDetail = htmlDocument.QuerySelector("div.tab-inner-cont").InnerHtml.Replace("<p>", "").Replace("</p>", "").Replace("<br/>", "").Replace("<br>","").Replace("</br>","").Replace("<h2>", "").Replace("</h2>", "").Replace(" ",""); jobInfos.Add(jobInfo); } } return jobInfos; }
public List<JobInfo> CrawlerJob(string city, string kw, string pagenum) { var jobInfos = new List<JobInfo>(); try { string url = string.Format(Url, HttpUtility.UrlEncode(city), HttpUtility.UrlEncode("全职")); var postdata = string.Format("first={0}&pn={1}&kd={2}", false, pagenum, HttpUtility.UrlEncode(kw));//pn 第几页 string resultjson = PostHtml(url, postdata); var laGouJobs = JsonConvert.DeserializeObject<LaGouJobs>(resultjson); if (laGouJobs != null && laGouJobs.Success) { foreach (var item in laGouJobs.Content.Result) { var jobInfo = new JobInfo(); jobInfo.JobAddress = item.City; jobInfo.JobLink = string.Format("http://www.lagou.com/jobs/{0}.html", item.PositionId); jobInfo.JobTitle = item.PositionName; jobInfo.JobCompany = item.CompanyShortName; jobInfo.JobSalary = item.Salary; jobInfo.JobWelfare = item.PositionAdvantage + string.Join(",", item.CompanyLabelList); jobInfo.PublishDate = item.CreateTime; jobInfo.CompanyType = item.IndustryField + "," + item.FinanceStage; jobInfo.JobBaseInfo = string.Format("经验{0},{1}以上,{2}", item.WorkYear, item.Education, item.JobNature); string detailhtml = GetHtml(jobInfo.JobLink); if (!string.IsNullOrEmpty(detailhtml)) { IHtmlDocument document = _parser.Parse(detailhtml); IElement element = document.QuerySelector("dl.job_detail").QuerySelector("dd.job_bt"); if (element != null) { jobInfo.JobDetail = element.InnerHtml; } } jobInfos.Add(jobInfo); } } } catch (Exception e) { } return jobInfos; }