Example #1
0
        public List<JobInfo> CrawlerJob(string city, string kw, string pagenum)
        {
            var jobInfos = new List<JobInfo>();

            if (_cityDic.ContainsKey(city))
            {
                city = _cityDic[city];
            }
            var url = string.Format(SouUrl, kw, city, pagenum);
            var html = GetHtml(url);
            if (!string.IsNullOrEmpty(html))
            {
                var htmlDocument = htmlParser.Parse(html);
                var elements = htmlDocument.QuerySelectorAll("ul.sojob-result-list li a");
                foreach (var element in elements)
                {
                    var title = element.Attributes["title"].Value;
                    var link = element.Attributes["href"].Value;
                    var salary = element.QuerySelector("dt.salary").InnerHtml;
                    var company = element.QuerySelector("dt.company").InnerHtml;
                    var citystring = element.QuerySelector("dt.city span").InnerHtml;
                    var date = element.QuerySelector("dt.date span").InnerHtml;
                    var jobInfo = new JobInfo()
                    {
                        JobTitle = title,
                        JobLink = link,
                        JobAddress = citystring,
                        JobCompany = company,
                        PublishDate = date,
                        JobSalary = salary
                    };
                    var jobhtml = GetHtml(link);
                    var document = htmlParser.Parse(jobhtml);
                    jobInfo.JobBaseInfo = document.QuerySelector("div.job-title-left").InnerHtml;
                    var querySelectorAll = document.QuerySelectorAll("div.job-main.main-message");
                    foreach (var query in querySelectorAll)
                    {
                        if (query.InnerHtml.Contains("薪酬福利"))
                        {
                            jobInfo.JobWelfare = query.InnerHtml;
                            continue;
                        }
                        if (query.InnerHtml.Contains("职位描述"))
                        {
                            jobInfo.JobDetail = query.InnerHtml;
                        }
                    }
                    jobInfos.Add(jobInfo);
                }
            }
            return jobInfos;
        }
Example #2
0
        public List<JobInfo> CrawlerJob(string city, string kw, string pagenum)
        {
            var jobInfos = new List<JobInfo>();
            try
            {
                if (_cityDic.ContainsKey(city))
                {
                    city = _cityDic[city];
                }
                var url = string.Format(SouUrl, city, HttpUtility.UrlEncode(kw), pagenum);
                var html = PostHtml(url, "");
                if (!string.IsNullOrEmpty(html))
                {
                    var htmlDocument = htmlParser.Parse(html);
                    var enumerable = htmlDocument.QuerySelector("div.resultListDiv").QuerySelectorAll("tr").Where(x => x.ClassName == "tr0");
                    foreach (var element in enumerable)
                    {
                        var jobInfo = new JobInfo();
                        jobInfo.JobTitle = element.QuerySelector("td.td1 a").InnerHtml;
                        jobInfo.JobLink = element.QuerySelector("td.td1 a").Attributes["href"].Value;
                        jobInfo.JobCompany = element.QuerySelector("td.td2 a").InnerHtml;
                        jobInfo.JobAddress = element.QuerySelector("td.td3 span").InnerHtml;
                        jobInfo.PublishDate = element.QuerySelector("td.td4 span").InnerHtml;

                        var jobhtml = GetHtml(jobInfo.JobLink);
                        var document = htmlParser.Parse(jobhtml);
                        var querySelector = document.QuerySelector("td.txt_2.jobdetail_xsfw_color");
                        if (querySelector != null)
                            jobInfo.JobSalary = querySelector.InnerHtml;
                        var selector = document.QuerySelector("div.jobdetail_divRight_span");
                        if (selector != null)
                            jobInfo.JobWelfare = selector.InnerHtml.Trim();

                        IElement selector1 = document.QuerySelector("td.wordBreakNormal.job_detail");
                        if (selector1 != null)
                            jobInfo.JobDetail = selector1.InnerHtml.Trim();

                        jobInfos.Add(jobInfo);
                    }

                }
            }
            catch (System.Exception e)
            {
                _loger.Error(e);
            }
            return jobInfos;
        }
Example #3
0
        public List<JobInfo> CrawlerJob(string city, string kw, string pagenum)
        {
            var jobInfos = new List<JobInfo>();
            var url = string.Format(SouUrl, HttpUtility.UrlEncode(city), kw, pagenum);
            var html = GetHtml(url);
            if (!string.IsNullOrEmpty(html))
            {
                var document = parser.Parse(html);
                var elements = document.QuerySelectorAll("table.newlist");
                foreach (var element in elements)
                {
                    string zwmc, gsmc, zwyx, gzdd, gxsj, link;
                    var querySelector = element.QuerySelector("td.zwmc a");
                    if (querySelector == null)
                        continue;
                    zwmc = querySelector.InnerHtml;
                    link = element.QuerySelector("td.zwmc a").Attributes["href"].Value;
                    gsmc = element.QuerySelector("td.gsmc a").InnerHtml;
                    zwyx = element.QuerySelector("td.zwyx").InnerHtml;
                    gzdd = element.QuerySelector("td.gzdd").InnerHtml;
                    gxsj = element.QuerySelector("td.gxsj span").InnerHtml;

                    var jobInfo = new JobInfo()
                    {
                        JobTitle = zwmc,
                        JobAddress = gzdd,
                        JobCompany = gsmc,
                        JobSalary = zwyx,
                        JobLink = link,
                        PublishDate = gxsj
                    };
                    var detail = GetHtml(link);
                    var htmlDocument = parser.Parse(detail);
                    jobInfo.JobWelfare = htmlDocument.QuerySelector("div .welfare-tab-box")
                           .InnerHtml.Replace("<span>", "")
                           .Replace("</span>", "");
                    jobInfo.JobBaseInfo = htmlDocument.QuerySelector("div.terminalpage-left").QuerySelector("ul.terminal-ul")
                           .InnerHtml.Replace("<li>", "")
                           .Replace("</li>", "")
                           .Replace("<span>", "")
                           .Replace("</span>", "").Replace("<strong>", "").Replace("</strong>", "");
                    jobInfo.JobDetail = htmlDocument.QuerySelector("div.tab-inner-cont").InnerHtml.Replace("<p>", "").Replace("</p>", "").Replace("<br/>", "").Replace("<br>","").Replace("</br>","").Replace("<h2>", "").Replace("</h2>", "").Replace(" ","");
                    jobInfos.Add(jobInfo);
                }
            }
            return jobInfos;
        }
Example #4
0
        public List<JobInfo> CrawlerJob(string city, string kw, string pagenum)
        {
            var jobInfos = new List<JobInfo>();
            try
            {
                string url = string.Format(Url, HttpUtility.UrlEncode(city), HttpUtility.UrlEncode("全职"));
                var postdata = string.Format("first={0}&pn={1}&kd={2}", false, pagenum, HttpUtility.UrlEncode(kw));//pn 第几页
                string resultjson = PostHtml(url, postdata);
                var laGouJobs = JsonConvert.DeserializeObject<LaGouJobs>(resultjson);
                if (laGouJobs != null && laGouJobs.Success)
                {
                    foreach (var item in laGouJobs.Content.Result)
                    {
                        var jobInfo = new JobInfo();
                        jobInfo.JobAddress = item.City;
                        jobInfo.JobLink = string.Format("http://www.lagou.com/jobs/{0}.html", item.PositionId);
                        jobInfo.JobTitle = item.PositionName;
                        jobInfo.JobCompany = item.CompanyShortName;
                        jobInfo.JobSalary = item.Salary;
                        jobInfo.JobWelfare = item.PositionAdvantage + string.Join(",", item.CompanyLabelList);
                        jobInfo.PublishDate = item.CreateTime;
                        jobInfo.CompanyType = item.IndustryField + "," + item.FinanceStage;
                        jobInfo.JobBaseInfo = string.Format("经验{0},{1}以上,{2}", item.WorkYear, item.Education, item.JobNature);
                        string detailhtml = GetHtml(jobInfo.JobLink);
                        if (!string.IsNullOrEmpty(detailhtml))
                        {
                            IHtmlDocument document = _parser.Parse(detailhtml);
                            IElement element = document.QuerySelector("dl.job_detail").QuerySelector("dd.job_bt");
                            if (element != null)
                            {
                                jobInfo.JobDetail = element.InnerHtml;
                            }
                        }
                        jobInfos.Add(jobInfo);
                    }

                }
            }
            catch (Exception e)
            {
            }
            return jobInfos;
        }