示例#1
0
        public void GetJobInfoList()
        {
            try
            {
                StringBuilder condition = new StringBuilder();
                workAddress = HttpUtility.UrlEncode(workAddress, Encoding.GetEncoding("utf-8"));
                condition.Append("jl=" + workAddress);
                if (!string.IsNullOrEmpty(keyWord))
                {
                    keyWord = HttpUtility.UrlEncode(keyWord, Encoding.GetEncoding("utf-8"));
                    condition.Append("&kw=" + keyWord);
                }
                condition.Append("&sm=1");
                if (!string.IsNullOrEmpty(upperSalary))
                {
                    condition.Append("&sf=" + upperSalary);
                }
                if (!string.IsNullOrEmpty(lowerSalary))
                {
                    condition.Append("&st=" + lowerSalary);
                }

                url = url + condition.ToString();
                string html = GetHtmlCode.GetByget(url, "utf-8");
                GetJobInfoFromPage(html);

                //页面数量
                string pageCountRegexStr = "(?<=onkeypress=\"zlapply.searchjob.enter2Page\\(this,event,)\\d+";
                Regex  pageCountRegex    = new Regex(pageCountRegexStr);
                string pageCountStr      = pageCountRegex.Match(html).Groups[0].Value;
                int    pageCount         = 0;
                int.TryParse(pageCountStr, out pageCount);

                for (int i = 2; i <= pageCount; i++)
                {
                    string url0 = url + string.Format("&p={0}", i);
                    html = GetHtmlCode.GetByget(url0, "utf-8");
                    GetJobInfoFromPage(html);
                }
                if (GetJobEnd != null)
                {
                    GetJobEnd(null, null);
                }
            }
            catch (Exception exMsg)
            {
                throw new Exception(exMsg.Message);
            }
        }
示例#2
0
        public void GetJobInfoList()
        {
            try
            {
                StringBuilder condition = new StringBuilder();
                condition.Append("jobarea=" + workAddressId);
                if (!string.IsNullOrEmpty(keyWord))
                {
                    keyWord = System.Web.HttpUtility.UrlEncode(keyWord, Encoding.GetEncoding("gb2312"));
                    condition.Append("&keyword=" + keyWord);
                }
                condition.Append("&keywordtype=2");

                url = url + condition.ToString();
                string html = GetHtmlCode.GetByget(url, "gb2312");
                GetJobInfoFromPage(html);

                int pageCount = 0;
                //页面数量
                string pageCountRegexStr = "(?<=name=\"jobid_count\"\\s*?value=\")\\d+(?=\">)";
                Regex  pageCountRegex    = new Regex(pageCountRegexStr);
                pageCount = (int.Parse(pageCountRegex.Match(html).Value) + 29) / 30;

                for (int i = 2; i <= pageCount; i++)
                {
                    string url0 = url + string.Format("&curr_page={0}", i);
                    html = GetHtmlCode.GetByget(url0, "gb2312");
                    GetJobInfoFromPage(html);
                }
                if (GetJobEnd != null)
                {
                    GetJobEnd(null, null);
                }
            }
            catch (Exception exMsg)
            {
                throw new Exception(exMsg.Message);
            }
        }
示例#3
0
 public void GetJobInfoList()
 {
     try
     {
         StringBuilder condition = new StringBuilder();
         condition.AppendFormat("dqs={0}", workAddressId); //地区
         condition.Append("&searchField=3");               //行业
         if (!string.IsNullOrEmpty(keyWord))
         {
             keyWord = HttpUtility.UrlEncode(keyWord, Encoding.GetEncoding("utf-8"));
             condition.Append("&key=" + keyWord); //职位名关键词
         }
         condition.Append("&pubTime=30");         //发布时间
         string xurl = string.Empty;
         for (int i = 0; i < 100; i++)
         {
             if (i > 0)
             {
                 xurl = url + condition.ToString() + "&curPage=" + i;//页数
             }
             else
             {
                 xurl = url + condition.ToString();
             }
             string html = GetHtmlCode.GetByget(xurl, "utf-8");
             if (string.IsNullOrEmpty(html))
             {
                 break;
             }
             GetJobInfoFromPage(html);
         }
     }
     catch (Exception exMsg)
     {
         throw new Exception(exMsg.Message);
     }
 }
示例#4
0
        private void GetJobInfoFromUrl(string url)
        {
            try
            {
                JobInfo info = new JobInfo();
                //--
                string pageStr = GetHtmlCode.GetByget(url, "gb2312");
                if (string.IsNullOrEmpty(pageStr))
                {
                    return;
                }
                //--
                pageStr = pageStr.Replace("\r\n", "");//替换换行符
                // 获取html,body标签内容
                string body       = string.Empty;
                string bodyFilter = @"(?is)<body.*?</body>";
                Match  m          = Regex.Match(pageStr, bodyFilter);
                if (m.Success)
                {
                    body = m.ToString().Replace("<tr >", "<tr>").Replace("\r\n", "");
                }
                // 过滤样式,脚本等不相干标签
                foreach (var filter in Filters)
                {
                    body = Regex.Replace(body, filter[0], filter[1]);
                }
                //--
                if (!string.IsNullOrEmpty(mustKey) && !body.Contains(mustKey))
                {
                    return;
                }
                body = Regex.Replace(body, "\\s", "");

                info.Url = url;
                string basicInfoRegexStr0 = "<tdclass=\"sr_bt\"colspan=\"2\">(.*?)</td>"; //职位名称
                string position           = Regex.Match(body, basicInfoRegexStr0).Value;
                if (string.IsNullOrEmpty(position))
                {
                    basicInfoRegexStr0 = "<tdclass=\"sr_bt\"colspan=\"3\">(.*?)</td>";
                    position           = Regex.Match(body, basicInfoRegexStr0).Value;
                }
                info.Position = string.IsNullOrEmpty(position) ? "" : position.Substring(position.IndexOf(">") + 1, position.IndexOf("</") - position.IndexOf(">") - 1);

                string basicInfoRegexStr1 = ".html\">(.*?)</a>";//公司名称
                string company            = Regex.Match(body, basicInfoRegexStr1).Value;
                info.Company = string.IsNullOrEmpty(company) ? "" : company.Substring(company.IndexOf(">") + 1, company.IndexOf("</a>") - company.IndexOf(">") - 1);

                string basicInfoRegexStr2 = "工作地点:</td><tdclass=\"txt_2\">(.*?)</td>";//工作地点
                string address            = Regex.Match(body, basicInfoRegexStr2).Value;
                info.Address = string.IsNullOrEmpty(address) ? "" : address.Substring(address.IndexOf("\">") + 2, address.LastIndexOf("</td>") - address.IndexOf("\">") - 2);

                string basicInfoRegexStr3 = "公司性质:</strong>&nbsp;&nbsp;(.*?)<br><br><strong>";//公司性质
                string nature             = Regex.Match(body, basicInfoRegexStr3).Value;
                if (string.IsNullOrEmpty(nature))
                {
                    basicInfoRegexStr3 = "公司行业:</strong>&nbsp;&nbsp;(.*?)<br><br><strong>";
                    nature             = Regex.Match(body, basicInfoRegexStr3).Value;
                }
                info.Nature = string.IsNullOrEmpty(nature) ? "" : nature.Substring(26, nature.IndexOf("<br>") - 26); //公司性质

                string basicInfoRegexStr4 = "公司规模:</strong>&nbsp;&nbsp;(.*?)</td>";                                  //公司规模
                string scale = Regex.Match(body, basicInfoRegexStr4).Value;
                info.Scale = string.IsNullOrEmpty(scale) ? "" : scale.Substring(26, scale.IndexOf("</td>") - 26);

                string basicInfoRegexStr5 = "工作年限:</td><tdclass=\"txt_2\">(.*?)</td>";//工作经验
                string experience         = Regex.Match(body, basicInfoRegexStr5).Value;
                info.Experience = string.IsNullOrEmpty(experience) ? "" : experience.Substring(experience.IndexOf("\">") + 2, experience.LastIndexOf("</td>") - experience.IndexOf("\">") - 2);

                string basicInfoRegexStr6 = "学&nbsp;&nbsp;&nbsp;&nbsp;历:</td><tdclass=\"txt_2\">(.*?)</td>";//学历
                string education          = Regex.Match(body, basicInfoRegexStr6).Value;
                info.Education = string.IsNullOrEmpty(education) ? "" : education.Substring(education.IndexOf("\">") + 2, education.LastIndexOf("</td>") - education.IndexOf("\">") - 2);

                string basicInfoRegexStr7 = "薪水范围:</td><tdclass=\"txt_2\">(.*?)</td>";//月薪
                string salary             = Regex.Match(body, basicInfoRegexStr7).Value;
                info.Salary = string.IsNullOrEmpty(salary) ? "" : salary.Substring(salary.IndexOf("\">") + 2, salary.LastIndexOf("</td>") - salary.IndexOf("\">") - 2);

                string basicInfoRegexStr8 = "发布日期:</td><tdclass=\"txt_2\">(.*?)</td>";//发布时间
                string time = Regex.Match(body, basicInfoRegexStr8).Value;
                info.Time = string.IsNullOrEmpty(time) ? "" : time.Substring(time.IndexOf("\">") + 2, time.LastIndexOf("</td>") - time.IndexOf("\">") - 2);;

                if (GetJobEnd != null)
                {
                    GetJobEnd(pageStr, info);
                }
            }
            catch (Exception exMsg)
            {
                throw new Exception(exMsg.Message);
            }
        }
示例#5
0
        private void GetJobInfoFromUrl(string url)
        {
            try
            {
                JobInfo info = new JobInfo();
                //--
                string pageStr = GetHtmlCode.GetByget(url, "utf-8");
                if (string.IsNullOrEmpty(pageStr))
                {
                    return;
                }
                //--
                pageStr = pageStr.Replace("\r\n", "");//替换换行符
                // 获取html,body标签内容
                string body       = string.Empty;
                string bodyFilter = @"(?is)<body.*?</body>";
                Match  m          = Regex.Match(pageStr, bodyFilter);
                if (m.Success)
                {
                    body = m.ToString().Replace("<tr >", "<tr>").Replace("\r\n", "");
                }
                // 过滤样式,脚本等不相干标签
                foreach (var filter in Filters)
                {
                    body = Regex.Replace(body, filter[0], filter[1]);
                }
                //--
                if (!string.IsNullOrEmpty(mustKey) && !body.Contains(mustKey))
                {
                    return;
                }
                body = Regex.Replace(body, "\\s", "");

                info.Url = url;

                string basicInfoRegexStr0 = "<h1title=([\\s\\S]+?)>(.*?)</h1>";                                                                                              //职位名称
                string position           = Regex.Match(body, basicInfoRegexStr0).Value;
                info.Position = string.IsNullOrEmpty(position) ? "" : position.Substring(position.IndexOf(">") + 1, position.IndexOf("</") - position.IndexOf(">") - 1);     //职位名称

                string basicInfoRegexStr1 = "</h1><h3>(.*?)</h3>";                                                                                                           //公司名称
                string company            = Regex.Match(body, basicInfoRegexStr1).Value;
                info.Company = string.IsNullOrEmpty(company) ? "" : company.Substring(company.IndexOf("<h3>") + 4, company.IndexOf("</h3>") - company.IndexOf("<h3>") - 4);  //公司名称

                string basicInfoRegexStr2 = "<divclass=\"resumeclearfix\"><span>(.*?)</span>";                                                                               //工作地点
                string address            = Regex.Match(body, basicInfoRegexStr2).Value;
                info.Address = string.IsNullOrEmpty(address) ? "" : address.Substring(address.IndexOf("<span>") + 6, address.IndexOf("</") - address.IndexOf("<span>") - 6); //工作地点

                string basicInfoRegexStr3 = "<li><span>企业性质:</span>(.*?)</li>";                                                                                              //公司性质
                string nature             = Regex.Match(body, basicInfoRegexStr3).Value;
                info.Nature = string.IsNullOrEmpty(nature) ? "" : nature.Substring(nature.IndexOf("</span>") + 7, nature.IndexOf("</li>") - nature.IndexOf("</span>") - 7);  //公司性质

                if (string.IsNullOrEmpty(info.Nature))
                {
                    string basicInfoRegexStr3_1 = "<br><span>性质:</span>(.*?)<br>";
                    string nature_1             = Regex.Match(body, basicInfoRegexStr3_1).Value;
                    info.Nature = string.IsNullOrEmpty(nature_1) ? "" : nature_1.Substring(nature_1.IndexOf("</span>") + 7, nature_1.LastIndexOf("<br>") - nature_1.IndexOf("</span>") - 7);//公司性质
                }

                string basicInfoRegexStr4 = "<li><span>企业规模:</span>(.*?)</li>";                                                                                       //公司规模
                string scale = Regex.Match(body, basicInfoRegexStr4).Value;
                info.Scale = string.IsNullOrEmpty(scale) ? "" : scale.Substring(scale.IndexOf("</span>") + 7, scale.IndexOf("</li>") - scale.IndexOf("</span>") - 7); //公司规模

                if (string.IsNullOrEmpty(info.Scale))
                {
                    string basicInfoRegexStr4_1 = "<br><span>规模:</span>(.*?)<br>";
                    string scale_1 = Regex.Match(body, basicInfoRegexStr4_1).Value;
                    info.Scale = info.Nature = string.IsNullOrEmpty(scale_1) ? "" : scale_1.Substring(scale_1.IndexOf("</span>") + 7, scale_1.LastIndexOf("<br>") - scale_1.IndexOf("</span>") - 7);//公司规模
                }

                string basicInfoRegexStr5 = "<spanclass=\"noborder\">(.*?)</span>";                                                                                                                      //工作经验
                string experience         = Regex.Match(body, basicInfoRegexStr5).Value;
                info.Experience = string.IsNullOrEmpty(experience) ? "" : experience.Substring(experience.IndexOf(">") + 1, experience.IndexOf("</") - experience.IndexOf(">") - 1);                     //工作经验

                string basicInfoRegexStr6 = "</span><span>(.*?)</span><spanclass=\"noborder\">";                                                                                                         //最低学历
                string education          = Regex.Match(body, basicInfoRegexStr6).Value;
                info.Education = string.IsNullOrEmpty(education) ? "" : education.Substring(education.IndexOf("<span>") + 6, education.IndexOf("</span><spanclass=") - education.IndexOf("<span>") - 6); //最低学历

                string basicInfoRegexStr7 = "<pclass=\"job-main-title\">(.*?)<";                                                                                                                         //月薪
                string salary             = Regex.Match(body, basicInfoRegexStr7).Value;
                info.Salary = string.IsNullOrEmpty(salary) ? "" : salary.Substring(salary.IndexOf(">") + 1, salary.LastIndexOf("<") - salary.IndexOf(">") - 1);                                          //月薪

                string timeInfoRegexStr = "<pclass=\"release-time\">发布时间:<em>(.*?)</em></p>";                                                                                                            //发布时间
                string time             = Regex.Match(body, timeInfoRegexStr).Value;
                info.Time = string.IsNullOrEmpty(time) ? "" : time.Substring(time.IndexOf("<em>") + 4, time.IndexOf("</em>") - time.IndexOf("<em>") - 4);                                                //发布时间

                if (GetJobEnd != null)
                {
                    GetJobEnd(pageStr, info);
                }
            }
            catch (Exception exMsg)
            {
                throw new Exception(exMsg.Message);
            }
        }