private void GetCorpStaffSzjsjMethod(string url, IList list, string html, bool crawlAll) { Parser parser = new Parser(new Lexer(html)); NodeList aNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dgConstBid"))); if (aNodes != null && aNodes.Count == 1 && aNodes[0] is TableTag) { TableTag table = (TableTag)aNodes[0]; for (int i = 1; i < table.Rows.Length; i++) { if (table.Rows[i].Columns.Length == 6) { Type typs = typeof(ATag); string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty; Name = table.Rows[i].Columns[1].ToPlainTextString().Trim().Replace(" ", ""); //Sex = table.Rows[i].Columns[1].ToPlainTextString().Trim().Replace(" ", ""); string urlSpilt = (table.Rows[i].Columns[1].Children.SearchFor(typs, true)[0] as ATag).Link; string idnum = urlSpilt.Replace("GoDetail('", "").Replace("');", ""); //urlSpilt.Substring(urlSpilt.IndexOf("('"), (urlSpilt.Length - 2)); IdNum = idnum.Replace("&am", "").Replace("&a", "").Replace("p;c", "").Replace("cate", "").Replace("cat", "").Replace("ate", ""); // CorpName = table.Rows[i].Columns[2].ToPlainTextString().Trim().Replace(" ", ""); CorpCode = CorpName; CertCode = table.Rows[i].Columns[4].ToPlainTextString().Trim().Replace(" ", ""); Profession = table.Rows[i].Columns[5].ToPlainTextString().Trim().Replace(" ", ""); PersonType = table.Rows[i].Columns[3].ToPlainTextString().Trim().Replace(" ", ""); Url = "http://61.144.226.2/ryxx/Detail_LWDZ.aspx?ID_NUMBER=" + idnum; string ctxhtml = string.Empty; try { ctxhtml = ToolWeb.GetHtmlByUrl(Url, Encoding.Default); } catch (Exception ex) { Logger.Error("人员姓名:" + CorpName + ",证件号:" + IdNum + "所在单位:" + CorpName + "," + Url + ";" + ex); continue; } Parser parserCtx = new Parser(new Lexer(ctxhtml)); NodeList ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("borderColor", "#cccccc"))); TableTag tabTag = ctxNode[0] as TableTag; string text = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("TD"), new HasAttributeFilter("width", "76%")), true).AsString().Replace(" ", ""); string strSpilt = "任职企业编号:.*?\r\n"; MatchCollection mc = Regex.Matches(text, strSpilt); foreach (Match m in mc) { CorpCode = m.ToString().Replace("任职企业编号:", "").Replace("\r\n", ""); } CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, string.Empty, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "深圳市区", "深圳市住房和建设局", Url, Profession, "", "", "", ""); // list.Add(corpStaff); ToolDb.SaveEntity(corpStaff, this.ExistCompareFields); // if (!crawlAll && list.Count >= this.MaxCount) return; } } parser.Reset(); } }
protected override IList ExecuteCrawl(bool crawlAll) { int count = 0; IList list = new List <CorpStaff>(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; string pageHtl = string.Empty; try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8); } catch { return(null); } int totalPage = 0; Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "clearfix"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().Replace(",", ","); string page = temp.GetRegexBegEnd("total", ",").GetReplace("\":"); totalPage = int.Parse(page); pageInt = totalPage / 15 + 1; } catch { } } for (int p = 1; p <= pageInt; p++) { if (p > 1) { Logger.Error(p); NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "$total", "$reload", "$pg", "$pgsz" }, new string[] { totalPage.ToString(), "0", p.ToString(), "15" }); try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { try { Thread.Sleep(60 * 1000 * 6); html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { try { Thread.Sleep(60 * 1000 * 6); html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { continue; } } } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table_box responsive personal"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int i = 1; i < table.RowCount - 1; i++) { TableRow tr = table.Rows[i]; string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty, staffNum = string.Empty, IssuanceTime = string.Empty, Organ = string.Empty; Name = tr.Columns[1].ToNodePlainString(); IdNum = tr.Columns[2].ToNodePlainString(); CertGrade = tr.Columns[3].ToNodePlainString(); RegCode = tr.Columns[4].ToNodePlainString(); PersonType = tr.Columns[5].ToNodePlainString(); ATag aTag = tr.Columns[1].GetATag(); Url = "http://jzsc.mohurd.gov.cn" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString(); } catch { try { Thread.Sleep(60 * 1000 * 6); htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString(); } catch { try { Thread.Sleep(60 * 1000 * 6); htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString(); } catch { continue; } } } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "activeTinyTabContent"))); if (dtlNode != null && dtlNode.Count > 0) { string ctx = dtlNode.AsHtml().GetReplace("</dd>", "\r\n").ToCtxString(); Sex = ctx.GetRegex("性别"); } parser.Reset(); dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "regcert_tab"))); if (dtlNode != null && dtlNode.Count > 0) { string ctx = dtlNode.AsHtml().GetReplace("</dd>", "\r\n").ToCtxString(); CertCode = ctx.GetRegex("证书编号"); ATag nameTag = dtlNode.GetATag(1); if (nameTag != null) { CorpName = nameTag.LinkText.ToNodeString(); } } CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, IdNum, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "全国", "", "中华人民共和国住房和城乡建设部建筑市场监管司", Url, Profession, staffNum, IssuanceTime, Organ, ""); ToolDb.SaveEntity(corpStaff, this.ExistCompareFields, this.ExistsUpdate); count++; if (count >= 28) { count = 0; Thread.Sleep(60 * 1000 * 6); } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; string pageHtl = string.Empty; try { html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_AspNetPager1"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList[0].ToPlainTextString().GetRegexBegEnd("共", "条"); int page = int.Parse(temp); int result = page / 15; if (page % 15 != 0) { pageInt = result + 1; } else { pageInt = result; } } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { i = 500; if (i > 1) { try { viewState = ToolWeb.GetAspNetViewState(html); NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "ctl00$ContentPlaceHolder1$txtName", "ctl00$ContentPlaceHolder1$txtIdNum", "ctl00$ContentPlaceHolder1$txtEmpName", "ctl00$ContentPlaceHolder1$txtEMP_ORG_CODE", "ctl00$ContentPlaceHolder1$txtCertNum", "ctl00$ContentPlaceHolder1$rdoIsDock" }, new string[] { "ctl00$ContentPlaceHolder1$AspNetPager1", i.ToString(), viewState, "", "", "", "", "", "0" }); html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8); } catch { } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "dataTable"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty, staffNum = string.Empty, IssuanceTime = string.Empty, Organ = string.Empty, CertState = string.Empty; TableRow tr = table.Rows[j]; Name = tr.Columns[1].ToNodePlainString(); RegCode = tr.Columns[2].ToNodePlainString(); CertCode = tr.Columns[3].ToNodePlainString(); CorpName = tr.Columns[5].ToNodePlainString(); PersonType = tr.Columns[4].ToNodePlainString(); CertGrade = tr.Columns[6].ToNodePlainString(); string htldtl = string.Empty; Url = "http://113.108.219.40/PlatForm/SearchCenter/" + tr.Columns[2].GetATagHref(); string sexUrl = "http://113.108.219.40/PlatForm/SearchCenter/" + tr.Columns[1].GetATagHref(); try { string htl = ToolWeb.GetHtmlByUrl(sexUrl, Encoding.UTF8); parser = new Parser(new Lexer(htl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (dtlList != null && dtlList.Count > 0) { TableTag tab = dtlList[0] as TableTag; string ctx = string.Empty; for (int k = 0; k < tab.RowCount; k++) { for (int d = 0; d < tab.Rows[k].ColumnCount; d++) { if ((d + 1) % 2 == 0) { ctx += tab.Rows[k].Columns[d].ToNodePlainString() + "\r\n"; } else { ctx += tab.Rows[k].Columns[d].ToNodePlainString().Replace(":", "").Replace(":", "") + ":"; } } } Sex = ctx.GetRegex(new string[] { "性别" }); } } catch { } try { htldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList dtList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (dtList != null && dtList.Count > 0) { TableTag tab = dtList[0] as TableTag; string ctx = string.Empty; for (int k = 0; k < tab.RowCount; k++) { for (int d = 0; d < tab.Rows[k].ColumnCount; d++) { if ((d + 1) % 2 == 0) { ctx += tab.Rows[k].Columns[d].ToNodePlainString() + "\r\n"; } else { ctx += tab.Rows[k].Columns[d].ToNodePlainString().Replace(":", "").Replace(":", "") + ":"; } } } IssuanceTime = ctx.GetRegex(new string[] { "签发日期", "日期" }); CertState = ctx.GetRegex(new string[] { "证书状态" }); Organ = ctx.GetRegex(new string[] { "发证机关" }); staffNum = CertGrade.GetLevel(); CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, IdNum, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "广东地区", "广东省住房和城乡建设厅", Url, Profession, staffNum, IssuanceTime, Organ, CertState); ToolDb.SaveEntity(corpStaff, this.ExistCompareFields, this.ExistsUpdate); } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; try { html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ContentPlaceHolder1_aspnetPager1"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList[0].ToPlainTextString().GetRegexBegEnd("/", "页");; pageInt = int.Parse(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { NameValueCollection nvc = ToolWeb.GetNameValueCollection( new string[] { "searchStr", "currentPage", "pageSize", "tab", "kind" }, new string[] { string.Empty, i.ToString(), "15", "4", "zyxx" } ); html = ToolWeb.GetHtmlByUrl("http://119.145.135.38/fscx/web/tab4List.do", nvc, Encoding.Default); } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-table2"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = null; if (nodeList.Count > 1) { table = nodeList[1] as TableTag; } else { table = nodeList[0] as TableTag; } for (int j = 1; j < table.RowCount; j++) { string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty, staffNum = string.Empty, IssuanceTime = string.Empty, Organ = string.Empty; TableRow tr = table.Rows[j]; Name = tr.Columns[0].ToNodePlainString(); CorpName = tr.Columns[1].ToNodePlainString(); CertCode = tr.Columns[2].ToNodePlainString().Replace(".", ""); IssuanceTime = tr.Columns[3].ToPlainTextString().GetDateRegex(); Regex regexLink = new Regex(@"\?id=[^&]+"); string temp = tr.GetAttribute("onclick").GetRegexBegEnd("'", "'"); string ids = regexLink.Match(temp).Value; Url = "http://119.145.135.38/fscx/web/tab4Detail.do" + ids; string htldtl = string.Empty; try { htldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.Default); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs-1")), true), new TagNameFilter("table"))); if (dtList != null && dtList.Count > 0) { string ctx = string.Empty; TableTag tab = dtList[0] as TableTag; for (int d = 0; d < tab.RowCount; d++) { for (int k = 0; k < tab.Rows[d].ColumnCount; k++) { if ((k + 1) % 2 == 0) { ctx += tab.Rows[d].Columns[k].ToNodePlainString() + "\r\n"; } else { ctx += tab.Rows[d].Columns[k].ToNodePlainString() + ":"; } } } Sex = ctx.GetRegex("性别"); CorpCode = ctx.GetRegex("所在单位机构代码"); PersonType = ctx.GetRegex("专业"); } parser = new Parser(new Lexer(htldtl)); NodeList cDtList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs1-1")), true), new TagNameFilter("table"))); if (cDtList != null && cDtList.Count > 0) { TableTag tab = cDtList[0] as TableTag; for (int k = 1; k < tab.RowCount; k++) { TableRow dr = tab.Rows[k]; string code = dr.Columns[0].ToNodePlainString(); if (code.Contains(CertCode)) { CertCode = code; CredType = dr.Columns[2].ToNodePlainString(); CertGrade = dr.Columns[3].ToNodePlainString(); string type = dr.Columns[4].ToNodePlainString(); if (!string.IsNullOrEmpty(type)) { PersonType = type; } Organ = dr.Columns[5].ToNodePlainString(); staffNum = CertGrade.GetLevel(); } else { continue; } } } if (PersonType == "-" || PersonType == "/") { PersonType = string.Empty; } CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, IdNum, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "佛山市", "佛山市住房和城乡建设管理局", Url, Profession, staffNum, IssuanceTime, Organ, ""); ToolDb.SaveEntity(corpStaff, this.ExistCompareFields, this.ExistsUpdate); } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <CorpStaff>(); int count = 1; Hashtable has = new Hashtable(); has.Add("注册建造工程师", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=2"); has.Add("注册建筑工程师", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=5"); has.Add("注册结构工程师", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=6"); has.Add("注册监理工程师", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=3"); has.Add("水利监理工程师", "http://61.144.226.2:8001/web/sljlAction.do?method=getSljlList&pageSize=50"); has.Add("注册造价工程师", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=4"); has.Add("小型项目负责人", "http://61.144.226.2:8001/web/xxxmAction.do?method=getXxxmList"); has.Add("质量主任", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=7"); has.Add("安全主任", "http://61.144.226.2:8001/web/personAction.do?method=getPersonList&category=8"); has.Add("劳务队长", "http://61.144.226.2:8001/web/lwdzAction.do?method=getLwdzList"); foreach (string item in has.Keys) { int sqlCount = 0; string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; string pageHtl = string.Empty; try { if (item == "小型项目负责人") { htl = ToolWeb.GetHtmlByUrl("http://61.144.226.2:8001/web/xxxmAction.do?pageSize=3000&page=1&backUrl=&page=136&method=getXxxmList&method=getXxxmList&personname=&personname=&orgName=&orgName=", Encoding.Default); } else { htl = ToolWeb.GetHtmlByUrl(has[item].ToString(), Encoding.Default); } } catch { continue; } Parser parser = new Parser(new Lexer(htl)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "lx"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.GetATagHref().GetRegexBegEnd("page=", "&"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { if (item != "小型项目负责人") { htl = ToolWeb.GetHtmlByUrl(has[item] + "&page=" + i.ToString(), Encoding.Default); } else { break; } } catch { continue; } } parser = new Parser(new Lexer(htl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty, staffNum = string.Empty; TableRow tr = table.Rows[j]; if (item.Contains("注册建造工程师") || item.Contains("注册建筑工程师") || item.Contains("注册结构工程师")) { Name = tr.Columns[1].ToNodePlainString(); CorpName = tr.Columns[2].ToNodePlainString(); CertCode = tr.Columns[4].ToNodePlainString(); CertGrade = tr.Columns[5].ToNodePlainString(); } if (item.Contains("水利监理工程师")) { Name = tr.Columns[1].ToNodePlainString(); CertCode = tr.Columns[3].ToNodePlainString(); Profession = tr.Columns[4].ToNodePlainString(); } if (item.Contains("注册监理工程师") || item.Contains("注册造价工程师")) { Name = tr.Columns[1].ToNodePlainString(); CorpName = tr.Columns[2].ToNodePlainString(); CertCode = tr.Columns[4].ToNodePlainString(); } if (item.Contains("小型项目负责人")) { Name = tr.Columns[1].ToNodePlainString(); CorpName = tr.Columns[2].ToNodePlainString(); CertCode = tr.Columns[4].ToNodePlainString(); Profession = tr.Columns[5].ToNodePlainString(); } if (item.Contains("质量主任") || item.Contains("安全主任")) { Name = tr.Columns[1].ToNodePlainString(); CorpName = tr.Columns[2].ToNodePlainString(); } if (item.Contains("劳务队长")) { Name = tr.Columns[1].ToNodePlainString(); CorpName = tr.Columns[2].ToNodePlainString(); CertCode = tr.Columns[4].ToNodePlainString(); } PersonType = item; string tempUrl = "http://61.144.226.2:8001/web/" + tr.Columns[1].GetATagValue("onclick").Replace("doView", "").Replace("(", "").Replace(")", "").Replace("'", ""); string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(tempUrl, Encoding.Default); } catch { } string ctx = string.Empty; parser = new Parser(new Lexer(htmldtl.Replace("th", "td"))); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "infoTableL"))); if (dtlNode != null && dtlNode.Count > 0) { TableTag tableDtl = dtlNode[0] as TableTag; for (int k = 0; k < tableDtl.RowCount; k++) { for (int d = 0; d < tableDtl.Rows[k].ColumnCount; d++) { string temp = tableDtl.Rows[k].Columns[d].ToNodePlainString().Replace(":", "").Replace(":", ""); if (d == 0) { ctx += temp += ":"; } else { ctx += temp += "\r\n"; } } } } CorpCode = ctx.GetRegex("任职企业编号"); staffNum = CertGrade.GetLevel(); CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, IdNum, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "深圳市", "深圳市住房和建设局", tempUrl, Profession, staffNum, "", "", ""); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } ToolDb.SaveEntity(corpStaff, this.ExistCompareFields, this.ExistsUpdate); count++; if (count >= 100) { count = 1; Thread.Sleep(480000); } } } } } return(list); }