protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <NoticeInfo>(); string sql = "select Id,PrjCode,InfoCtx,InfoUrl from NoticeInfo where (PrjCode='' or PrjCode is null) "; sql += " and convert(varchar(max), InfoCtx) <> '见附件' and convert(varchar(max), InfoCtx)<>'详见附件' "; sql += " and datalength (InfoCtx)<>0 and datalength (InfoCtx) is not null"; DataTable dt = ToolCoreDb.GetDbData(sql); if (dt != null && dt.Rows.Count > 0) { foreach (DataRow row in dt.Rows) { string ctx = Convert.ToString(row["InfoCtx"]); string prjCode = ctx.GetNoticePrjCode(); if (string.IsNullOrEmpty(prjCode)) { prjCode = ctx.GetRegexBegEnd("工程编号", "工程名称").Replace(":", "").Replace(":", "").Replace("\r", "").Replace("\n", "").Replace(" ", "").Replace("\t", ""); } string update = "update NoticeInfo set PrjCode='" + prjCode + "' where Id='" + row["Id"].ToString() + "'"; int result = ToolCoreDb.ExecuteSql(update); } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ContentPlaceHolder1_AspNetPager1")), true), new TagNameFilter("a"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[pageNode.Count - 1].GetATagHref().Replace("'", "").Replace(")", "kdxx").Replace(",", "xxdk"); pageInt = int.Parse(temp.GetRegexBegEnd("xxdk", "kdxx")); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { if (i == 2) { viewState = ToolWeb.GetAspNetViewState(html); eventValidation = ToolWeb.GetAspNetEventValidation(html); } NameValueCollection nvc = ToolWeb.GetNameValueCollection( new string[] { "ctl00$ContentPlaceHolder1$ScriptManager1", "ctl00$ContentPlaceHolder1$txtORGNAME", "ctl00$ContentPlaceHolder1$txtORGCODE", "ctl00$ContentPlaceHolder1$txtPNAME", "ctl00$ContentPlaceHolder1$txtIDNUM", "ctl00$ContentPlaceHolder1$txtHIREERORGNAME", "ctl00$ContentPlaceHolder1$txtHIREERORGCODE", "ctl00$ContentPlaceHolder1$ddlRegType", "ctl00$ContentPlaceHolder1$ddlTitle", "ctl00$ContentPlaceHolder1$ddlABC", "ctl00$ContentPlaceHolder1$ddlCert", "__VIEWSTATE", "__EVENTTARGET", "__EVENTARGUMENT", "__EVENTVALIDATION", "__ASYNCPOST" }, new string[] { "ctl00$ContentPlaceHolder1$UpdatePanel1|ctl00$ContentPlaceHolder1$AspNetPager1", "", "", "", "", "", "", "", "", "", "", viewState, "ctl00$ContentPlaceHolder1$AspNetPager1", i.ToString(), eventValidation, "true" } ); html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-grid"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, CorpType, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty; TableRow tr = table.Rows[j]; CorpName = tr.Columns[0].ToNodePlainString(); LinkMan = tr.Columns[1].ToNodePlainString(); cUrl = tr.Columns[0].GetATagValue("onclick").Replace("OpenWin('", ""); if (cUrl.IndexOf("'") > 0) { cUrl = "http://113.108.219.40/intogd/" + cUrl.Remove(cUrl.IndexOf("'")); } string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.UTF8); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-table"))); if (dtlNode != null && dtlNode.Count > 0) { string ctx = string.Empty; TableTag dtlTable = dtlNode[0] as TableTag; for (int k = 0; k < dtlTable.RowCount; k++) { for (int d = 0; d < dtlTable.Rows[k].ColumnCount; d++) { TableColumn col = dtlTable.Rows[k].Columns[d]; if (col.GetAttribute("class") == "td-left") { ctx += col.ToNodePlainString() + ":"; } else { ctx += col.ToNodePlainString() + "\r\n"; } } } RegDate = ctx.GetRegex("成立时间,注册时间").GetDateRegex(); RegFund = ctx.GetRegex("注册资本"); BusinessCode = ctx.GetRegex("营业执照注册号"); CorpType = "外地进粤企业"; CorpAddress = ctx.GetRegex("注册详细地址"); if (!string.IsNullOrEmpty(RegFund) && !RegFund.Contains("万")) { RegFund += "万"; } CorpInfo corp = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, CorpType, "广东省", "广东地区", "广东省住房和城乡建设厅", cUrl, ISOQualNum, ISOEnvironNum, OffAdr); string strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}'", corp.CorpName, corp.CorpType); DataTable dt = ToolCoreDb.GetDbData(strSql); if (dt != null && dt.Rows.Count > 0) { string id = dt.Rows[0]["Id"].ToString(); StringBuilder delCorpQual = new System.Text.StringBuilder(); StringBuilder delCorpLeader = new System.Text.StringBuilder(); StringBuilder delCorpSecLicStaff = new System.Text.StringBuilder(); StringBuilder delCorpInstitution = new StringBuilder(); delCorpInstitution.AppendFormat("delete from CorpInstitution where CorpId='{0}'", id); delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", id); delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", id); delCorpSecLicStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", id); ToolCoreDb.ExecuteSql(delCorpInstitution.ToString()); ToolCoreDb.ExecuteSql(delCorpQual.ToString()); ToolCoreDb.ExecuteSql(delCorpLeader.ToString()); ToolCoreDb.ExecuteSql(delCorpSecLicStaff.ToString()); string corpSql = string.Format("delete from CorpInfo where Id='{0}'", id); ToolCoreDb.ExecuteSql(corpSql); } if (ToolDb.SaveEntity(corp, this.ExistCompareFields)) { if (!string.IsNullOrEmpty(LinkMan)) { CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, LinkMan, "", "企业法定代表人", cUrl); ToolDb.SaveEntity(leader, ""); } if (!string.IsNullOrEmpty(tr.Columns[2].ToNodePlainString())) { CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, tr.Columns[2].ToNodePlainString(), "", "技术负责人", cUrl); ToolDb.SaveEntity(leader, ""); } if (!string.IsNullOrEmpty(tr.Columns[3].ToNodePlainString())) { CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, tr.Columns[3].ToNodePlainString(), "", "驻粤负责人", cUrl); ToolDb.SaveEntity(leader, ""); } AddCorpQual(corp, htmldtl); AddCorpTecStaff(corp, htmldtl); GetOffAddress(htmldtl, cUrl, corp); } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { int count = 1, totalCount = 1; string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; string pageHtl = string.Empty; try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("style", "text-align:center;padding-bottom:10px;"))); if (pageNode != null && pageNode.Count > 0) { string temp = pageNode[0].ToNodePlainString(); try { temp = temp.GetRegexBegEnd("总页数", "页"); pageInt = int.Parse(temp.Replace(":", "")); } catch { } } for (int i = 320; i <= pageInt; i++) { if (i > 1) { NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "param", "corpType", "corp_name", "page" }, new string[] { "", "1", "", i.ToString() }); try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); continue; } } } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty, ctxKc = string.Empty, corpRz = string.Empty; TableRow tr = table.Rows[j]; CorpName = tr.Columns[1].ToNodePlainString(); CorpCode = tr.Columns[2].ToNodePlainString(); LinkMan = tr.Columns[3].ToNodePlainString(); string href = tr.Columns[1].GetATagHref(); string htmldtl = string.Empty; string[] postParams = null; NameValueCollection dtlNvc = null; string infoUrl = "http://portal.szjs.gov.cn:8888/publicShow/corpDetail.html"; try { string temp = href.Replace("corpDetail", "").Replace("(", "").Replace(")", "").Replace("'", ""); postParams = temp.Split(','); dtlNvc = ToolWeb.GetNameValueCollection(new string[] { "param", "corpType", "orgCode" }, new string[] { postParams[0], "1", postParams[1] }); cUrl = infoUrl + string.Format("?param={0}&corpType=1&orgCode={1}", postParams[0], CorpCode); } catch { continue; } try { htmldtl = ToolWeb.GetHtmlByUrl(infoUrl, dtlNvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { ToolWeb.GetHtmlByUrl(infoUrl, dtlNvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); try { ToolWeb.GetHtmlByUrl(infoUrl, dtlNvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); continue; } } } parser = new Parser(new Lexer(htmldtl.Replace("th", "td"))); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "center"))); if (dtlNode != null && dtlNode.Count > 0) { TableTag tabledtl = dtlNode[0] as TableTag; string ctx = string.Empty; for (int d = 0; d < tabledtl.RowCount; d++) { for (int k = 0; k < tabledtl.Rows[d].ColumnCount; k++) { string temp = tabledtl.Rows[d].Columns[k].ToNodePlainString(); if (k == 0) { ctx += temp + ":"; } else { ctx += temp + "\r\n"; } } } LinkPhone = ctx.GetRegex("联系电话"); Fax = ctx.GetRegex("传真"); Email = ctx.GetRegex("电子邮箱"); CorpAddress = ctx.GetRegex("注册地址"); RegFund = ctx.GetRegex("注册资金"); RegDate = ctx.GetRegex("设立时间"); } CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, "建筑业企业", "广东省", "深圳市", "深圳市住房和建设局", cUrl, ISOQualNum, ISOEnvironNum, OffAdr); object obj = ToolDb.ExecuteScalar(string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and InfoSource='{2}'", info.CorpName, info.CorpType, info.InfoSource)); int qualCount = 0, leaderCount = 0, awardCount = 0, certCount = 0, punishCount = 0, seclicCount = 0, seclicstaffCount = 0, tecstaffCount = 0, deviceCount = 0, resultCount = 0, infoCount = 0; bool isDel = false; if (obj != null && obj.ToString() != "") { isDel = true; string id = obj.ToString(); StringBuilder delCorpQual = new System.Text.StringBuilder(); StringBuilder delCorpLeader = new System.Text.StringBuilder(); StringBuilder delCorpAward = new System.Text.StringBuilder(); StringBuilder delCorpCert = new System.Text.StringBuilder(); StringBuilder delCorpPunish = new System.Text.StringBuilder(); StringBuilder delCorpSecLic = new System.Text.StringBuilder(); StringBuilder delCorpSecLicStaff = new System.Text.StringBuilder(); StringBuilder delCorpDevice = new System.Text.StringBuilder(); StringBuilder delCorpResults = new System.Text.StringBuilder(); StringBuilder delCorpTecStaff = new System.Text.StringBuilder(); delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", id); delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", id); delCorpAward.AppendFormat("delete from CorpAward where CorpId='{0}'", id); //delCorpCert.AppendFormat("delete from CorpCert where CorpId='{0}'", id); delCorpPunish.AppendFormat("delete from CorpPunish where CorpId='{0}'", id); delCorpSecLic.AppendFormat("delete from CorpSecLic where CorpId='{0}'", id); delCorpSecLicStaff.AppendFormat("delete from CorpSecLicStaff where CorpId='{0}'", id); delCorpTecStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", id); //delCorpDevice.AppendFormat("delete from CorpDevice where CorpId='{0}'", id); delCorpResults.AppendFormat("delete from CorpResults where CorpId='{0}'", id); qualCount = ToolCoreDb.ExecuteSql(delCorpQual.ToString()); leaderCount = ToolCoreDb.ExecuteSql(delCorpLeader.ToString()); awardCount = ToolCoreDb.ExecuteSql(delCorpAward.ToString()); //certCount = ToolCoreDb.ExecuteSql(delCorpCert.ToString()); punishCount = ToolCoreDb.ExecuteSql(delCorpPunish.ToString()); seclicCount = ToolCoreDb.ExecuteSql(delCorpSecLic.ToString()); seclicstaffCount = ToolCoreDb.ExecuteSql(delCorpSecLicStaff.ToString()); tecstaffCount = ToolCoreDb.ExecuteSql(delCorpTecStaff.ToString()); //deviceCount = ToolCoreDb.ExecuteSql(delCorpDevice.ToString()); resultCount = ToolCoreDb.ExecuteSql(delCorpResults.ToString()); string corpSql = string.Format("delete from CorpInfo where Id='{0}'", id); infoCount = ToolCoreDb.ExecuteSql(corpSql); } if (infoCount != -1 || !isDel) { if (ToolDb.SaveEntity(info, string.Empty)) { if (isDel) { if (qualCount != -1) { AddCorpQual(info, postParams[0], "1"); } if (awardCount != -1) { AddCorpAward(info, postParams[0], "1"); } //if (certCount != -1) // AddCorpCert(info, htmldtl); //if (deviceCount != -1) // AddCorpDevice(info, htmldtl); if (punishCount != -1) { AddCorpPunish(info, postParams[0], "1"); } if (resultCount != -1) { AddCorpResults(info, postParams[0], "1"); } if (seclicCount != -1) { AddCorpSecLic(info, postParams[0], "1"); } if (seclicstaffCount != -1) { AddCorpSecLicStaff(info, postParams[0], "1"); } if (tecstaffCount != -1) { AddCorpTecStaff(info, postParams[0], "1"); } if (leaderCount != -1) { AddCorpLeader(info, postParams[0], "1"); } } else { AddCorpQual(info, postParams[0], "1"); AddCorpAward(info, postParams[0], "1"); //AddCorpCert(info, htmldtl); //AddCorpDevice(info, htmldtl); AddCorpPunish(info, postParams[0], "1"); AddCorpResults(info, postParams[0], "1"); AddCorpSecLic(info, postParams[0], "1"); AddCorpSecLicStaff(info, postParams[0], "1"); AddCorpTecStaff(info, postParams[0], "1"); AddCorpLeader(info, postParams[0], "1"); } } } count++; totalCount++; if (count >= 90) { count = 1; Thread.Sleep(10 * 60 * 1000); } } } } ToolCoreDb.ExecuteProcedure(); string sql = "update a set a.FkId= c.Id FROM AttenCorp a left join CorpInfo c on c.CorpName=A.CorpName"; ToolDb.ExecuteSql(sql); return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { int count = 1, totalCount = 1; string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; string pageHtl = string.Empty; try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.Default); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "lx"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.GetATagHref().GetRegexBegEnd("page=", "&"); pageInt = int.Parse(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&page=" + i.ToString(), Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty, ctxKc = string.Empty, corpRz = string.Empty; TableRow tr = table.Rows[j]; CorpName = tr.Columns[1].ToNodePlainString(); CorpCode = tr.Columns[2].ToNodePlainString(); LinkMan = tr.Columns[3].ToNodePlainString(); string href = tr.Columns[1].GetATagValue("onclick"); string htmldtl = string.Empty; string[] url = null; try { string temp = href.Replace("doView", "").Replace("(", "").Replace(")", "").Replace("'", ""); url = temp.Split(','); cUrl = "http://61.144.226.2:8001/web/enterprs/unitInfoAction.do?method=toView&qybh=" + url[0] + "&certType=1&orgcode=" + url[1]; htmldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.Default); } catch { continue; } parser = new Parser(new Lexer(htmldtl.Replace("th", "td"))); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "infoTableL"))); if (dtlNode != null && dtlNode.Count > 0) { TableTag tabledtl = dtlNode[0] as TableTag; string ctx = string.Empty; for (int d = 0; d < tabledtl.RowCount; d++) { for (int k = 0; k < tabledtl.Rows[d].ColumnCount; k++) { string temp = tabledtl.Rows[d].Columns[k].ToNodePlainString(); if (k == 0) { ctx += temp + ":"; } else { ctx += temp + "\r\n"; } } } LinkPhone = ctx.GetRegex("联系电话"); Fax = ctx.GetRegex("传真"); Email = ctx.GetRegex("电子邮箱"); CorpAddress = ctx.GetRegex("注册地址"); RegFund = ctx.GetRegex("注册资金"); RegDate = ctx.GetRegex("设立时间"); } CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, "设计与施工一体化企业", "广东省", "深圳市", "深圳市住房和建设局", cUrl, ISOQualNum, ISOEnvironNum, OffAdr); object obj = ToolDb.ExecuteScalar(string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and InfoSource='{2}'", info.CorpName, info.CorpType, info.InfoSource)); int qualCount = 0, leaderCount = 0, awardCount = 0, certCount = 0, punishCount = 0, seclicCount = 0, seclicstaffCount = 0, tecstaffCount = 0, deviceCount = 0, resultCount = 0, infoCount = 0; bool isDel = false; if (obj != null && obj.ToString() != "") { isDel = true; string id = obj.ToString(); StringBuilder delCorpQual = new System.Text.StringBuilder(); StringBuilder delCorpLeader = new System.Text.StringBuilder(); StringBuilder delCorpAward = new System.Text.StringBuilder(); StringBuilder delCorpCert = new System.Text.StringBuilder(); StringBuilder delCorpPunish = new System.Text.StringBuilder(); StringBuilder delCorpSecLic = new System.Text.StringBuilder(); StringBuilder delCorpSecLicStaff = new System.Text.StringBuilder(); StringBuilder delCorpDevice = new System.Text.StringBuilder(); StringBuilder delCorpResults = new System.Text.StringBuilder(); StringBuilder delCorpTecStaff = new System.Text.StringBuilder(); delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", id); delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", id); delCorpAward.AppendFormat("delete from CorpAward where CorpId='{0}'", id); delCorpCert.AppendFormat("delete from CorpCert where CorpId='{0}'", id); delCorpPunish.AppendFormat("delete from CorpPunish where CorpId='{0}'", id); delCorpSecLic.AppendFormat("delete from CorpSecLic where CorpId='{0}'", id); delCorpSecLicStaff.AppendFormat("delete from CorpSecLicStaff where CorpId='{0}'", id); delCorpTecStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", id); delCorpDevice.AppendFormat("delete from CorpDevice where CorpId='{0}'", id); delCorpResults.AppendFormat("delete from CorpResults where CorpId='{0}'", id); qualCount = ToolCoreDb.ExecuteSql(delCorpQual.ToString()); leaderCount = ToolCoreDb.ExecuteSql(delCorpLeader.ToString()); awardCount = ToolCoreDb.ExecuteSql(delCorpAward.ToString()); certCount = ToolCoreDb.ExecuteSql(delCorpCert.ToString()); punishCount = ToolCoreDb.ExecuteSql(delCorpPunish.ToString()); seclicCount = ToolCoreDb.ExecuteSql(delCorpSecLic.ToString()); seclicstaffCount = ToolCoreDb.ExecuteSql(delCorpSecLicStaff.ToString()); tecstaffCount = ToolCoreDb.ExecuteSql(delCorpTecStaff.ToString()); deviceCount = ToolCoreDb.ExecuteSql(delCorpDevice.ToString()); resultCount = ToolCoreDb.ExecuteSql(delCorpResults.ToString()); string corpSql = string.Format("delete from CorpInfo where Id='{0}'", id); infoCount = ToolCoreDb.ExecuteSql(corpSql); } if (infoCount != -1 || !isDel) { if (ToolDb.SaveEntity(info, string.Empty)) { if (isDel) { if (qualCount != -1) { AddCorpQual(info, htmldtl); } if (awardCount != -1) { AddCorpAward(info, htmldtl); } if (certCount != -1) { AddCorpCert(info, htmldtl); } if (deviceCount != -1) { AddCorpDevice(info, htmldtl); } if (punishCount != -1) { AddCorpPunish(info, htmldtl); } if (resultCount != -1) { AddCorpResults(info, htmldtl); } if (seclicCount != -1) { AddCorpSecLic(info, htmldtl); } if (seclicstaffCount != -1) { AddCorpSecLicStaff(info, htmldtl); } if (tecstaffCount != -1) { AddCorpTecStaff(info, htmldtl); } if (leaderCount != -1) { AddCorpLeader(info, htmldtl); } } else { AddCorpQual(info, htmldtl); AddCorpAward(info, htmldtl); AddCorpCert(info, htmldtl); AddCorpDevice(info, htmldtl); AddCorpPunish(info, htmldtl); AddCorpResults(info, htmldtl); AddCorpSecLic(info, htmldtl); AddCorpSecLicStaff(info, htmldtl); AddCorpTecStaff(info, htmldtl); AddCorpLeader(info, htmldtl); } } } count++; totalCount++; if (count >= 90) { count = 1; Thread.Sleep(700000); } } } } ToolCoreDb.ExecuteProcedure(); string sql = "update a set a.FkId= c.Id FROM AttenCorp a left join CorpInfo c on c.CorpName=A.CorpName"; ToolDb.ExecuteSql(sql); return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { string newUrl = "http://202.104.65.182:8081/G2/gfmweb/web-enterprise!list.do?data&filter_params_=enterpriseId,rowNum,enterpriseBaseId,enterpriseName,organizationCode&defined_operations_=&nocheck_operations_=&"; string gridSearch = "true"; string nd = ToolHtml.GetDateTimeLong(DateTime.Now).ToString(); string PAGESIZE = "100"; string PAGE = "1"; string sortField = ""; string sortDirection = "asc"; string searchVal = "1"; string _enterpriseName_like = "公司"; string entTypeCodes = ""; NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection", "searchVal", "_enterpriseName_like", "entTypeCodes" }, new string[] { gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection, searchVal, _enterpriseName_like, entTypeCodes }); string html = string.Empty; int pageInt = 1; try { html = ToolWeb.GetHtmlByUrl(newUrl, nvc, Encoding.UTF8); } catch { return(null); } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); string page = smsTypeJson["total"].ToString(); pageInt = int.Parse(page); for (int i = 1; i <= pageInt; i++) { if (i > 1) { PAGE = i.ToString(); nvc = ToolWeb.GetNameValueCollection(new string[] { "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection", "searchVal", "_enterpriseName_like", "entTypeCodes" }, new string[] { gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection, searchVal, _enterpriseName_like, entTypeCodes }); try { html = ToolWeb.GetHtmlByUrl(newUrl, nvc, Encoding.UTF8); smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); } catch { continue; } } object[] objList = (object[])smsTypeJson["data"]; foreach (object obj in objList) { Dictionary <string, object> dic = obj as Dictionary <string, object>; string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, corpType = string.Empty, qualCode = string.Empty, corpMgr = string.Empty, businessMgr = string.Empty, tecMgr = string.Empty; CorpName = Convert.ToString(dic["enterpriseName"]); CorpCode = Convert.ToString(dic["organizationCode"]); string idCode = Convert.ToString(dic["enterpriseBaseId"]); string enterpriseId = Convert.ToString(dic["enterpriseId"]); cUrl = "http://202.104.65.182:8081/G2/webdrive/web-enterprise!view.do?enterpriseId=" + enterpriseId; //string infoUrl = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-pub!getEnterpriseInfoById.do"; //string infoUrl2 = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-pub!menuTree.do"; //Dictionary<string, object> dtlInfo = null, dtlInfo2 = null; //string infoJson = string.Empty, infoJson2 = string.Empty; string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(cUrl).GetJsString(); //NameValueCollection dtlNvc = ToolWeb.GetNameValueCollection(new string[] { //"enterpriseId","menutype" //}, new string[] { enterpriseId, "" }); //infoJson = ToolWeb.GetHtmlByUrl(infoUrl, dtlNvc, Encoding.UTF8); //dtlInfo = (Dictionary<string, object>)serializer.DeserializeObject(infoJson); //dtlNvc = ToolWeb.GetNameValueCollection(new string[] { //"enterpriseId", //"menutype", //"actionFlag" //}, new string[] { //enterpriseId,"","" //}); //infoJson2 = ToolWeb.GetHtmlByUrl(infoUrl2, dtlNvc, Encoding.UTF8); //dtlInfo2 = (Dictionary<string, object>)serializer.DeserializeObject(infoJson2); } catch { continue; } CorpAddress = ToolHtml.GetHtmlInputValue(htmldtl, "_M.registerAddress"); RegDate = ToolHtml.GetHtmlInputValue(htmldtl, "_M.registerTime"); RegFund = ToolHtml.GetHtmlInputValue(htmldtl, "_M.licenseCapital"); if (!string.IsNullOrEmpty(RegFund)) { RegFund += "万元"; } BusinessCode = ToolHtml.GetHtmlInputValue(htmldtl, "_M.licenseRegistrationCode"); CorpSite = ToolHtml.GetHtmlInputValue(htmldtl, "_M.firmWebsite"); LinkMan = ToolHtml.GetHtmlInputValue(htmldtl, "_M.name"); Email = ToolHtml.GetHtmlInputValue(htmldtl, "_M.email"); LinkPhone = ToolHtml.GetHtmlInputValue(htmldtl, "_M.tel"); Fax = ToolHtml.GetHtmlInputValue(htmldtl, "_M.fax"); corpMgr = ToolHtml.GetHtmlInputValue(htmldtl, "_M.legalPersonName"); Parser parser = new Parser(new Lexer(htmldtl)); NodeList typeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "g2-cell col-sm-6"))); if (typeNode != null && typeNode.Count > 0) { string str = string.Empty; for (int j = 2; j < typeNode.Count; j++) { string semp = typeNode[j].ToNodePlainString(); if (!string.IsNullOrEmpty(semp)) { try { DateTime time = DateTime.Parse(semp); continue; } catch { } str += semp + ","; } } if (!string.IsNullOrEmpty(str)) { corpType = str.Remove(str.Length - 1); } } CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "广东地区", "广东省住房和城乡建设厅", cUrl, ISOQualNum, ISOEnvironNum, string.Empty); string exisSql = string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and InfoSource='{2}'", info.CorpName, info.CorpType, info.InfoSource); string corpId = Convert.ToString(ToolDb.ExecuteScalar(exisSql)); if (!string.IsNullOrEmpty(corpId)) { string delCorpQual = string.Format("delete from CorpQual where CorpId='{0}'", corpId); string delCorpLeader = string.Format("delete from CorpLeader where CorpId='{0}'", corpId); string delCorpSecLicStaff = string.Format("delete from CorpSecLicStaff where CorpId='{0}'", corpId); int qualCount = 0, leaderCount = 0, tecstaffCount = 0, infoCount = 0; string corpSql = string.Format("delete from CorpInfo where Id='{0}'", corpId); infoCount = ToolDb.ExecuteSql(corpSql); qualCount = ToolDb.ExecuteSql(delCorpQual); leaderCount = ToolDb.ExecuteSql(delCorpLeader); tecstaffCount = ToolDb.ExecuteSql(delCorpSecLicStaff); if (infoCount > 0) { ToolDb.SaveEntity(info, ""); } if (qualCount >= 0) { try { AddCorpQual(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } } if (leaderCount >= 0) { try { AddCorpLeader(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } } if (tecstaffCount >= 0) { try { AddCorpStaff(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } } } else { if (ToolDb.SaveEntity(info, "")) { try { AddCorpLeader(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } try { AddCorpQual(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } try { AddCorpStaff(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } } } } } ToolCoreDb.ExecuteProcedure(); return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; try { html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr); } catch { return(null); } string opValue = string.Empty, leveVlaue = string.Empty; string[] levelNode = new string[] { "特级", "特级(旧标准)", "一级", "一级(旧标准)", "二级", "二级(旧标准)", "三级", "三级(旧标准)", "暂定三级(旧标准)", "不分等级" }; Parser parser = new Parser(new Lexer(html)); NodeList typeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_ddlENT_SORT_ID")), true), new TagNameFilter("option"))); if (typeNode != null && typeNode.Count > 0) { for (int t = 1; t < typeNode.Count; t++) { for (int l = 1; l < levelNode.Length; l++) { leveVlaue = levelNode[l]; OptionTag opTag = typeNode[t] as OptionTag; opValue = opTag.GetAttribute("value"); parser = new Parser(new Lexer(html)); NodeList inputNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_ValidateCode1_txtRanNum"))); string valiCode = string.Empty; if (inputNode != null && inputNode.Count > 0) { valiCode = (inputNode[0] as InputTag).GetAttribute("value"); } viewState = ToolWeb.GetAspNetViewState(html); NameValueCollection typeNvc = ToolWeb.GetNameValueCollection( new string[] { "ctl00_ContentPlaceHolder1_toolkitScriptManager1_HiddenField", "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "ctl00$ContentPlaceHolder1$ddlENT_SORT_ID", "ctl00$ContentPlaceHolder1$ddlRank", "ctl00$ContentPlaceHolder1$txtEnt_name", "ctl00$ContentPlaceHolder1$ValidateCode1$txtValidateCode", "ctl00$ContentPlaceHolder1$ValidateCode1$txtRanNum", "ctl00$ContentPlaceHolder1$btnsearch" }, new string[] { "", "", "", "", viewState, opValue, leveVlaue, "", valiCode, valiCode, "搜 索" }); try { html = ToolWeb.GetHtmlByUrl(SiteUrl, typeNvc, Encoding.UTF8, ref cookiestr); } catch { continue; } parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_AspNetPager1"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList[0].ToPlainTextString().GetRegexBegEnd("共", "条"); int page = int.Parse(temp); int result = page / 15; if (page % 15 != 0) { pageInt = result + 1; } else { pageInt = result; } } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 20) { break; } if (i > 1) { try { parser = new Parser(new Lexer(html)); NodeList pageInputNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_ValidateCode1_txtRanNum"))); string pageValiCode = string.Empty; if (pageInputNode != null && pageInputNode.Count > 0) { pageValiCode = (pageInputNode[0] as InputTag).GetAttribute("value"); } viewState = ToolWeb.GetAspNetViewState(html); NameValueCollection nvc = ToolWeb.GetNameValueCollection( new string[] { "ctl00$ContentPlaceHolder1$ddlENT_SORT_ID", "ctl00$ContentPlaceHolder1$ddlRank", "ctl00$ContentPlaceHolder1$txtEnt_name", "ctl00$ContentPlaceHolder1$ValidateCode1$txtRanNum", "ctl00$ContentPlaceHolder1$ValidateCode1$txtValidateCode", "ctl00_ContentPlaceHolder1_toolkitScriptManager1_HiddenField", "__EVENTARGUMENT", "__EVENTTARGET", "__LASTFOCUS", "__VIEWSTATE" }, new string[] { opValue, leveVlaue, "", pageValiCode, "", "", i.ToString(), "ctl00$ContentPlaceHolder1$AspNetPager1", "", viewState } ); html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tab_ent"))); if (nodeList != null && nodeList.Count > 0) { #region 循环列表 TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, corpType = string.Empty, qualCode = string.Empty, corpMgr = string.Empty, businessMgr = string.Empty, tecMgr = string.Empty; string htlCtx = string.Empty, QualType = string.Empty, CorpLevey = string.Empty; TableRow tr = table.Rows[j]; string qualStr = tr.Columns[2].ToHtml(); CorpName = tr.Columns[1].ToNodePlainString(); QualType = tr.Columns[2].ToPlainTextString(); CorpLevey = tr.Columns[3].ToNodePlainString(); qualCode = tr.Columns[4].ToNodePlainString(); if (QualType == "--") { QualType = ""; } cUrl = "http://113.108.219.40/PlatForm/SearchCenter/" + tr.Columns[1].GetATagHref(); List <string> quaList = new List <string>(); parser = new Parser(new Lexer(tr.Columns[4].ToHtml())); NodeList quaNodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (quaNodeList != null && quaNodeList.Count > 0) { for (int q = 0; q < quaNodeList.Count; q++) { quaList.Add("http://113.108.219.40/PlatForm/SearchCenter/" + quaNodeList[q].GetATagHref()); } } string quaUrl = "http://113.108.219.40/PlatForm/SearchCenter/" + tr.Columns[4].GetATagHref(); string htldtl = string.Empty; try { htldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.UTF8); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (dtlList != null && dtlList.Count > 0) { TableTag tab = dtlList[0] as TableTag; string ctx = string.Empty; for (int k = 0; k < tab.RowCount; k++) { for (int d = 0; d < tab.Rows[k].ColumnCount; d++) { if ((d + 1) % 2 == 0) { ctx += tab.Rows[k].Columns[d].ToNodePlainString() + "\r\n"; } else { ctx += tab.Rows[k].Columns[d].ToNodePlainString().Replace(":", "").Replace(":", "") + ":"; } } } corpType = ctx.GetRegex(new string[] { "企业类型", "类型" }); CorpAddress = ctx.GetRegex(new string[] { "企业注册地址", "地址" }); BusinessCode = ctx.GetRegex(new string[] { "营业执照注册号", "注册号" }); RegDate = ctx.GetRegex(new string[] { "成立时间", "成立日期", "时间", "日期" }).GetDateRegex(); LinkMan = ctx.GetRegex(new string[] { "企业法定代表人", "法定代表人" }); RegFund = ctx.GetRegex(new string[] { "注册资金", "资金" }); if (!RegFund.Contains("万")) { RegFund += "万"; } corpMgr = ctx.GetRegex(new string[] { "企业经理" }); if (corpMgr.Contains("暂无")) { corpMgr = string.Empty; } businessMgr = ctx.GetRegex(new string[] { "经营负责人" }); if (businessMgr.Contains("暂无")) { businessMgr = string.Empty; } tecMgr = ctx.GetRegex(new string[] { "技术负责人" }); if (tecMgr.Contains("暂无")) { tecMgr = string.Empty; } CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "广东地区", "广东省住房和城乡建设厅", cUrl, ISOQualNum, ISOEnvironNum, string.Empty); string strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and Url='{1}'", info.CorpName, info.Url); object obj = ToolDb.ExecuteScalar(strSql); if (obj != null && obj.ToString() != "") { StringBuilder delCorpQual = new System.Text.StringBuilder(); StringBuilder delCorpLeader = new System.Text.StringBuilder(); delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", obj); delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", obj); ToolDb.ExecuteSql(delCorpQual.ToString()); ToolDb.ExecuteSql(delCorpLeader.ToString()); string corpSql = string.Format("delete from CorpInfo where Id='{0}'", obj); ToolCoreDb.ExecuteSql(corpSql); } if (ToolDb.SaveEntity(info, string.Empty)) { if (!string.IsNullOrEmpty(LinkMan)) { CorpLeader leader = ToolDb.GenCorpLeader(info.Id, LinkMan, "", "企业法定代表人", cUrl); ToolDb.SaveEntity(leader, string.Empty); } if (!string.IsNullOrEmpty(corpMgr)) { CorpLeader leader = ToolDb.GenCorpLeader(info.Id, corpMgr, "", "企业经理", cUrl); ToolDb.SaveEntity(leader, string.Empty); } if (!string.IsNullOrEmpty(businessMgr)) { CorpLeader leader = ToolDb.GenCorpLeader(info.Id, businessMgr, "", "经营负责人", cUrl); ToolDb.SaveEntity(leader, string.Empty); } if (!string.IsNullOrEmpty(tecMgr)) { CorpLeader leader = ToolDb.GenCorpLeader(info.Id, tecMgr, "", "技术负责人", cUrl); ToolDb.SaveEntity(leader, string.Empty); } if (!string.IsNullOrEmpty(qualStr)) { List <CorpQual> corpQuals = new List <CorpQual>(); string quaCtx = string.Empty; for (int c = 0; c < quaList.Count; c++) { string quaHtl = string.Empty; try { quaHtl = ToolWeb.GetHtmlByUrl(quaList[c], Encoding.UTF8); } catch { } parser = new Parser(new Lexer(quaHtl)); NodeList quaNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (quaNode != null && quaNode.Count > 0) { TableTag quaTable = quaNode[0] as TableTag; for (int k = 0; k < quaTable.RowCount; k++) { for (int d = 0; d < quaTable.Rows[k].ColumnCount; d++) { string temp = quaTable.Rows[k].Columns[d].ToNodePlainString(); //string quatemp = quaTable.Rows[k].ToNodePlainString(); if ((d + 1) % 2 == 0) { quaCtx += temp + "\r\n"; } else { quaCtx += temp.Replace(":", "").Replace(":", "") + ":"; } } } } string qualctx = string.Empty; parser.Reset(); NodeList spanNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "lblQuaInfo"))); if (spanNode != null && spanNode.Count > 0) { qualctx = spanNode.ToHtml().GetReplace("<br/>,<br />,<br>,</br>", "∈").ToCtxString(); } string str = qualctx.ToLower().Replace("<br/>", "∈").Replace("</br>", "∈").Replace("<br>", "∈"); str = Regex.Replace(str, "<[^>]*>", ""); string[] qual = str.Split('∈'); for (int q = 0; q < qual.Length; q++) { if (string.IsNullOrEmpty(qual[q]) || qual[q] == "--") { continue; } string CorpId = string.Empty, QualName = string.Empty, quaCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty, quaType = string.Empty; LicDate = quaCtx.GetRegex("发证日期,发证时间").GetDateRegex(); LicUnit = quaCtx.GetRegex("发证机关,发证机构"); ValidDate = quaCtx.GetRegex("证书有效期").GetDateRegex(); quaType = quaCtx.GetRegex("证书类型"); string value = qual[q]; int len = value.IndexOf("/"); if (len != -1) { QualLevel = value.Substring(len, value.Length - len).Replace("/", ""); value = value.Remove(len); } else { QualLevel = CorpLevey; } string[] dtl = value.Split(' '); CorpId = info.Id; QualName = dtl[0].Trim(); if (string.IsNullOrEmpty(QualName)) { QualName = dtl[dtl.Length - 1]; } quaCode = quaCtx.GetRegex("证书编号");//qualCode; for (int ty = 1; ty < dtl.Length; ty++) { quaType += dtl[ty].Trim() + ","; } if (!string.IsNullOrEmpty(quaType) && quaType.Contains(",")) { quaType = quaType.Substring(0, quaType.Length - 1); if (quaType[0] == ',' || quaType[0] == ',') { quaType = quaType.Substring(1, quaType.Length - 1); } } qualNum = QualLevel.GetLevel(); CorpQual corpQual = null; corpQual = ToolDb.GenCorpQual(info.Id, QualName, quaCode, QualSeq, quaType, QualLevel, ValidDate, LicDate, LicUnit, quaUrl, qualNum, "广东省", "广东地区"); ToolDb.SaveEntity(corpQual, string.Empty); } } } } } } #endregion } } } } } ToolCoreDb.ExecuteProcedure(); return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; string pageHtl = string.Empty; try { html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList enttypeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("name", "selected2")), true), new TagNameFilter("option"))); parser.Reset(); NodeList typeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("name", "selected")), true), new TagNameFilter("option"))); if (enttypeNode != null && enttypeNode.Count > 0 && typeNode != null && typeNode.Count > 0) { for (int t = 0; t < enttypeNode.Count; t++) { string entTag = (enttypeNode[t] as OptionTag).GetAttribute("value"); string entText = enttypeNode[t].ToNodePlainString(); for (int d = 0; d < typeNode.Count; d++) { string typeTag = (typeNode[d] as OptionTag).GetAttribute("value"); string corpType = typeNode[d].ToNodePlainString(); if (t == 1 && d == 0) { typeTag = "16"; corpType = "房地产开发企业"; } if (t == 1 && d == 1) { typeTag = "17"; corpType = "预拌商品混凝土企业"; } if (t == 1 && d == 2) { typeTag = "19"; corpType = "建筑业施工企业"; } try { NameValueCollection nvc = ToolWeb.GetNameValueCollection( new string[] { "pageMethod", "method", "selected2", "selected", "_state", "keyword", "currentPage", "currentPage_temp" }, new string[] { "", "searchHandBook", entTag, typeTag, "1", "", "1", "1" }); html = ToolWeb.GetHtmlByUrl("http://www.dgjs.gov.cn/dgweb/search.do", nvc, Encoding.UTF8, ref cookiestr); } catch { continue; } parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "mainNextPage"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().GetRegexBegEnd("/", "页").Replace("\r", "").Replace("\t", "").Replace("\n", ""); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { NameValueCollection nvc = ToolWeb.GetNameValueCollection( new string[] { "pageMethod", "method", "selected2", "selected", "_state", "keyword", "currentPage", "currentPage_temp", }, new string[] { "next", "searchHandBook", entTag, typeTag, "1", "", (i - 1).ToString(), i.ToString() }); html = ToolWeb.GetHtmlByUrl("http://www.dgjs.gov.cn/dgweb/search.do", nvc, Encoding.UTF8, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("align", "center")), true), new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "jsxmtb")))); if (tableNode != null && tableNode.Count > 0) { TableTag table = tableNode[0] as TableTag; for (int j = 2; t == 1 ? j <= table.RowCount : j < table.RowCount; j++) { string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, CorpLevey = string.Empty; TableRow tr = null; #region 信用手册 if (entText.Contains("手册")) { tr = table.Rows[j]; CorpName = tr.Columns[1].ToNodePlainString(); LinkMan = tr.Columns[3].ToNodePlainString(); CorpAddress = tr.Columns[5].ToNodePlainString(); CorpLevey = tr.Columns[2].ToNodePlainString(); if (corpType.Contains("担保企业")) { cUrl = "http://www.dgjs.gov.cn/dgweb/" + tr.Columns[10].GetATagHref(); } else { cUrl = "http://www.dgjs.gov.cn/dgweb/" + tr.Columns[9].GetATagHref(); } string htlDtl = string.Empty; try { htlDtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.UTF8).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htlDtl.Replace("th", "td").Replace("TH", "TD"))); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dgjsj")), true), new TagNameFilter("table"))); if (dtlNode != null && dtlNode.Count > 1) { string ctx = string.Empty; TableTag dtlTable = dtlNode[0] as TableTag; for (int c = 1; c < dtlTable.RowCount; c++) { for (int v = 0; v < dtlTable.Rows[c].ColumnCount; v++) { if (string.IsNullOrEmpty(dtlTable.Rows[c].Columns[v].ToNodePlainString())) { continue; } if ((v + 1) % 2 == 0) { ctx += dtlTable.Rows[c].Columns[v].ToNodePlainString() + "\r\n"; } else { ctx += dtlTable.Rows[c].Columns[v].ToNodePlainString() + ":"; } } } RegDate = ctx.GetRegex("设立时间,设立日期"); LinkPhone = ctx.GetRegex("联系电话"); Fax = ctx.GetRegex("传真"); Email = ctx.GetRegex("电子邮箱"); BusinessType = ctx.GetRegex("经济性质"); BusinessCode = ctx.GetRegex("营业执照注册号"); } CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "东莞市", "东莞市住房和城乡建设局", cUrl, ISOQualNum, ISOEnvironNum, string.Empty); if (!string.IsNullOrEmpty(CorpName.GetNotChina())) { string strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and InfoSource='{1}' and CorpType='{2}'", info.CorpName, info.InfoSource, info.CorpType); object obj = ToolDb.ExecuteScalar(strSql); if (obj != null && obj.ToString() != "") { StringBuilder delCorpQual = new System.Text.StringBuilder(); StringBuilder delCorpLeader = new System.Text.StringBuilder(); StringBuilder delCorpTecStaff = new System.Text.StringBuilder(); delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", obj); delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", obj); delCorpTecStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", obj); ToolDb.ExecuteSql(delCorpQual.ToString()); ToolDb.ExecuteSql(delCorpLeader.ToString()); ToolDb.ExecuteSql(delCorpTecStaff.ToString()); string corpSql = string.Format("delete from CorpInfo where Id='{0}'", obj); ToolCoreDb.ExecuteSql(corpSql); } if (ToolDb.SaveEntity(info, string.Empty)) { object corpId = ToolDb.ExecuteScalar("select Id from CorpInfo where Url='" + info.Url + "' and InfoSource='东莞市住房和城乡建设局' "); ToolDb.ExecuteSql("delete from CorpQual where CorpId='" + corpId + "'"); #region 企业资质 TableTag quaTable = dtlNode[1] as TableTag; for (int q = 2; q < quaTable.RowCount; q++) { TableRow quaTr = quaTable.Rows[q]; string CorpId = string.Empty, QualName = string.Empty, QualCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualType = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty; CorpId = info.Id; QualName = quaTr.Columns[0].ToNodePlainString(); QualLevel = quaTr.Columns[1].ToNodePlainString(); QualCode = quaTr.Columns[5].ToNodePlainString(); LicUnit = quaTr.Columns[6].ToNodePlainString(); QualType = quaTr.Columns[0].ToNodePlainString(); ValidDate = quaTr.Columns[3].ToPlainTextString().GetDateRegex(); qualNum = QualLevel.GetLevel(); CorpQual qual = ToolDb.GenCorpQual(CorpId, QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, cUrl, qualNum, "广东省", "东莞市"); ToolDb.SaveEntity(qual, ""); } #endregion #region 企业负责人 parser = new Parser(new Lexer(htlDtl)); NodeList leaderNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "head2"))); if (leaderNode != null && leaderNode.Count > 0) { ToolDb.ExecuteSql("delete from CorpLeader where CorpId='" + corpId + "'"); ATag leaderTag = leaderNode.GetATag(1); if (!leaderTag.LinkText.Contains("负责人")) { leaderTag = leaderNode.GetATag(2); } if (!leaderTag.LinkText.Contains("负责人")) { leaderTag = leaderNode.GetATag(3); } if (!leaderTag.LinkText.Contains("负责人")) { leaderTag = leaderNode.GetATag(4); } if (leaderTag.LinkText.Contains("负责人")) { string leaderUrl = "http://www.dgjs.gov.cn/dgweb/" + leaderTag.Link; string leaderDtl = string.Empty; try { leaderDtl = ToolWeb.GetHtmlByUrl(leaderUrl, Encoding.UTF8).GetJsString(); } catch { } parser = new Parser(new Lexer(leaderDtl)); NodeList leaderDtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "dgjsj")), true), new TagNameFilter("table"))); if (leaderDtlNode != null && leaderDtlNode.Count > 0) { TableTag leaderTable = leaderDtlNode[0] as TableTag; for (int l = 3; l < leaderTable.RowCount; l++) { TableRow leaderTr = leaderTable.Rows[l]; if (leaderTr.ToHtml().ToLower().Contains("none")) { continue; } string LeaderName = string.Empty, LeaderDuty = string.Empty, LeaderType = string.Empty, htlCtx = string.Empty; try { LeaderName = leaderTr.Columns[0].ToNodePlainString(); LeaderDuty = leaderTr.Columns[4].ToNodePlainString(); LeaderType = leaderTr.Columns[1].ToNodePlainString(); } catch { } if (!string.IsNullOrEmpty(LeaderName)) { CorpLeader corpLeader = ToolDb.GenCorpLeader(info.Id, LeaderName, LeaderDuty, LeaderType, leaderUrl); ToolDb.SaveEntity(corpLeader, string.Empty); } } } } } #endregion #region 企业技术力量 parser = new Parser(new Lexer(htlDtl)); NodeList tecNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "head2"))); if (tecNode != null && tecNode.Count > 0) { ToolDb.ExecuteSql("delete from CorpTecStaff where CorpId='" + corpId + "'"); ATag leaderTag = tecNode.GetATag(1); if (!leaderTag.LinkText.Contains("技术")) { leaderTag = tecNode.GetATag(2); } if (!leaderTag.LinkText.Contains("技术")) { leaderTag = tecNode.GetATag(3); } if (!leaderTag.LinkText.Contains("技术")) { leaderTag = tecNode.GetATag(4); } if (!leaderTag.LinkText.Contains("技术")) { leaderTag = tecNode.GetATag(5); } if (leaderTag.LinkText.Contains("技术")) { string leaderUrl = "http://www.dgjs.gov.cn/dgweb/" + leaderTag.Link; string leaderDtl = string.Empty; try { leaderDtl = ToolWeb.GetHtmlByUrl(leaderUrl, Encoding.UTF8).GetJsString(); } catch { } parser = new Parser(new Lexer(leaderDtl)); NodeList leaderDtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "maintable"))); if (leaderDtlNode != null && leaderDtlNode.Count > 0) { TableTag leaderTable = leaderDtlNode[0] as TableTag; for (int l = 2; l < leaderTable.RowCount - 1; l++) { TableRow leaderTr = leaderTable.Rows[l]; string StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty; try { StaffName = leaderTr.Columns[1].ToNodePlainString(); stffType = leaderTr.Columns[6].ToNodePlainString(); if (stffType == "/") { stffType = null; } CertNo = leaderTr.Columns[8].ToNodePlainString(); } catch { } if (!string.IsNullOrEmpty(StaffName)) { CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, leaderUrl, stffType); ToolDb.SaveEntity(staff, string.Empty); } } } } } #endregion } } } #endregion #region 资质证书企业 else { tr = table.Rows[j - 1]; try { CorpName = tr.Columns[0].ToNodePlainString(); CorpAddress = tr.Columns[1].ToNodePlainString(); LinkMan = tr.Columns[2].ToNodePlainString(); CorpInfo info1 = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "东莞市", "东莞市住房和城乡建设局", cUrl, ISOQualNum, ISOEnvironNum, string.Empty); if (!string.IsNullOrEmpty(CorpName.GetNotChina())) { string strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and InfoSource='{1}' and CorpType='{2}'", info1.CorpName, info1.InfoSource, info1.CorpType); object obj = ToolDb.ExecuteScalar(strSql); if (obj != null && obj.ToString() != "") { string corpSql = string.Format("delete from CorpInfo where Id='{0}'", obj); ToolCoreDb.ExecuteSql(corpSql); } ToolDb.SaveEntity(info1, string.Empty); } } catch (Exception ex) { } } #endregion } } } } } } return(null); }
protected override System.Collections.IList ExecuteCrawl(bool crawlAll) { IList list = new List <CorpInfo>(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1, sqlCount = 0; string eventValidation = string.Empty; try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.Default); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "TABLEPANE"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().GetRegexBegEnd("共", "页"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&pages=" + i, Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "resultset"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 1; j < table.RowCount - 1; j++) { TableRow tr = table.Rows[j]; string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty, ctxKc = string.Empty, corpRz = string.Empty, corpType = string.Empty; CorpName = tr.Columns[2].ToNodePlainString(); if (string.IsNullOrWhiteSpace(CorpName)) { continue; } CorpCode = tr.Columns[3].ToNodePlainString(); corpType = "物业管理企业"; LinkMan = tr.Columns[5].ToNodePlainString(); LinkPhone = tr.Columns[6].ToNodePlainString(); cUrl = "http://61.144.226.3:92/WyjgInsideWeb/e-business/prg/signup/CreditInfoBase.jsp?EventID=CreditInfo&QYFRDM=" + CorpCode; string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.Default); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("SCRIPT"), new HasAttributeFilter("language", "javascript"))); if (dtlNode != null && dtlNode.Count > 0) { string ctx = dtlNode.AsString(); CorpAddress = ctx.GetRegexBegEnd("qyxxdz.value", ";").GetReplace(new string[] { "=", "'" }); CorpSite = ctx.GetRegexBegEnd("qywz.value", ";").GetReplace(new string[] { "=", "'" }); Email = ctx.GetRegexBegEnd("dzxx.value", ";").GetReplace(new string[] { "=", "'" }); BusinessCode = ctx.GetRegexBegEnd("yyzzzch.value", ";").GetReplace(new string[] { "=", "'" }); RegFund = ctx.GetRegexBegEnd("zczb.value", ";").GetReplace(new string[] { "=", "'", "人民币" }); RegDate = ctx.GetRegexBegEnd("qyclsj.value", ";").GetReplace(new string[] { "=", "'" }); BusinessType = ctx.GetRegexBegEnd("qydjzclx.value", ";").GetReplace(new string[] { "=", "'" }); CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "深圳市", "深圳市住房和建设局", cUrl, ISOQualNum, ISOEnvironNum, OffAdr); string result = Convert.ToString(ToolDb.ExecuteScalar(string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and CorpCode='{2}' and InfoSource='{3}'", info.CorpName, info.CorpType, info.CorpCode, info.InfoSource))); if (string.IsNullOrEmpty(result)) { AddCorpInfo(info, ctx); } else { string delQual = string.Format("delete from CorpQual where CorpId='{0}'", result); string delCorp = string.Format("delete from CorpInfo where Id='{0}'", result); int delResult = 0; if (ToolCoreDb.ExecuteSql(delQual) > 0) { delResult = ToolCoreDb.ExecuteSql(delCorp); } if (delResult > 0) { AddCorpInfo(info, ctx); } } sqlCount++; if (sqlCount >= 90) { sqlCount = 0; Thread.Sleep(11 * 60 * 1000); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; try { html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ContentPlaceHolder1_aspnetPager1"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList[0].ToPlainTextString().GetRegexBegEnd("/", "页");; pageInt = int.Parse(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { NameValueCollection nvc = ToolWeb.GetNameValueCollection( new string[] { "searchStr", "currentPage", "pageSize", "tab", "kind" }, new string[] { string.Empty, i.ToString(), "15", "2", "zzxx" } ); html = ToolWeb.GetHtmlByUrl("http://119.145.135.38/fscx/web/tab3List.do", nvc, Encoding.Default); } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-table2"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = null; if (nodeList.Count > 1) { table = nodeList[1] as TableTag; } else { table = nodeList[0] as TableTag; } for (int j = 1; j < table.RowCount; j++) { string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, CorpType = string.Empty; TableRow tr = table.Rows[j]; CorpName = tr.Columns[0].ToNodePlainString(); CorpType = tr.Columns[1].ToNodePlainString(); Regex regexLink = new Regex(@"\?id=[^&]+"); string temp = tr.GetAttribute("onclick").GetRegexBegEnd("'", "'"); string ids = regexLink.Match(temp).Value; cUrl = "http://119.145.135.38/fscx/web/tab3Detail.do" + ids; string htldtl = string.Empty; try { htldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.Default); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs-1")), true), new TagNameFilter("table"))); if (dtList != null && dtList.Count > 0) { string ctx = string.Empty; TableTag tab = dtList[0] as TableTag; for (int d = 0; d < tab.RowCount; d++) { for (int k = 0; k < tab.Rows[d].ColumnCount; k++) { if ((k + 1) % 2 == 0) { ctx += tab.Rows[d].Columns[k].ToNodePlainString() + "\r\n"; } else { ctx += tab.Rows[d].Columns[k].ToNodePlainString() + ":"; } } } CorpCode = ctx.GetRegex("组织机构代码,机构代码"); BusinessCode = ctx.GetRegex("营业执照注册号"); BusinessType = ctx.GetRegex("注册经济类别"); RegFund = ctx.GetRegex("注册资本(万元),注册资本,注册资金", false).Replace("(万元)", "").Replace("(万)", "").Replace("万元", "").Replace("万", ""); RegDate = ctx.GetRegex("成立日期,成立时间,设立日期,设立时间"); CorpAddress = ctx.GetRegex("注册地址"); LinkMan = ctx.GetRegex("法定代表人,联系人"); LinkPhone = ctx.GetRegex("联系电话"); Fax = ctx.GetRegex("传真"); Email = ctx.GetRegex("电子邮箱"); CorpSite = ctx.GetRegex("企业网址"); if (RegDate.Contains("000")) { RegDate = ""; } if (!RegFund.Contains("万")) { RegFund += "万"; } CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, CorpType, "广东省", "佛山市", "佛山市住房和城乡建设管理局", cUrl, ISOQualNum, ISOEnvironNum, string.Empty); string strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and InfoSource='{1}'", info.CorpName, info.InfoSource); object obj = ToolDb.ExecuteScalar(strSql); if (obj != null && obj.ToString() != "") { StringBuilder delCorpQual = new System.Text.StringBuilder(); StringBuilder delCorpResults = new System.Text.StringBuilder(); StringBuilder delCorpSecLic = new System.Text.StringBuilder(); StringBuilder delCorpPunish = new StringBuilder(); delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", obj); delCorpResults.AppendFormat("delete from CorpResults where CorpId='{0}'", obj); delCorpSecLic.AppendFormat("delete from CorpSecLic where CorpId='{0}'", obj); delCorpPunish.AppendFormat("delete from CorpPunish where CorpId='{0}'", obj); ToolDb.ExecuteSql(delCorpQual.ToString()); ToolDb.ExecuteSql(delCorpResults.ToString()); ToolDb.ExecuteSql(delCorpSecLic.ToString()); ToolDb.ExecuteSql(delCorpPunish.ToString()); string corpSql = string.Format("delete from CorpInfo where Id='{0}'", obj); ToolCoreDb.ExecuteSql(corpSql); } if (ToolDb.SaveEntity(info, string.Empty)) { parser = new Parser(new Lexer(htldtl)); NodeList qualList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs1-1")), true), new TagNameFilter("table"))); if (qualList != null && qualList.Count > 0) { AddQual(qualList[0] as TableTag, info.Id, info.Url); } parser = new Parser(new Lexer(htldtl)); NodeList secLicList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs5-1")), true), new TagNameFilter("table"))); if (secLicList != null && secLicList.Count > 0) { AddCorpSecLic(secLicList[0] as TableTag, info.Id, info.Url); } parser = new Parser(new Lexer(htldtl)); NodeList resultsList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs2-1")), true), new TagNameFilter("table"))); if (resultsList != null && resultsList.Count > 0) { AddCorpResults(resultsList[0] as TableTag, info.Id, info.Url); } parser = new Parser(new Lexer(htldtl)); NodeList PunishList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tabs7-1")), true), new TagNameFilter("table"))); if (PunishList != null && PunishList.Count > 0) { AddCorpPunish(PunishList[0] as TableTag, info.Id, info.Url); } } } } } } ToolCoreDb.ExecuteProcedure(); return(null); }