protected void AddCorpTecStaff(CorpInfo info, string infoUrl) { string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(infoUrl); } catch { } Parser parser = new Parser(new Lexer(htmldtl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int i = 2; i < table.RowCount; i++) { if (table.Rows[i].ColumnCount <= 1) { break; } string StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty; TableRow tr = table.Rows[i]; StaffName = tr.Columns[1].ToNodePlainString(); IdCard = tr.Columns[2].ToNodePlainString(); CertLevel = tr.Columns[3].ToNodePlainString(); CertNo = tr.Columns[4].ToNodePlainString(); stffType = tr.Columns[5].ToNodePlainString(); CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, info.Url, stffType); ToolDb.SaveEntity(staff, string.Empty); } } }
protected List <ProvInfo> GetCity() { List <ProvInfo> citys = ToolFile.Deserialize <ProvInfo>(ToolFile.WebCityPath); if (citys == null || citys.Count < 1) { citys = new List <ProvInfo>(); string url = "http://jzsc.mohurd.gov.cn/asite/region/index"; string html = string.Empty; try { html = ToolWeb.GetHtmlByUrl(url); } catch { } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); object[] objs = (object[])(((Dictionary <string, object>)((Dictionary <string, object>)smsTypeJson["json"])["category"])["provinces"]); foreach (object obj in objs) { Dictionary <string, object> dic = (Dictionary <string, object>)obj; ProvInfo info = new ProvInfo(); info.RegionId = Convert.ToString(dic["region_id"]); info.RegionName = Convert.ToString(dic["region_name"]); info.RegionFullName = Convert.ToString(dic["region_fullname"]); citys.Add(info); } citys = citys.OrderBy(x => x.RegionName).ToList(); ToolFile.Serialize <ProvInfo>(citys, ToolFile.WebCityPath); } return(citys); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; int sqlCount = 0; try { htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "list_table"))); if (nodeList.Count > 0) { string CorpName = string.Empty, CorpType = string.Empty, Behavior = string.Empty, BehaviorCtx = string.Empty, BeginDate = string.Empty, Othery1 = string.Empty, othery2 = string.Empty, othery3 = string.Empty, infoUrl = string.Empty; TableTag table = (TableTag)nodeList[0]; for (int j = 1; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; CorpName = tr.Columns[1].ToPlainTextString().Trim(); CorpType = tr.Columns[2].ToPlainTextString().Trim(); Behavior = tr.Columns[3].ToPlainTextString().Trim(); BeginDate = tr.Columns[4].ToPlainTextString().Trim(); ATag aTag = tr.Columns[3].SearchFor(typeof(ATag), true)[0] as ATag; infoUrl = "http://61.144.226.2/CXDA_BLXW/Detail.aspx?Doc_ID=" + aTag.Link.Replace("GoAttachView('", "").Replace("');", "").Trim(); string htmldetail = string.Empty; try { htmldetail = ToolWeb.GetHtmlByUrl(ToolWeb.UrlEncode(infoUrl), Encoding.GetEncoding("GB2312")).Replace("= 602;", "罚"); } catch (Exception) { continue; } Parser dtlparser = new Parser(new Lexer(htmldetail)); NodeList dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "Table1"), new TagNameFilter("table"))); if (dtnode.Count > 0) { BehaviorCtx = dtnode.AsString().Replace("\t", "").Replace(" ", "").Replace("\r\n", "").Trim(); CorpBehavior info = ToolDb.GenCorpBehavior(CorpName, CorpType, Behavior, BehaviorCtx, infoUrl, string.Empty, string.Empty, BeginDate); if (sqlCount <= this.MaxCount) { ToolDb.SaveEntity(info, this.ExistCompareFields); sqlCount++; } else { return(list); } } } } return(list); }
protected void SetTemp() { string keyEncrypt = "KdNszj.Bdimp.WebApi.AzdgKEY"; KdAzdgHelper azdg = new KdAzdgHelper(keyEncrypt); string cookies = string.Empty; IWebHttpClient httpClient = new WebHttpClient(); Uri url = new Uri("http://localhost:7434/Home/Login"); string json = ToolWeb.GetHtmlByUrl(url.ToString(), Encoding.UTF8, ref cookies); Dictionary <string, string> dict = JsonConvert.DeserializeObject <Dictionary <string, string> >(json); string mrsa = dict["mrsa"]; string ersa = dict["ersa"]; string auths_Token = dict["auths_Token"]; string userName = azdg.Encrypt("adminer"); string userPwd2 = DESEncrypt.GenerateMD5("1"); string userPwd = azdg.Encrypt(userPwd2); NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "userName", "userPwd", "mrsa", "ersa", "auths_Token" }, new string[] { userName, userPwd, mrsa, ersa, auths_Token }); string result = ToolWeb.GetHtmlByUrl(url.ToString(), nvc, Encoding.UTF8, ref cookies); }
protected void AddCorpQual(CorpInfo info, string infoUrl) { string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(infoUrl); } catch { } Parser parser = new Parser(new Lexer(htmldtl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int i = 1; i < table.RowCount; i++) { if (table.Rows[i].ColumnCount <= 1) { break; } string CorpId = string.Empty, QualName = string.Empty, QualCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualType = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty; TableRow tr = table.Rows[i]; QualType = tr.Columns[1].ToNodePlainString(); QualCode = tr.Columns[2].ToNodePlainString(); string name = tr.Columns[3].ToNodePlainString(); LicDate = tr.Columns[4].ToPlainTextString().GetDateRegex(); ValidDate = tr.Columns[5].ToPlainTextString().GetDateRegex(); LicUnit = tr.Columns[6].ToNodePlainString(); if (name.Contains("不分")) { QualName = name.Remove(name.IndexOf("不分")); QualLevel = "不分级"; } else if (name.Contains("暂定级")) { QualName = name.Remove(name.IndexOf("暂定级")); QualLevel = "不分级"; } else if (!string.IsNullOrWhiteSpace(name) && name.Length > 2) { QualLevel = name.Substring(name.Length - 2, 2); QualName = name.Remove(name.Length - 2, 2); } if (QualType.Contains("监理")) { QualName = QualName + "监理"; } qualNum = QualLevel.GetLevel(); CorpQual qual = ToolDb.GenCorpQual(info.Id, QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, info.Url, qualNum, info.Province, info.City); ToolDb.SaveEntity(qual, string.Empty); } } }
protected void AddCorpStaff(CorpInfo info, string enterpriseId) { string url = "http://202.104.65.182:8081/G2/webdrive/web-person-info.do?enterpriseId=" + enterpriseId + "&enterpriseBaseId=&data&filter_params_=rowNum,personId,personBaseId,name,isPause,isDel&defined_operations_=&nocheck_operations_=&"; string gridSearch = "false"; string nd = ToolHtml.GetDateTimeLong(DateTime.Now).ToString(); string PAGESIZE = "1000"; string PAGE = "1"; string sortField = ""; string sortDirection = "asc"; NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection" }, new string[] { gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection }); string strJson = string.Empty; try { strJson = ToolWeb.GetHtmlByUrl(url, nvc); } catch { return; } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(strJson); object[] objList = smsTypeJson["data"] as object[]; if (objList != null) { foreach (object obj in objList) { string StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty; Dictionary <string, object> dic = obj as Dictionary <string, object>; StaffName = Convert.ToString(dic["name"]); string dtlUrl = "http://202.104.65.182:8081/G2/webdrive/web-person-certificate.do?personId=" + dic["personId"] + "&actionFlag=view&data&filter_params_=rowNum,personBaseId,personId,certificateId,certificateType,registerLevel,certificateCode,certificatePhotoetch,gardenMajor,issuer,major,pmTitle,issueDate,registerValidEnd&defined_operations_=&nocheck_operations_=view&";//"http://202.104.65.182:8081/G2/webdrive/web-person-certificate.do?personId=" + enterpriseId + "&actionFlag=view&data&filter_params_=rowNum,personBaseId,personId,certificateId,certificateType,registerLevel,certificateCode,certificatePhotoetch,gardenMajor,issuer,major,pmTitle,issueDate,registerValidEnd&defined_operations_=&nocheck_operations_=view&"; string dtlJson = string.Empty; try { dtlJson = ToolWeb.GetHtmlByUrl(dtlUrl, nvc); } catch { continue; } Dictionary <string, object> dtlDic = (Dictionary <string, object>)serializer.DeserializeObject(dtlJson); object[] dtlObjList = dtlDic["data"] as object[]; if (dtlObjList != null && dtlObjList.Length > 0) { Dictionary <string, object> dicDtl = dtlObjList[0] as Dictionary <string, object>; CertNo = Convert.ToString(dicDtl["certificateCode"]); stffType = Convert.ToString(dicDtl["major"]); } CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, info.Url, stffType); ToolDb.SaveEntity(staff, string.Empty); } } }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string bidhtml = string.Empty; string html = string.Empty; int pageInt = 1; try { html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { Logger.Error(ex.ToString()); } Parser parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "GridView1"), new TagNameFilter("table"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int i = 1; i < table.RowCount; i++) { Winista.Text.HtmlParser.Tags.TableRow tr = table.Rows[i]; ATag alink = tr.Columns[8].SearchFor(typeof(ATag), true)[0] as ATag; string view = string.Empty; string even = string.Empty; view = ToolWeb.GetAspNetViewState(html); even = ToolWeb.GetAspNetEventValidation(html); string alin = alink.Link.Replace("__doPostBack('", "").Replace("','')", ""); NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "GridViewPaging1$txtGridViewPagingForwardTo", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION" }, new string[] { alin, "", view, "1", "", even }); string cookies = string.Empty; try { bidhtml = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookies); } catch (Exception ex) { Logger.Error(ex.ToString()); } for (int l = 1; l <= 14; l++) { if (l == 7) { continue; } Save(l, bidhtml, list, crawlAll); } } } if (sqlcount > 100) { string sql = string.Format("update CorpCreditjd set IsNew='0' where CreateTime<'{0}'", DateTime.Now.ToString("yyyy-MM-dd")); ToolDb.ExecuteSql(sql); } return(list); }
private void GetCorpStaffSzjsjMethod(string url, IList list, string html, bool crawlAll) { Parser parser = new Parser(new Lexer(html)); NodeList aNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dgConstBid"))); if (aNodes != null && aNodes.Count == 1 && aNodes[0] is TableTag) { TableTag table = (TableTag)aNodes[0]; for (int i = 1; i < table.Rows.Length; i++) { if (table.Rows[i].Columns.Length == 6) { Type typs = typeof(ATag); string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty; Name = table.Rows[i].Columns[1].ToPlainTextString().Trim().Replace(" ", ""); //Sex = table.Rows[i].Columns[1].ToPlainTextString().Trim().Replace(" ", ""); string urlSpilt = (table.Rows[i].Columns[1].Children.SearchFor(typs, true)[0] as ATag).Link; string idnum = urlSpilt.Replace("GoDetail('", "").Replace("');", ""); //urlSpilt.Substring(urlSpilt.IndexOf("('"), (urlSpilt.Length - 2)); IdNum = idnum.Replace("&am", "").Replace("&a", "").Replace("p;c", "").Replace("cate", "").Replace("cat", "").Replace("ate", ""); // CorpName = table.Rows[i].Columns[2].ToPlainTextString().Trim().Replace(" ", ""); CorpCode = CorpName; CertCode = table.Rows[i].Columns[4].ToPlainTextString().Trim().Replace(" ", ""); Profession = table.Rows[i].Columns[5].ToPlainTextString().Trim().Replace(" ", ""); PersonType = table.Rows[i].Columns[3].ToPlainTextString().Trim().Replace(" ", ""); Url = "http://61.144.226.2/ryxx/Detail_LWDZ.aspx?ID_NUMBER=" + idnum; string ctxhtml = string.Empty; try { ctxhtml = ToolWeb.GetHtmlByUrl(Url, Encoding.Default); } catch (Exception ex) { Logger.Error("人员姓名:" + CorpName + ",证件号:" + IdNum + "所在单位:" + CorpName + "," + Url + ";" + ex); continue; } Parser parserCtx = new Parser(new Lexer(ctxhtml)); NodeList ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("borderColor", "#cccccc"))); TableTag tabTag = ctxNode[0] as TableTag; string text = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("TD"), new HasAttributeFilter("width", "76%")), true).AsString().Replace(" ", ""); string strSpilt = "任职企业编号:.*?\r\n"; MatchCollection mc = Regex.Matches(text, strSpilt); foreach (Match m in mc) { CorpCode = m.ToString().Replace("任职企业编号:", "").Replace("\r\n", ""); } CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, string.Empty, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "广东省", "深圳市区", "深圳市住房和建设局", Url, Profession, "", "", "", ""); // list.Add(corpStaff); ToolDb.SaveEntity(corpStaff, this.ExistCompareFields); // if (!crawlAll && list.Count >= this.MaxCount) return; } } parser.Reset(); } }
protected void AddCorpLeader(CorpInfo info, string enterpriseId) { string url = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-leader.do?enterpriseId=" + enterpriseId + "&data&filter_params_=rowNum,leaderId,name,title,safetyLicenseCode,safetyLicenseIssuer,safetyLicenseValidEnd&defined_operations_=&nocheck_operations_=&"; string gridSearch = "false"; string nd = ToolHtml.GetDateTimeLong(DateTime.Now).ToString(); string PAGESIZE = "1000"; string PAGE = "1"; string sortField = ""; string sortDirection = "asc"; NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection" }, new string[] { gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection }); string strJson = string.Empty; try { strJson = ToolWeb.GetHtmlByUrl(url, nvc); } catch { return; } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(strJson); object[] objList = smsTypeJson["data"] as object[]; if (objList != null) { foreach (object obj in objList) { Dictionary <string, object> dic = obj as Dictionary <string, object>; string LeaderName = string.Empty, LeaderDuty = string.Empty, LeaderType = string.Empty, htlCtx = string.Empty, phone = string.Empty; LeaderName = Convert.ToString(dic["name"]); LeaderType = Convert.ToString(dic["title"]); CorpLeader corpLeader = ToolDb.GenCorpLeader(info.Id, LeaderName, LeaderDuty, LeaderType, info.Url, phone); ToolDb.SaveEntity(corpLeader, string.Empty); } } }
protected void AddCorpPrompt(CorpInfo info, string infoUrl) { string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(infoUrl); } catch { } Parser parser = new Parser(new Lexer(htmldtl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int i = 1; i < table.RowCount; i++) { if (table.Rows[i].ColumnCount <= 1) { break; } string prov = string.Empty, city = string.Empty, area = string.Empty, corpId = string.Empty, RecordCode = string.Empty, RecordName = string.Empty, RecordInfo = string.Empty, ImplUnit = string.Empty, BeginDate = string.Empty, InfoUrl = string.Empty; bool IsGood = false; TableRow tr = table.Rows[i]; RecordCode = tr.Columns[0].ToNodePlainString(); RecordName = tr.Columns[1].ToNodePlainString(); RecordInfo = tr.Columns[2].ToNodePlainString(); ImplUnit = tr.Columns[3].ToNodePlainString(); BeginDate = tr.Columns[4].ToPlainTextString().GetDateRegex(); CorpPrompt corp = ToolDb.GetCorpPrompt(info.Province, info.City, "", info.Id, RecordCode, RecordName, RecordInfo, ImplUnit, BeginDate, IsGood, infoUrl); ToolDb.SaveEntity(corp, string.Empty); } } }
protected void AddCorpResults(CorpInfo info, string infoUrl) { string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(infoUrl); } catch { } Parser parser = new Parser(new Lexer(htmldtl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int i = 1; i < table.RowCount; i++) { if (table.Rows[i].ColumnCount <= 1) { break; } string PrjName = string.Empty, PrjCode = string.Empty, BuildUnit = string.Empty, GrantDate = string.Empty, PrjAddress = string.Empty, ChargeDept = string.Empty, PrjClassLevel = string.Empty, PrjClass = string.Empty, BuildArea = string.Empty, InviteArea = string.Empty, ProspUnit = string.Empty, DesignUnit = string.Empty, SuperUnit = string.Empty, ConstUnit = string.Empty, PrjStartDate = string.Empty, PrjEndDate = string.Empty; TableRow tr = table.Rows[i]; PrjCode = tr.Columns[1].ToNodePlainString(); PrjName = tr.Columns[2].ToNodePlainString(); PrjAddress = tr.Columns[3].ToNodePlainString(); PrjClass = tr.Columns[4].ToNodePlainString(); BuildUnit = tr.Columns[5].ToNodePlainString(); CorpResults result = ToolDb.GenCorpResults(info.Id, PrjName, PrjCode, BuildUnit, GrantDate, PrjAddress, ChargeDept, PrjClassLevel, PrjClass, BuildArea, InviteArea, ProspUnit, DesignUnit, SuperUnit, ConstUnit, PrjStartDate, PrjEndDate, info.Url); ToolDb.SaveEntity(result, string.Empty); } } }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; int sqlCount = 0; try { htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "list_table"))); if (nodeList.Count > 0) { string WarningName = string.Empty, Color = string.Empty, Begindate = string.Empty, PrjName = string.Empty, DateStage = string.Empty, Score = string.Empty, LastScore = string.Empty, CorpType = string.Empty, Number = string.Empty, UrlInfo = string.Empty, WarnCtx = string.Empty; TableTag table = (TableTag)nodeList[0]; for (int j = 1; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; WarningName = tr.Columns[2].ToPlainTextString().Trim(); DateStage = "1";//0代表半年 ATag aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag; ImageTag img = tr.Columns[1].SearchFor(typeof(ImageTag), true)[0] as ImageTag; string ppp = img.ImageURL; if (img.ImageURL.Contains("yellow")) { Color = "1";//0代表红色,1代表黄色 } else { Color = "0";//0代表红色,1代表黄色 } PrjName = tr.Columns[3].ToPlainTextString().Trim(); UrlInfo = "http://61.144.226.2/jsxx/zjjsdetail.aspx?ID=" + aTag.Link.Replace("GoView(", "").Replace(");", "").Trim(); string htmldetail = string.Empty; try { htmldetail = ToolWeb.GetHtmlByUrl(ToolWeb.UrlEncode(UrlInfo), Encoding.GetEncoding("GB2312")).Replace("= 602;", "罚"); } catch (Exception) { continue; } Parser dtlparser = new Parser(new Lexer(htmldetail)); NodeList dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "Table8"), new TagNameFilter("table"))); if (dtnode.Count > 0) { WarnCtx = dtnode.AsString().Replace("\t", "").Replace(" ", "").Replace("\r\n", "").Trim(); Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}"); if (WarnCtx.Contains("警示开始日期")) { Begindate = WarnCtx.Substring(WarnCtx.IndexOf("警示开始日期")).ToString().Replace("警示开始日期:", "").Trim(); } Begindate = regDate.Match(Begindate).Value.Trim(); CorpWarning info = new CorpWarning(); if (sqlCount <= this.MaxCount) { ToolDb.SaveEntity(info, this.ExistCompareFields); sqlCount++; } else { return(list); } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { int count = 1, totalCount = 1; string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; string pageHtl = string.Empty; try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.Default); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "lx"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.GetATagHref().GetRegexBegEnd("page=", "&"); pageInt = int.Parse(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&page=" + i.ToString(), Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty, ctxKc = string.Empty, corpRz = string.Empty; TableRow tr = table.Rows[j]; CorpName = tr.Columns[1].ToNodePlainString(); CorpCode = tr.Columns[2].ToNodePlainString(); LinkMan = tr.Columns[3].ToNodePlainString(); string href = tr.Columns[1].GetATagValue("onclick"); string htmldtl = string.Empty; string[] url = null; try { string temp = href.Replace("doView", "").Replace("(", "").Replace(")", "").Replace("'", ""); url = temp.Split(','); cUrl = "http://61.144.226.2:8001/web/enterprs/unitInfoAction.do?method=toView&qybh=" + url[0] + "&certType=1&orgcode=" + url[1]; htmldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.Default); } catch { continue; } parser = new Parser(new Lexer(htmldtl.Replace("th", "td"))); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "infoTableL"))); if (dtlNode != null && dtlNode.Count > 0) { TableTag tabledtl = dtlNode[0] as TableTag; string ctx = string.Empty; for (int d = 0; d < tabledtl.RowCount; d++) { for (int k = 0; k < tabledtl.Rows[d].ColumnCount; k++) { string temp = tabledtl.Rows[d].Columns[k].ToNodePlainString(); if (k == 0) { ctx += temp + ":"; } else { ctx += temp + "\r\n"; } } } LinkPhone = ctx.GetRegex("联系电话"); Fax = ctx.GetRegex("传真"); Email = ctx.GetRegex("电子邮箱"); CorpAddress = ctx.GetRegex("注册地址"); RegFund = ctx.GetRegex("注册资金"); RegDate = ctx.GetRegex("设立时间"); } CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, "设计与施工一体化企业", "广东省", "深圳市", "深圳市住房和建设局", cUrl, ISOQualNum, ISOEnvironNum, OffAdr); object obj = ToolDb.ExecuteScalar(string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and InfoSource='{2}'", info.CorpName, info.CorpType, info.InfoSource)); int qualCount = 0, leaderCount = 0, awardCount = 0, certCount = 0, punishCount = 0, seclicCount = 0, seclicstaffCount = 0, tecstaffCount = 0, deviceCount = 0, resultCount = 0, infoCount = 0; bool isDel = false; if (obj != null && obj.ToString() != "") { isDel = true; string id = obj.ToString(); StringBuilder delCorpQual = new System.Text.StringBuilder(); StringBuilder delCorpLeader = new System.Text.StringBuilder(); StringBuilder delCorpAward = new System.Text.StringBuilder(); StringBuilder delCorpCert = new System.Text.StringBuilder(); StringBuilder delCorpPunish = new System.Text.StringBuilder(); StringBuilder delCorpSecLic = new System.Text.StringBuilder(); StringBuilder delCorpSecLicStaff = new System.Text.StringBuilder(); StringBuilder delCorpDevice = new System.Text.StringBuilder(); StringBuilder delCorpResults = new System.Text.StringBuilder(); StringBuilder delCorpTecStaff = new System.Text.StringBuilder(); delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", id); delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", id); delCorpAward.AppendFormat("delete from CorpAward where CorpId='{0}'", id); delCorpCert.AppendFormat("delete from CorpCert where CorpId='{0}'", id); delCorpPunish.AppendFormat("delete from CorpPunish where CorpId='{0}'", id); delCorpSecLic.AppendFormat("delete from CorpSecLic where CorpId='{0}'", id); delCorpSecLicStaff.AppendFormat("delete from CorpSecLicStaff where CorpId='{0}'", id); delCorpTecStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", id); delCorpDevice.AppendFormat("delete from CorpDevice where CorpId='{0}'", id); delCorpResults.AppendFormat("delete from CorpResults where CorpId='{0}'", id); qualCount = ToolCoreDb.ExecuteSql(delCorpQual.ToString()); leaderCount = ToolCoreDb.ExecuteSql(delCorpLeader.ToString()); awardCount = ToolCoreDb.ExecuteSql(delCorpAward.ToString()); certCount = ToolCoreDb.ExecuteSql(delCorpCert.ToString()); punishCount = ToolCoreDb.ExecuteSql(delCorpPunish.ToString()); seclicCount = ToolCoreDb.ExecuteSql(delCorpSecLic.ToString()); seclicstaffCount = ToolCoreDb.ExecuteSql(delCorpSecLicStaff.ToString()); tecstaffCount = ToolCoreDb.ExecuteSql(delCorpTecStaff.ToString()); deviceCount = ToolCoreDb.ExecuteSql(delCorpDevice.ToString()); resultCount = ToolCoreDb.ExecuteSql(delCorpResults.ToString()); string corpSql = string.Format("delete from CorpInfo where Id='{0}'", id); infoCount = ToolCoreDb.ExecuteSql(corpSql); } if (infoCount != -1 || !isDel) { if (ToolDb.SaveEntity(info, string.Empty)) { if (isDel) { if (qualCount != -1) { AddCorpQual(info, htmldtl); } if (awardCount != -1) { AddCorpAward(info, htmldtl); } if (certCount != -1) { AddCorpCert(info, htmldtl); } if (deviceCount != -1) { AddCorpDevice(info, htmldtl); } if (punishCount != -1) { AddCorpPunish(info, htmldtl); } if (resultCount != -1) { AddCorpResults(info, htmldtl); } if (seclicCount != -1) { AddCorpSecLic(info, htmldtl); } if (seclicstaffCount != -1) { AddCorpSecLicStaff(info, htmldtl); } if (tecstaffCount != -1) { AddCorpTecStaff(info, htmldtl); } if (leaderCount != -1) { AddCorpLeader(info, htmldtl); } } else { AddCorpQual(info, htmldtl); AddCorpAward(info, htmldtl); AddCorpCert(info, htmldtl); AddCorpDevice(info, htmldtl); AddCorpPunish(info, htmldtl); AddCorpResults(info, htmldtl); AddCorpSecLic(info, htmldtl); AddCorpSecLicStaff(info, htmldtl); AddCorpTecStaff(info, htmldtl); AddCorpLeader(info, htmldtl); } } } count++; totalCount++; if (count >= 90) { count = 1; Thread.Sleep(700000); } } } } ToolCoreDb.ExecuteProcedure(); string sql = "update a set a.FkId= c.Id FROM AttenCorp a left join CorpInfo c on c.CorpName=A.CorpName"; ToolDb.ExecuteSql(sql); return(null); }
/// <summary> /// 企业资质信息 /// </summary> /// <param name="info"></param> /// <param name="html"></param> protected void AddCorpQual(CorpInfo info, string param, string corpType) { string url = "http://portal.szjs.gov.cn:8888/publicShow/queryCertificateInfo.html"; string[] postParams = new string[] { "param", "corpType", "orgCode", "page" }; string[] postValues = new string[] { param, corpType, info.CorpCode, "1" }; NameValueCollection nvc = ToolWeb.GetNameValueCollection(postParams, postValues); string html = string.Empty; try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); return; } } JavaScriptSerializer java = new JavaScriptSerializer(); Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); int pageInt = 1; try { pageInt = (int)jsonResults["totalPage"]; } catch { } for (int i = 1; i <= pageInt; i++) { if (i > 1) { postValues = new string[] { param, corpType, info.CorpCode, i.ToString() }; nvc = ToolWeb.GetNameValueCollection(postParams, postValues); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); continue; } } jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); } object[] dicQuals = (object[])jsonResults["records"]; foreach (object dicQual in dicQuals) { Dictionary <string, object> dic = (Dictionary <string, object>)dicQual; string CorpId = string.Empty, QualName = string.Empty, QualCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualType = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty; QualType = Convert.ToString(dic["name"]); QualCode = Convert.ToString(dic["cert_no"]); string certType = Convert.ToString(dic["cert_type"]); string certId = Convert.ToString(dic["cert_id"]); string htmldtl = string.Empty; string urlDtl = "http://portal.szjs.gov.cn:8888/publicShow/queryCertificateDetail.html"; NameValueCollection dtlNvc = ToolWeb.GetNameValueCollection(new string[] { "param", "corpType", "cert_id" }, new string[] { param, certType, certId }); try { htmldtl = ToolWeb.GetHtmlByUrl(urlDtl, dtlNvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { htmldtl = ToolWeb.GetHtmlByUrl(urlDtl, dtlNvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); continue; } } object[] dtlQuals = (object[])java.DeserializeObject(htmldtl); foreach (object objQual in dtlQuals) { Dictionary <string, object> dicDtl = (Dictionary <string, object>)objQual; QualName = Convert.ToString(dicDtl["name1"]); QualLevel = Convert.ToString(dicDtl["name2"]); LicUnit = Convert.ToString(dicDtl["appr_org"]); LicDate = Convert.ToString(dicDtl["appr_date"]); ValidDate = Convert.ToString(dicDtl["valid_period"]); qualNum = QualLevel.GetLevel(); CorpQual qual = ToolDb.GenCorpQual(info.Id, QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, info.Url, qualNum, "广东省", "深圳市"); ToolDb.SaveEntity(qual, string.Empty); } } } }
/// <summary> /// 企业安全人员证书 /// </summary> /// <param name="info"></param> /// <param name="html"></param> protected void AddCorpSecLicStaff(CorpInfo info, string param, string corpType) { string url = "http://portal.szjs.gov.cn:8888/publicShow/queryPersonSafe.html"; string[] postParams = new string[] { "param", "corpType", "orgCode", "page" }; string[] postValues = new string[] { param, corpType, info.CorpCode, "1" }; NameValueCollection nvc = ToolWeb.GetNameValueCollection(postParams, postValues); string html = string.Empty; try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); return; } } JavaScriptSerializer java = new JavaScriptSerializer(); Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); int pageInt = 1; try { pageInt = (int)jsonResults["totalPage"]; } catch { } for (int i = 1; i <= pageInt; i++) { if (i > 1) { postValues = new string[] { param, corpType, info.CorpCode, i.ToString() }; nvc = ToolWeb.GetNameValueCollection(postParams, postValues); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); continue; } } jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); } object[] dicRecords = (object[])jsonResults["records"]; foreach (object dicRecord in dicRecords) { string PersonName = string.Empty, PersonCertNo = string.Empty, GrantUnit = string.Empty, GrantDate = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord; PersonName = Convert.ToString(dic["name"]); PersonCertNo = Convert.ToString(dic["lics_id"]); GrantUnit = Convert.ToString(dic["issue_dept"]); GrantDate = Convert.ToString(dic["issue_date"]); CorpSecLicStaff SecLicStaff = ToolDb.GenCorpSecLicStaff(info.Id, PersonName, PersonCertNo, GrantUnit, GrantDate, info.Url); ToolDb.SaveEntity(SecLicStaff, string.Empty); } } }
protected override IList ExecuteCrawl(bool crawlAll) { string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ContentPlaceHolder1_AspNetPager1")), true), new TagNameFilter("a"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[pageNode.Count - 1].GetATagHref().Replace("'", "").Replace(")", "kdxx").Replace(",", "xxdk"); pageInt = int.Parse(temp.GetRegexBegEnd("xxdk", "kdxx")); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { if (i == 2) { viewState = ToolWeb.GetAspNetViewState(html); eventValidation = ToolWeb.GetAspNetEventValidation(html); } NameValueCollection nvc = ToolWeb.GetNameValueCollection( new string[] { "ctl00$ContentPlaceHolder1$ScriptManager1", "ctl00$ContentPlaceHolder1$txtORGNAME", "ctl00$ContentPlaceHolder1$txtORGCODE", "ctl00$ContentPlaceHolder1$txtPNAME", "ctl00$ContentPlaceHolder1$txtIDNUM", "ctl00$ContentPlaceHolder1$txtHIREERORGNAME", "ctl00$ContentPlaceHolder1$txtHIREERORGCODE", "ctl00$ContentPlaceHolder1$ddlRegType", "ctl00$ContentPlaceHolder1$ddlTitle", "ctl00$ContentPlaceHolder1$ddlABC", "ctl00$ContentPlaceHolder1$ddlCert", "__VIEWSTATE", "__EVENTTARGET", "__EVENTARGUMENT", "__EVENTVALIDATION", "__ASYNCPOST" }, new string[] { "ctl00$ContentPlaceHolder1$UpdatePanel1|ctl00$ContentPlaceHolder1$AspNetPager1", "", "", "", "", "", "", "", "", "", "", viewState, "ctl00$ContentPlaceHolder1$AspNetPager1", i.ToString(), eventValidation, "true" } ); html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-grid"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, CorpType, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty; TableRow tr = table.Rows[j]; CorpName = tr.Columns[0].ToNodePlainString(); LinkMan = tr.Columns[1].ToNodePlainString(); cUrl = tr.Columns[0].GetATagValue("onclick").Replace("OpenWin('", ""); if (cUrl.IndexOf("'") > 0) { cUrl = "http://113.108.219.40/intogd/" + cUrl.Remove(cUrl.IndexOf("'")); } string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(cUrl, Encoding.UTF8); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-table"))); if (dtlNode != null && dtlNode.Count > 0) { string ctx = string.Empty; TableTag dtlTable = dtlNode[0] as TableTag; for (int k = 0; k < dtlTable.RowCount; k++) { for (int d = 0; d < dtlTable.Rows[k].ColumnCount; d++) { TableColumn col = dtlTable.Rows[k].Columns[d]; if (col.GetAttribute("class") == "td-left") { ctx += col.ToNodePlainString() + ":"; } else { ctx += col.ToNodePlainString() + "\r\n"; } } } RegDate = ctx.GetRegex("成立时间,注册时间").GetDateRegex(); RegFund = ctx.GetRegex("注册资本"); BusinessCode = ctx.GetRegex("营业执照注册号"); CorpType = "外地进粤企业"; CorpAddress = ctx.GetRegex("注册详细地址"); if (!string.IsNullOrEmpty(RegFund) && !RegFund.Contains("万")) { RegFund += "万"; } CorpInfo corp = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, CorpType, "广东省", "广东地区", "广东省住房和城乡建设厅", cUrl, ISOQualNum, ISOEnvironNum, OffAdr); string strSql = string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}'", corp.CorpName, corp.CorpType); DataTable dt = ToolCoreDb.GetDbData(strSql); if (dt != null && dt.Rows.Count > 0) { string id = dt.Rows[0]["Id"].ToString(); StringBuilder delCorpQual = new System.Text.StringBuilder(); StringBuilder delCorpLeader = new System.Text.StringBuilder(); StringBuilder delCorpSecLicStaff = new System.Text.StringBuilder(); StringBuilder delCorpInstitution = new StringBuilder(); delCorpInstitution.AppendFormat("delete from CorpInstitution where CorpId='{0}'", id); delCorpQual.AppendFormat("delete from CorpQual where CorpId='{0}'", id); delCorpLeader.AppendFormat("delete from CorpLeader where CorpId='{0}'", id); delCorpSecLicStaff.AppendFormat("delete from CorpTecStaff where CorpId='{0}'", id); ToolCoreDb.ExecuteSql(delCorpInstitution.ToString()); ToolCoreDb.ExecuteSql(delCorpQual.ToString()); ToolCoreDb.ExecuteSql(delCorpLeader.ToString()); ToolCoreDb.ExecuteSql(delCorpSecLicStaff.ToString()); string corpSql = string.Format("delete from CorpInfo where Id='{0}'", id); ToolCoreDb.ExecuteSql(corpSql); } if (ToolDb.SaveEntity(corp, this.ExistCompareFields)) { if (!string.IsNullOrEmpty(LinkMan)) { CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, LinkMan, "", "企业法定代表人", cUrl); ToolDb.SaveEntity(leader, ""); } if (!string.IsNullOrEmpty(tr.Columns[2].ToNodePlainString())) { CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, tr.Columns[2].ToNodePlainString(), "", "技术负责人", cUrl); ToolDb.SaveEntity(leader, ""); } if (!string.IsNullOrEmpty(tr.Columns[3].ToNodePlainString())) { CorpLeader leader = ToolDb.GenCorpLeader(corp.Id, tr.Columns[3].ToNodePlainString(), "", "驻粤负责人", cUrl); ToolDb.SaveEntity(leader, ""); } AddCorpQual(corp, htmldtl); AddCorpTecStaff(corp, htmldtl); GetOffAddress(htmldtl, cUrl, corp); } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; int sqlCount = 0; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; try { htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } if (htl.Contains("RowCount")) { try { int index = htl.IndexOf("RowCount"); string pageStr = htl.Substring(index, htl.Length - index).Replace("RowCount", "").Replace("}", "").Replace(":", "").Replace("\"", ""); decimal b = decimal.Parse(pageStr) / 10; if (b.ToString().Contains(".")) { pageInt = Convert.ToInt32(b) + 1; } else { pageInt = Convert.ToInt32(b); } } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { htl = ToolWeb.GetHtmlByUrl("http://www.szjs.gov.cn/build/build.ashx?_=1353579439242&menu=%E8%A1%8C%E6%94%BF%E5%A4%84%E7%BD%9A&pageSize=10&pageIndex=" + i.ToString() + "&fileOrg=&fileDate=&fileId=&unitName=&timp=", Encoding.UTF8); } catch { } } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(htl); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key != "DataList") { continue; } object[] array = (object[])obj.Value; foreach (object obj2 in array) { Dictionary <string, object> dicSmsType = (Dictionary <string, object>)obj2; string DocNo = string.Empty, PunishType = string.Empty, GrantUnit = string.Empty, DocDate = string.Empty, PunishCtx = string.Empty, GrantName = string.Empty, InfoUrl = string.Empty; try { DocNo = Convert.ToString(dicSmsType["FileId"]); PunishType = Convert.ToString(dicSmsType["PunTypeText"]); GrantUnit = Convert.ToString(dicSmsType["UnitName"]); DocDate = Convert.ToString(dicSmsType["ServiceDate"]); InfoUrl = "http://www.szjs.gov.cn/PUNhtml/" + Convert.ToString(dicSmsType["PunDoc"]); CorpPunish info = ToolDb.GenCorpPunish(string.Empty, DocNo, PunishType, GrantUnit, DocDate, PunishCtx, InfoUrl, GrantName, "1"); if (sqlCount <= this.MaxCount) { if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { string file = Convert.ToString(dicSmsType["PunDoc"]); AddBaseFile(InfoUrl, file, info); } sqlCount++; } else { return(list); } } catch { continue; } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { int count = 1; IList list = new List <CorpWarning>(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; try { htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "lx"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.GetATagHref().GetRegexBegEnd("page=", "&"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { htl = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&page=" + i.ToString(), Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(htl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string code = string.Empty, warningName = string.Empty, deliveryDate = string.Empty, warningType = string.Empty, punishmentType = string.Empty, prjNumber = string.Empty, totalScore = string.Empty, resultScore = string.Empty, corpType = string.Empty, publicEndDate = string.Empty, warningEndDate = string.Empty, prjName = string.Empty, badInfo = string.Empty, msgType = string.Empty, color = string.Empty; TableRow tr = table.Rows[j]; warningName = tr.Columns[2].ToNodePlainString(); totalScore = tr.Columns[3].ToNodePlainString(); prjNumber = tr.Columns[4].ToNodePlainString(); resultScore = tr.Columns[5].ToNodePlainString(); corpType = tr.Columns[6].ToNodePlainString(); publicEndDate = tr.Columns[7].ToPlainTextString().GetDateRegex(); color = "0"; warningType = "半年度黄色警示"; msgType = "深圳市住房和建设局"; CorpWarning info = ToolDb.GenCorpWarning("广东省", "深圳市区", "", code, warningName, deliveryDate, warningType, punishmentType, prjNumber, totalScore, resultScore, corpType, publicEndDate, warningEndDate, prjName, badInfo, msgType, color); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } count++; if (count >= 200) { count = 1; Thread.Sleep(480000); } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { int count = 0; IList list = new List <CorpStaff>(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; string pageHtl = string.Empty; try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8); } catch { return(null); } int totalPage = 0; Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "clearfix"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().Replace(",", ","); string page = temp.GetRegexBegEnd("total", ",").GetReplace("\":"); totalPage = int.Parse(page); pageInt = totalPage / 15 + 1; } catch { } } for (int p = 1; p <= pageInt; p++) { if (p > 1) { Logger.Error(p); NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "$total", "$reload", "$pg", "$pgsz" }, new string[] { totalPage.ToString(), "0", p.ToString(), "15" }); try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { try { Thread.Sleep(60 * 1000 * 6); html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { try { Thread.Sleep(60 * 1000 * 6); html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { continue; } } } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table_box responsive personal"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int i = 1; i < table.RowCount - 1; i++) { TableRow tr = table.Rows[i]; string Name = string.Empty, Sex = string.Empty, CredType = string.Empty, IdNum = string.Empty, CorpName = string.Empty, CorpCode = string.Empty, CertCode = string.Empty, CertGrade = string.Empty, RegLevel = string.Empty, RegCode = string.Empty, AuthorUnit = string.Empty, PersonType = string.Empty, Province = string.Empty, City = string.Empty, CreateTime = string.Empty, InfoSource = string.Empty, Url = string.Empty, Profession = string.Empty, staffNum = string.Empty, IssuanceTime = string.Empty, Organ = string.Empty; Name = tr.Columns[1].ToNodePlainString(); IdNum = tr.Columns[2].ToNodePlainString(); CertGrade = tr.Columns[3].ToNodePlainString(); RegCode = tr.Columns[4].ToNodePlainString(); PersonType = tr.Columns[5].ToNodePlainString(); ATag aTag = tr.Columns[1].GetATag(); Url = "http://jzsc.mohurd.gov.cn" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString(); } catch { try { Thread.Sleep(60 * 1000 * 6); htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString(); } catch { try { Thread.Sleep(60 * 1000 * 6); htmldtl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8).GetJsString(); } catch { continue; } } } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "activeTinyTabContent"))); if (dtlNode != null && dtlNode.Count > 0) { string ctx = dtlNode.AsHtml().GetReplace("</dd>", "\r\n").ToCtxString(); Sex = ctx.GetRegex("性别"); } parser.Reset(); dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "regcert_tab"))); if (dtlNode != null && dtlNode.Count > 0) { string ctx = dtlNode.AsHtml().GetReplace("</dd>", "\r\n").ToCtxString(); CertCode = ctx.GetRegex("证书编号"); ATag nameTag = dtlNode.GetATag(1); if (nameTag != null) { CorpName = nameTag.LinkText.ToNodeString(); } } CorpStaff corpStaff = ToolDb.GenCorpStaff(Name, Sex, CredType, IdNum, CorpName, CorpCode, CertCode, RegLevel, RegCode, AuthorUnit, PersonType, CertGrade, "全国", "", "中华人民共和国住房和城乡建设部建筑市场监管司", Url, Profession, staffNum, IssuanceTime, Organ, ""); ToolDb.SaveEntity(corpStaff, this.ExistCompareFields, this.ExistsUpdate); count++; if (count >= 28) { count = 0; Thread.Sleep(60 * 1000 * 6); } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <InviteInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "wb-page-li"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().GetRegexBegEnd("/", "\r"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { SiteUrl = "http://www.jxsggzy.cn/web/jyxx/002002/002002002/" + i + ".html"; try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr); } catch { continue; } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "ewb-list-node clearfix"))); if (listNode != null && listNode.Count > 0) { for (int j = 0; j < listNode.Count; j++) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; ATag aTag = listNode[j].GetATag(); prjName = aTag.GetAttribute("title"); if (string.IsNullOrWhiteSpace(prjName)) { prjName = aTag.LinkText; } beginDate = listNode[j].ToPlainTextString().GetDateRegex(); if (prjName[2].Equals('县') || prjName[2].Equals('区') || prjName[2].Equals('市')) { area = prjName.Substring(0, 3); } InfoUrl = "http://www.jxsggzy.cn" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "article-info"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); inviteCtx = HtmlTxt.ToCtxString(); prjAddress = inviteCtx.GetAddressRegex(); if (string.IsNullOrWhiteSpace(prjAddress)) { prjAddress = inviteCtx.GetRegexBegEnd("地址:", "地址:"); } buildUnit = inviteCtx.GetBuildRegex(); if (buildUnit.Contains("单位章")) { buildUnit = string.Empty; } if (buildUnit.Contains("联系人")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系人")); } code = inviteCtx.GetCodeRegex(); inviteType = "交通工程"; specType = "政府采购"; msgType = "江西省公共资源交易中心"; InviteInfo info = ToolDb.GenInviteInfo("江西省", "江西省及地市", area, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { string html = string.Empty; string html1 = string.Empty; IList list = new ArrayList(); try { html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { Logger.Error(ex); return(list); } Parser parser = new Parser(new Lexer(html)); string cookiestr = string.Empty; string viewState = ToolWeb.GetAspNetViewState(parser); string eventValidation = ToolWeb.GetAspNetEventValidation(parser); string beginDate = DateTime.Today.AddDays(-1).ToString("yyyy-MM-dd"); for (int i = 1; i <= 12; i++) { string ddlIndex = string.Empty; ddlIndex = i.ToString(); if (i == 12) { ddlIndex = "999999999"; } NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "drpRpt", "DropDownList2", "txtCorpName", "DropDownList1", "GridViewPaging1$txtGridViewPagingForwardTo", "__EVENTVALIDATION" }, new string[] { "UpdatePanel1|DropDownList2", "DropDownList2", string.Empty, string.Empty, viewState, "419425", ddlIndex, string.Empty, "-1", "1", eventValidation }); try { html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr); html1 = html; //处理第一页 DealHtml(list, html, crawlAll, ddlIndex); } catch (Exception ex) { Logger.Error(ex); } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } //取得页码 int pageInt = 1; parser = new Parser(new Lexer(html)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "GridViewPaging1_lblGridViewPagingDesc"))); if (tdNodes != null) { string pageTemp = tdNodes[0].ToPlainTextString().Trim(); try { pageTemp = pageTemp.Substring(pageTemp.IndexOf("页,共")).Replace("页,共", string.Empty).Replace("页", string.Empty); pageInt = int.Parse(pageTemp); } catch (Exception ex) { Logger.Error(ex); } } parser.Reset(); //处理后续页 if (pageInt > 1) { for (int j = 2; j <= pageInt; j++) { string viewStatePage = ToolWeb.GetAspNetViewState(html1); string eventValidationPage = ToolWeb.GetAspNetEventValidation(html1); string cookPage = string.Empty; NameValueCollection nvcPage = null; if (j == 14 && ddlIndex.Equals("2")) { j++; nvcPage = ToolWeb.GetNameValueCollection(new string[] { "ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "drpRpt", "DropDownList2", "txtCorpName", "DropDownList1", "GridViewPaging1$txtGridViewPagingForwardTo", "__EVENTVALIDATION", "GridViewPaging1$btnForwardToPage" }, new string[] { "UpdatePanel1|GridViewPaging1$btnForwardToPage", string.Empty, string.Empty, string.Empty, viewStatePage, "419425", ddlIndex, string.Empty, "-1", j.ToString(), eventValidationPage, "Go" }); } else { nvcPage = ToolWeb.GetNameValueCollection(new string[] { "ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "drpRpt", "DropDownList2", "txtCorpName", "DropDownList1", "GridViewPaging1$txtGridViewPagingForwardTo", "__EVENTVALIDATION", "GridViewPaging1$btnNext.x", "GridViewPaging1$btnNext.y" }, new string[] { "UpdatePanel1|GridViewPaging1$btnNext", string.Empty, string.Empty, string.Empty, viewStatePage, "419425", ddlIndex, string.Empty, "-1", (j - 1).ToString(), eventValidationPage, "6", "10" }); } try { html = ToolWeb.GetHtmlByUrl(SiteUrl, nvcPage, Encoding.UTF8, ref cookiestr); //处理后续页 DealHtml(list, html, crawlAll, ddlIndex); } catch (Exception ex) { Logger.Error(ex); continue; } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } return(list); }
/// <summary> /// 企业技术力量 /// </summary> /// <param name="info"></param> /// <param name="html"></param> protected void AddCorpTecStaff(CorpInfo info, string param, string corpType) { string url = "http://portal.szjs.gov.cn:8888/publicShow/queryTechnology.html"; string[] postParams = new string[] { "param", "corpType", "orgCode", "page" }; string[] postValues = new string[] { param, corpType, info.CorpCode, "1" }; NameValueCollection nvc = ToolWeb.GetNameValueCollection(postParams, postValues); string html = string.Empty; try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); return; } } JavaScriptSerializer java = new JavaScriptSerializer(); Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); int pageInt = 1; try { pageInt = (int)jsonResults["totalPage"]; } catch { } for (int i = 1; i <= pageInt; i++) { if (i > 1) { postValues = new string[] { param, corpType, info.CorpCode, i.ToString() }; nvc = ToolWeb.GetNameValueCollection(postParams, postValues); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { Thread.Sleep(8 * 60 * 1000); html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { continue; } } jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); } object[] dicRecords = (object[])jsonResults["records"]; foreach (object dicRecord in dicRecords) { Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord; string StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty; StaffName = Convert.ToString(dic["name"]); stffType = Convert.ToString(dic["typename"]); CertNo = Convert.ToString(dic["alt_cert_id"]); CertLevel = Convert.ToString(dic["alt_qual_lv"]); IdCard = Convert.ToString(dic["id_number"]); CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, info.Url, stffType); ToolDb.SaveEntity(staff, string.Empty); } } }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <CorpInfo>(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1, count = 0; string eventValidation = string.Empty; try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("align", "center"))); if (pageNode != null && pageNode.Count > 0) { try { string countTemp = pageNode.AsString().GetRegexBegEnd("\r", "条").Replace(" ", "").Replace("\r", "").Replace("\n", ""); string temp = pageNode.AsString().GetRegexBegEnd("/", "页").Replace(" ", ""); pageInt = int.Parse(temp); count = int.Parse(countTemp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { NameValueCollection nvc = ToolWeb.GetNameValueCollection( new string[] { "datainfo_id", "datainfo_action", "count", "pages", "page", "dwmc", "zzdj", "zsbh", "szss" }, new string[] { string.Empty, string.Empty, count.ToString(), pageInt.ToString(), i.ToString(), string.Empty, string.Empty, string.Empty, string.Empty } ); try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "list-table"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; string CorpId = string.Empty, QualName = string.Empty, QualCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualType = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty; QualName = "计算机信息系统集成"; QualCode = tr.Columns[3].ToNodePlainString(); QualLevel = tr.Columns[2].ToNodePlainString(); LicDate = tr.Columns[5].ToPlainTextString().GetDateRegex(); qualNum = QualLevel.GetLevel(); string corpName = tr.Columns[1].ToNodePlainString(); string city = tr.Columns[6].ToNodePlainString(); object isCorp = ToolDb.ExecuteScalar("select Id from CorpInfo where CorpName='" + corpName + "'"); if (isCorp == null || isCorp.ToString() == "") { string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, OffAdr = string.Empty, Cert = string.Empty, ctxKc = string.Empty, corpRz = string.Empty; CorpInfo info = ToolDb.GenCorpInfo(corpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, string.Empty, city, city, "中华人民共和国工业和信息化部", this.SiteUrl, ISOQualNum, ISOEnvironNum, OffAdr); if (ToolDb.SaveEntity(info, null)) { CorpQual qual = ToolDb.GenCorpQual(info.Id, QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, info.Url, qualNum, city, city); ToolDb.SaveEntity(qual, ""); } } else { CorpQual qual = ToolDb.GenCorpQual(isCorp.ToString(), QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, this.SiteUrl, qualNum, city, city); ToolDb.SaveEntity(qual, "QualCode,CorpId,QualName", true); } } } } return(list); }
/// <summary> /// 企业业绩 /// </summary> /// <param name="info"></param> /// <param name="html"></param> protected void AddCorpResults(CorpInfo info, string param, string corpType) { string url = "http://portal.szjs.gov.cn:8888/publicShow/queryPerformance.html"; string[] postParams = new string[] { "param", "corpType", "orgCode", "page" }; string[] postValues = new string[] { param, corpType, info.CorpCode, "1" }; NameValueCollection nvc = ToolWeb.GetNameValueCollection(postParams, postValues); string html = string.Empty; try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); return; } } JavaScriptSerializer java = new JavaScriptSerializer(); Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); int pageInt = 1; try { pageInt = (int)jsonResults["totalPage"]; } catch { } for (int i = 1; i <= pageInt; i++) { if (i > 1) { postValues = new string[] { param, corpType, info.CorpCode, i.ToString() }; nvc = ToolWeb.GetNameValueCollection(postParams, postValues); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); continue; } } jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); } object[] dicRecords = (object[])jsonResults["records"]; foreach (object dicRecord in dicRecords) { string PrjName = string.Empty, PrjCode = string.Empty, BuildUnit = string.Empty, GrantDate = string.Empty, PrjAddress = string.Empty, ChargeDept = string.Empty, PrjClassLevel = string.Empty, PrjClass = string.Empty, BuildArea = string.Empty, InviteArea = string.Empty, ProspUnit = string.Empty, DesignUnit = string.Empty, SuperUnit = string.Empty, ConstUnit = string.Empty, PrjStartDate = string.Empty, PrjEndDate = string.Empty; PrjName = "业绩"; PrjCode = "业绩"; BuildUnit = "业绩"; GrantDate = DateTime.Today.ToString(); CorpResults result = ToolDb.GenCorpResults(info.Id, PrjName, PrjCode, BuildUnit, GrantDate, PrjAddress, ChargeDept, PrjClassLevel, PrjClass, BuildArea, InviteArea, ProspUnit, DesignUnit, SuperUnit, ConstUnit, PrjStartDate, PrjEndDate, info.Url); ToolDb.SaveEntity(result, string.Empty); } } }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; try { html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "myPages_input")), true), new TagNameFilter("option"))); if (pageList != null && pageList.Count > 0) { try { OptionTag opt = pageList[pageList.Count - 1] as OptionTag; string temp = opt.GetAttribute("value"); pageInt = int.Parse(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&pageNo=" + i.ToString(), Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("dl"), new HasAttributeFilter("class", "i-news")), true), new TagNameFilter("dd"))); if (nodeList != null && nodeList.Count > 0) { for (int j = 0; j < nodeList.Count; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; beginDate = nodeList[j].ToPlainTextString().GetDateRegex(); if (!string.IsNullOrEmpty(beginDate)) { prjName = nodeList[j].ToNodePlainString().Replace(beginDate, "").Replace("[", "").Replace("]", ""); } else { prjName = nodeList[j].ToNodePlainString().Replace("[", "").Replace("]", ""); } prjName = prjName.GetBidPrjName(); bidType = prjName.GetInviteBidType(); InfoUrl = "http://www.chinapsp.cn/cn/info.aspx" + nodeList[j].GetATagHref(); string htldtl = string.Empty; try { htldtl = ToolWeb.GetHtmlByUrl(InfoUrl, Encoding.UTF8); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dbDetailFV"))); if (dtList != null && dtList.Count > 0) { HtmlTxt = dtList.AsHtml(); bidCtx = HtmlTxt.ToLower().Replace("<tr>", "\r\n").Replace("</tr>", "\r\n").ToCtxString(); if (prjName.Contains("招标编号") || prjName.Contains("项目编号")) { if (prjName.IndexOf("(") != -1) { prjName = prjName.Remove(prjName.IndexOf("(")); } else if (prjName.IndexOf("(") != -1) { prjName = prjName.Remove(prjName.IndexOf("(")); } else if (prjName.Contains("招标编号")) { prjName = prjName.Remove(prjName.IndexOf("招标编号")); } else if (prjName.Contains("项目编号")) { prjName = prjName.Remove(prjName.IndexOf("项目编号")); } } buildUnit = bidCtx.GetBuildRegex(); prjAddress = bidCtx.Replace(" ", "").GetAddressRegex(); bidUnit = bidCtx.GetBidRegex(); bidMoney = bidCtx.GetMoneyRegex(); if (!string.IsNullOrEmpty(bidMoney)) { decimal money = Convert.ToDecimal(bidMoney); if (money > 10000) { bidMoney = Convert.ToString(money / 10000); } } if (bidMoney == "0") { bidMoney = bidCtx.GetMoneyRegex(null, true); if (string.IsNullOrEmpty(bidMoney)) { bidMoney = "0"; } } if (!string.IsNullOrEmpty(bidMoney)) { decimal money = Convert.ToDecimal(bidMoney); if (money > 10000) { bidMoney = Convert.ToString(money / 10000); } } if (bidMoney == "0") { bidMoney = bidCtx.ToLower().GetMoneyRegex(new string[] { "rmb" }); } if (string.IsNullOrEmpty(bidUnit) && bidMoney == "0") { if (bidCtx.Contains("采购失败") || bidCtx.Contains("本项目招标失败")) { bidUnit = "没有中标商"; bidMoney = "0"; } } code = bidCtx.GetCodeRegex().GetChina(); specType = "其他"; msgType = "广东采联采购招标有限公司"; prjName = prjName.GetBidPrjName(); BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx, " and LastModifier ='00000000000000000000000000000000'"); } } } } return(list); }
/// <summary> /// 企业获奖信息 /// </summary> /// <param name="info"></param> /// <param name="html"></param> protected void AddCorpAward(CorpInfo info, string param, string corpType) { string url = "http://portal.szjs.gov.cn:8888/publicShow/queryPrizes.html"; string[] postParams = new string[] { "param", "corpType", "orgCode", "page" }; string[] postValues = new string[] { param, corpType, info.CorpCode, "1" }; NameValueCollection nvc = ToolWeb.GetNameValueCollection(postParams, postValues); string html = string.Empty; try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); return; } } JavaScriptSerializer java = new JavaScriptSerializer(); Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); int pageInt = 1; try { pageInt = (int)jsonResults["totalPage"]; } catch { } for (int i = 1; i <= pageInt; i++) { if (i > 1) { postValues = new string[] { param, corpType, info.CorpCode, i.ToString() }; nvc = ToolWeb.GetNameValueCollection(postParams, postValues); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); continue; } } jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); } object[] dicRecords = (object[])jsonResults["records"]; foreach (object dicRecord in dicRecords) { Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord; string AwardName = string.Empty, AwardDate = string.Empty, AwardLevel = string.Empty, GrantUnit = string.Empty, ProjectName = string.Empty; AwardName = Convert.ToString(dic["award_name"]); AwardDate = Convert.ToString(dic["award_date"]); AwardLevel = Convert.ToString(dic["award_lvl"]); GrantUnit = Convert.ToString(dic["award_org"]); ProjectName = Convert.ToString(dic["rel_prj"]); CorpAward award = ToolDb.GenCorpAward(info.Id, AwardName, AwardDate, AwardLevel, GrantUnit, ProjectName, info.Url); ToolDb.SaveEntity(award, string.Empty); } } }
/// <summary> /// 保存企业技术管理人员情况 /// </summary> /// <param name="info"></param> /// <param name="html"></param> private void AddCorpTecStaff(CorpInfo info, string html) { string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; viewState = ToolWeb.GetAspNetViewState(html); eventValidation = ToolWeb.GetAspNetEventValidation(html); int pageInt = 1; NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "ctl00$MainContent$ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__EVENTVALIDATION", "__ASYNCPOST" }, new string[] { "ctl00$MainContent$UpdatePanel1|ctl00$MainContent$step3", "ctl00$MainContent$step3", "", viewState, eventValidation, "true" }); string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(info.Url, nvc, Encoding.UTF8, ref cookiestr); } catch { } //Parser parser = new Parser(new Lexer(htmldtl)); //NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MainContent_AspNetPager1")), true), new TagNameFilter("a"))); //if (pageNode != null && pageNode.Count > 0) //{ // try // { // string temp = pageNode[pageNode.Count - 1].GetATagHref().Replace("'", "").Replace(")", "kdxx").Replace(",", "xxdk"); // pageInt = int.Parse(temp.GetRegexBegEnd("xxdk", "kdxx")); // } // catch { } //} //for (int i = 1; i <= pageInt; i++) //{ // if (i > 1) // { // NameValueCollection nvc1 = ToolWeb.GetNameValueCollection(new string[]{ // "ctl00$MainContent$ScriptManager1", // "__EVENTTARGET", // "__EVENTARGUMENT", // "__VIEWSTATE", // "__EVENTVALIDATION", // "__ASYNCPOST" // }, new string[]{ // "ctl00$MainContent$UpdatePanel1|ctl00$MainContent$AspNetPager1", // "ctl00$MainContent$AspNetPager1", // i.ToString(), // viewState, // eventValidation, // "true" // }); // try // { // htmldtl = ToolWeb.GetHtmlByUrl("http://113.108.219.40/intogd/Open/EnterpriseInfo.aspx?ID=1aNTSgxf1zvCznU8XPW9UQ==", nvc1, Encoding.UTF8, ref cookiestr); // } // catch { continue; } // } Parser parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-grid"))); if (dtlNode != null && dtlNode.Count > 0) { TableTag table = dtlNode[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; string StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty; StaffName = tr.Columns[0].ToNodePlainString(); stffType = tr.Columns[1].ToNodePlainString(); string aHref = "http://113.108.219.40/intogd/Open/" + tr.Columns[0].GetATagHref(); string staffDtl = string.Empty; try { staffDtl = ToolWeb.GetHtmlByUrl(aHref, Encoding.UTF8); } catch { } parser = new Parser(new Lexer(staffDtl)); NodeList staffNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%"))); if (staffNode != null && staffNode.Count > 0) { string ctx = string.Empty; TableTag dtlTable = staffNode[1] as TableTag; for (int k = 0; k < dtlTable.RowCount; k++) { for (int d = 0; d < dtlTable.Rows[k].ColumnCount; d++) { TableColumn col = dtlTable.Rows[k].Columns[d]; if (col.GetAttribute("class") == "td-left") { ctx += col.ToNodePlainString() + ":"; } else { ctx += col.ToNodePlainString() + "\r\n"; } } } CertNo = ctx.GetRegex("职称证号"); } CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, info.Url, stffType); ToolDb.SaveEntity(staff, string.Empty); } } // } }
/// <summary> /// 企业处罚信息 /// </summary> /// <param name="info"></param> /// <param name="html"></param> protected void AddCorpPunish(CorpInfo info, string param, string corpType) { string url = "http://portal.szjs.gov.cn:8888/publicShow/queryPunish.html"; string[] postParams = new string[] { "param", "corpType", "orgCode", "page" }; string[] postValues = new string[] { param, corpType, info.CorpCode, "1" }; NameValueCollection nvc = ToolWeb.GetNameValueCollection(postParams, postValues); string html = string.Empty; try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); return; } } JavaScriptSerializer java = new JavaScriptSerializer(); Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); int pageInt = 1; try { pageInt = (int)jsonResults["totalPage"]; } catch { } for (int i = 1; i <= pageInt; i++) { if (i > 1) { postValues = new string[] { param, corpType, info.CorpCode, i.ToString() }; nvc = ToolWeb.GetNameValueCollection(postParams, postValues); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); continue; } } jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); } object[] dicRecords = (object[])jsonResults["records"]; foreach (object dicRecord in dicRecords) { string DocNo = string.Empty, PunishType = string.Empty, GrantUnit = string.Empty, DocDate = string.Empty, PunishCtx = string.Empty, IsShow = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord; DocNo = Convert.ToString(dic["file_id"]); PunishType = Convert.ToString(dic["pun_type_text"]); GrantUnit = Convert.ToString(dic["file_org"]); DocDate = Convert.ToString(dic["file_date"]); CorpPunish punish = ToolDb.GenCorpPunish(info.Id, DocNo, PunishType, GrantUnit, DocDate, PunishCtx, info.Url, "0"); ToolDb.SaveEntity(punish, string.Empty); } } }
/// <summary> /// 获取办公地址并保存分支机构信息 /// </summary> /// <param name="html"></param> /// <param name="url"></param> /// <returns></returns> private string GetOffAddress(string html, string url, CorpInfo info) { string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; viewState = ToolWeb.GetAspNetViewState(html); eventValidation = ToolWeb.GetAspNetEventValidation(html); string returnValue = string.Empty; int pageInt = 1; NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "ctl00$MainContent$ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__EVENTVALIDATION", "__ASYNCPOST" }, new string[] { "ctl00$MainContent$UpdatePanel1|ctl00$MainContent$step2", "ctl00$MainContent$step2", "", viewState, eventValidation, "true" }); string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8, ref cookiestr); } catch { } Parser parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-grid"))); if (dtlNode != null && dtlNode.Count > 0) { TableTag table = dtlNode[0] as TableTag; for (int i = 1; i < table.RowCount; i++) { string CorpName = string.Empty, CorpCode = string.Empty, Location = string.Empty, DtlAddress = string.Empty, PostalCode = string.Empty, ResInstitution = string.Empty, LinkMan = string.Empty, LinPhone = string.Empty, Fax = string.Empty, BusinessCode = string.Empty, RegDate = string.Empty, Email = string.Empty, SafetyCode = string.Empty, TotalReMan = string.Empty, TechReMan = string.Empty, SafeReMan = string.Empty, QualityReMan = string.Empty, Url = string.Empty, TotalSafetyCode = string.Empty, TechSafetyCode = string.Empty, QualitySafetyCode = string.Empty; TableRow tr = table.Rows[i]; Url = "http://113.108.219.40/intogd/Open/" + tr.Columns[0].GetATagHref(); CorpName = tr.Columns[0].ToNodePlainString(); TotalReMan = tr.Columns[2].ToNodePlainString(); TechReMan = tr.Columns[3].ToNodePlainString(); QualityReMan = tr.Columns[4].ToNodePlainString(); SafeReMan = tr.Columns[5].ToNodePlainString(); string dtlHtml = string.Empty; try { dtlHtml = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8); } catch { continue; } parser = new Parser(new Lexer(dtlHtml)); NodeList staffNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%"))); if (staffNode != null && staffNode.Count > 1) { string ctx = string.Empty; TableTag dtlTable = staffNode[1] as TableTag; for (int k = 0; k < dtlTable.RowCount; k++) { for (int d = 0; d < dtlTable.Rows[k].ColumnCount; d++) { TableColumn col = dtlTable.Rows[k].Columns[d]; if (col.GetAttribute("class") == "td-left") { ctx += col.ToNodePlainString() + ":"; } else { ctx += col.ToNodePlainString() + "\r\n"; } } } if (string.IsNullOrEmpty(returnValue)) { returnValue = ctx.GetRegex("详细地址"); } CorpCode = ctx.GetRegex("组织机构代码"); Location = ctx.GetRegex("所在地"); DtlAddress = ctx.GetRegex("详细地址"); PostalCode = ctx.GetRegex("邮政编码"); ResInstitution = ctx.GetRegex("驻粤负责机构"); LinkMan = ctx.Replace(" ", "").GetRegex("联系人"); Fax = ctx.GetRegex("传真号码"); LinPhone = ctx.GetRegex("联系电话"); BusinessCode = ctx.GetRegex("营业执照注册号").Replace("分", ""); RegDate = ctx.GetRegex("设立时间").GetDateRegex(); Email = ctx.GetRegex("邮箱"); parser.Reset(); NodeList safeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "data-table"))); if (safeNode != null && safeNode.Count > 2) { TableTag safeTable = safeNode[2] as TableTag; string TotalSafetyCodeCtx = string.Empty; string TechSafetyCodeCtx = string.Empty; for (int k = 0; k < safeTable.RowCount; k++) { for (int d = 0; d < safeTable.Rows[k].ColumnCount; d++) { TableColumn col = safeTable.Rows[k].Columns[d]; if (d >= 2) { if (col.GetAttribute("class") == "td-left") { TechSafetyCodeCtx += col.ToNodePlainString() + ":"; } else { TechSafetyCodeCtx += col.ToNodePlainString() + "\r\n"; } } else { if (col.GetAttribute("class") == "td-left") { TotalSafetyCodeCtx += col.ToNodePlainString() + ":"; } else { TotalSafetyCodeCtx += col.ToNodePlainString() + "\r\n"; } } } } TotalSafetyCode = ToolHtml.GetRegexStringNot(TotalSafetyCodeCtx, new string[] { "安全生产考核合格证号(A证)" }); TechSafetyCode = ToolHtml.GetRegexStringNot(TechSafetyCodeCtx, new string[] { "安全生产考核合格证号(A证)" }); } if (safeNode != null && safeNode.Count > 4) { TableTag safeTable = safeNode[4] as TableTag; string SafetyCodeCtx = string.Empty; string QualitySafetyCodeCtx = string.Empty; for (int k = 0; k < safeTable.RowCount; k++) { for (int d = 0; d < safeTable.Rows[k].ColumnCount; d++) { TableColumn col = safeTable.Rows[k].Columns[d]; if (d >= 2) { if (col.GetAttribute("class") == "td-left") { QualitySafetyCodeCtx += col.ToNodePlainString() + ":"; } else { QualitySafetyCodeCtx += col.ToNodePlainString() + "\r\n"; } } else { if (col.GetAttribute("class") == "td-left") { SafetyCodeCtx += col.ToNodePlainString() + ":"; } else { SafetyCodeCtx += col.ToNodePlainString() + "\r\n"; } } } } SafetyCode = ToolHtml.GetRegexStringNot(SafetyCodeCtx, new string[] { "安全生产考核合格证号(A或B证)" }); QualitySafetyCode = ToolHtml.GetRegexStringNot(QualitySafetyCodeCtx, new string[] { "安全生产考核合格证号" });// QualitySafetyCodeCtx.GetRegex("安全生产考核合格证号"); } CorpInstitution entity = ToolDb.GenCorpInstitution("广东省", "广东地区", info.Id, CorpName, CorpCode, Location, DtlAddress, PostalCode, ResInstitution, LinkMan, LinPhone, Fax, BusinessCode, RegDate, Email, SafetyCode, TotalReMan, TechReMan, SafeReMan, QualityReMan, Url, TotalSafetyCode, TechSafetyCode, QualitySafetyCode); ToolDb.SaveEntity(entity, string.Empty); } } } return(returnValue); }
/// <summary> /// 企业安全许可 /// </summary> /// <param name="info"></param> /// <param name="html"></param> protected void AddCorpSecLic(CorpInfo info, string param, string corpType) { string url = "http://portal.szjs.gov.cn:8888/publicShow/querySafeProduction.html"; string[] postParams = new string[] { "param", "corpType", "orgCode", "page" }; string[] postValues = new string[] { param, corpType, info.CorpCode, "1" }; NameValueCollection nvc = ToolWeb.GetNameValueCollection(postParams, postValues); string html = string.Empty; try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); return; } } JavaScriptSerializer java = new JavaScriptSerializer(); Dictionary <string, object> jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); int pageInt = 1; try { pageInt = (int)jsonResults["totalPage"]; } catch { } for (int i = 1; i <= pageInt; i++) { if (i > 1) { postValues = new string[] { param, corpType, info.CorpCode, i.ToString() }; nvc = ToolWeb.GetNameValueCollection(postParams, postValues); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(12 * 60 * 1000); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { Thread.Sleep(8 * 60 * 1000); continue; } } jsonResults = (Dictionary <string, object>)java.DeserializeObject(html); } object[] dicRecords = (object[])jsonResults["records"]; foreach (object dicRecord in dicRecords) { Dictionary <string, object> dic = (Dictionary <string, object>)dicRecord; string SecLicCode = string.Empty, SecLicDesc = string.Empty, ValidStartDate = string.Empty, ValidStartEnd = string.Empty, SecLicUnit = string.Empty; SecLicCode = Convert.ToString(dic["lics_id"]); SecLicDesc = Convert.ToString(dic["lics_range"]); ValidStartDate = Convert.ToString(dic["valid_start_date"]); ValidStartEnd = Convert.ToString(dic["valid_end_date"]); SecLicUnit = Convert.ToString(dic["issue_dept"]); if (Encoding.Default.GetByteCount(SecLicDesc) > 1000) { SecLicDesc = string.Empty; } CorpSecLic seclic = ToolDb.GenCorpSecLic(info.Id, SecLicCode, SecLicDesc, ValidStartDate, ValidStartEnd, SecLicUnit, info.Url); ToolDb.SaveEntity(seclic, string.Empty); } } }