protected void AddCorpStaff(CorpInfo info, string enterpriseId) { string url = "http://202.104.65.182:8081/G2/webdrive/web-person-info.do?enterpriseId=" + enterpriseId + "&enterpriseBaseId=&data&filter_params_=rowNum,personId,personBaseId,name,isPause,isDel&defined_operations_=&nocheck_operations_=&"; string gridSearch = "false"; string nd = ToolHtml.GetDateTimeLong(DateTime.Now).ToString(); string PAGESIZE = "1000"; string PAGE = "1"; string sortField = ""; string sortDirection = "asc"; NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection" }, new string[] { gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection }); string strJson = string.Empty; try { strJson = ToolWeb.GetHtmlByUrl(url, nvc); } catch { return; } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(strJson); object[] objList = smsTypeJson["data"] as object[]; if (objList != null) { foreach (object obj in objList) { string StaffName = string.Empty, IdCard = string.Empty, CertLevel = string.Empty, CertNo = string.Empty, stffType = string.Empty; Dictionary <string, object> dic = obj as Dictionary <string, object>; StaffName = Convert.ToString(dic["name"]); string dtlUrl = "http://202.104.65.182:8081/G2/webdrive/web-person-certificate.do?personId=" + dic["personId"] + "&actionFlag=view&data&filter_params_=rowNum,personBaseId,personId,certificateId,certificateType,registerLevel,certificateCode,certificatePhotoetch,gardenMajor,issuer,major,pmTitle,issueDate,registerValidEnd&defined_operations_=&nocheck_operations_=view&";//"http://202.104.65.182:8081/G2/webdrive/web-person-certificate.do?personId=" + enterpriseId + "&actionFlag=view&data&filter_params_=rowNum,personBaseId,personId,certificateId,certificateType,registerLevel,certificateCode,certificatePhotoetch,gardenMajor,issuer,major,pmTitle,issueDate,registerValidEnd&defined_operations_=&nocheck_operations_=view&"; string dtlJson = string.Empty; try { dtlJson = ToolWeb.GetHtmlByUrl(dtlUrl, nvc); } catch { continue; } Dictionary <string, object> dtlDic = (Dictionary <string, object>)serializer.DeserializeObject(dtlJson); object[] dtlObjList = dtlDic["data"] as object[]; if (dtlObjList != null && dtlObjList.Length > 0) { Dictionary <string, object> dicDtl = dtlObjList[0] as Dictionary <string, object>; CertNo = Convert.ToString(dicDtl["certificateCode"]); stffType = Convert.ToString(dicDtl["major"]); } CorpTecStaff staff = ToolDb.GenCorpTecStaff(info.Id, StaffName, IdCard, CertLevel, CertNo, info.Url, stffType); ToolDb.SaveEntity(staff, string.Empty); } } }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <NoticeInfo>(); DateTime endDate = DateTime.Today.AddDays(1); DateTime startDate = endDate.AddDays(-60); long startTime = ToolHtml.GetDateTimeLong(startDate); long endTime = ToolHtml.GetDateTimeLong(endDate); string infoUrl = this.SiteUrl + this.MaxCount + "&jbTimeMin=" + startTime + "&jbTimeMax=" + endTime; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.UTF8); } catch { return(null); } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = null; smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key == "total") { continue; } object[] array = (object[])obj.Value; foreach (object arrValue in array) { string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, prjType = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)arrValue; InfoUrl = "http://jyzx.cb.gov.cn/jyxx/jbxx/"; prjCode = Convert.ToString(dic["bdBH"]); InfoTitle = Convert.ToString(dic["bdName"]); InfoType = "截标信息"; PublistTime = Convert.ToString(dic["jbTime2"]); prjType = Convert.ToString(dic["gcLeiXing2"]); htmlTxt = string.Format("标段编号:{0}</br>标段名称:{1}</br>工程类型:{2}</br>截标时间:{3}", prjCode, InfoTitle, prjType, PublistTime); InfoCtx = string.Format("标段编号:{0}\r\n标段名称:{1}\r\n工程类型:{2}\r\n截标时间:{3}", prjCode, InfoTitle, prjType, PublistTime); NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "深圳龙岗区工程", "龙岗区", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "深圳市建设工程交易中心龙岗分中心", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, prjType, string.Empty, htmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <MeetInfo>(); DateTime startDate = DateTime.Today; DateTime endDate = startDate.AddDays(30); long startTime = ToolHtml.GetDateTimeLong(startDate); long endTime = ToolHtml.GetDateTimeLong(endDate); string Url = this.SiteUrl + this.MaxCount + "&startTime=" + startTime + "&endTime=" + endTime; string infUrl = "http://www.bajsjy.com/jyxx/hyxx/"; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(Url); } catch { return(null); } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); object[] objvalues = smsTypeJson["rows"] as object[]; foreach (object objValue in objvalues) { Dictionary <string, object> dic = (Dictionary <string, object>)objValue; string meetTime = string.Empty, prjName = string.Empty, meetName = string.Empty, place = string.Empty, prjCode = string.Empty; prjCode = Convert.ToString(dic["bdBH"]); meetTime = Convert.ToString(dic["huiYiStartTime2"]); prjName = Convert.ToString(dic["bdName"]); place = Convert.ToString(dic["huiYiDiDianName"]); meetName = Convert.ToString(dic["huiYiLeiXingName"]); MeetInfo info = ToolDb.GenMeetInfo("广东省", "深圳宝安区工程", string.Empty, string.Empty, prjName, place, meetName, meetTime, string.Empty, "深圳市建设工程交易中心宝安分中心", infUrl, prjCode, string.Empty, string.Empty, string.Empty); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { // 删除 string bDate = startDate.ToString("yyyy-MM-dd"), eDate = endDate.ToString("yyyy-MM-dd"); string sqlwhere = " where City='深圳宝安区工程' and InfoUrl='" + infUrl + "' and BeginDate>='" + bDate + "' and BeginDate<='" + eDate + "'"; string delMeetSql = "delete from MeetInfo " + sqlwhere; int countMeet = ToolDb.ExecuteSql(delMeetSql); return(list); } } if (list != null && list.Count > 0) { // 删除 string bDate = startDate.ToString("yyyy-MM-dd"), eDate = endDate.ToString("yyyy-MM-dd"); string sqlwhere = " where City='深圳宝安区工程' and InfoUrl='" + infUrl + "' and BeginDate>='" + bDate + "' and BeginDate<='" + eDate + "'"; string delMeetSql = "delete from MeetInfo " + sqlwhere; int countMeet = ToolDb.ExecuteSql(delMeetSql); } return(list); }
protected void AddCorpLeader(CorpInfo info, string enterpriseId) { string url = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-leader.do?enterpriseId=" + enterpriseId + "&data&filter_params_=rowNum,leaderId,name,title,safetyLicenseCode,safetyLicenseIssuer,safetyLicenseValidEnd&defined_operations_=&nocheck_operations_=&"; string gridSearch = "false"; string nd = ToolHtml.GetDateTimeLong(DateTime.Now).ToString(); string PAGESIZE = "1000"; string PAGE = "1"; string sortField = ""; string sortDirection = "asc"; NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection" }, new string[] { gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection }); string strJson = string.Empty; try { strJson = ToolWeb.GetHtmlByUrl(url, nvc); } catch { return; } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(strJson); object[] objList = smsTypeJson["data"] as object[]; if (objList != null) { foreach (object obj in objList) { Dictionary <string, object> dic = obj as Dictionary <string, object>; string LeaderName = string.Empty, LeaderDuty = string.Empty, LeaderType = string.Empty, htlCtx = string.Empty, phone = string.Empty; LeaderName = Convert.ToString(dic["name"]); LeaderType = Convert.ToString(dic["title"]); CorpLeader corpLeader = ToolDb.GenCorpLeader(info.Id, LeaderName, LeaderDuty, LeaderType, info.Url, phone); ToolDb.SaveEntity(corpLeader, string.Empty); } } }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <MeetInfo>(); DateTime startDate = DateTime.Today; DateTime endDate = startDate.AddDays(30); long startTime = ToolHtml.GetDateTimeLong(startDate); long endTime = ToolHtml.GetDateTimeLong(endDate); string infoUrl = this.SiteUrl + this.MaxCount + "&startTime=" + startTime + "&endTime" + endTime; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.UTF8); } catch { } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); int len = html.Length; JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = null; smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key == "total") { continue; } object[] array = (object[])obj.Value; foreach (object arrValue in array) { string meetTime = string.Empty, prjName = string.Empty, meetName = string.Empty, place = string.Empty, builUnit = string.Empty, prjCode = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)arrValue; prjName = Convert.ToString(dic["bdName"]).GetReplace("<br/>"); meetTime = Convert.ToString(dic["huiYiStartTime2"]); meetName = Convert.ToString(dic["huiYiLeiXingName"]); place = Convert.ToString(dic["huiYiDiDianName"]); prjCode = Convert.ToString(dic["bdBH"]); MeetInfo info = ToolDb.GenMeetInfo("广东省", "深圳龙岗区工程", string.Empty, string.Empty, prjName, place, meetName, meetTime, string.Empty, "深圳市建设工程交易中心龙岗分中心", SiteUrl, prjCode, builUnit, string.Empty, string.Empty); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { // 删除 string bDate = startDate.ToString("yyyy-MM-dd"), eDate = endDate.ToString("yyyy-MM-dd"); string sqlwhere = " where City='深圳龙岗区工程' and InfoUrl='" + SiteUrl + "' and BeginDate>='" + bDate + "' and BeginDate<='" + eDate + "'"; string delMeetSql = "delete from MeetInfo " + sqlwhere; int countMeet = ToolDb.ExecuteSql(delMeetSql); return(list); } } } if (list != null && list.Count > 0) { // 删除 string bDate = startDate.ToString("yyyy-MM-dd"), eDate = endDate.ToString("yyyy-MM-dd"); string sqlwhere = " where City='深圳龙岗区工程' and InfoUrl='" + SiteUrl + "' and BeginDate>='" + bDate + "' and BeginDate<='" + eDate + "'"; string delMeetSql = "delete from MeetInfo " + sqlwhere; int countMeet = ToolDb.ExecuteSql(delMeetSql); } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <NoticeInfo>(); string html = string.Empty; DateTime beginDate = DateTime.Today; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + (MaxCount + 20) + "&jbTimeMin=" + ToolHtml.GetDateTimeLong(beginDate) + "&jbTimeMax=" + ToolHtml.GetDateTimeLong(beginDate.AddDays(7).Date)); } catch { return(null); } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key == "total") { continue; } object[] array = (object[])obj.Value; foreach (object arrValue in array) { string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, prjType = string.Empty, bgType = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)arrValue; prjCode = Convert.ToString(dic["bdBH"]); InfoTitle = Convert.ToString(dic["bdName"]); prjType = Convert.ToString(dic["gcLeiXing2"]); PublistTime = Convert.ToString(dic["jbTime2"]); InfoType = "截标信息"; //InfoUrl = "http://61.144.226.5:8001/jyw/queryOldDataDetail.do?type=3&id=" + Convert.ToString(dic["ggBDGuid"]); htmlTxt = InfoCtx = "工程编号:" + prjCode + "\r\n工程名称:" + InfoTitle + "\r\n工程类型:" + prjType + "\r\n截标日期:" + PublistTime; NoticeInfo info = ToolDb.GenNoticeInfo("广东省", "深圳市工程", string.Empty, string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, MsgTypeCosnt.ShenZhenMsgType, InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, prjType, bgType, htmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { string delSql = string.Format("delete from NoticeInfo where InfoType='{0}' and PublistTime>='{1}' and PublistTime<='{2}' and InfoSource='{3}'", info.InfoType, beginDate, beginDate.AddDays(7), info.InfoSource); ToolDb.ExecuteSql(delSql); return(list); } } } if (list != null && list.Count > 0) { string delSql = string.Format("delete from NoticeInfo where InfoType='截标信息' and PublistTime>='{0}' and PublistTime<='{1}' and InfoSource='{2}'", beginDate, beginDate.AddDays(7), MsgTypeCosnt.ShenZhenMsgType); ToolDb.ExecuteSql(delSql); } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); //取得页码 string html = string.Empty; try { DateTime time = ToolHtml.GetDateTimeByLong(1509517250628); DateTime dt24 = DateTime.Now.ToUniversalTime(); string b = ToolHtml.GetDateTimeLong(dt24).ToString(); html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + b, Encoding.Default); } catch (Exception ex) { Logger.Error(ex.ToString()); } Parser parser = new Parser(new Lexer(html)); int pageInt = 1; JavaScriptSerializer serializer = new JavaScriptSerializer(); object[] objs = (object[])serializer.DeserializeObject(html); object[] items = objs[1] as object[]; Dictionary <string, object> smsTypeJson = items[0] as Dictionary <string, object>; string a = Convert.ToString(smsTypeJson["page_all"]); int page = int.Parse(a); pageInt = page / 18 + 1; parser.Reset(); for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { string lian = "http://www.nmgp.gov.cn/category/category-ajax.php?type_name=3&byf_page=" + i + "&fun=cggg&_=1509441711785"; html = this.ToolWebSite.GetHtmlByUrl(lian, Encoding.UTF8); } catch (Exception ex) { Logger.Error("分页"); continue; } } parser = new Parser(new Lexer(html)); JavaScriptSerializer serializer1 = new JavaScriptSerializer(); object[] objd = (object[])serializer.DeserializeObject(html); object[] items1 = objd[0] as object[]; Dictionary <string, object> smsTypeJson1 = items1[0] as Dictionary <string, object>; foreach (KeyValuePair <string, object> obj in smsTypeJson) { object[] array = objd[0] as object[]; foreach (object arrValue in array) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, strHtml = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)arrValue; endDate = Convert.ToString(dic["ENDDATE"]).GetDateRegex("yyyy-MM-dd"); prjName = Convert.ToString(dic["TITLE"]); string xu = Convert.ToString(dic["wp_mark_id"]); InfoUrl = "http://www.nmgp.gov.cn/ay_post/post.php?tb_id=3&p_id=" + xu; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.UTF8).Replace(" ", ""); } catch (Exception) { continue; } Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>"); htmldtl = regexHtml.Replace(htmldtl, ""); Parser parserdtl = new Parser(new Lexer(htmldtl)); Parser dtlparserHTML = new Parser(new Lexer(htmldtl)); NodeList nodesDtl = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "center"))); if (nodesDtl != null && nodesDtl.Count > 0) { Parser begDate = new Parser(new Lexer(nodesDtl.ToHtml())); NodeList begNode = begDate.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "center"))); if (begNode != null && begNode.Count > 0) { beginDate = begNode.AsString().GetDateRegex("yyyy年MM月dd日"); } begDate.Reset(); NodeList dtlTable = begDate.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("border", "1"))); if (dtlTable != null && dtlTable.Count > 0) { TableTag tableDtl = dtlTable[0] as TableTag; if (tableDtl.RowCount > 2) { string ctx = tableDtl.Rows[2].ToPlainTextString(); bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); } if (bidMoney == "0" && tableDtl.RowCount > 4) { string ctx = tableDtl.Rows[4].ToPlainTextString(); bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); } if (bidMoney == "0" && tableDtl.RowCount > 6) { string ctx = tableDtl.Rows[6].ToPlainTextString(); bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); } if (bidMoney == "0" && tableDtl.RowCount > 8) { string ctx = tableDtl.Rows[8].ToPlainTextString(); bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); } if (bidMoney == "0" && tableDtl.RowCount > 10) { string ctx = tableDtl.Rows[10].ToPlainTextString(); bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); } if (bidMoney == "0" && tableDtl.RowCount > 12) { string ctx = tableDtl.Rows[12].ToPlainTextString(); bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); } } HtmlTxt = nodesDtl.ToHtml(); bidCtx = HtmlTxt.ToCtxString(); code = bidCtx.GetRegex("批准文件编号,工程编号,项目编号").Replace("无", ""); code = bidCtx.GetRegexBegEnd("批准文件编号:", "二"); buildUnit = bidCtx.GetBuildRegex(); if (string.IsNullOrEmpty(buildUnit)) { buildUnit = bidCtx.GetRegexBegEnd("代理机构名称:", "地址"); } prjAddress = bidCtx.GetAddressRegex(); if (string.IsNullOrEmpty(prjAddress)) { prjAddress = bidCtx.GetRegexBegEnd("地址:", "邮政编码"); } msgType = "内蒙古自治区政府采购中心"; specType = "政府采购"; bidType = ToolHtml.GetInviteTypes(prjName); prjName = ToolDb.GetPrjName(prjName); BidInfo info = ToolDb.GenBidInfo("内蒙古自治区", "内蒙古自治区及盟市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } //for (int i = 1; i <= pageInt; i++) //{ // if (i > 1) // { // try // { // html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&pos=" + i.ToString(), Encoding.Default); // } // catch (Exception ex) // { // Logger.Error(ex.ToString()); // } // } // parser = new Parser(new Lexer(html)); // NodeList nodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "recordlist"))); // if (nodes != null && nodes.Count > 0) // { // TableTag table = nodes[0] as TableTag; // for (int t = 0; t < table.RowCount; t++) // { // string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, // code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, // bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, // bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, // otherType = string.Empty, HtmlTxt = string.Empty,strHtml=string.Empty; // TableRow tr = table.Rows[t]; // endDate = tr.Columns[1].ToPlainTextString().GetDateRegex(); // ATag alink = tr.Columns[0].GetATag(); // prjName = tr.Columns[0].GetATagValue("title"); // InfoUrl = "http://www.nmgp.gov.cn" + alink.Link; // string htmldtl = string.Empty; // try // { // htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace(" ", "").Trim(); // } // catch (Exception ex) // { // continue; // } // Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>"); // htmldtl = regexHtml.Replace(htmldtl, ""); // Parser parserdtl = new Parser(new Lexer(htmldtl)); // Parser dtlparserHTML = new Parser(new Lexer(htmldtl)); // NodeList nodesDtl = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "hlcms_9"))); // if (nodesDtl != null && nodesDtl.Count > 0) // { // Parser begDate = new Parser(new Lexer(nodesDtl.ToHtml())); // NodeList begNode = begDate.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "yzhang"))); // if (begNode != null && begNode.Count > 0) // { // beginDate = begNode.AsString().GetDateRegex("yyyy年MM月dd日"); // } // begDate.Reset(); // NodeList dtlTable = begDate.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "5"))); // if (dtlTable != null && dtlTable.Count > 0) // { // TableTag tableDtl = dtlTable[0] as TableTag; // if (tableDtl.RowCount > 2) // { // string ctx = tableDtl.Rows[2].ToPlainTextString(); // bidUnit = ctx.GetRegexBegEnd("供应商:",";"); // bidMoney = ctx.GetRegexBegEnd("中标金额:","。").GetMoney(); // } // if (bidMoney == "0"&& tableDtl.RowCount >4) // { // string ctx = tableDtl.Rows[4].ToPlainTextString(); // bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); // bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); // } // if (bidMoney == "0" && tableDtl.RowCount > 6) // { // string ctx = tableDtl.Rows[6].ToPlainTextString(); // bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); // bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); // } // if (bidMoney == "0" && tableDtl.RowCount > 8) // { // string ctx = tableDtl.Rows[8].ToPlainTextString(); // bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); // bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); // } // if (bidMoney == "0" && tableDtl.RowCount > 10) // { // string ctx = tableDtl.Rows[10].ToPlainTextString(); // bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); // bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); // } // if (bidMoney == "0" && tableDtl.RowCount > 12) // { // string ctx = tableDtl.Rows[12].ToPlainTextString(); // bidUnit = ctx.GetRegexBegEnd("供应商:", ";"); // bidMoney = ctx.GetRegexBegEnd("中标金额:", "。").GetMoney(); // } // } // HtmlTxt = nodesDtl.ToHtml(); // bidCtx = HtmlTxt.ToCtxString(); // code = bidCtx.GetRegex("批准文件编号,工程编号,项目编号",true,50).Replace("无", ""); // buildUnit = bidCtx.GetBuildRegex(); // if (string.IsNullOrEmpty(buildUnit)) // buildUnit = bidCtx.GetRegex("采购代理机构名称,采购单位名称"); // prjAddress = bidCtx.GetAddressRegex(); // if (string.IsNullOrEmpty(prjAddress)) // prjAddress = bidCtx.GetRegex("投标地点,开标地点,地址"); // msgType = "内蒙古自治区政府采购中心"; // specType = "政府采购"; // bidType = ToolHtml.GetInviteTypes(prjName); // prjName = ToolDb.GetPrjName(prjName); // BidInfo info = ToolDb.GenBidInfo("内蒙古自治区", "内蒙古自治区及盟市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); // list.Add(info); // if (!crawlAll && list.Count >= this.MaxCount) // return list; // } // } // } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { string newUrl = "http://202.104.65.182:8081/G2/gfmweb/web-enterprise!list.do?data&filter_params_=enterpriseId,rowNum,enterpriseBaseId,enterpriseName,organizationCode&defined_operations_=&nocheck_operations_=&"; string gridSearch = "true"; string nd = ToolHtml.GetDateTimeLong(DateTime.Now).ToString(); string PAGESIZE = "100"; string PAGE = "1"; string sortField = ""; string sortDirection = "asc"; string searchVal = "1"; string _enterpriseName_like = "公司"; string entTypeCodes = ""; NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection", "searchVal", "_enterpriseName_like", "entTypeCodes" }, new string[] { gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection, searchVal, _enterpriseName_like, entTypeCodes }); string html = string.Empty; int pageInt = 1; try { html = ToolWeb.GetHtmlByUrl(newUrl, nvc, Encoding.UTF8); } catch { return(null); } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); string page = smsTypeJson["total"].ToString(); pageInt = int.Parse(page); for (int i = 1; i <= pageInt; i++) { if (i > 1) { PAGE = i.ToString(); nvc = ToolWeb.GetNameValueCollection(new string[] { "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection", "searchVal", "_enterpriseName_like", "entTypeCodes" }, new string[] { gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection, searchVal, _enterpriseName_like, entTypeCodes }); try { html = ToolWeb.GetHtmlByUrl(newUrl, nvc, Encoding.UTF8); smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); } catch { continue; } } object[] objList = (object[])smsTypeJson["data"]; foreach (object obj in objList) { Dictionary <string, object> dic = obj as Dictionary <string, object>; string CorpName = string.Empty, CorpCode = string.Empty, CorpAddress = string.Empty, RegDate = string.Empty, RegFund = string.Empty, BusinessCode = string.Empty, BusinessType = string.Empty, LinkMan = string.Empty, LinkPhone = string.Empty, Fax = string.Empty, Email = string.Empty, CorpSite = string.Empty, cUrl = string.Empty, ISOQualNum = string.Empty, ISOEnvironNum = string.Empty, corpType = string.Empty, qualCode = string.Empty, corpMgr = string.Empty, businessMgr = string.Empty, tecMgr = string.Empty; CorpName = Convert.ToString(dic["enterpriseName"]); CorpCode = Convert.ToString(dic["organizationCode"]); string idCode = Convert.ToString(dic["enterpriseBaseId"]); string enterpriseId = Convert.ToString(dic["enterpriseId"]); cUrl = "http://202.104.65.182:8081/G2/webdrive/web-enterprise!view.do?enterpriseId=" + enterpriseId; //string infoUrl = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-pub!getEnterpriseInfoById.do"; //string infoUrl2 = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-pub!menuTree.do"; //Dictionary<string, object> dtlInfo = null, dtlInfo2 = null; //string infoJson = string.Empty, infoJson2 = string.Empty; string htmldtl = string.Empty; try { htmldtl = ToolWeb.GetHtmlByUrl(cUrl).GetJsString(); //NameValueCollection dtlNvc = ToolWeb.GetNameValueCollection(new string[] { //"enterpriseId","menutype" //}, new string[] { enterpriseId, "" }); //infoJson = ToolWeb.GetHtmlByUrl(infoUrl, dtlNvc, Encoding.UTF8); //dtlInfo = (Dictionary<string, object>)serializer.DeserializeObject(infoJson); //dtlNvc = ToolWeb.GetNameValueCollection(new string[] { //"enterpriseId", //"menutype", //"actionFlag" //}, new string[] { //enterpriseId,"","" //}); //infoJson2 = ToolWeb.GetHtmlByUrl(infoUrl2, dtlNvc, Encoding.UTF8); //dtlInfo2 = (Dictionary<string, object>)serializer.DeserializeObject(infoJson2); } catch { continue; } CorpAddress = ToolHtml.GetHtmlInputValue(htmldtl, "_M.registerAddress"); RegDate = ToolHtml.GetHtmlInputValue(htmldtl, "_M.registerTime"); RegFund = ToolHtml.GetHtmlInputValue(htmldtl, "_M.licenseCapital"); if (!string.IsNullOrEmpty(RegFund)) { RegFund += "万元"; } BusinessCode = ToolHtml.GetHtmlInputValue(htmldtl, "_M.licenseRegistrationCode"); CorpSite = ToolHtml.GetHtmlInputValue(htmldtl, "_M.firmWebsite"); LinkMan = ToolHtml.GetHtmlInputValue(htmldtl, "_M.name"); Email = ToolHtml.GetHtmlInputValue(htmldtl, "_M.email"); LinkPhone = ToolHtml.GetHtmlInputValue(htmldtl, "_M.tel"); Fax = ToolHtml.GetHtmlInputValue(htmldtl, "_M.fax"); corpMgr = ToolHtml.GetHtmlInputValue(htmldtl, "_M.legalPersonName"); Parser parser = new Parser(new Lexer(htmldtl)); NodeList typeNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "g2-cell col-sm-6"))); if (typeNode != null && typeNode.Count > 0) { string str = string.Empty; for (int j = 2; j < typeNode.Count; j++) { string semp = typeNode[j].ToNodePlainString(); if (!string.IsNullOrEmpty(semp)) { try { DateTime time = DateTime.Parse(semp); continue; } catch { } str += semp + ","; } } if (!string.IsNullOrEmpty(str)) { corpType = str.Remove(str.Length - 1); } } CorpInfo info = ToolDb.GenCorpInfo(CorpName, CorpCode, CorpAddress, RegDate, RegFund, BusinessCode, BusinessType, LinkMan, LinkPhone, Fax, Email, CorpSite, corpType, "广东省", "广东地区", "广东省住房和城乡建设厅", cUrl, ISOQualNum, ISOEnvironNum, string.Empty); string exisSql = string.Format("select Id from CorpInfo where CorpName='{0}' and CorpType='{1}' and InfoSource='{2}'", info.CorpName, info.CorpType, info.InfoSource); string corpId = Convert.ToString(ToolDb.ExecuteScalar(exisSql)); if (!string.IsNullOrEmpty(corpId)) { string delCorpQual = string.Format("delete from CorpQual where CorpId='{0}'", corpId); string delCorpLeader = string.Format("delete from CorpLeader where CorpId='{0}'", corpId); string delCorpSecLicStaff = string.Format("delete from CorpSecLicStaff where CorpId='{0}'", corpId); int qualCount = 0, leaderCount = 0, tecstaffCount = 0, infoCount = 0; string corpSql = string.Format("delete from CorpInfo where Id='{0}'", corpId); infoCount = ToolDb.ExecuteSql(corpSql); qualCount = ToolDb.ExecuteSql(delCorpQual); leaderCount = ToolDb.ExecuteSql(delCorpLeader); tecstaffCount = ToolDb.ExecuteSql(delCorpSecLicStaff); if (infoCount > 0) { ToolDb.SaveEntity(info, ""); } if (qualCount >= 0) { try { AddCorpQual(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } } if (leaderCount >= 0) { try { AddCorpLeader(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } } if (tecstaffCount >= 0) { try { AddCorpStaff(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } } } else { if (ToolDb.SaveEntity(info, "")) { try { AddCorpLeader(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } try { AddCorpQual(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } try { AddCorpStaff(info, enterpriseId); } catch (Exception ex) { Logger.Error(ex); } } } } } ToolCoreDb.ExecuteProcedure(); return(null); }
protected void AddCorpQual(CorpInfo info, string enterpriseId) { string gridSearch = "false"; string nd = ToolHtml.GetDateTimeLong(DateTime.Now).ToString(); string PAGESIZE = "100"; string PAGE = "1"; string sortField = ""; string sortDirection = "asc"; string url = "http://202.104.65.182:8081/G2/webdrive/web-enterprise-qualification.do?enterpriseId=" + enterpriseId + "&data&filter_params_=rowNum,qualificationId,enterpriseBaseId,enterpriseId,qualificationBeforeId,qualificationCode,validEnd&defined_operations_=&nocheck_operations_=&"; NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "gridSearch", "nd", "PAGESIZE", "PAGE", "sortField", "sortDirection" }, new string[] { gridSearch, nd, PAGESIZE, PAGE, sortField, sortDirection }); string strJson = string.Empty; try { strJson = ToolWeb.GetHtmlByUrl(url, nvc); } catch { return; } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(strJson); object[] objList = smsTypeJson["data"] as object[]; if (objList != null) { foreach (object obj in objList) { Dictionary <string, object> dic = obj as Dictionary <string, object>; string enterpriseBaseId = Convert.ToString(dic["enterpriseBaseId"]); string validEnd = Convert.ToString(dic["validEnd"]); string qualificationCode = Convert.ToString(dic["qualificationCode"]); string enterpriseIds = Convert.ToString(dic["enterpriseId"]); string qualificationBeforeId = Convert.ToString(dic["qualificationBeforeId"]); string qualificationId = Convert.ToString(dic["qualificationId"]); string dtlUrl = "http://202.104.65.182:8081/G2/webdrive/none/web-enterprise-qualification-item.do?qualificationId=" + qualificationId + "&actionFlag=&data&filter_params_=rowNum,qualificationItemId,qualificationId,contentCodeValue,qualificationCode,contentCodeName,qualificationLevelName,mainItem,certificateIssuer,certificateDate,validDate&defined_operations_=&nocheck_operations_=&"; string dtlJson = string.Empty; try { dtlJson = ToolWeb.GetHtmlByUrl(dtlUrl, nvc); } catch { continue; } Dictionary <string, object> dtlDicJson = (Dictionary <string, object>)serializer.DeserializeObject(dtlJson); object[] objDtlList = dtlDicJson["data"] as object[]; if (objDtlList != null) { foreach (object objDtl in objDtlList) { Dictionary <string, object> dicDtl = objDtl as Dictionary <string, object>; string CorpId = string.Empty, QualName = string.Empty, QualCode = string.Empty, QualSeq = string.Empty, qualNum = string.Empty, QualType = string.Empty, QualLevel = string.Empty, ValidDate = string.Empty, LicDate = string.Empty, LicUnit = string.Empty; CorpId = info.Id; QualCode = qualificationCode; QualName = QualType = Convert.ToString(dicDtl["contentCodeName"]); QualLevel = Convert.ToString(dicDtl["qualificationLevelName"]); LicUnit = Convert.ToString(dicDtl["certificateIssuer"]); LicDate = Convert.ToString(dicDtl["certificateDate"]); ValidDate = Convert.ToString(dicDtl["validDate"]); qualNum = QualLevel.GetLevel(); Dictionary <string, object> mainItem = dicDtl["mainItem"] as Dictionary <string, object>; if (mainItem != null) { QualSeq = Convert.ToString(mainItem["desc"]); } CorpQual qual = ToolDb.GenCorpQual(CorpId, QualName, QualCode, QualSeq, QualType, QualLevel, ValidDate, LicDate, LicUnit, info.Url, qualNum, "广东省", "广东地区"); ToolDb.SaveEntity(qual, string.Empty); } } } } }