protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <ProjectConpact>(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagefooter"))); if (tdNodes.Count > 0 && tdNodes != null) { try { string temp = tdNodes.AsString().GetRegexBegEnd(",共有", "页"); page = int.Parse(temp); } catch { return(list); } } for (int i = 1; i <= page; i++) { if (i > 1) { try { htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&web_cur_page=" + i, Encoding.UTF8); } catch { } } parser = new Parser(new Lexer(htl)); NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table1"))); if (tableNodeList.Count > 0 || tableNodeList != null) { TableTag table = tableNodeList[tableNodeList.Count - 1] as TableTag; for (int j = 1; j < table.RowCount; j++) { string pProvince = string.Empty, pUrl = string.Empty, pCity = string.Empty, pSubcontractCode = string.Empty, pSubcontractName = string.Empty, pSubcontractCompany = string.Empty, pInfoSource = string.Empty, pRecordDate = string.Empty, pCompactPrice = string.Empty, pCompactType = string.Empty, pBuildUnit = string.Empty, pPrjCode = string.Empty, PrjName = string.Empty, pPrjMgrQual = string.Empty, pPrjMgrName = string.Empty, pContUnit = string.Empty, pCreatetime = string.Empty; TableRow tr = table.Rows[j]; pBuildUnit = tr.Columns[1].ToPlainTextString().Trim(); pContUnit = tr.Columns[2].ToPlainTextString().Trim(); pCompactType = tr.Columns[3].ToPlainTextString().Trim(); pRecordDate = tr.Columns[4].ToPlainTextString().Trim(); ATag aTag = tr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag; pUrl = "http://www.cb.gov.cn" + aTag.Link.Replace("GoDetail('", "").Replace("');", ""); string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(pUrl), Encoding.UTF8).Replace("<br/>", "\r\n").Trim(); } catch (Exception) { continue; } parser = new Parser(new Lexer(htmldetail)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "sub_detailed"))); if (dtList != null && dtList.Count > 0) { parser = new Parser(new Lexer(dtList.AsHtml())); NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (tableNode != null && tableNode.Count > 0) { TableTag tag = tableNode[0] as TableTag; for (int r = 0; r < tag.RowCount; r++) { for (int c = 0; c < tag.Rows[r].ColumnCount; c++) { string temp = tag.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:"); if ((c + 1) % 2 == 0) { pInfoSource += temp + "\r\n"; } else { pInfoSource += temp + ":"; } } } } PrjName = pInfoSource.GetRegex("工程名称"); pSubcontractCode = pInfoSource.GetRegex("分包工程编号"); pSubcontractName = pInfoSource.GetRegex("分包工程名称"); pSubcontractCompany = pInfoSource.GetRegex("分包工程发包单位"); pCompactPrice = pInfoSource.GetRegex("合同价款"); pPrjMgrQual = pInfoSource.GetRegex("项目经理资格"); pPrjMgrName = pInfoSource.GetRegex("项目经理名称"); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (pCompactPrice.Contains("万")) { pCompactPrice = pCompactPrice.Remove(pCompactPrice.IndexOf("万")).Trim(); pCompactPrice = regBidMoney.Match(pCompactPrice).Value; } else { try { pCompactPrice = (decimal.Parse(regBidMoney.Match(pCompactPrice).Value) / 10000).ToString(); if (decimal.Parse(pCompactPrice) < decimal.Parse("0.1")) { pCompactPrice = "0"; } } catch (Exception) { pCompactPrice = "0"; } } ProjectConpact info = ToolDb.GenProjectConpact("广东省", pUrl, "深圳市龙岗区", pSubcontractCode, pSubcontractName, pSubcontractCompany, pInfoSource, pRecordDate, pCompactPrice, pCompactType, pBuildUnit, pPrjCode, PrjName, pPrjMgrQual, pPrjMgrName, pContUnit, pCreatetime, "深圳市龙岗区住房和建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { string newUrl = "http://htjg.szjs.gov.cn/web/webService/getContractList.json"; IList list = new List <ProjectConpact>(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1, count = 1; string eventValidation = string.Empty; JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = null; try { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "pageNumber", "limit" }, new string[] { "1", "500" }); htl = this.ToolWebSite.GetHtmlByUrl(newUrl, nvc); smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(htl); int totalCount = Convert.ToInt32(smsTypeJson["total"]); pageInt = totalCount / 500 + 1; } catch (Exception ex) { return(list); } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "paageNumber", "limit" }, new string[] { i.ToString(), "500" }); htl = this.ToolWebSite.GetHtmlByUrl(newUrl, nvc); serializer = new JavaScriptSerializer(); smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(htl); } catch { continue; } } foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key == "total") { continue; } object[] array = (object[])obj.Value; foreach (object arrValue in array) { string pProvince = string.Empty, pUrl = string.Empty, pCity = string.Empty, pSubcontractCode = string.Empty, pSubcontractName = string.Empty, pSubcontractCompany = string.Empty, pInfoSource = string.Empty, pRecordDate = string.Empty, pCompactPrice = string.Empty, pCompactType = string.Empty, pBuildUnit = string.Empty, pPrjCode = string.Empty, PrjName = string.Empty, pPrjMgrQual = string.Empty, pPrjMgrName = string.Empty, pContUnit = string.Empty, pCreatetime = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)arrValue; string id = Convert.ToString(dic["id"]); PrjName = Convert.ToString(dic["itemname"]); pBuildUnit = Convert.ToString(dic["const_org"]); pContUnit = Convert.ToString(dic["corp_name"]); pCompactType = Convert.ToString(dic["pact_type"]); pRecordDate = Convert.ToString(dic["status_time"]); pUrl = "http://htjg.szjs.gov.cn/web/contractdetail.jsp?id=" + id; string dtlUrl = "http://htjg.szjs.gov.cn/web/webService/getContractById.json"; string htmldtl = string.Empty; try { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "id" }, new string[] { id }); htmldtl = this.ToolWebSite.GetHtmlByUrl(dtlUrl, nvc); } catch (Exception ex) { continue; } JavaScriptSerializer serializerNew = new JavaScriptSerializer(); Dictionary <string, object> dtlDic = (Dictionary <string, object>)serializer.DeserializeObject(htmldtl); pInfoSource = string.Format("合同类型:{0}\r\n发包方式:{1}\r\n工程项目编码:{2}\r\n工程项目名称:{3}\r\n 工程标段名称:{4}\r\n建设单位:{5}\r\n分包工程编号:{6}\r\n分包工程名称:{7}\r\n发包单位:{8}\r\n承包单位:{9}\r\n合同价:{10}\r\n备案日期:{11}\r\n", Convert.ToString(dtlDic["pact_type"]), Convert.ToString(dtlDic["appl_method"]), Convert.ToString(dtlDic["itemcode"]), PrjName, Convert.ToString(dtlDic["prj_name"]), pBuildUnit, Convert.ToString(dtlDic["fb_prj_id"]), Convert.ToString(dtlDic["fb_prj_name"]), Convert.ToString(dtlDic["fbr_org"]), pContUnit, Convert.ToString(dtlDic["contract_price"]), pRecordDate); pCompactType = pInfoSource.GetRegex("合同类型"); pSubcontractCompany = pInfoSource.GetRegex("发包方式"); pPrjCode = pInfoSource.GetRegex("工程项目编码"); pCreatetime = pInfoSource.GetRegex("工程标段名称"); pSubcontractCode = pInfoSource.GetRegex("分包工程编号"); pSubcontractName = pInfoSource.GetRegex("分包工程名称"); pPrjMgrQual = pInfoSource.GetRegex("发包单位"); pCompactPrice = pInfoSource.GetRegex("合同价"); ProjectConpact info = ToolDb.GenProjectConpact("广东省", pUrl, "深圳市区", pSubcontractCode, pSubcontractName, pSubcontractCompany, pInfoSource, pRecordDate, pCompactPrice, pCompactType, pBuildUnit, pPrjCode, PrjName, pPrjMgrQual, pPrjMgrName, pContUnit, pCreatetime, "深圳市住房和建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } count++; if (count >= 200) { count = 1; Thread.Sleep(600 * 1000); } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(this.SiteUrl), Encoding.UTF8, ref cookiestr); viewState = this.ToolWebSite.GetAspNetViewState(htl); eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_Main_paging_LblPageCount"))); if (tdNodes.Count > 0 && tdNodes != null) { try { page = int.Parse(tdNodes[0].ToPlainTextString().Trim()); } catch { return(list); } } for (int i = 1; i <= page; i++) { if (i > 1) { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "ctl00$ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "ctl00$Main$ddl_type", "ctl00$Main$txt_Title", "ctl00$Main$paging$txtPageIndex", "__VIEWSTATE", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "__ASYNCPOST", "ctl00$Main$paging$btnForward.x", "ctl00$Main$paging$btnForward.y" }, new string[] { "ctl00$UpdatePanel1|ctl00$Main$paging$btnNext", "", "", "1", "", i.ToString(), viewState, "", eventValidation, "true", "6", "6" }); try { htl = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(htl)); NodeList tableList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Main_gv_ZbResult"))); if (tableList != null && tableList.Count > 0) { TableTag table = tableList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string pProvince = string.Empty, pUrl = string.Empty, pCity = string.Empty, pSubcontractCode = string.Empty, pSubcontractName = string.Empty, pSubcontractCompany = string.Empty, pInfoSource = string.Empty, pRecordDate = string.Empty, pCompactPrice = string.Empty, pCompactType = string.Empty, pBuildUnit = string.Empty, pPrjCode = string.Empty, PrjName = string.Empty, pPrjMgrQual = string.Empty, pPrjMgrName = string.Empty, pContUnit = string.Empty, pCreatetime = string.Empty; TableRow tr = table.Rows[j]; pPrjCode = tr.Columns[2].ToPlainTextString().Trim(); PrjName = tr.Columns[3].ToNodePlainString(); pUrl = "http://www.szbajs.gov.cn/SiteManage/" + tr.GetAttribute("ondblclick").Replace("&", "&").Replace(")", "kdxx").GetRegexBegEnd("'", "kdxx").Replace("'", ""); string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(pUrl), Encoding.UTF8); } catch (Exception) { continue; } Parser parserdetail = new Parser(new Lexer(htmldetail)); NodeList dtnode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "data_con"))); if (dtnode != null && dtnode.Count > 0) { string ctx = dtnode.AsString().Replace(" ", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("工程名称:\r\n", "工程名称:").Replace("工程名称:\r\n", "工程名称:"); pInfoSource = ctx; string c = dtnode.AsString(); if (string.IsNullOrEmpty(PrjName)) { PrjName = ctx.GetRegex("工程名称"); } pBuildUnit = ctx.GetRegex("建设单位"); pContUnit = ctx.GetRegex("分包施工单位"); pPrjMgrName = ctx.GetRegex("施工单位联系人"); pCompactPrice = ctx.GetRegex("工程造价"); pCompactType = "专业分包合同"; pSubcontractCompany = ctx.GetRegex("总包施工单位"); pRecordDate = c.GetRegex("合同开工日期").GetDateRegex(); pSubcontractCode = pPrjCode; pSubcontractName = PrjName; ProjectConpact info = ToolDb.GenProjectConpact("广东省", pUrl, "深圳市宝安区", pSubcontractCode, pSubcontractName, pSubcontractCompany, pInfoSource, pRecordDate, pCompactPrice, pCompactType, pBuildUnit, pPrjCode, PrjName, pPrjMgrQual, pPrjMgrName, pContUnit, pCreatetime, "深圳市宝安区建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } if (htl.Contains("RowCount")) { try { int index = htl.IndexOf("RowCount"); string pageStr = htl.Substring(index, htl.Length - index).Replace("RowCount", "").Replace("}", "").Replace(":", "").Replace("\"", ""); decimal b = decimal.Parse(pageStr) / 20; if (b.ToString().Contains(".")) { page = Convert.ToInt32(b) + 1; } else { page = Convert.ToInt32(b); } } catch { } } for (int i = 1; i <= page; i++) { if (i > 1) { try { htl = this.ToolWebSite.GetHtmlByUrl("http://www.szjs.gov.cn/build/build.ashx?_=1352582850568&menu=%E9%A1%B9%E7%9B%AE%E4%BF%A1%E6%81%AF&type=%E6%96%BD%E5%B7%A5%E7%9B%91%E7%90%86%E5%90%88%E5%90%8C%E5%A4%87%E6%A1%88&pageSize=20&pageIndex=" + i.ToString(), Encoding.UTF8); } catch (Exception ex) { continue; } } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(htl); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key != "DataList") { continue; } object[] array = (object[])obj.Value; foreach (object obj2 in array) { Dictionary <string, object> dicSmsType = (Dictionary <string, object>)obj2; string pProvince = string.Empty, pUrl = string.Empty, pCity = string.Empty, pSubcontractCode = string.Empty, pSubcontractName = string.Empty, pSubcontractCompany = string.Empty, pInfoSource = string.Empty, pRecordDate = string.Empty, pCompactPrice = string.Empty, pCompactType = string.Empty, pBuildUnit = string.Empty, pPrjCode = string.Empty, PrjName = string.Empty, pPrjMgrQual = string.Empty, pPrjMgrName = string.Empty, pContUnit = string.Empty, pCreatetime = string.Empty; try { string noid = Convert.ToString(dicSmsType["Nid"]); PrjName = Convert.ToString(dicSmsType["PrjName"]); pBuildUnit = Convert.ToString(dicSmsType["ConstOrg"]); pContUnit = Convert.ToString(dicSmsType["CorpName"]); pCompactType = Convert.ToString(dicSmsType["PactType"]); pRecordDate = Convert.ToString(dicSmsType["IssueDate"]); pUrl = "http://www.szjs.gov.cn/build/htba_detail.aspx?id=" + noid; string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(pUrl), Encoding.UTF8).Trim(); } catch (Exception) { continue; } Parser parser = new Parser(new Lexer(htmldetail)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "js-table mar-l-4"))); if (dtList != null && dtList.Count > 0) { TableTag table = dtList[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow dr = table.Rows[j]; string ctx = string.Empty; for (int k = 0; k < dr.ColumnCount; k++) { ctx += dr.Columns[k].ToPlainTextString().Trim().Replace(" ", "").Replace("\r", "").Replace("\n", ""); } pInfoSource += ctx + "\r\n"; } Regex regexsubcode = new Regex(@"分包工程编号(:|:)[^\r\n]+\r\n"); pSubcontractCode = regexsubcode.Match(pInfoSource).Value.Replace("分包工程编号:", "").Trim(); Regex regexsubname = new Regex(@"分包工程名称(:|:)[^\r\n]+\r\n"); pSubcontractName = regexsubname.Match(pInfoSource).Value.Replace("分包工程名称:", "").Trim(); Regex regexsubcom = new Regex(@"分包工程发包单位(:|:)[^\r\n]+\r\n"); pSubcontractCompany = regexsubcom.Match(pInfoSource).Value.Replace("分包工程发包单位:", "").Trim(); Regex regpCompactPrice = new Regex(@"合同价(:|:)[^\r\n]+\r\n"); pCompactPrice = regpCompactPrice.Match(pInfoSource).Value.Replace("合同价:", "").Trim(); Regex regpPrjMgrQual = new Regex(@"项目经理资格(:|:)[^\r\n]+\r\n"); pPrjMgrQual = regpPrjMgrQual.Match(pInfoSource).Value.Replace("项目经理资格:", "").Trim(); Regex regpPrjMgrName = new Regex(@"项目经理名称(:|:)[^\r\n]+\r\n"); pPrjMgrName = regpPrjMgrName.Match(pInfoSource).Value.Replace("项目经理名称:", "").Trim(); Regex regpPrjCode = new Regex(@"(工程编号|总包工程编号)(:|:)[^\r\n]+\r\n"); pPrjCode = regpPrjCode.Match(pInfoSource).Value.Replace("总包工程编号", "").Replace("工程编号", "").Replace(":", "").Replace(":", "").Replace("总包", "").Trim(); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (pCompactPrice.Contains("万")) { pCompactPrice = pCompactPrice.Remove(pCompactPrice.IndexOf("万")).Trim(); pCompactPrice = regBidMoney.Match(pCompactPrice).Value; } else { try { pCompactPrice = (decimal.Parse(regBidMoney.Match(pCompactPrice).Value) / 10000).ToString(); if (decimal.Parse(pCompactPrice) < decimal.Parse("0.1")) { pCompactPrice = "0"; } } catch (Exception) { pCompactPrice = "0"; } } } ProjectConpact info = ToolDb.GenProjectConpact("广东省", pUrl, "深圳市区", pSubcontractCode, pSubcontractName, pSubcontractCompany, pInfoSource, pRecordDate, pCompactPrice, pCompactType, pBuildUnit, pPrjCode, PrjName, pPrjMgrQual, pPrjMgrName, pContUnit, pCreatetime, "深圳市住房和建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } catch { continue; } } } } return(list); }