protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <ProjectLic>(); int pageInt = 1, count = 0; string htl = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { htl = ToolHtml.GetHtmlByUrlEncode(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "pageLinkTd"))); if (tdNodes != null && tdNodes.Count > 0) { try { string temp = tdNodes.AsString().ToNodeString(); string s = temp.GetRegexBegEnd("总页数", "页").Replace(":", ""); pageInt = int.Parse(s); } catch (Exception ex) { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "page", "qymc", "ann_serial", "pro_name" }, new string[] { i.ToString(), "", "", "" }); try { htl = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(htl)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tblPrjConstBid"))); if (listNode != null && listNode.Count > 0) { TableTag table = (TableTag)listNode[0]; for (int j = 1; j < table.RowCount - 1; j++) { string pPrjName = string.Empty, pBuildUnit = string.Empty, pBuildAddress = string.Empty, pBuildManager = string.Empty, pBuildScale = string.Empty, pPrjPrice = string.Empty, pPrjStartDate = string.Empty, PrjEndDate = string.Empty, pConstUnit = string.Empty, pConstUnitManager = string.Empty, pSuperUnit = string.Empty, pSuperUnitManager = string.Empty, pProspUnit = string.Empty, pProspUnitManager = string.Empty, pDesignUnit = string.Empty, pDesignUnitManager = string.Empty, pPrjManager = string.Empty, pSpecialPerson = string.Empty, pLicUnit = string.Empty, pPrjLicCode = string.Empty, PrjLicDate = string.Empty, pPrjDesc = string.Empty, pProvince = string.Empty, pCity = string.Empty, pInfoSource = string.Empty, pUrl = string.Empty, pCreatetime = string.Empty, pPrjCode = string.Empty; TableRow tr = table.Rows[j]; pPrjLicCode = tr.Columns[0].ToNodePlainString(); pPrjCode = tr.Columns[1].ToNodePlainString(); pPrjName = tr.Columns[2].ToNodePlainString(); pBuildUnit = tr.Columns[3].ToNodePlainString(); PrjLicDate = tr.Columns[4].ToPlainTextString().GetDateRegex(); pUrl = "http://portal.szjs.gov.cn:8888/gongshi/sgxkz.html"; NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "instanceGuid", "yxtywlsh" }, new string[] { pPrjCode, pPrjLicCode }); string htmldetl = string.Empty; try { htmldetl = this.ToolWebSite.GetHtmlByUrl(pUrl, nvc, Encoding.UTF8); } catch (Exception) { continue; } Parser parserdetail = new Parser(new Lexer(htmldetl)); NodeList dtnode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tblPrjConstBid"))); if (dtnode != null && dtnode.Count > 0) { TableTag dtlTag = dtnode[0] as TableTag; pInfoSource = ""; for (int rowIndex = 1; rowIndex < dtlTag.RowCount; rowIndex++) { for (int colIndex = 0; colIndex < dtlTag.Rows[rowIndex].ColumnCount; colIndex++) { if (colIndex % 2 == 0) { pInfoSource += dtlTag.Rows[rowIndex].Columns[colIndex].ToNodePlainString() + ":"; } else { pInfoSource += dtlTag.Rows[rowIndex].Columns[colIndex].ToNodePlainString() + "\r\n"; } } } pPrjStartDate = pInfoSource.GetRegex("合同开工日期"); PrjEndDate = pInfoSource.GetRegex("合同竣工日期"); pDesignUnit = pInfoSource.GetRegex("设计单位,建设单位 "); pBuildAddress = pInfoSource.GetRegex("工程地址,建设地址"); pBuildScale = pInfoSource.GetRegex("建筑面积,建设规模"); pSuperUnit = pInfoSource.GetRegex("监理单位"); pConstUnit = pInfoSource.GetRegex("施工单位"); pLicUnit = pInfoSource.GetRegex("发证机关"); pProspUnit = pInfoSource.GetRegex("勘察单位"); pPrjPrice = pInfoSource.GetRegex("合同价格"); pPrjManager = pInfoSource.GetRegex("项目经理,项目负责人"); if (string.IsNullOrEmpty(pLicUnit)) { pLicUnit = "深圳市住房和建设局"; } ProjectLic info = ToolDb.GenProjectLic(pPrjName, pBuildUnit, pBuildAddress, pBuildManager, pBuildScale, pPrjPrice, pPrjStartDate, PrjEndDate, pConstUnit, pConstUnitManager, pSuperUnit, pSuperUnitManager, pProspUnit, pProspUnitManager, pDesignUnit, pDesignUnitManager, pPrjManager, pSpecialPerson, pLicUnit, pPrjLicCode, PrjLicDate, pPrjDesc, "广东省", "深圳市区", pInfoSource, pUrl, pCreatetime, pPrjCode, "深圳市住房和建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } count++; if (count >= 200) { count = 1; Thread.Sleep(600 * 1000); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <ProjectLic>(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagefooter"))); if (tdNodes.Count > 0 && tdNodes != null) { try { string temp = tdNodes.AsString().GetRegexBegEnd(",共有", "页"); page = int.Parse(temp); } catch { return(list); } } for (int i = 1; i <= page; i++) { if (i > 1) { try { htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&web_cur_page=" + i, Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(htl)); NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table1"))); if (tableNodeList.Count > 0 && tableNodeList != null) { TableTag table = tableNodeList[tableNodeList.Count - 1] as TableTag; for (int j = 1; j < table.RowCount; j++) { string pPrjName = string.Empty, pBuildUnit = string.Empty, pBuildAddress = string.Empty, pBuildManager = string.Empty, pBuildScale = string.Empty, pPrjPrice = string.Empty, pPrjStartDate = string.Empty, PrjEndDate = string.Empty, pConstUnit = string.Empty, pConstUnitManager = string.Empty, pSuperUnit = string.Empty, pSuperUnitManager = string.Empty, pProspUnit = string.Empty, pProspUnitManager = string.Empty, pDesignUnit = string.Empty, pDesignUnitManager = string.Empty, pPrjManager = string.Empty, pSpecialPerson = string.Empty, pLicUnit = string.Empty, pPrjLicCode = string.Empty, PrjLicDate = string.Empty, pPrjDesc = string.Empty, pProvince = string.Empty, pCity = string.Empty, pInfoSource = string.Empty, pUrl = string.Empty, pCreatetime = string.Empty, pPrjCode = string.Empty; TableRow tr = table.Rows[j]; pPrjName = tr.Columns[3].ToPlainTextString().Trim(); pPrjCode = tr.Columns[2].ToPlainTextString().Trim(); PrjLicDate = tr.Columns[1].ToPlainTextString().Trim(); pBuildUnit = tr.Columns[4].ToPlainTextString().Trim(); ATag aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag; pUrl = "http://www.cb.gov.cn" + aTag.Link.Replace("GoDetail('", "").Replace("');", ""); string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(pUrl, Encoding.UTF8).Replace("<br/>", "\r\n").Trim(); } catch { continue; } parser = new Parser(new Lexer(htmldetail)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table1"))); if (dtList != null && dtList.Count > 0) { pInfoSource = dtList.AsHtml().GetReplace("</p>,</br>", "\r\n").ToCtxString(); pDesignUnit = pInfoSource.GetRegex("设计单位"); pBuildAddress = pInfoSource.GetRegex("工程地址,工程地点"); pBuildScale = pInfoSource.GetRegex("建筑面积,建设规模"); pSuperUnit = pInfoSource.GetRegex("监理单位"); pConstUnit = pInfoSource.GetRegex("施工单位"); pLicUnit = pInfoSource.GetRegex("发证机关"); pProspUnit = pInfoSource.GetRegex("勘察单位"); pPrjManager = pInfoSource.GetRegex("项目经理,项目负责人"); pPrjStartDate = pInfoSource.GetRegex("计划开工日期"); PrjEndDate = pInfoSource.GetRegex("计划竣工日期"); pPrjPrice = pInfoSource.GetRegex("工程造价"); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (pPrjPrice.Contains("万")) { pPrjPrice = pPrjPrice.Remove(pPrjPrice.IndexOf("万")).Trim(); pPrjPrice = regBidMoney.Match(pPrjPrice).Value; } else { try { pPrjPrice = (decimal.Parse(pInfoSource.GetRegex("工程造价")) / 10000).ToString(); if (decimal.Parse(pPrjPrice) < decimal.Parse("0.1")) { pPrjPrice = "0"; } } catch (Exception) { pPrjPrice = "0"; } } if (string.IsNullOrEmpty(pLicUnit)) { pLicUnit = "深圳市龙岗区住房和建设局"; } ProjectLic info = ToolDb.GenProjectLic(pPrjName, pBuildUnit, pBuildAddress, pBuildManager, pBuildScale, pPrjPrice, pPrjStartDate, PrjEndDate, pConstUnit, pConstUnitManager, pSuperUnit, pSuperUnitManager, pProspUnit, pProspUnitManager, pDesignUnit, pDesignUnitManager, pPrjManager, pSpecialPerson, pLicUnit, pPrjLicCode, PrjLicDate, pPrjDesc, "广东省", "深圳市龙岗区", pInfoSource, pUrl, pCreatetime, pPrjCode, "深圳市龙岗区住房和建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = ToolHtml.GetHtmlByUrlEncode(SiteUrl, Encoding.UTF8); viewState = this.ToolWebSite.GetAspNetViewState(htl); eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_Main_paging_LblPageCount"))); if (tdNodes != null && tdNodes.Count > 0) { try { page = int.Parse(tdNodes[0].ToPlainTextString().Trim()); } catch { return(list); } } for (int i = 1; i <= page; i++) { if (i > 1) { //if (i < 3) //{ // viewState = this.ToolWebSite.GetAspNetViewState(htl); // eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl); //} //NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[]{ // "ctl00$ScriptManager1", // "__EVENTTARGET", // "__EVENTARGUMENT", // "__VIEWSTATE", // "__VIEWSTATEENCRYPTED", // "__EVENTVALIDATION", // "ctl00$Main$ddl_type", // "ctl00$Main$txt_Title", // "ctl00$Main$paging$txtPageIndex", // "__ASYNCPOST", // "ctl00$Main$paging$btnNext.x","ctl00$Main$paging$btnNext.y" //}, new string[]{ // "ctl00$Main$paging$btnForward", // string.Empty, // string.Empty, // viewState, // string.Empty, // eventValidation, // "","",i.ToString(),"true","5","9" //}); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "ctl00$ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "ctl00$Main$ddl_type", "ctl00$Main$txt_Title", "ctl00$Main$paging$txtPageIndex", "__VIEWSTATE", "__VIEWSTATEGENERATOR", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "__ASYNCPOST", "ctl00$Main$paging$btnForward.x", "ctl00$Main$paging$btnForward.y" }, new string[] { "ctl00$UpdatePanel1|ctl00$Main$paging$btnForward", string.Empty, string.Empty, "1", string.Empty, i.ToString(), viewState, "19AE96F3", "", eventValidation, "true", "7", "9" }); try { htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8); //viewState = htl.GetRegexBegEnd("VIEWSTATE", "hiddenField", 100000).Replace("|8|", "").Replace("|", ""); //eventValidation = htl.Replace("|", "kdxxAdmin").GetRegexBegEnd("EVENTVALIDATIONkdxxAdmin", "kdxxAdmin", 10000); //continue; } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(htl)); NodeList tableList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Main_GV_New"))); if (tableList != null && tableList.Count > 0) { TableTag table = (TableTag)tableList[0]; for (int j = 1; j < table.RowCount; j++) { string pPrjName = string.Empty, pBuildUnit = string.Empty, pBuildAddress = string.Empty, pBuildManager = string.Empty, pBuildScale = string.Empty, pPrjPrice = string.Empty, pPrjStartDate = string.Empty, PrjEndDate = string.Empty, pConstUnit = string.Empty, pConstUnitManager = string.Empty, pSuperUnit = string.Empty, pSuperUnitManager = string.Empty, pProspUnit = string.Empty, pProspUnitManager = string.Empty, pDesignUnit = string.Empty, pDesignUnitManager = string.Empty, pPrjManager = string.Empty, pSpecialPerson = string.Empty, pLicUnit = string.Empty, pPrjLicCode = string.Empty, PrjLicDate = string.Empty, pPrjDesc = string.Empty, pProvince = string.Empty, pCity = string.Empty, pInfoSource = string.Empty, pUrl = string.Empty, pCreatetime = string.Empty, pPrjCode = string.Empty; TableRow tr = table.Rows[j]; pPrjName = tr.Columns[2].ToPlainTextString().Trim(); pPrjCode = tr.Columns[1].ToPlainTextString().Trim(); pBuildUnit = tr.Columns[3].ToPlainTextString().Trim(); PrjLicDate = tr.Columns[4].ToPlainTextString().Trim(); pUrl = "http://www.szbajs.gov.cn/SiteManage/" + tr.GetAttribute("ondblclick").Replace("&", "&").Replace(")", "kdxx").GetRegexBegEnd("'", "kdxx").Replace("'", ""); string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(pUrl), Encoding.UTF8); } catch (Exception) { continue; } Parser parserdetail = new Parser(new Lexer(htmldetail)); NodeList dtnode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "data_con"))); if (dtnode != null && dtnode.Count > 0) { string ctx = dtnode.AsString().Replace(" ", ""); pInfoSource = ctx; pDesignUnit = ctx.GetRegex(new string[] { "设计单位" }); pBuildAddress = ctx.GetRegex(new string[] { "工程地址", "工程地点" }); pBuildScale = ctx.GetRegex(new string[] { "建设规模", "建筑面积" }); pSuperUnit = ctx.GetRegex(new string[] { "监理单位" }); pConstUnit = ctx.GetRegex(new string[] { "施工单位" }); pLicUnit = ctx.GetRegex(new string[] { "发证机关" }); pProspUnit = ctx.GetRegex(new string[] { "勘察单位" }); pPrjManager = ctx.GetRegex(new string[] { "项目经理", "项目负责人" }); pPrjStartDate = ctx.GetRegex(new string[] { "计划开工日期" }); PrjEndDate = ctx.GetRegex(new string[] { "计划竣工日期" }); pPrjPrice = ctx.GetMoneyRegex(new string[] { "工程造价" }); if (string.IsNullOrEmpty(PrjLicDate)) { ctx.GetRegex("发证日期"); } if (string.IsNullOrEmpty(pLicUnit)) { pLicUnit = "深圳市宝安区建设局"; } ProjectLic info = ToolDb.GenProjectLic(pPrjName, pBuildUnit, pBuildAddress, pBuildManager, pBuildScale, pPrjPrice, pPrjStartDate, PrjEndDate, pConstUnit, pConstUnitManager, pSuperUnit, pSuperUnitManager, pProspUnit, pProspUnitManager, pDesignUnit, pDesignUnitManager, pPrjManager, pSpecialPerson, pLicUnit, pPrjLicCode, PrjLicDate, pPrjDesc, "广东省", "深圳市宝安区", pInfoSource, pUrl, pCreatetime, pPrjCode, "深圳市宝安区建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } if (htl.Contains("RowCount")) { try { int index = htl.IndexOf("RowCount"); string pageStr = htl.Substring(index, htl.Length - index).Replace("RowCount", "").Replace("}", "").Replace(":", "").Replace("\"", ""); decimal b = decimal.Parse(pageStr) / 20; if (b.ToString().Contains(".")) { page = Convert.ToInt32(b) + 1; } else { page = Convert.ToInt32(b); } } catch { } } for (int i = 1; i <= page; i++) { if (i > 1) { try { htl = this.ToolWebSite.GetHtmlByUrl("http://www.szjs.gov.cn/build/build.ashx?_=1352593922281&menu=%E9%A1%B9%E7%9B%AE%E4%BF%A1%E6%81%AF&type=%E6%96%BD%E5%B7%A5%E8%AE%B8%E5%8F%AF&pageSize=20&pageIndex=" + i.ToString(), Encoding.UTF8); } catch { continue; } } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(htl); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key != "DataList") { continue; } object[] array = (object[])obj.Value; foreach (object obj2 in array) { Dictionary <string, object> dicSmsType = (Dictionary <string, object>)obj2; string pPrjName = string.Empty, pBuildUnit = string.Empty, pBuildAddress = string.Empty, pBuildManager = string.Empty, pBuildScale = string.Empty, pPrjPrice = string.Empty, pPrjStartDate = string.Empty, PrjEndDate = string.Empty, pConstUnit = string.Empty, pConstUnitManager = string.Empty, pSuperUnit = string.Empty, pSuperUnitManager = string.Empty, pProspUnit = string.Empty, pProspUnitManager = string.Empty, pDesignUnit = string.Empty, pDesignUnitManager = string.Empty, pPrjManager = string.Empty, pSpecialPerson = string.Empty, pLicUnit = string.Empty, pPrjLicCode = string.Empty, PrjLicDate = string.Empty, pPrjDesc = string.Empty, pProvince = string.Empty, pCity = string.Empty, pInfoSource = string.Empty, pUrl = string.Empty, pCreatetime = string.Empty, pPrjCode = string.Empty; try { pPrjCode = Convert.ToString(dicSmsType["AnnSerial"]); pPrjName = Convert.ToString(dicSmsType["PrjName"]); pBuildUnit = Convert.ToString(dicSmsType["ConstOrg"]); PrjLicDate = Convert.ToString(dicSmsType["IssueDate"]); pUrl = "http://www.szjs.gov.cn/build/sgxk_detail.aspx?id=" + pPrjCode; string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(pUrl, Encoding.UTF8, ref cookiestr).Trim(); } catch (Exception) { continue; } Parser parser = new Parser(new Lexer(htmldetail)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "js-table mar-l-4"))); if (dtList != null && dtList.Count > 0) { TableTag table = dtList[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow dr = table.Rows[j]; string ctx = string.Empty; for (int k = 0; k < dr.ColumnCount; k++) { ctx += dr.Columns[k].ToPlainTextString().Trim().Replace("\r", "").Replace("\n", ""); } pInfoSource += ctx + "\r\n"; } Regex regpDesignUnit = new Regex(@"设计单位(:|:)[^\r\n]+\r\n"); pDesignUnit = regpDesignUnit.Match(pInfoSource).Value.Replace("设计单位", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regAddree = new Regex(@"(工程地址|工程地点)(:|:)[^\r\n]+\r\n"); pBuildAddress = regAddree.Match(pInfoSource).Value.Replace("工程地址", "").Replace("工程地点", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regSca = new Regex(@"(建筑面积|建设规模)(:|:)[^\r\n]+\r\n"); pBuildScale = regSca.Match(pInfoSource).Value.Replace("建设规模", "").Replace("建筑面积", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regpSuperUnit = new Regex(@"监理单位(:|:)[^\r\n]+\r\n"); pSuperUnit = regpSuperUnit.Match(pInfoSource).Value.Replace("监理单位", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regpConstUnit = new Regex(@"施工单位(:|:)[^\r\n]+\r\n"); pConstUnit = regpConstUnit.Match(pInfoSource).Value.Replace("施工单位", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regpLicUnit = new Regex(@"发证机关(:|:)[^\r\n]+\r\n"); pLicUnit = regpLicUnit.Match(pInfoSource).Value.Replace("发证机关", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regpPosoUnit = new Regex(@"勘察单位(:|:)[^\r\n]+\r\n"); pProspUnit = regpPosoUnit.Match(pInfoSource).Value.Replace("勘察单位", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regMan = new Regex(@"(项目经理|项目负责人)(:|:)[^\r\n]+\r\n"); pPrjManager = regMan.Match(pInfoSource).Value.Replace("项目负责人", "").Replace("项目经理", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regBeg = new Regex(@"计划开工日期(:|:)[^\r\n]+\r\n"); pPrjStartDate = regBeg.Match(pInfoSource).Value.Replace("计划开工日期", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regEnd = new Regex(@"计划竣工日期(:|:)[^\r\n]+\r\n"); PrjEndDate = regEnd.Match(pInfoSource).Value.Replace("计划竣工日期", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regpPrice = new Regex(@"工程造价(:|:)[^\r\n]+\r\n"); pPrjPrice = regpPrice.Match(pInfoSource).Value.Replace("工程造价", "").Replace("/", "").Replace(":", "").Replace(":", "").Trim(); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (pPrjPrice.Contains("万")) { pPrjPrice = pPrjPrice.Remove(pPrjPrice.IndexOf("万")).Trim(); pPrjPrice = regBidMoney.Match(pPrjPrice).Value; } else { try { pPrjPrice = (decimal.Parse(regpPrice.Match(pPrjPrice).Value) / 10000).ToString(); if (decimal.Parse(pPrjPrice) < decimal.Parse("0.1")) { pPrjPrice = "0"; } } catch (Exception) { pPrjPrice = "0"; } } if (string.IsNullOrEmpty(pLicUnit)) { pLicUnit = "深圳市住房和建设局"; } ProjectLic info = ToolDb.GenProjectLic(pPrjName, pBuildUnit, pBuildAddress, pBuildManager, pBuildScale, pPrjPrice, pPrjStartDate, PrjEndDate, pConstUnit, pConstUnitManager, pSuperUnit, pSuperUnitManager, pProspUnit, pProspUnitManager, pDesignUnit, pDesignUnitManager, pPrjManager, pSpecialPerson, pLicUnit, pPrjLicCode, PrjLicDate, pPrjDesc, "广东省", "深圳市区", pInfoSource, pUrl, pCreatetime, pPrjCode, "深圳市住房和建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } catch { continue; } } } } return(list); }