protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int sqlCount = 0; //取得页码 int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = ToolHtml.GetHtmlByUrlEncode(SiteUrl, Encoding.UTF8); } catch (Exception ex) { Logger.Error(ex.ToString()); return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellspacing", "2"), new TagNameFilter("table"))); if (sNode != null && sNode.Count > 0) { string pageString = sNode.AsString(); Regex regexPage = new Regex(@",共[^页]+页,"); Match pageMatch = regexPage.Match(pageString); try { pageInt = int.Parse(pageMatch.Value.Replace(",共", "").Replace("页,", "").Trim()); } catch (Exception) { } } string cookiestr = string.Empty; for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "ctl00$hdnPageCount" }, new string[] { "ctl00$Content$GridView1", "Page$" + i.ToString(), viewState, "", eventValidation, pageInt.ToString() }); html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8); } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_Content_GridView1"), new TagNameFilter("table"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount - 1; j++) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j] as TableRow; code = tr.Columns[1].ToPlainTextString().Trim(); prjName = tr.Columns[2].ToPlainTextString().Trim(); buildUnit = tr.Columns[3].ToPlainTextString().Trim(); beginDate = tr.Columns[5].ToPlainTextString().Trim(); endDate = tr.Columns[6].ToPlainTextString().Trim(); ATag aTag = tr.Columns[2].Children[0] as ATag; InfoUrl = "http://www.szjsjy.com.cn/BusinessInfo/" + aTag.Link; string htmldetail = string.Empty; try { htmldetail = ToolHtml.GetHtmlByUrlEncode(InfoUrl, Encoding.UTF8).Replace(" ", "").Trim(); Parser dtlparserHTML = new Parser(new Lexer(htmldetail)); NodeList dtnodeHTML = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span"))); HtmlTxt = dtnodeHTML.AsHtml(); htmldetail = ToolHtml.GetHtmlByUrlEncode(InfoUrl, Encoding.UTF8).Replace(" ", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n"); } catch (Exception ex) { continue; } Parser dtlparser = new Parser(new Lexer(htmldetail)); NodeList dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "lblXXNR"), new TagNameFilter("span"))); inviteCtx = dtnode.AsString().Replace(" ", ""); Regex regPrjAdd = new Regex(@"(工程地点|工程地址):[^\r\n]+[\r\n]{1}"); prjAddress = regPrjAdd.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("工程地址:", "").Trim(); msgType = "深圳市建设工程交易中心"; specType = "建设工程"; Regex regInvType = new Regex(@"[^\r\n]+[\r\n]{1}"); string InvType = regInvType.Match(inviteCtx).Value; inviteType = ToolHtml.GetInviteTypes(InvType); #region 2013-11-19修改 Dictionary <string, Regex> dicRegex = new Dictionary <string, Regex>(); dicRegex.Add("重要提示", new Regex(@"([.\S\s]*)(?=重要提示)")); dicRegex.Add("温馨提示", new Regex(@"([.\S\s]*)(?=温馨提示)")); foreach (string dicValue in dicRegex.Keys) { if (inviteCtx.Contains(dicValue)) { inviteCtx = dicRegex[dicValue].Match(inviteCtx).Value; } } #endregion InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳市工程", string.Empty, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, string.Empty, InfoUrl, HtmlTxt); if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx)) { dtlparser.Reset(); NodeList dlNodes = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("a"));// if (dlNodes != null && dlNodes.Count > 0) { for (int f = 0; f < dlNodes.Count; f++) { ATag fileTag = dlNodes[f] as ATag; if (fileTag.IsAtagAttach()) { //BaseAttach attach = ToolDb.GenBaseAttach(fileTag.StringText, info.Id, fileTag.Link.Replace("..", "http://www.szjsjy.com.cn")); try { BaseAttach attach = ToolHtml.GetBaseAttach(fileTag.Link.Replace("..", "http://www.szjsjy.com.cn"), fileTag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, "SourceID,AttachServerPath"); } } catch { } } } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <ProjectLic>(); int pageInt = 1, count = 0; string htl = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { htl = ToolHtml.GetHtmlByUrlEncode(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "pageLinkTd"))); if (tdNodes != null && tdNodes.Count > 0) { try { string temp = tdNodes.AsString().ToNodeString(); string s = temp.GetRegexBegEnd("总页数", "页").Replace(":", ""); pageInt = int.Parse(s); } catch (Exception ex) { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "page", "qymc", "ann_serial", "pro_name" }, new string[] { i.ToString(), "", "", "" }); try { htl = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(htl)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tblPrjConstBid"))); if (listNode != null && listNode.Count > 0) { TableTag table = (TableTag)listNode[0]; for (int j = 1; j < table.RowCount - 1; j++) { string pPrjName = string.Empty, pBuildUnit = string.Empty, pBuildAddress = string.Empty, pBuildManager = string.Empty, pBuildScale = string.Empty, pPrjPrice = string.Empty, pPrjStartDate = string.Empty, PrjEndDate = string.Empty, pConstUnit = string.Empty, pConstUnitManager = string.Empty, pSuperUnit = string.Empty, pSuperUnitManager = string.Empty, pProspUnit = string.Empty, pProspUnitManager = string.Empty, pDesignUnit = string.Empty, pDesignUnitManager = string.Empty, pPrjManager = string.Empty, pSpecialPerson = string.Empty, pLicUnit = string.Empty, pPrjLicCode = string.Empty, PrjLicDate = string.Empty, pPrjDesc = string.Empty, pProvince = string.Empty, pCity = string.Empty, pInfoSource = string.Empty, pUrl = string.Empty, pCreatetime = string.Empty, pPrjCode = string.Empty; TableRow tr = table.Rows[j]; pPrjLicCode = tr.Columns[0].ToNodePlainString(); pPrjCode = tr.Columns[1].ToNodePlainString(); pPrjName = tr.Columns[2].ToNodePlainString(); pBuildUnit = tr.Columns[3].ToNodePlainString(); PrjLicDate = tr.Columns[4].ToPlainTextString().GetDateRegex(); pUrl = "http://portal.szjs.gov.cn:8888/gongshi/sgxkz.html"; NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "instanceGuid", "yxtywlsh" }, new string[] { pPrjCode, pPrjLicCode }); string htmldetl = string.Empty; try { htmldetl = this.ToolWebSite.GetHtmlByUrl(pUrl, nvc, Encoding.UTF8); } catch (Exception) { continue; } Parser parserdetail = new Parser(new Lexer(htmldetl)); NodeList dtnode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tblPrjConstBid"))); if (dtnode != null && dtnode.Count > 0) { TableTag dtlTag = dtnode[0] as TableTag; pInfoSource = ""; for (int rowIndex = 1; rowIndex < dtlTag.RowCount; rowIndex++) { for (int colIndex = 0; colIndex < dtlTag.Rows[rowIndex].ColumnCount; colIndex++) { if (colIndex % 2 == 0) { pInfoSource += dtlTag.Rows[rowIndex].Columns[colIndex].ToNodePlainString() + ":"; } else { pInfoSource += dtlTag.Rows[rowIndex].Columns[colIndex].ToNodePlainString() + "\r\n"; } } } pPrjStartDate = pInfoSource.GetRegex("合同开工日期"); PrjEndDate = pInfoSource.GetRegex("合同竣工日期"); pDesignUnit = pInfoSource.GetRegex("设计单位,建设单位 "); pBuildAddress = pInfoSource.GetRegex("工程地址,建设地址"); pBuildScale = pInfoSource.GetRegex("建筑面积,建设规模"); pSuperUnit = pInfoSource.GetRegex("监理单位"); pConstUnit = pInfoSource.GetRegex("施工单位"); pLicUnit = pInfoSource.GetRegex("发证机关"); pProspUnit = pInfoSource.GetRegex("勘察单位"); pPrjPrice = pInfoSource.GetRegex("合同价格"); pPrjManager = pInfoSource.GetRegex("项目经理,项目负责人"); if (string.IsNullOrEmpty(pLicUnit)) { pLicUnit = "深圳市住房和建设局"; } ProjectLic info = ToolDb.GenProjectLic(pPrjName, pBuildUnit, pBuildAddress, pBuildManager, pBuildScale, pPrjPrice, pPrjStartDate, PrjEndDate, pConstUnit, pConstUnitManager, pSuperUnit, pSuperUnitManager, pProspUnit, pProspUnitManager, pDesignUnit, pDesignUnitManager, pPrjManager, pSpecialPerson, pLicUnit, pPrjLicCode, PrjLicDate, pPrjDesc, "广东省", "深圳市区", pInfoSource, pUrl, pCreatetime, pPrjCode, "深圳市住房和建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } count++; if (count >= 200) { count = 1; Thread.Sleep(600 * 1000); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_cph_context_GridViewPaingTwo1_lblGridViewPagingDesc"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList.AsString().GetRegexBegEnd("共", "页"); pageInt = Convert.ToInt32(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_GridView1"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; infoType = "办事指南"; headName = tr.Columns[1].ToNodePlainString(); releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex(); infoUrl = "http://www.dgzb.com.cn/DGJYWEB/SiteManage/" + tr.Columns[1].GetATagHref(); string htldtl = string.Empty; try { htldtl = ToolHtml.GetHtmlByUrlEncode(infoUrl, Encoding.UTF8); } catch { } parser = new Parser(new Lexer(htldtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "line"))); if (dtlList != null && dtlList.Count > 0) { ctxHtml = dtlList.AsHtml(); infoCtx = dtlList.AsString(); msgType = MsgTypeCosnt.DongGuanMsgType; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "东莞市区", string.Empty, infoCtx, infoType); if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } else { sqlCount++; if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { parser = new Parser(new Lexer(htldtl)); NodeList aNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_DownLoadFiles1_GridView1"))); if (aNode != null && aNode.Count > 0) { TableTag tab = aNode[0] as TableTag; for (int a = 1; a < tab.RowCount; a++) { TableRow dr = tab.Rows[a]; ATag aTag = dr.Columns[1].GetATag(); if (aTag.IsAtagAttach()) { try { BaseAttach obj = ToolHtml.GetBaseAttach("http://www.dgzb.com.cn/DGJYWEB/SiteManage/" + aTag.Link, aTag.LinkText, info.Id); if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } } } } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = ToolHtml.GetHtmlByUrlEncode(SiteUrl, Encoding.UTF8); viewState = this.ToolWebSite.GetAspNetViewState(htl); eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_Main_paging_LblPageCount"))); if (tdNodes != null && tdNodes.Count > 0) { try { page = int.Parse(tdNodes[0].ToPlainTextString().Trim()); } catch { return(list); } } for (int i = 1; i <= page; i++) { if (i > 1) { //if (i < 3) //{ // viewState = this.ToolWebSite.GetAspNetViewState(htl); // eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl); //} //NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[]{ // "ctl00$ScriptManager1", // "__EVENTTARGET", // "__EVENTARGUMENT", // "__VIEWSTATE", // "__VIEWSTATEENCRYPTED", // "__EVENTVALIDATION", // "ctl00$Main$ddl_type", // "ctl00$Main$txt_Title", // "ctl00$Main$paging$txtPageIndex", // "__ASYNCPOST", // "ctl00$Main$paging$btnNext.x","ctl00$Main$paging$btnNext.y" //}, new string[]{ // "ctl00$Main$paging$btnForward", // string.Empty, // string.Empty, // viewState, // string.Empty, // eventValidation, // "","",i.ToString(),"true","5","9" //}); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "ctl00$ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "ctl00$Main$ddl_type", "ctl00$Main$txt_Title", "ctl00$Main$paging$txtPageIndex", "__VIEWSTATE", "__VIEWSTATEGENERATOR", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "__ASYNCPOST", "ctl00$Main$paging$btnForward.x", "ctl00$Main$paging$btnForward.y" }, new string[] { "ctl00$UpdatePanel1|ctl00$Main$paging$btnForward", string.Empty, string.Empty, "1", string.Empty, i.ToString(), viewState, "19AE96F3", "", eventValidation, "true", "7", "9" }); try { htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8); //viewState = htl.GetRegexBegEnd("VIEWSTATE", "hiddenField", 100000).Replace("|8|", "").Replace("|", ""); //eventValidation = htl.Replace("|", "kdxxAdmin").GetRegexBegEnd("EVENTVALIDATIONkdxxAdmin", "kdxxAdmin", 10000); //continue; } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(htl)); NodeList tableList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Main_GV_New"))); if (tableList != null && tableList.Count > 0) { TableTag table = (TableTag)tableList[0]; for (int j = 1; j < table.RowCount; j++) { string pPrjName = string.Empty, pBuildUnit = string.Empty, pBuildAddress = string.Empty, pBuildManager = string.Empty, pBuildScale = string.Empty, pPrjPrice = string.Empty, pPrjStartDate = string.Empty, PrjEndDate = string.Empty, pConstUnit = string.Empty, pConstUnitManager = string.Empty, pSuperUnit = string.Empty, pSuperUnitManager = string.Empty, pProspUnit = string.Empty, pProspUnitManager = string.Empty, pDesignUnit = string.Empty, pDesignUnitManager = string.Empty, pPrjManager = string.Empty, pSpecialPerson = string.Empty, pLicUnit = string.Empty, pPrjLicCode = string.Empty, PrjLicDate = string.Empty, pPrjDesc = string.Empty, pProvince = string.Empty, pCity = string.Empty, pInfoSource = string.Empty, pUrl = string.Empty, pCreatetime = string.Empty, pPrjCode = string.Empty; TableRow tr = table.Rows[j]; pPrjName = tr.Columns[2].ToPlainTextString().Trim(); pPrjCode = tr.Columns[1].ToPlainTextString().Trim(); pBuildUnit = tr.Columns[3].ToPlainTextString().Trim(); PrjLicDate = tr.Columns[4].ToPlainTextString().Trim(); pUrl = "http://www.szbajs.gov.cn/SiteManage/" + tr.GetAttribute("ondblclick").Replace("&", "&").Replace(")", "kdxx").GetRegexBegEnd("'", "kdxx").Replace("'", ""); string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(pUrl), Encoding.UTF8); } catch (Exception) { continue; } Parser parserdetail = new Parser(new Lexer(htmldetail)); NodeList dtnode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "data_con"))); if (dtnode != null && dtnode.Count > 0) { string ctx = dtnode.AsString().Replace(" ", ""); pInfoSource = ctx; pDesignUnit = ctx.GetRegex(new string[] { "设计单位" }); pBuildAddress = ctx.GetRegex(new string[] { "工程地址", "工程地点" }); pBuildScale = ctx.GetRegex(new string[] { "建设规模", "建筑面积" }); pSuperUnit = ctx.GetRegex(new string[] { "监理单位" }); pConstUnit = ctx.GetRegex(new string[] { "施工单位" }); pLicUnit = ctx.GetRegex(new string[] { "发证机关" }); pProspUnit = ctx.GetRegex(new string[] { "勘察单位" }); pPrjManager = ctx.GetRegex(new string[] { "项目经理", "项目负责人" }); pPrjStartDate = ctx.GetRegex(new string[] { "计划开工日期" }); PrjEndDate = ctx.GetRegex(new string[] { "计划竣工日期" }); pPrjPrice = ctx.GetMoneyRegex(new string[] { "工程造价" }); if (string.IsNullOrEmpty(PrjLicDate)) { ctx.GetRegex("发证日期"); } if (string.IsNullOrEmpty(pLicUnit)) { pLicUnit = "深圳市宝安区建设局"; } ProjectLic info = ToolDb.GenProjectLic(pPrjName, pBuildUnit, pBuildAddress, pBuildManager, pBuildScale, pPrjPrice, pPrjStartDate, PrjEndDate, pConstUnit, pConstUnitManager, pSuperUnit, pSuperUnitManager, pProspUnit, pProspUnitManager, pDesignUnit, pDesignUnitManager, pPrjManager, pSpecialPerson, pLicUnit, pPrjLicCode, PrjLicDate, pPrjDesc, "广东省", "深圳市宝安区", pInfoSource, pUrl, pCreatetime, pPrjCode, "深圳市宝安区建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <ProjectFinish>(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1, count = 1; string eventValidation = string.Empty; try { htl = ToolHtml.GetHtmlByUrlEncode(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "pageLinkTd"))); if (tdNodes != null && tdNodes.Count > 0) { try { string temp = tdNodes.AsString().ToNodeString(); string s = temp.GetRegexBegEnd("总页数", "页").Replace(":", ""); pageInt = int.Parse(s); } catch (Exception ex) { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "page", "qymc", "ann_serial", "pro_name" }, new string[] { i.ToString(), "", "", "" }); try { htl = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8); } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(htl)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tblPrjConstBid"))); if (listNode != null && listNode.Count > 0) { TableTag table = (TableTag)listNode[0]; for (int j = 1; j < table.RowCount - 1; j++) { string pUrl = string.Empty, pInfoSource = string.Empty, pEndDate = string.Empty, pConstUnit = string.Empty, pSuperUnit = string.Empty, pDesignUnit = string.Empty, prjEndDesc = string.Empty, pPrjAddress = string.Empty, pBuildUnit = string.Empty, pPrjCode = string.Empty, PrjName = string.Empty, pRecordUnit = string.Empty, pCreatetime = string.Empty, pLicUnit = string.Empty; TableRow tr = table.Rows[j]; pPrjCode = tr.Columns[0].ToNodePlainString(); PrjName = tr.Columns[1].ToNodePlainString(); pBuildUnit = tr.Columns[2].ToNodePlainString(); pEndDate = tr.Columns[3].ToNodePlainString().GetDateRegex(); if (string.IsNullOrEmpty(pRecordUnit)) { pRecordUnit = "深圳市住房和建设局"; } ProjectFinish info = ToolDb.GenProjectFinish("广东省", pUrl, "深圳市区", pInfoSource, pEndDate, pConstUnit, pSuperUnit, pDesignUnit, prjEndDesc, pPrjAddress, pBuildUnit, pPrjCode, PrjName, pRecordUnit, pCreatetime, "深圳市住房和建设局", pLicUnit); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } count++; if (count >= 200) { count = 1; Thread.Sleep(600 * 1000); } } } } return(list); }