protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <InviteInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string cookiestr = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "input-group-addon"))); if (tdNodes != null && tdNodes.Count > 0) { try { string reTemp = tdNodes.AsString().GetRegexBegEnd("共", "项"); string pageTemp = tdNodes.AsString().GetRegexBegEnd("项", "页").GetReplace("共,项,页," + reTemp + ",,"); pageInt = int.Parse(pageTemp); } catch (Exception) { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "?pi=" + (i - 1), Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "inside_table"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = (TableTag)nodeList[0]; for (int j = 1; j < table.RowCount; j++) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, bidType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j]; prjName = tr.Columns[1].ToPlainTextString().Trim(); buildUnit = tr.Columns[2].ToPlainTextString().Trim(); beginDate = tr.Columns[3].ToPlainTextString().Trim(); InfoUrl = "http://www.bajsjy.com/" + tr.Columns[1].GetATagHref(); string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("<th", "<td").Replace("</th>", "</td>").Replace(" ", ""); } catch (Exception) { continue; } Parser parserdetail = new Parser(new Lexer(htmldetail)); NodeList nodeDetailList = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "inside_table"))); if (nodeDetailList != null && nodeDetailList.Count > 0) { HtmlTxt = nodeDetailList.AsHtml(); TableTag tabledetail = (TableTag)nodeDetailList[0]; for (int r = 0; r < tabledetail.RowCount; r++) { TableRow trdetail = tabledetail.Rows[r]; for (int c = 0; c < trdetail.ColumnCount; c++) { string tr1 = string.Empty; string tr2 = string.Empty; NodeList inptList; NodeList selList; if (trdetail.ColumnCount <= 1) { continue; } tr1 = trdetail.Columns[c].ToPlainTextString().Trim(); tr2 = trdetail.Columns[c + 1].ToPlainTextString().Trim(); inptList = trdetail.Columns[c + 1].SearchFor(typeof(InputTag), true); selList = trdetail.Columns[c + 1].SearchFor(typeof(SelectTag), true); if (inptList != null && inptList.Count > 0) { if (inptList.Count > 1) { for (int inp = 0; inp < inptList.Count; inp++) { InputTag inputTage = (InputTag)inptList[inp]; if (inputTage.GetAttribute("checked") == "checked") { tr2 = inputTage.GetAttribute("value"); } } } else { InputTag inputTage = (InputTag)inptList[0]; tr2 = inputTage.GetAttribute("value"); } } if (selList != null && selList.Count > 0) { SelectTag selTag = (SelectTag)selList[0]; NodeList opList = new NodeList(); selTag.CollectInto(opList, new HasAttributeFilter("selected", "selected")); tr2 = opList.AsString(); } inviteCtx += tr1 + ":" + tr2 + "\r\n"; if (trdetail.ColumnCount > (c + 1)) { c = c + 1; } } } Regex regPrjAddr = new Regex(@"工程地址:[^\r\n]+\r\n"); prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地址:", "").Trim(); Regex regoType = new Regex(@"工程类型:[^\r\n]+\r\n"); string oType = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim(); if (oType.Contains("房建")) { otherType = "房建及工业民用建筑"; } else if (oType.Contains("市政")) { otherType = "市政工程"; } else if (oType.Contains("园林绿化")) { otherType = "园林绿化工程"; } else if (oType.Contains("装饰") || oType.Contains("装修")) { otherType = "装饰装修工程"; } else if (oType.Contains("电力")) { otherType = "电力工程"; } else if (oType.Contains("水利")) { otherType = "水利工程"; } if (oType.Contains("环保")) { otherType = "环保工程"; } msgType = "深圳市建设工程交易中心宝安分中心"; specType = "建设工程"; bidType = ToolHtml.GetInviteTypes(prjName); InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳宝安区工程", "宝安区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, bidType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }