protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "scott")), true), new TagNameFilter("a"))); Regex regexPage = new Regex(@"共\d+页"); try { Regex numpage = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); ATag link = (ATag)nodeList[nodeList.Count - 1]; page = Convert.ToInt32(numpage.Match(link.Link).Value.Trim()); } catch (Exception) { } for (int i = 1; i <= page; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(htl); eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "newtitle", "totalRows", "pageNO" }, new string[] { string.Empty, "0", i.ToString() }); try { htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default, ref cookiestr).Replace("<th", "<td").Replace("</th>", "</td>").Replace(" ", ""); } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(htl)); NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "cnewslist"))); if (tableNodeList != null && tableNodeList.Count > 0) { TableTag table = (TableTag)tableNodeList[0]; for (int j = 1; j < table.RowCount - 2; j++) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, img = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j]; prjName = tr.Columns[0].ToPlainTextString().Trim(); endDate = tr.Columns[2].ToPlainTextString().Trim(); ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[0] as ATag; InfoUrl = aTag.Link; ImageTag image = aTag.SearchFor(typeof(ImageTag), true)[0] as ImageTag; //beginDate = DateTime.Now.Date.ToString(); //if (image == null) //{ // beginDate = endDate; // endDate = string.Empty; //} string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).Replace("<th", "<td").Replace("</th>", "</td>").Replace("</TH>", "</td>").Replace("<TH", "<td").Replace(" ", ""); } catch (Exception) { Logger.Error("InviteZhuHaiJS"); continue; } Parser parserdetail = new Parser(new Lexer(htmldetail)); NodeList dtnode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "borderTB"))); if (dtnode.Count > 0) { HtmlTxt = dtnode.AsHtml(); TableTag tabletwo = (TableTag)dtnode[0]; for (int row = 0; row < tabletwo.RowCount; row++) { TableRow r = tabletwo.Rows[row]; for (int k = 0; k < r.ColumnCount; k++) { string st = string.Empty; string st1 = string.Empty; st = r.Columns[k].ToPlainTextString().Trim(); if (k + 1 < r.ColumnCount) { st1 = r.Columns[k + 1].ToPlainTextString().Trim(); } inviteCtx += st + ":" + st1 + "\r\n"; if (k + 1 <= r.ColumnCount) { k++; } } } Regex regBuidUnit = new Regex(@"(招标人|招标人/招标代理)(:|:)[^\r\n]+\r\n"); buildUnit = regBuidUnit.Match(inviteCtx).Value.Replace("招标人:", "").Replace("招标人/招标代理:", "").Trim(); Regex regPrjAddr = new Regex(@"(建设地点|项目地址|建设单位)(:|:)[^\r\n]+\r\n"); prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("建设单位:", "").Replace("建设地点:", "").Replace("项目地址", "").Replace(":", "").Trim(); if (Encoding.Default.GetByteCount(prjAddress) > 200 || prjAddress == "") { prjAddress = "见招标信息"; } Regex regcode = new Regex(@"项目编号(:|:)[^\r\n]+\r\n"); code = regcode.Match(inviteCtx).Value.Replace("项目编号:", "").Replace(":", "").Trim(); beginDate = inviteCtx.GetRegex("报名时间").GetDateRegex(); if (string.IsNullOrEmpty(beginDate) || DateTime.Parse(beginDate) > DateTime.Now) { beginDate = DateTime.Now.ToString("yyyy-MM-dd"); } msgType = "珠海市建设工程交易中心"; specType = "建设工程"; inviteCtx = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = st1 ns = ", "").Replace("urn:schemas-microsoft-com:office:smarttags", "").Replace("/>", "").Trim(); inviteCtx = inviteCtx.Replace("<?", "").Replace("xml:namespace prefix = o ns = ", "").Replace("urn:schemas-microsoft-com:office:office", "").Replace("/>", "").Trim(); Regex regInvType = new Regex(@"[^\r\n]+[\r\n]{1}"); if (buildUnit == "") { buildUnit = ""; } inviteType = ToolHtml.GetInviteTypes(prjName); InviteInfo info = ToolDb.GenInviteInfo("广东省", "珠海市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); parserdetail.Reset(); NodeList nodeListtwo = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "Noprint")), true), new TagNameFilter("a"))); if (nodeListtwo.Count > 0) { ATag aTa3g = nodeListtwo[0] as ATag; BaseAttach attach = ToolDb.GenBaseAttach("工作议程(点击下载)", info.Id, aTa3g.Link); base.AttachList.Add(attach); } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }