protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <InviteInfo>(); foreach (string area in this.DicSiteUrl.Keys) { int pageInt = 1, count = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.DicSiteUrl[area], Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "pageintro"))); if (sNode != null && sNode.Count > 0) { try { string temp = sNode.AsString().ToCtxString().GetRegexBegEnd("页共", "页"); pageInt = int.Parse(temp); } catch (Exception) { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.DicSiteUrl[area] + "&page=" + i.ToString(), Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "nav_list"))), new TagNameFilter("ul"))), new TagNameFilter("li"))); if (sNode != null && sNode.Count > 0) { for (int t = 0; t < sNode.Count; t++) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; if (area == "建设工程招标公告") { endDate = sNode[t].ToNodePlainString().GetDateRegex(); } prjName = sNode[t].GetATagValue("title"); InfoUrl = "http://p.zsjyzx.gov.cn" + sNode[t].GetATagHref(); string url = string.Empty, shurl = string.Empty, urls = string.Empty; urls = InfoUrl + "s"; shurl = urls.GetRegexBegEnd("articalID=", "s"); url = "http://p.zsjyzx.gov.cn/port/Application/NewPage/ggnr.jsp?articalID=" + shurl; string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8); Parser dtlparserHTML = new Parser(new Lexer(htmldetail)); NodeList dtnodeHTML = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "details_1"))); HtmlTxt = dtnodeHTML.AsHtml(); htmldetail = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8).Replace(" ", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n").Replace("<br/>", "\r\n"); } catch (Exception ex) { continue; } Parser dtlparser = new Parser(new Lexer(htmldetail)); NodeList dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "details_1"))); HtmlTxt = dtnode.AsHtml(); if (area == "建设工程招标公告") { inviteCtx = HtmlTxt.ToCtxString(); } else { inviteCtx = HtmlTxt.Replace("</p>", "\r\n").ToCtxString(); } buildUnit = inviteCtx.GetBuildRegex(); if (buildUnit.Contains("招标代理机构")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理机构")); } if (buildUnit.Contains("地址")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")); } if (string.IsNullOrWhiteSpace(buildUnit)) { buildUnit = inviteCtx.GetRegex("采购人"); } prjAddress = inviteCtx.GetAddressRegex(); if (string.IsNullOrWhiteSpace(prjAddress)) { prjAddress = inviteCtx.GetRegex("地址"); } if (string.IsNullOrWhiteSpace(prjAddress)) { prjAddress = inviteCtx.GetRegex("联系地址"); } if (prjAddress.Contains("购买")) { prjAddress = prjAddress.Remove(prjAddress.IndexOf("购买")); } if (string.IsNullOrWhiteSpace(beginDate)) { beginDate = inviteCtx.GetRegexBegEnd("时间:", "点击"); } code = inviteCtx.GetCodeRegex(); if (code.Contains("采购")) { code = code.Remove(code.IndexOf("采购")); } msgType = "中山市公共资源交易中心"; if (area == "建设工程招标公告") { specType = "建设工程"; } else { specType = "政府采购"; } inviteType = ToolHtml.GetInviteTypes(prjName); string are = area != "建设工程招标公告" ? area : ""; InviteInfo info = ToolDb.GenInviteInfo("广东省", "中山市区", are, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); count++; list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { string ulra = a.GetATagValue("onclick"); string sht = ulra.Replace("','_black')", "").Replace("javascript:window.open('", ""); link = "http://p.zsjyzx.gov.cn" + sht; } if (Encoding.Default.GetByteCount(link) > 500) { continue; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && count >= this.MaxCount) { goto Funcs; } } } } Funcs :; } return(list); }