protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "yema"))); if (noList != null && noList.Count > 0) { string temp = noList.AsString(); try { Regex reg = new Regex(@"/[^页]+页"); string result = reg.Match(temp).Value.Replace("页", "").Replace("/", ""); pageInt = Convert.ToInt32(result); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.gy-center.net/announce/list.jhtml?visi_id=&cid=97&chid=&gid=&thistype=&searchcid=&keyword=&action=yes&interval=&page=" + i.ToString(), Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tab01"))), new TagNameFilter("ul"))), new TagNameFilter("li"))); if (dtlList != null && dtlList.Count > 0) { for (int j = 0; j < dtlList.Count - 1; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; string temp = dtlList[j].ToPlainTextString(); string tempHtl = dtlList[j].ToHtml(); prjName = ToolHtml.GetHtmlAtagValue("title", tempHtl); beginDate = ToolHtml.GetRegexDateTime(temp); InfoUrl = "http://www.gy-center.net/announce/" + ToolHtml.GetHtmlAtagValue("href", tempHtl); string htlDtl = string.Empty; try { htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default); htlDtl = System.Text.RegularExpressions.Regex.Replace(htlDtl, "(<script)[\\s\\S]*?(</script>)", ""); } catch { continue; } parser = new Parser(new Lexer(htlDtl)); NodeList htlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "r_content_right_main"))); if (htlList != null && htlList.Count > 0) { HtmlTxt = htlList.ToHtml(); bidCtx = Regex.Replace(HtmlTxt, "<[^>]*>", "").Replace(" ", "").Replace(" ", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\t\t", "").Replace("\r\r", "\r").Replace("\n\n", "\n"); bidType = ToolHtml.GetInviteTypes(prjName); string bidStr = string.Empty; parser = new Parser(new Lexer(HtmlTxt)); NodeList bidList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "MsoNormalTable"))); if (bidList != null && bidList.Count > 0) { try { TableTag tab = bidList[0] as TableTag; if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 6) { bidStr = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[2].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[2].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[3].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[3].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[4].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[4].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[5].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[5].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[6].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[6].ToPlainTextString().ToNodeString() + "\r\n"; } else if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 5) { bidStr = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[2].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[2].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[3].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[3].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[4].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[4].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[5].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[5].ToPlainTextString().ToNodeString() + "\r\n"; } else if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 4) { bidStr = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[2].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[2].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[3].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[3].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[4].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[4].ToPlainTextString().ToNodeString() + "\r\n"; } else if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 3) { bidStr = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[2].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[2].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[3].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[3].ToPlainTextString().ToNodeString() + "\r\n"; } else if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 2) { bidStr = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[2].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[2].ToPlainTextString().ToNodeString() + "\r\n"; } else if (tab.RowCount > 1 && tab.Rows[0].ColumnCount > 1) { bidStr = tab.Rows[0].Columns[0].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[0].ToPlainTextString().ToNodeString() + "\r\n"; bidStr += tab.Rows[0].Columns[1].ToPlainTextString().ToNodeString() + ":" + tab.Rows[1].Columns[1].ToPlainTextString().ToNodeString() + "\r\n"; } } catch { } } buildUnit = ToolHtml.GetRegexString(bidCtx, ToolHtml.BuildRegex); prjAddress = ToolHtml.GetRegexString(bidCtx, ToolHtml.AddressRegex); code = ToolHtml.GetRegexString(bidCtx, ToolHtml.CodeRegex); bidUnit = ToolHtml.GetRegexString(bidCtx, ToolHtml.BidRegex); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = ToolHtml.GetRegexString(bidStr.Replace(" ", ""), ToolHtml.BidRegex, false); } if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegexBegEnd("确认", "为"); } bidMoney = ToolHtml.GetRegexString(bidCtx, ToolHtml.MoneyRegex); bidMoney = ToolHtml.GetRegexMoney(bidMoney); if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = bidCtx.GetRegexBegEnd("¥", "元").GetMoney(); } buildUnit = ToolHtml.GetSubString(buildUnit, 150); prjAddress = ToolHtml.GetSubString(prjAddress, 150); code = ToolHtml.GetSubString(code, 50); bidUnit = ToolHtml.GetSubString(bidUnit, 150); bidUnit = ToolHtml.GetStringTemp(bidUnit); buildUnit = ToolHtml.GetStringTemp(buildUnit); if (string.IsNullOrEmpty(code)) { code = "见中标信息"; } if (string.IsNullOrEmpty(prjAddress)) { prjAddress = "见中标信息"; } specType = "其他"; msgType = "工网在线"; BidInfo info = ToolDb.GenBidInfo("广东省", "电网专项工程", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList nodeAtag = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (nodeAtag != null && nodeAtag.Count > 0) { for (int c = 0; c < nodeAtag.Count; c++) { ATag a = nodeAtag[c] as ATag; if (a.Link.IsAtagAttach()) { string alink = "http://www.bidding.csg.cn/" + a.Link; BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText.Replace(" ", ""), info.Id, alink); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html.GetJsString())); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "foot"))); if (sNode != null && sNode.Count > 0) { try { string temp = sNode.AsString().Trim(); Regex reg = new Regex(@"共[^页]+页"); pageInt = Convert.ToInt32(reg.Match(temp).Value); } catch { pageInt = 18; } } for (int i = 0; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.haizhu.gov.cn/site/jsj/bszx/ztbgl/index_" + (i - 1).ToString() + ".html", Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "body"))), new TagNameFilter("ul"))), new TagNameFilter("li"))); if (nodeList != null && nodeList.Count > 0) { for (int j = 7; j < nodeList.Count; j++) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; INode iNode = nodeList[j]; prjName = ToolHtml.GetHtmlAtagValue("title", iNode.ToHtml(), null, 1); beginDate = iNode.ToPlainTextString().GetDateRegex(); string aLinks = string.Empty; try { aLinks = iNode.ToHtml().GetATag(1).Link.Replace("./jsgczbgl/", ""); aLinks = aLinks.Substring(0, 6); } catch { } InfoUrl = "http://www.haizhu.gov.cn/site/jsj/bszx/ztbgl/" + iNode.ToHtml().GetATag(1).Link.Replace("./", ""); inviteType = prjName.GetInviteBidType(); string htlDtl = string.Empty; try { htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8); htlDtl = htlDtl.GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htlDtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "img"))); if (dtlList != null && dtlList.Count > 0) { HtmlTxt = dtlList.ToHtml(); inviteCtx = HtmlTxt.Replace("<p>", "\r\n").Replace("</p>", "\r\n").ToCtxString(); prjAddress = inviteCtx.GetAddressRegex(); buildUnit = inviteCtx.GetBuildRegex(); code = inviteCtx.GetCodeRegex(); msgType = "广州市海珠区建设和园林绿化局"; specType = "建设工程"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州市区", "海珠区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aList != null && aList.Count > 0) { for (int c = 0; c < aList.Count; c++) { ATag a = aList[c] as ATag; if (a.Link.IsAtagAttach()) { string alink = "http://www.haizhu.gov.cn/site/jsj/bszx/ztbgl/jsgczbgl/" + aLinks + "/" + a.Link.Replace("./", ""); BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText.Replace(" ", "").Replace(";", "").Replace(";", ""), info.Id, alink); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
/// <summary> /// 获取ATag属性值 /// </summary> /// <param name="value"></param> /// <param name="strName">ATag属性</param> /// <param name="i"></param> /// <returns></returns> public static string GetATagValue(this NodeList value, string strName = "href", int i = 0) { return(ToolHtml.GetHtmlAtagValue(strName, value.ToHtml(), null, i)); }
/// <summary> /// 获取ATag属性值 /// </summary> /// <param name="value"></param> /// <param name="strName">ATag属性</param> /// <param name="i"></param> /// <returns></returns> public static string GetATagValue(this string value, string strName = "href", int i = 0) { return(ToolHtml.GetHtmlAtagValue(strName, value, null, i)); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "yema"))); if (noList != null && noList.Count > 0) { string temp = noList.AsString(); try { Regex reg = new Regex(@"/[^页]+页"); string result = reg.Match(temp).Value.Replace("页", "").Replace("/", ""); pageInt = Convert.ToInt32(result); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.gy-center.net/announce/list.jhtml?visi_id=&cid=76&chid=&gid=&thistype=&searchcid=&keyword=&action=yes&interval=&page=" + i.ToString(), Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "tab01"))), new TagNameFilter("ul"))), new TagNameFilter("li"))); if (dtlList != null && dtlList.Count > 0) { for (int j = 0; j < dtlList.Count - 1; j++) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; string temp = dtlList[j].ToPlainTextString(); string tempHtl = dtlList[j].ToHtml(); prjName = ToolHtml.GetHtmlAtagValue("title", tempHtl); beginDate = ToolHtml.GetRegexDateTime(temp); InfoUrl = "http://www.gy-center.net/announce/" + ToolHtml.GetHtmlAtagValue("href", tempHtl); string htlDtl = string.Empty; try { htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default); htlDtl = System.Text.RegularExpressions.Regex.Replace(htlDtl, "(<script)[\\s\\S]*?(</script>)", ""); } catch { continue; } parser = new Parser(new Lexer(htlDtl)); NodeList htlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "r_content_right_main"))); if (htlList != null && htlList.Count > 0) { HtmlTxt = htlList.ToHtml(); inviteCtx = Regex.Replace(HtmlTxt, "<[^>]*>", "").Replace(" ", "").Replace(" ", "").Replace("\t\t", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n"); inviteType = ToolHtml.GetInviteTypes(prjName); prjAddress = ToolHtml.GetRegexString(inviteCtx, ToolHtml.AddressRegex); buildUnit = ToolHtml.GetRegexString(inviteCtx, ToolHtml.BuildRegex); code = ToolHtml.GetRegexString(inviteCtx, ToolHtml.CodeRegex); prjAddress = ToolHtml.GetSubString(prjAddress, 150); buildUnit = ToolHtml.GetSubString(buildUnit, 150); code = ToolHtml.GetSubString(code, 50); if (string.IsNullOrEmpty(code)) { code = "见招标信息"; } if (string.IsNullOrEmpty(prjAddress)) { prjAddress = "见招标信息"; } specType = "其他"; msgType = "工网在线"; if (string.IsNullOrEmpty(buildUnit)) { buildUnit = "工网在线"; } InviteInfo info = ToolDb.GenInviteInfo("广东省", "电网专项工程", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); //取得页码 int pageInt = 1; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new TagNameFilter("div")), new HasAttributeFilter("id", "page_div"))); if (sNode != null && sNode.Count > 0) { string page = ToolHtml.GetRegexString(sNode.AsString(), "共", "页"); try { pageInt = int.Parse(page); } catch { pageInt = 7; } } parser.Reset(); for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.conghua.gov.cn/zgch/zbzb/list_" + i.ToString() + ".shtml", Encoding.Default); } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(html)); sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list_list"))), new TagNameFilter("table"))); if (sNode != null && sNode.Count > 0) { TableTag table = sNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; string projectName = ToolHtml.GetHtmlAtagValue("title", tr.ToHtml()); if (!projectName.Contains("中标") && !projectName.Contains("结果") && !projectName.Contains("候选单位公示")) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = projectName; inviteType = ToolHtml.GetInviteTypes(projectName); beginDate = ToolHtml.GetRegexDateTime(tr.Columns[1].ToPlainTextString()); InfoUrl = "http://www.conghua.gov.cn" + ToolHtml.GetHtmlAtagValue("href", tr.ToHtml()).Replace("..", ""); string htmlDtl = string.Empty; try { htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8); htmlDtl = ToolHtml.GetRegexHtlTxt(htmlDtl); } catch { continue; } parser = new Parser(new Lexer(htmlDtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoomcon"))); if (dtlList != null && dtlList.Count > 0) { HtmlTxt = dtlList.ToHtml(); inviteCtx = dtlList.AsString().Replace(" ", ""); buildUnit = ToolHtml.GetRegexString(inviteCtx, ToolHtml.BuildRegex, true); if (!string.IsNullOrEmpty(buildUnit) && buildUnit.Contains(" ")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf(" ")); } buildUnit = ToolHtml.GetSubString(buildUnit, 150); msgType = "广州建设工程交易中心"; specType = "建设工程"; inviteType = inviteType == "" ? "小型工程" : inviteType; if (string.IsNullOrEmpty(buildUnit)) { buildUnit = "广州建设工程交易中心"; } InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州市区", "从化市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } else { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = projectName; bidType = ToolHtml.GetInviteTypes(projectName); beginDate = ToolHtml.GetRegexDateTime(tr.Columns[1].ToPlainTextString()); InfoUrl = "http://www.conghua.gov.cn" + ToolHtml.GetHtmlAtagValue("href", tr.ToHtml()).Replace("..", ""); string htmlDtl = string.Empty; try { htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8); htmlDtl = ToolHtml.GetRegexHtlTxt(htmlDtl); } catch { continue; } parser = new Parser(new Lexer(htmlDtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoomcon"))); if (dtlList != null && dtlList.Count > 0) { HtmlTxt = dtlList.ToHtml(); bidCtx = dtlList.AsString(); buildUnit = ToolHtml.GetRegexString(bidCtx, ToolHtml.BuildRegex, true); buildUnit = ToolHtml.GetSubString(buildUnit, 150); msgType = "广州建设工程交易中心"; specType = "建设工程"; bidType = bidType == "" ? bidType : "小型工程"; parser = new Parser(new Lexer(HtmlTxt)); NodeList bidNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (bidNode != null && bidNode.Count > 0) { string ctx = string.Empty; TableTag bidTable = bidNode[0] as TableTag; try { for (int r = 0; r < bidTable.RowCount; r++) { ctx += bidTable.Rows[r].Columns[0].ToNodePlainString() + ":"; ctx += bidTable.Rows[r].Columns[1].ToNodePlainString() + "\r\n"; } } catch { } bidUnit = ctx.GetRegex("单位名称,承包意向人名称"); bidMoney = ctx.GetMoneyRegex(); prjMgr = ctx.GetMgrRegex(); if (prjMgr.Contains("/")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("/")); } } if (string.IsNullOrEmpty(buildUnit)) { buildUnit = "广州建设工程交易中心"; } BidInfo info = ToolDb.GenBidInfo("广东省", "广州市区", "从化市", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } } return(list); }