protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", ""); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__VIEWSTATE", "__EVENTTARGET", "__EVENTARGUMENT" }, new string[] { viewState, "MoreInfoList1$Pager", i.ToString() }); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[1].GetATag(); prjName = aTag.GetAttribute("title"); beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.hbggzy.cn" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.GetReplace("</p>,<br />,<br/>", "\r\n").ToCtxString().GetReplace("\t", "\r\n"); prjAddress = bidCtx.GetAddressRegex(); buildUnit = bidCtx.GetBuildRegex(); bidUnit = bidCtx.GetBidRegex(); bidMoney = bidCtx.GetMoneyRegex(); if (string.IsNullOrWhiteSpace(bidUnit)) { parser = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (tableNode != null && tableNode.Count > 0) { TableTag tableTag = tableNode[0] as TableTag; string ctx = string.Empty; for (int r = 0; r < tableTag.RowCount; r++) { for (int c = 0; c < tableTag.Rows[r].ColumnCount; c++) { string temp = tableTag.Rows[r].Columns[c].ToNodePlainString(); if (string.IsNullOrWhiteSpace(temp)) { continue; } if (temp.Contains("中标人") || temp.Contains("中标单位")) { try { ctx += temp.GetReplace(":,:") + ":" + tableTag.Rows[r + 1].Columns[c].ToNodePlainString() + "\r\n"; ctx += tableTag.Rows[r].Columns[c + 1].ToNodePlainString().GetReplace(":,:") + ":" + tableTag.Rows[r + 1].Columns[c + 1].ToNodePlainString() + "\r\n"; } catch { } r++; break; } else { if ((c + 1) % 2 == 0) { ctx += temp.GetReplace(":,:") + "\r\n"; } else { ctx += temp.GetReplace(":,:") + ":"; } } } } bidUnit = ctx.GetBidRegex(); bidMoney = ctx.GetMoneyRegex(); } } if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } if (buildUnit.Contains("地址")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")); } if (buildUnit.Contains("联系")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系")); } if (buildUnit.Contains("指挥部")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("指挥部")); } code = bidCtx.GetCodeRegex().GetCodeDel().GetReplace("."); if (bidUnit.Contains("日历天") || bidUnit.Contains("预期中标") || bidUnit.Contains("投标人") || bidUnit.Contains("中标价")) { bidUnit = string.Empty; } msgType = "湖北省公共资源交易中心"; specType = "政府采购"; bidType = "交通工程"; buildUnit = buildUnit.Replace(" ", ""); BidInfo info = ToolDb.GenBidInfo("湖北省", "湖北省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.hbggzy.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); string html = string.Empty; int pageInt = 1; string nextPage = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "Normal"))); if (pageNode != null && pageNode.Count > 0) { string temp = pageNode.AsString().GetRegexBegEnd("总记录数:", ",每页显示"); string sum = pageNode.AsString().GetRegexBegEnd("每页显示", "条记录"); try { pageInt = int.Parse(temp) / int.Parse(sum) + 1; } catch { } parser = new Parser(new Lexer(pageNode.AsHtml())); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int a = 0; a < aNode.Count; a++) { ATag aTag = aNode[a].GetATag(); if (aTag.LinkText.Contains("下一页")) { nextPage = "http://www.szns.gov.cn" + aTag.Link; break; } } } } for (int i = 2; i <= pageInt; i++) { if (i > 1) { try { string m = "http://www.szns.gov.cn/jyj/xxgk6/qt74/zbgs/0239458d-" + i + ".html"; html = this.ToolWebSite.GetHtmlByUrl(m); } catch { continue; } parser = new Parser(new Lexer(html)); NodeList aNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "partPage")), true), new TagNameFilter("a"))); if (aNode != null && aNode.Count > 0) { for (int a = 0; a < aNode.Count; a++) { ATag aTag = aNode[a].GetATag(); if (aTag.LinkText.Contains("下一页")) { nextPage = "http://www.szns.gov.cn" + aTag.Link; break; } } } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("style", "width:100%;border-collapse:collapse;"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[0].GetATag(); prjName = aTag.GetAttribute("title"); string m = tr.ChildrenHTML.ToString(); beginDate = m.GetRegexBegEnd("<span>", "</span>").GetDateRegex(); InfoUrl = aTag.Link.GetReplace("&", "&"); InfoUrl = "http://www.szns.gov.cn" + InfoUrl; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "page_con"))); if (dtlNode == null || dtlNode.Count < 1) { parser.Reset(); dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body")); } if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.GetReplace("<br/>,</p>,<br>,<br />", "\r\n").ToCtxString(); buildUnit = bidCtx.GetBuildRegex(); prjAddress = bidCtx.GetAddressRegex(); code = bidCtx.GetCodeRegex().GetCodeDel(); bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("成交候选人,第一中标候选人名称").GetReplace("名称"); } bidMoney = bidCtx.GetMoneyRegex(); if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetRegexBegEnd("预中标价:", "\r").GetMoney(); } prjMgr = bidCtx.GetMgrRegex(); if (string.IsNullOrEmpty(bidUnit)) { parser = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("style", "border-collapse: collapse; width: 98%;"))); if (tableNode != null && tableNode.Count > 0) { TableTag dtlTable = tableNode[0] as TableTag; string ctx = string.Empty; if (dtlTable.RowCount == 2) { try { for (int r = 0; r < dtlTable.Rows[0].ColumnCount; r++) { ctx += dtlTable.Rows[0].Columns[r].ToNodePlainString() + ":"; ctx += dtlTable.Rows[1].Columns[r].ToNodePlainString() + "\r\n"; } } catch { } bidUnit = ctx.GetBidRegex(); if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = ctx.GetRegex("中标金额(万元)"); } if (string.IsNullOrEmpty(bidMoney)) { bidMoney = ctx.GetMoneyRegex(); } if (string.IsNullOrEmpty(prjMgr)) { prjMgr = ctx.GetMgrRegex(); } } } } try { if (decimal.Parse(bidMoney) > 100000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } } catch { } if (!string.IsNullOrEmpty(bidUnit) && bidUnit[0] == '为') { bidUnit = bidUnit.Substring(1, bidUnit.Length - 1); } specType = "政府采购"; bidType = prjName.GetInviteBidType(); msgType = "深圳市南山区教育局"; BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int a = 0; a < aNode.Count; a++) { ATag file = aNode[a].GetATag(); if (file.IsAtagAttach()) { string link = file.Link; if (!link.ToLower().Contains("http")) { link = "http://exoa.nsjy.com" + file.Link; } base.AttachList.Add(ToolDb.GenBaseAttach(file.LinkText, info.Id, link)); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "LblPageCount"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString(); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "TBuildInc", "TFindContractorName", "SArea", "SCCSort", "txtGO", "__EVENTVALIDATION" }, new string[] { "lbtnGO", "", viewState, "", "", "0", "", i.ToString(), eventValidation }); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "gv_List"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; TableRow tr = table.Rows[j]; buildUnit = tr.Columns[1].ToNodePlainString(); if (buildUnit.Contains("..")) { Span builSpan = tr.Columns[1].GetSpan(); buildUnit = builSpan.GetAttribute("title"); } ATag aTag = tr.Columns[2].GetATag(); prjName = aTag.GetAttribute("title"); bidUnit = tr.Columns[3].ToNodePlainString(); if (bidUnit.Contains("..")) { Span bidSpan = tr.Columns[3].GetSpan(); bidUnit = bidSpan.GetAttribute("title"); } beginDate = tr.Columns[4].ToNodePlainString().GetDateRegex(); area = tr.Columns[5].ToNodePlainString(); InfoUrl = "http://www.ynzb.com.cn/" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = ToolHtml.GetHtmlByUrl(this.SiteUrl, InfoUrl, Encoding.UTF8).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellspacing", "1"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); TableTag tag = dtlNode[0] as TableTag; for (int r = 0; r < tag.RowCount; r++) { if (r == 0) { bidCtx += tag.Rows[r].Columns[0].ToNodePlainString() + "\r\n"; continue; } for (int c = 0; c < tag.Rows[r].ColumnCount; c++) { string temp = tag.Rows[r].Columns[c].ToNodePlainString(); if ((c + 1) % 2 == 0) { bidCtx += temp + "\r\n"; } else { bidCtx += temp + ":"; } } } prjAddress = bidCtx.GetAddressRegex(); prjMgr = bidCtx.GetMgrRegex(); if (prjMgr.Contains("/") || prjMgr.Contains("-")) { prjMgr = string.Empty; } bidMoney = bidCtx.GetMoneyRegex(); code = bidCtx.GetCodeRegex(); specType = "建设工程"; msgType = "云南省住房和城乡建设厅"; BidInfo info = ToolDb.GenBidInfo("云南省", "云南省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.ynzb.com.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8, ref cookiestr); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "hyxma03"))); Regex regexPage = new Regex(@"共\d+页"); try { page = int.Parse(regexPage.Match(nodeList.AsString()).Value.Trim(new char[] { '共', '页' })); } catch (Exception) { } for (int i = 1; i <= page; i++) { if (i > 1) { try { htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode("http://www.szns.gov.cn/publish/main/1/19/26/zbtbxx/5466/index_" + i.ToString() + ".html"), Encoding.UTF8); } catch (Exception ex) { } } parser = new Parser(new Lexer(htl)); NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "hyxdianbeijing"))); if (tableNodeList.Count > 0) { for (int j = 0; j < tableNodeList.Count; j++) { ATag aTag = tableNodeList.SearchFor(typeof(ATag), true)[j] as ATag; string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = aTag.LinkText; Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}"); beginDate = regDate.Match(tableNodeList[j].ToPlainTextString()).Value.Trim(); InfoUrl = "http://www.szns.gov.cn" + aTag.Link.Replace("amp;", "").Trim(); string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.UTF8).Replace(" ", ""); } catch (Exception) { continue; } Parser parserdetail = new Parser(new Lexer(htmldetail)); NodeList dtnode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "hyxzf2"))); if (dtnode.Count > 0) { HtmlTxt = dtnode.AsHtml(); Regex regeximg = new Regex(@"<img[^>]*>");//去掉图片 HtmlTxt = regeximg.Replace(HtmlTxt, ""); bidCtx = dtnode.AsString().Replace("\n", "\r\n").Replace(" ", "").Trim(); Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>"); bidCtx = regexHtml.Replace(bidCtx, ""); Regex regBuidUnit = new Regex(@"(招标人|建设单位)(:|:)[^\r\n]+\r\n"); buildUnit = regBuidUnit.Match(bidCtx).Value.Replace("招标人:", "").Replace("建设单位:", "").Trim(); Regex regCode = new Regex(@"工程编号(:|:)[^\r\n]+\r\n"); code = regCode.Match(bidCtx).Value.Replace("工程编号:", "").Trim(); Regex regBidUnit = new Regex(@"中标人(:|:)[^\r\n]+\r\n"); bidUnit = regBidUnit.Match(bidCtx).Value.Replace("中标人:", "").Trim(); Regex regMoney = new Regex(@"(中标价|中标价格)(:|:)[^\r\n]+\r\n"); bidMoney = regMoney.Match(bidCtx).Value.Replace("中标价:", "").Replace("中标价格:", "").Replace(",", "").Trim(); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (bidMoney.Contains("万")) { bidMoney = bidMoney.Remove(bidMoney.IndexOf("万")).Trim(); bidMoney = regBidMoney.Match(bidMoney).Value; } else { try { bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString(); if (decimal.Parse(bidMoney) < decimal.Parse("0.1")) { bidMoney = "0"; } } catch (Exception) { bidMoney = "0"; } } if (buildUnit == "") { buildUnit = ""; } if (bidUnit == "") { bidUnit = ""; } Regex regprjMgr = new Regex(@"(总监|建造师|建造师(总监))(:|:)[^\r\n]+\r\n"); prjMgr = regprjMgr.Match(bidCtx).Value.Replace("建造师:", "").Replace("总监:", "").Replace("建造师(总监):", "").Trim(); msgType = "深圳市南山区政府采购及招标中心"; specType = "建设工程"; bidType = ToolHtml.GetInviteTypes(prjName); prjName = ToolDb.GetPrjName(prjName); BidInfo info = ToolDb.GenBidInfo("广东省", "深圳区及街道工程", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); //取得页码 int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_GridViewPaingTwo1_lblGridViewPagingDesc"), new TagNameFilter("span"))); string pageString = sNode.AsString(); Regex regexPage = new Regex(@"共[^页]+页"); Match pageMatch = regexPage.Match(pageString); try { pageInt = int.Parse(pageMatch.Value.Replace("共", "").Replace("页", "").Trim()); } catch (Exception) { } string cookiestr = string.Empty; for (int i = 1; i < pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "__EVENTVALIDATION", "ctl00$cph_context$drp_selSeach", "ctl00$cph_context$txt_strWhere", "ctl00$cph_context$drp_Rq", "ctl00$cph_context$GridViewPaingTwo1$txtGridViewPagingForwardTo", "ctl00$cph_context$GridViewPaingTwo1$btnNext.x", "ctl00$cph_context$GridViewPaingTwo1$btnNext.y" }, new string[] { string.Empty, string.Empty, string.Empty, viewState, eventValidation, "1", string.Empty, "3", (i - 1).ToString(), "9", "7" }); try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr); } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_GridView1"), new TagNameFilter("table"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j] as TableRow; code = tr.Columns[1].ToPlainTextString().Trim(); prjName = tr.Columns[2].ToPlainTextString().Trim(); buildUnit = tr.Columns[3].ToPlainTextString().Trim(); beginDate = tr.Columns[4].ToPlainTextString().Trim().GetReplace(" - ", "&").Split('&')[0].Trim(); try { endDate = tr.Columns[4].ToPlainTextString().Trim().GetReplace(" - ", "&").Split('&')[1].Trim(); } catch { } ATag aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag; InfoUrl = "http://www.dgzb.com.cn:8080/dgjyweb/sitemanage/" + aTag.Link; string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace(" ", "").Trim(); Parser dtlparserHTML = new Parser(new Lexer(htmldetail)); NodeList dtnodeHTML = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_span_MetContent"), new TagNameFilter("span"))); HtmlTxt = dtnodeHTML.AsHtml(); htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace(" ", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n").Replace("<br/>", "\r\n"); } catch (Exception ex) { continue; } Parser dtlparser = new Parser(new Lexer(htmldetail)); NodeList dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_span_MetContent"), new TagNameFilter("span"))); bidCtx = dtnode.AsString().Replace(" ", ""); Regex regPrjAdd = new Regex(@"(工程地点|工程地址):[^\r\n]+[\r\n]{1}"); prjAddress = regPrjAdd.Match(bidCtx).Value.Replace("工程地点:", "").Replace("工程地址:", "").Trim(); msgType = "东莞市建设工程交易中心"; specType = "建设工程"; Regex regMoney = new Regex(@"(中标价|中标值):[^元]+元{1}"); string moneystr = regMoney.Match(bidCtx).Value.Replace("中标价:", "").Replace("中标值:", "").Replace("元", "").Trim(); if (moneystr.Contains("万")) { bidMoney = moneystr.Replace("万", "").Replace("元", ""); } else { try { bidMoney = (decimal.Parse(moneystr) / 10000).ToString(); } catch (Exception) { } } bidMoney = regMoney.Match(bidCtx).Value.Replace("中标价:", "").Replace("中标值:", "").Replace("元", "").Trim(); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (!string.IsNullOrEmpty(regBidMoney.Match(bidMoney).Value)) { if (bidMoney.Contains("万元") || bidMoney.Contains("万美元") || bidMoney.Contains("万")) { bidMoney = regBidMoney.Match(bidMoney).Value; } else { try { bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString(); if (decimal.Parse(bidMoney) < decimal.Parse("0.1")) { bidMoney = "0"; } } catch (Exception) { bidMoney = "0"; } } } Regex regBidUnit = new Regex(@"(中标人|中标单位):[^\r\n]+[\r\n]{1}"); bidUnit = regBidUnit.Match(bidCtx).Value.Replace("中标人:", "").Replace("中标单位:", "").Trim(); Regex regprjMgr = new Regex(@"(项目经理):[^\r\n]+[\r\n]{1}"); prjMgr = regprjMgr.Match(bidCtx).Value.Replace("项目经理:", "").Trim(); prjName = ToolDb.GetPrjName(prjName); bidType = ToolHtml.GetInviteTypes(prjName); if (!string.IsNullOrEmpty(bidUnit)) { string[] unit = bidUnit.Split(','); if (unit.Length > 0) { bidUnit = unit[0]; } } if (Encoding.Default.GetByteCount(bidUnit) > 150) { bidUnit = string.Empty; } BidInfo info = ToolDb.GenBidInfo("广东省", "东莞市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); dtlparser.Reset(); NodeList fileNode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_DownLoadFiles1_GridView2"), new TagNameFilter("table"))); if (fileNode != null && fileNode.Count > 0 && fileNode[0] is TableTag) { TableTag fileTable = fileNode[0] as TableTag; for (int f = 1; f < fileTable.Rows.Length; f++) { BaseAttach attach = ToolDb.GenBaseAttach(fileTable.Rows[f].Columns[1].ToPlainTextString().Trim(), info.Id, "http://www.dgzb.com.cn/dgjyweb/sitemanage/" + (fileTable.Rows[f].Columns[1].SearchFor(typeof(ATag), true)[0] as ATag).Link); base.AttachList.Add(attach); } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } return(list); }
public override void BestBidAskQuote(BidInfo oBid, AskInfo oAsk) { }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); //取得页码 int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode("http://www.szsszx.com/tender/pager?key=zhongbiao&pagenumber=20&pageindex=1"), Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagelist"))); if (tdNodes != null && tdNodes.Count > 0) { string pageTemp = tdNodes.AsString().Replace(" ", ""); Regex regpage = new Regex(@"1/\d+"); try { pageInt = int.Parse(regpage.Match(pageTemp).Value.Replace("1/", "")); } catch (Exception ex) { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode("http://www.szsszx.com/tender/pager?key=zhongbiao&pagenumber=20&pageindex=" + i.ToString()), Encoding.UTF8); } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new TagNameFilter("li")); if (nodeList != null && nodeList.Count > 0) { for (int j = 0; j < nodeList.Count; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; CompositeTag obj = nodeList[j] as CompositeTag; ATag aTag = obj.SearchFor(typeof(ATag), true)[0] as ATag; Span dateSpan = obj.SearchFor(typeof(Span), true)[0] as Span; prjName = aTag.GetAttribute("title"); beginDate = dateSpan.ToPlainTextString().Trim(new char[] { '[', ']' }); InfoUrl = "http://www.szsszx.com" + aTag.Link; string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace(" ", "").Trim(); Parser dtlparserHTML = new Parser(new Lexer(htmldetail)); NodeList dtnodeHTML = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "news-content"), new TagNameFilter("div"))); HtmlTxt = dtnodeHTML.AsHtml(); htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace(" ", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n"); Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>"); htmldetail = regexHtml.Replace(htmldetail, ""); } catch (Exception ex) { continue; } Parser dtlparser = new Parser(new Lexer(htmldetail)); NodeList dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "news-content"), new TagNameFilter("div"))); bidCtx = dtnode.ToHtml().ToCtxString(); bidCtx = Regex.Replace(bidCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace("<", "").Replace(">", "").Replace("\n\n\n\t", "\r\n").Replace("\n\n", "\r\n"); Regex regCode = new Regex(@"招标编号(:|:)[^\r\n]+\r\n"); code = regCode.Match(bidCtx).Value.Replace("招标编号", "").Replace(":", "").Replace(":", "").Trim(); if (Encoding.Default.GetByteCount(code) > 50) { code = ""; } Regex regbuildUnit = new Regex(@"(采购人|采购单位|采购代理机构)(:|:)[^\r\n]+\r\n"); buildUnit = regbuildUnit.Match(bidCtx).Value.Replace("采购人", "").Replace("采购单位", "").Replace("采购代理机构", "").Replace(":", "").Replace(":", "").Trim(); prjAddress = bidCtx.GetAddressRegex(); bidUnit = bidCtx.GetBidRegex();// regBidUnit.Match(bidCtx).Value.Replace("中标单位", "").Replace(":", "").Replace(":", "").Trim(); Regex regBidMoneystr = new Regex(@"(中标价|价格|金额)(:|:)[^\r\n]+\r\n"); string monerystr = regBidMoneystr.Match(bidCtx).Value.Replace("中标价", "").Replace("价格", "").Replace("金额", "").Replace("万元整", "").Replace(":", "").Replace(":", "").Replace(",", "").Replace(",", "").Trim(); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (!string.IsNullOrEmpty(regBidMoney.Match(monerystr).Value)) { if ((monerystr.Contains("万元") || monerystr.Contains("万美元")) && !monerystr.Contains("万元整")) { bidMoney = regBidMoney.Match(monerystr).Value; } else { try { bidMoney = (decimal.Parse(regBidMoney.Match(monerystr).Value) / 10000).ToString(); if (decimal.Parse(bidMoney) < decimal.Parse("0.1")) { bidMoney = "0"; } } catch (Exception) { bidMoney = "0"; } } } specType = "其他"; msgType = "深圳市深水水务咨询有限公司"; prjName = ToolDb.GetPrjName(prjName); bidType = ToolHtml.GetInviteTypes(prjName); BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int count = 0; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch { return(null); } int startIndex = html.IndexOf("{"); int endIndex = html.LastIndexOf("}"); html = html.Substring(startIndex, (endIndex + 1) - startIndex); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); object[] objvalues = smsTypeJson["rows"] as object[]; foreach (object objValue in objvalues) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; Dictionary <string, object> dic = (Dictionary <string, object>)objValue; prjName = Convert.ToString(dic["title"]); //if (!prjName.Contains("湖北能源集团鄂州电厂三期2×1050MW超超临界燃煤机组扩建工程金属及压力容器质量检查(安全性能检查)-(二次)")) //{ // continue; //} beginDate = Convert.ToString(dic["faBuStartTimeText"]).GetDateRegex(); bidType = Convert.ToString(dic["gongChengTypeText"]); string foUrl = "http://www.ezztb.gov.cn//jyw/jyw/queryZbgs.do?guid=" + dic["yuanXiTongId"]; string htmldtl = string.Empty; string prName = string.Empty, bdName = string.Empty, xmBh = string.Empty, xmMc = string.Empty, startTime = string.Empty, endTime = string.Empty, zbrAndLht = string.Empty, zbdlJG = string.Empty, zbFangShi = string.Empty, zhongBiaoGQ = string.Empty, ziGeDengJi = string.Empty, ziGeZhengShu = string.Empty; try { HtmlTxt = this.ToolWebSite.GetHtmlByUrl(foUrl, Encoding.UTF8); } catch { continue; } JavaScriptSerializer Newserializer = new JavaScriptSerializer(); Dictionary <string, object> newTypeJson = null; try { newTypeJson = (Dictionary <string, object>)Newserializer.DeserializeObject(HtmlTxt); } catch { continue; } InfoUrl = "http://www.ezztb.gov.cn/jiaoyixingxi/zbgs_view.html?guid=" + dic["yuanXiTongId"]; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl); } catch { continue; } Dictionary <string, object> dics = (Dictionary <string, object>)newTypeJson; Dictionary <string, object> bd = dics["bd"] as Dictionary <string, object>; Dictionary <string, object> gcx = bd["gc"] as Dictionary <string, object>; Dictionary <string, object> xm = bd["xm"] as Dictionary <string, object>; code = Convert.ToString(gcx["gcBH"]); buildUnit = Convert.ToString(gcx["zbRName"]); bidUnit = Convert.ToString(dics["tbrName"]); prjMgr = Convert.ToString(dics["xiangMuJiLi"]); bidMoney = Convert.ToString(dics["zhongBiaoJE"]); prName = Convert.ToString(gcx["gcName"]); bdName = Convert.ToString(bd["bdName"]); xmBh = Convert.ToString(xm["xm_BH"]); xmMc = Convert.ToString(xm["xm_Name"]); startTime = Convert.ToString(dics["zbgsStartTime"]); startTime = ToolHtml.GetDateTimeByLong(Convert.ToInt64(startTime)).ToString("yyyy-MM-dd HH:mm"); endTime = Convert.ToString(dics["zbgsEndTime"]); endTime = ToolHtml.GetDateTimeByLong(Convert.ToInt64(endTime)).ToString("yyyy-MM-dd HH:mm"); zbrAndLht = Convert.ToString(dics["zbrAndLht"]); zbdlJG = Convert.ToString(dics["zbdlJG"]); zbFangShi = Convert.ToString(dics["zbFangShi"]); zhongBiaoGQ = Convert.ToString(dics["zhongBiaoGQ"]); ziGeDengJi = Convert.ToString(dics["ziGeDengJi"]); ziGeZhengShu = Convert.ToString(dics["ziGeZhengShu"]); HtmlTxt = ("<table>") + ("<tr><th>招标项目编号:</th><td>" + code + "</td></tr>") + ("<tr><th>招标项目名称:</th><td>" + prName + "</td></tr>") + ("<tr><th>标段名称:</th><td>" + bdName + "</td></tr>") + ("<tr><th>项目编号:</th><td>" + xmBh + "</td></tr>") + ("<tr><th>项目名称:</th><td>" + xmMc + "</td></tr>") + ("<tr><th>公示时间:</th><td>" + startTime + "至" + endTime + "</td></tr>") + ("<tr><th>招标人:</th><td>" + zbrAndLht + "</td></tr>") + ("<tr><th>招标代理机构:</th><td>" + zbdlJG + "</td></tr>") + ("<tr><th>招标方式:</th><td>" + zbFangShi + "</td></tr>") + ("<tr><th>中标人:</th><td>" + bidUnit + "</td></tr>") + ("<tr><th>中标价:</th><td>" + bidMoney + "</td></tr>") + ("<tr><th>中标工期:</th><td>" + zhongBiaoGQ + "</td></tr>") + ("<tr><th>项目经理:</th><td>" + prjMgr + "</td></tr>") + ("<tr><th>资格等级:</th><td>" + ziGeDengJi + "</td></tr>") + ("<tr><th>资格证书:</th><td>" + ziGeZhengShu + "</td></tr>") + ("</table>"); bidCtx = HtmlTxt.Replace("</td>", "\r\n").ToCtxString(); try { if (Convert.ToDecimal(bidMoney) > 100000) { bidMoney = (decimal.Parse(bidMoney) / 1000000).ToString(); } } catch { } msgType = "鄂州市公共资源交易中心"; specType = "政府采购"; bidType = prjName.GetInviteBidType(); buildUnit = buildUnit.Replace(" ", ""); BidInfo info = ToolDb.GenBidInfo("湖北省", "湖北省及地市", "鄂州市", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); count++; Parser parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.ezztb.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (count >= 50) { Thread.Sleep(1000 * 60 * 5); count = 0; } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "page")), true), new TagNameFilter("a"))); if (sNode != null && sNode.Count > 0) { string temp = sNode[sNode.Count - 1].ToNodePlainString(); try { pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { int emp = i - 1; html = this.ToolWebSite.GetHtmlByUrl("http://www.szqh.gov.cn/sygnan/xxgk/xxgkml/zbcg/zbgs/index_" + emp + ".shtml"); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "gl-news-box-02")), true), new TagNameFilter("li"))); if (nodeList != null && nodeList.Count > 0) { for (int j = 0; j < nodeList.Count; j++) { INode node = nodeList[j]; ATag aTag = node.GetATag(); if (aTag == null) { continue; } string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; prjName = aTag.GetAttribute("title"); beginDate = node.ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.szqh.gov.cn/sygnan/xxgk/xxgkml/zbcg/zbgs/" + aTag.Link.GetReplace("./", ""); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.ToLower().ToCtxString(); buildUnit = bidCtx.GetBuildRegex(); code = bidCtx.GetCodeRegex().GetCodeDel(); if (string.IsNullOrWhiteSpace(code)) { code = bidCtx.GetRegexBegEnd("编号:", ")"); } bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("中标候选人名称,中签单位,第一成交候选人,成交候选人,中标(成交)供应商1,中标人"); } bidMoney = bidCtx.GetMoneyRegex(); if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetMoneyRegex(null, true); } if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetRegex("总额").GetMoney(); } prjMgr = bidCtx.GetMgrRegex(); if (string.IsNullOrWhiteSpace(bidUnit)) { parser = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("class", "MsoNormalTable"), new TagNameFilter("table"))); if (tableNode != null && tableNode.Count > 0) { for (int t = 0; t < tableNode.Count; t++) { TableTag table = tableNode[t] as TableTag; if ((table.Rows[0].ToHtml().Contains("中标") || table.Rows[0].ToHtml().Contains("成交")) && !table.Rows[0].ToHtml().Contains("候选")) { string ctx = string.Empty; for (int r = 0; r < table.Rows[0].ColumnCount; r++) { try { ctx += table.Rows[0].Columns[r].ToNodePlainString() + ":"; ctx += table.Rows[1].Columns[r].ToNodePlainString() + "\r\n"; } catch { } } bidUnit = ctx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = ctx.GetRegex("中标(成交)供应商"); } break; } } for (int tb = 0; tb < tableNode.Count; tb++) { TableTag tables = tableNode[tb] as TableTag; if (tables.Rows[0].ToHtml().Contains("投标报价") || tables.Rows[0].ToHtml().Contains("总报价")) { string ctx = string.Empty; for (int r = 0; r < tables.Rows[0].ColumnCount; r++) { try { ctx += tables.Rows[0].Columns[r].ToNodePlainString() + ":"; ctx += tables.Rows[1].Columns[r].ToNodePlainString() + "\r\n"; } catch { } } string unit = ctx.GetBidRegex(); if (string.IsNullOrEmpty(unit)) { bidUnit = ctx.GetRegex("中标(成交)供应商"); } if (!string.IsNullOrEmpty(bidUnit) && bidUnit == unit) { bidMoney = ctx.GetMoneyRegex(null, false, "万元"); } break; } } } } try { if (Convert.ToDecimal(bidMoney) > 100000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } } catch { } if (bidUnit.Contains("金额")) { bidUnit = ""; } if (bidUnit.Contains("&")) { bidUnit = bidUnit.Replace("&", "&"); } specType = "建设工程"; bidType = prjName.GetInviteBidType(); msgType = "深圳市前海深港现代服务业合作区管理局"; BidInfo info = ToolDb.GenBidInfo("广东省", "深圳区及街道工程", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("vAlign", "bottom"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当前"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); string __CSRFTOKEN = ToolHtml.GetHtmlInputValue(html, "__CSRFTOKEN"); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__CSRFTOKEN", "__VIEWSTATE", "__EVENTTARGET", "__EVENTARGUMENT" }, new string[] { __CSRFTOKEN, viewState, "MoreInfoList1$Pager", i.ToString() }); try { cookiestr = cookiestr.GetReplace(new string[] { "path=/;", "HttpOnly", "," }); html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[1].GetATag(); prjName = aTag.GetAttribute("title"); beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.spprec.com" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "ivs_content"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.GetReplace("<br />,<br/>,<br>,</p>", "\r\n").ToCtxString(); parser = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (tableNode != null && tableNode.Count > 0) { bidCtx = string.Empty; TableTag htmlTable = tableNode[0] as TableTag; for (int r = 0; r < htmlTable.RowCount; r++) { if (r == 8 || r == 9) { continue; } for (int c = 0; c < htmlTable.Rows[r].ColumnCount; c++) { string temp = htmlTable.Rows[r].Columns[c].ToNodePlainString(); if (r == 7) { try { bidCtx += temp + ":"; string value = htmlTable.Rows[r + 2].Columns[c].ToNodePlainString(); bidCtx += value + "\r\n"; } catch { try { bidCtx += temp + ":"; string value = htmlTable.Rows[r + 1].Columns[c].ToNodePlainString(); bidCtx += value + "\r\n"; } catch { } } } else { if (string.IsNullOrEmpty(temp)) { continue; } if ((c + 1) % 2 == 0) { bidCtx += temp + "\r\n"; } else { bidCtx += temp + ":"; } } } } } buildUnit = bidCtx.GetBuildRegex().GetReplace("/"); prjAddress = bidCtx.GetAddressRegex(); code = bidCtx.GetCodeRegex().GetChina().GetCodeDel(); bidUnit = bidCtx.GetRegex("中标候选人名称").GetReplace("/"); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetBidRegex().GetReplace("/"); } if (bidUnit.Contains("投标报价")) { bidUnit = ""; } bidMoney = bidCtx.GetMoneyRegex(); msgType = "四川省公共资源交易中心"; specType = bidType = "建设工程"; BidInfo info = ToolDb.GenBidInfo("四川省", "四川省及地市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag tag = aNode[k] as ATag; if (tag.IsAtagAttach()) { string link = string.Empty; if (tag.Link.ToLower().Contains("http")) { link = tag.Link; } else { link = "http://www.spprec.com" + tag.Link; } BaseAttach attach = ToolDb.GenBaseAttach(tag.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "0h120")), true), new TagNameFilter("a"))); if (sNode != null && sNode.Count > 0) { try { string temp = sNode[sNode.Count - 2].ToNodePlainString(); pageInt = Convert.ToInt32(temp.GetReplace("[,]")); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "0h120"))); if (viewList != null && viewList.Count > 0) { for (int j = 0; j < viewList.Count; j++) { TableTag table = viewList[j] as TableTag; string prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty; ATag aTag = viewList[j].GetATag(); if (aTag == null) { continue; } prjName = aTag.GetAttribute("title"); beginDate = table.ToNodePlainString().GetDateRegex(); InfoUrl = "http://baipenzhu.huidong.gov.cn/" + aTag.Link; string htlDtl = string.Empty; try { htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htlDtl)); NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "fontzoom"))); if (dtl != null && dtl.Count > 0) { HtmlTxt = dtl.AsHtml(); if (prjName.Contains("中标") || prjName.Contains("成交") || prjName.Contains("结果")) { string buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty; bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); if (prjName.Contains("(")) { prjName = prjName.GetRegexBegEnd("(", ")"); } string tempName = bidCtx.GetRegex("工程名称,项目名称"); if (!string.IsNullOrEmpty(tempName)) { prjName = tempName; } code = bidCtx.GetCodeRegex().GetCodeDel(); buildUnit = bidCtx.GetBuildRegex(); if (buildUnit.Contains("招标代理")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理")); } if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("中标候选公司,中标候选人"); } bidMoney = bidCtx.GetMoneyRegex(); if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { string ctx = bidCtx.GetReplace("元\r\n,元;\r\n", "元kdxx").GetRegexBegEnd("中标价", "kdxx"); bidMoney = ctx.GetMoney("万元"); } try { if (decimal.Parse(bidMoney) > 100000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } } catch { } Parser imgParser = new Parser(new Lexer(HtmlTxt.ToLower())); NodeList imgNode = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img")); string src = string.Empty; if (imgNode != null && imgNode.Count > 0) { string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src"); src = "http://baipenzhu.huidong.gov.cn/" + imgUrl; HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src); } msgType = "惠东县白盆珠镇人民政府"; specType = "政府采购"; bidType = prjName.GetInviteBidType(); BidInfo info = ToolDb.GenBidInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!string.IsNullOrEmpty(src)) { string sql = string.Format("select Id from BidInfo where InfoUrl='{0}'", info.InfoUrl); object obj = ToolDb.ExecuteScalar(sql); if (obj == null || obj.ToString() == "") { try { BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, ""); } } catch { } } } parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://baipenzhu.huidong.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } else { string code = string.Empty, buildUnit = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty; inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); if (prjName.Contains("(") && !prjName.Contains("《")) { prjName = prjName.GetRegexBegEnd("(", ")"); } string tempName = inviteCtx.GetRegex("工程名称,项目名称"); if (!string.IsNullOrEmpty(tempName)) { prjName = tempName; } inviteType = prjName.GetInviteBidType(); code = inviteCtx.GetCodeRegex().GetCodeDel(); buildUnit = inviteCtx.GetBuildRegex(); prjAddress = inviteCtx.GetAddressRegex(); if (buildUnit.Contains("招标代理")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理")); } if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } Parser imgParser = new Parser(new Lexer(HtmlTxt.ToLower())); NodeList imgNode = imgParser.ExtractAllNodesThatMatch(new TagNameFilter("img")); string src = string.Empty; if (imgNode != null && imgNode.Count > 0) { string imgUrl = (imgNode[0] as ImageTag).GetAttribute("src"); src = "http://baipenzhu.huidong.gov.cn/" + imgUrl; HtmlTxt = HtmlTxt.ToLower().GetReplace(imgUrl, src); } msgType = "惠东县白盆珠镇人民政府"; specType = "政府采购"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!string.IsNullOrEmpty(src)) { string sql = string.Format("select Id from InviteInfo where InfoUrl='{0}'", info.InfoUrl); object obj = ToolDb.ExecuteScalar(sql); if (obj == null || obj.ToString() == "") { try { BaseAttach attach = ToolHtml.GetBaseAttach(src, prjName, info.Id, "SiteManage\\Files\\InviteAttach\\"); if (attach != null) { ToolDb.SaveEntity(attach, ""); } } catch { } } } parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://baipenzhu.huidong.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); //取得页码 int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("bgColor", "#EEF4F9"))); if (tdNodes != null && tdNodes.Count > 0) { string pageTemp = tdNodes.AsString().Replace(" ", "").Replace(" ", "").Trim(); Regex regpage = new Regex(@"1/[0-9]+页"); try { pageInt = int.Parse(regpage.Match(pageTemp).Value.Split('/')[1].Replace("页", "").Trim()); } catch (Exception ex) { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.szzdzb.cn/Product-index-id-11-p-" + i + ".html", Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "hangao27"))), new TagNameFilter("table"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j]; code = tr.Columns[0].ToPlainTextString().Trim(); prjName = tr.Columns[1].ToPlainTextString().Trim(); ATag aTag = tr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag; InfoUrl = "http://www.szzdzb.cn" + aTag.Link; string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace(" ", "").GetJsString(); } catch { continue; } Parser dtlparser = new Parser(new Lexer(htmldetail)); NodeList dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "hangao27"))), new TagNameFilter("table"))); if (dtnode != null && dtnode.Count > 0) { HtmlTxt = dtnode.AsHtml(); bidCtx = HtmlTxt.ToLower().GetReplace("</p>,<br/>", "\r\n").ToCtxString(); beginDate = bidCtx.GetRegex("发布时间").GetDateRegex(); if (bidCtx.Contains("确定中标供应商")) { parser = new Parser(new Lexer(HtmlTxt)); NodeList nodeTab = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "pContent"))), new TagNameFilter("table"))); if (nodeTab != null && nodeTab.Count > 0) { TableTag tabNode = nodeTab[0] as TableTag; for (int r = 0; r < tabNode.RowCount; r++) { try { if (tabNode.Rows[r].ToNodePlainString().Contains("确定中标供应商")) { bidUnit = tabNode.Rows[r + 1].Columns[1].ToNodePlainString(); bidMoney = tabNode.Rows[r + 2].Columns[1].ToNodePlainString().Replace(",", "").Replace(",", "").GetMoney("万元"); break; } } catch { } } } if (string.IsNullOrWhiteSpace(bidUnit)) { bidUnit = bidCtx.GetBidRegex(); } if (bidMoney == "0" || string.IsNullOrWhiteSpace(bidMoney)) { bidMoney = bidCtx.Replace(",", "").Replace(",", "").GetMoneyRegex(); } } else { bidUnit = bidCtx.GetBidRegex(new string[] { "第一备选供应商" }); parser = new Parser(new Lexer(HtmlTxt)); NodeList nodeTab = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "pContent"))), new TagNameFilter("table"))); if (nodeTab != null && nodeTab.Count > 0) { TableTag tabNode = nodeTab[0] as TableTag; for (int r = 0; r < tabNode.RowCount; r++) { try { if (tabNode.Rows[r].ToNodePlainString().Contains(bidUnit)) { bidMoney = tabNode.Rows[r].Columns[2].ToNodePlainString().Replace(",", "").Replace(",", "").GetMoney(); break; } } catch { } } } } specType = "其他"; msgType = "深圳市振东招标代理有限公司"; bidType = ToolHtml.GetInviteTypes(prjName); prjName = ToolDb.GetPrjName(prjName); BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("width", "700"))); if (sNode != null && sNode.Count > 0) { try { string temp = sNode.AsString().ToNodeString(); Regex reg = new Regex(@"/[^下一页]+下一页"); string page = reg.Match(temp).Value.Replace("/", "").Replace("下一页", ""); pageInt = Convert.ToInt32(page); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.stjs.gov.cn/zbtb/qxzhaobiaonews.asp?page=" + i.ToString(), Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "705"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[nodeList.Count - 1] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; string prjType = tr.Columns[1].ToNodePlainString(); if (prjType.Contains("中标") || prjType.Contains("结果")) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = prjType.Replace("[中标公示]", ""); bidType = prjName.GetInviteBidType(); beginDate = tr.Columns[2].ToNodePlainString().Replace(".", "-"); InfoUrl = "http://www.stjs.gov.cn/" + tr.Columns[1].GetATagHref().Replace("../", ""); string htlDtl = string.Empty; try { htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default); htlDtl = htlDtl.GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htlDtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "680"))); if (dtlList != null && dtlList.Count > 0) { HtmlTxt = dtlList.ToHtml(); bidCtx = HtmlTxt.ToCtxString(); string ctx = string.Empty; parser = new Parser(new Lexer(HtmlTxt)); NodeList tabList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%"))); if (tabList != null && tabList.Count > 0) { TableTag tab = tabList[0] as TableTag; for (int k = 0; k < tab.RowCount; k++) { for (int d = 0; d < tab.Rows[k].ColumnCount; d++) { if (d % 2 == 0) { ctx += tab.Rows[k].Columns[d].ToPlainTextString().Replace(" ", "").Replace("\n", "").Replace("\r", "") + ":"; } else { ctx += tab.Rows[k].Columns[d].ToPlainTextString().Replace(" ", "").Replace("\n", "").Replace("\r", "") + "\r\n"; } } } bidUnit = ctx.GetBidRegex(); bidMoney = ctx.GetMoneyRegex(); buildUnit = ctx.GetBuildRegex(); prjAddress = ctx.GetAddressRegex(); code = ctx.GetCodeRegex(); } else { bidUnit = bidCtx.GetBidRegex(); bidMoney = bidCtx.GetMoneyRegex(); buildUnit = bidCtx.GetBuildRegex(); prjAddress = bidCtx.GetAddressRegex(); code = bidCtx.GetCodeRegex(); } msgType = "汕头建设网"; specType = "建设工程"; BidInfo info = ToolDb.GenBidInfo("广东省", "汕头市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } else { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = prjType.Replace("[招标公告]", ""); inviteType = prjName.GetInviteBidType(); beginDate = tr.Columns[2].ToNodePlainString().Replace(".", "-"); InfoUrl = "http://www.stjs.gov.cn/" + tr.Columns[1].GetATagHref().Replace("../", ""); string htlDtl = string.Empty; try { htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default); htlDtl = htlDtl.GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htlDtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "680"))); if (dtlList != null && dtlList.Count > 0) { HtmlTxt = dtlList.ToHtml(); inviteCtx = HtmlTxt.ToCtxString().Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n"); string ctx = string.Empty; parser = new Parser(new Lexer(HtmlTxt)); NodeList tabList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%"))); if (tabList != null && tabList.Count > 0) { TableTag tab = tabList[0] as TableTag; for (int k = 0; k < tab.RowCount; k++) { for (int d = 0; d < tab.Rows[k].ColumnCount; d++) { if (d % 2 == 0) { ctx += tab.Rows[k].Columns[d].ToPlainTextString().Replace(" ", "").Replace("\r\n", "") + ":"; } else { ctx += tab.Rows[k].Columns[d].ToPlainTextString().Replace(" ", "").Replace("\r\n", "") + "\r\n"; } } } buildUnit = ctx.GetBuildRegex(); code = ctx.GetCodeRegex().Replace("/", ""); prjAddress = ctx.GetAddressRegex(); } else { buildUnit = inviteCtx.GetBuildRegex(); code = inviteCtx.GetCodeRegex().Replace("/", ""); prjAddress = inviteCtx.GetAddressRegex(); } msgType = "汕头建设网"; specType = "建设工程"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "汕头市区", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("vAlign", "bottom"))); if (sNode != null && sNode.Count > 0) { try { string temp = sNode.AsString(); Regex reg = new Regex(@",共[^页]+页"); string page = reg.Match(temp).Value.Replace(",共", "").Replace("页", ""); pageInt = Convert.ToInt32(page); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.ydszj.gov.cn/channel/138006/content?pageNo=" + i.ToString(), Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("style", "height:350px;"))), new HasAttributeFilter("li"))); if (nodeList != null && nodeList.Count > 0) { for (int j = 0; j < nodeList.Count; j++) { string prjType = nodeList[j].ToPlainTextString(); if (prjType.Contains("中标") || prjType.Contains("结果")) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = nodeList[j].ToPlainTextString().Replace(nodeList[j].ToPlainTextString().GetDateRegex(), ""); beginDate = nodeList[j].ToPlainTextString().GetDateRegex(); bidType = prjName.GetInviteBidType(); InfoUrl = "http://www.ydszj.gov.cn" + nodeList[j].GetATagHref(); string htlDtl = string.Empty; try { htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default); htlDtl = htlDtl.GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htlDtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news_content"))); if (dtlList != null && dtlList.Count > 0) { HtmlTxt = dtlList.ToHtml(); bidCtx = HtmlTxt.ToCtxString(); bidUnit = bidCtx.GetBidRegex(); buildUnit = bidCtx.GetBuildRegex(); prjAddress = bidCtx.GetAddressRegex(); code = bidCtx.GetCodeRegex(); bidMoney = bidCtx.GetMoneyRegex(); msgType = "英德市住房和城乡建设局"; specType = "建设工程"; BidInfo info = ToolDb.GenBidInfo("广东省", "清远市区", "英德市", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } else { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = nodeList[j].ToPlainTextString().Replace(nodeList[j].ToPlainTextString().GetDateRegex(), ""); beginDate = nodeList[j].ToPlainTextString().GetDateRegex(); inviteType = prjName.GetInviteBidType(); InfoUrl = "http://www.ydszj.gov.cn" + nodeList[j].GetATagHref(); string htlDtl = string.Empty; try { htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default); htlDtl = htlDtl.GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htlDtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news_content"))); if (dtlList != null && dtlList.Count > 0) { HtmlTxt = dtlList.ToHtml(); inviteCtx = HtmlTxt.ToCtxString(); buildUnit = inviteCtx.GetBuildRegex(); code = inviteCtx.GetCodeRegex(); prjAddress = inviteCtx.GetAddressRegex(); msgType = "英德市住房和城乡建设局"; specType = "建设工程"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "清远市区", "英德市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } } return(list); }
public override void BidQuote(BidInfo oInfo) { }
public void DealHtml(IList list, string html, bool crawlAll) { Parser parserDtl = new Parser(new Lexer(html)); NodeList aNodes = parserDtl.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable"))); if (aNodes != null && aNodes.Count > 0) { Type typs = typeof(ATag); TableTag table = aNodes[0] as TableTag; for (int t = 1; t < table.RowCount - 1; t++) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, bidType = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, ctx = string.Empty, CreateTime = string.Empty, FbTime = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[t] as TableRow; ATag aTag = tr.SearchFor(typeof(ATag), true)[0] as ATag; InfoUrl = aTag.Link; prjName = table.Rows[t].Columns[1].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace(" ", " ").Trim(); endDate = table.Rows[t].Columns[2].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace(" ", " ").Trim(); string htmlDtl = string.Empty; try { htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default); } catch (Exception ex) { continue; } Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>"); htmlDtl = regexHtml.Replace(htmlDtl, ""); Parser parserCtx = new Parser(new Lexer(htmlDtl)); NodeList ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "printTb lefttable"))); if (ctxNode != null && ctxNode.Count > 0) { Parser parserdiv = new Parser(new Lexer(htmlDtl)); NodeList aNodesdiv = parserdiv.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "biuuu_button"))); HtmlTxt = ctxNode.AsHtml().Replace(aNodesdiv.AsHtml(), "").Trim(); Type tp = typeof(ATag); TableTag tabTag = ctxNode[0] as TableTag; string startTime = tabTag.Rows[1].Columns[0].ToPlainTextString().Replace("\r\n", "").Replace("\t", "").Replace(" ", " ").Trim(); Regex regex = new Regex(@"时间:\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}"); Match math = regex.Match(startTime); beginDate = math.Value.Replace("时间:", "").Trim(); Regex regexcode = new Regex("(工程编号|项目编号):[^\r\n]+[\r\n]{1}"); Match match = regexcode.Match(tabTag.ToPlainTextString()); if (match.Value.Length > 0) { code = match.Value.Substring(match.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace(" ", " ").Trim(); } Regex regexBuildUnit = new Regex("(中标人|中标单位):[^\r\n]+[\r\n]{1}"); Match matchBuildUnit = regexBuildUnit.Match(tabTag.ToPlainTextString()); if (matchBuildUnit.Value.Length > 0) { buildUnit = matchBuildUnit.Value.Substring(matchBuildUnit.Value.IndexOf(":") + 1).Replace("\r\n", "").Replace("\t", "").Replace(" ", " ").Trim(); } Regex regexbidUnit = new Regex("(招标人|建设单位):[^\r\n]+[\r\n]{1}"); Match matchbidUnit = regexbidUnit.Match(tabTag.ToPlainTextString()); if (matchbidUnit.Value.Length > 0) { bidUnit = matchbidUnit.Value.Replace("招标人:", "").Replace("建设单位:", "").Replace("\r\n", "").Replace("\t", "").Replace(" ", " ").Trim(); } Regex regexMoney = new Regex("(中标价|其中标价为|中标价格):[^\r\n]+[\r\n]{1}"); Match matchMoney = regexMoney.Match(tabTag.ToPlainTextString()); if (matchMoney.Value.Length > 0) { bidMoney = matchMoney.Value.Replace("中标价:", "").Replace("其中标价为:", "").Replace("中标价格:", "").Replace("\r", ""); } Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (bidMoney.Contains("万")) { bidMoney = bidMoney.Remove(bidMoney.IndexOf("万")).Trim(); bidMoney = regBidMoney.Match(bidMoney).Value; } else { try { bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString(); if (decimal.Parse(bidMoney) < decimal.Parse("0.1")) { bidMoney = "0"; } } catch (Exception) { bidMoney = "0"; } } if (Encoding.Default.GetByteCount(code) > 50) { code = ""; } if (buildUnit == "" || buildUnit == null) { buildUnit = ""; } if (Encoding.Default.GetByteCount(buildUnit) > 150) { buildUnit = buildUnit.Substring(0, 150); } if (bidUnit == "" || bidUnit == null) { bidUnit = ""; } if (bidUnit.Length > 75) { bidUnit = bidUnit.Substring(0, 150); } ctx = tabTag.Rows[2].Columns[0].ToPlainTextString().Replace(" ", " ").Replace("\r\n\r\n\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n"); if (ctx.Length > 0) { Regex regexCtx = new Regex("<!--[^<]+-->"); ctx = regexCtx.Replace(ctx, ""); } } parserCtx.Reset(); ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "toptd_bai"))); Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}"); beginDate = regDate.Match(ctxNode.AsString()).Value.Trim(); if (ctx.Contains("公示开始时间")) { beginDate = ctx.Substring(ctx.IndexOf("公示开始时间")).ToString(); Regex regBeDate = new Regex(@"\d{4}年\d{1,2}月\d{1,2}日"); beginDate = regBeDate.Match(beginDate).Value.Trim(); } if (beginDate == "") { beginDate = regDate.Match(ctxNode.AsString()).Value.Trim(); } if (beginDate == "") { beginDate = string.Empty; } prjName = ToolDb.GetPrjName(prjName); bidType = ToolHtml.GetInviteTypes(prjName); BidInfo info = ToolDb.GenBidInfo("广东省", "惠州市区", "大亚湾区", string.Empty, code, prjName, bidUnit, beginDate, buildUnit, beginDate, endDate, ctx, string.Empty, "惠州市建设工程交易中心", bidType, "建设工程", string.Empty, bidMoney, InfoUrl, string.Empty, HtmlTxt); list.Add(info); ctxNode = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("target", "_blank"))); NodeList aTagNodes = ctxNode.SearchFor(typeof(ATag), true); for (int a = 0; a < aTagNodes.Count; a++) { ATag fileTage = aTagNodes[a] as ATag; if (fileTage.Link.Contains("http://www.ebc.huizhou.gov.cn/index/loadNewsFile")) { string downloadURL = fileTage.Link; BaseAttach attach = ToolDb.GenBaseAttach(fileTage.ToPlainTextString(), info.Id, downloadURL); base.AttachList.Add(attach); } } if (!crawlAll && list.Count >= this.MaxCount) { return; } } } }
private void SessionHolderOnBestBidQuote(BidInfo info) { ProcessBestQuote(info.Symbol, info.Exchange, info.Price, info.Size, info.NumOrders, info.UpdateType, info.Ssboe, info.Usecs, Level1Fields.BestBidPrice, Level1Fields.BestBidVolume); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default); } catch { } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "huifont"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[0].ToNodePlainString().GetReplace("1/"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?Paging=" + i, Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "98%"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; ATag aTag = tr.GetATag(); if (aTag == null) { continue; } string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; prjName = aTag.GetAttribute("title"); beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex(); InfoUrl = "http://ggzy.xjbt.gov.cn" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml().ToLower().GetReplace("<a href='http://22'>22</a>."); bidCtx = HtmlTxt.ToLower().GetReplace("</p>,<br/>", "\r\n").ToCtxString(); buildUnit = bidCtx.GetBuildRegex(); code = bidCtx.GetCodeRegex().GetCodeDel(); bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("第一中标候选单位为,第一名"); } bidMoney = bidCtx.GetMoneyRegex(); if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetMoneyRegex(null, true); } prjMgr = bidCtx.GetMgrRegex(); if (string.IsNullOrEmpty(bidUnit)) { parser = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (tableNode != null && tableNode.Count > 0) { TableTag tag = tableNode[tableNode.Count - 1] as TableTag; string ctx = string.Empty; for (int r = 0; r < tag.RowCount; r++) { for (int c = 0; c < tag.Rows[r].ColumnCount; c++) { string temp = tag.Rows[r].Columns[c].ToNodePlainString(); if ((c + 1) % 2 == 0) { ctx += temp + "\r\n"; } else if (temp.Contains("工程师") || temp.Contains("注册证号")) { ctx += temp + "\r\n"; } else { ctx += temp.GetReplace(":,:") + ":"; } } } ctx = ctx.GetReplace("单位名称\r\n", "单位名称:"); bidUnit = ctx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = ctx.GetRegex("单位名称"); } if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = ctx.GetMoneyRegex(); } if (string.IsNullOrEmpty(prjMgr)) { prjMgr = ctx.GetMgrRegex(); } if (string.IsNullOrEmpty(prjMgr)) { prjMgr = ctx.GetRegex("建造师姓名"); } if (string.IsNullOrEmpty(code)) { code = ctx.GetCodeRegex(); } if (string.IsNullOrEmpty(buildUnit)) { buildUnit = ctx.GetBuildRegex(); } } } if (bidUnit.Contains("公司")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司"; } if (bidUnit.Contains("小写")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("小写")); } try { if (decimal.Parse(bidMoney) > 100000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } } catch { } msgType = "新疆生产建设兵团公共资源交易中心"; specType = "政府采购"; bidType = "建设工程"; BidInfo info = ToolDb.GenBidInfo("新疆维吾尔自治区", "新疆维吾尔自治区及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string cookiestr = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pager")), true), new TagNameFilter("a"))); if (pageNode != null && pageNode.Count > 0) { try { Regex reg = new Regex(@"[0-9]+"); string temp = reg.Match(pageNode[pageNode.Count - 1].GetATagHref().Replace("'", "")).Value; pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection( new string[] { "__VIEWSTATE", "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "hsa1$DD_LX", "hsa1$wd", "pager_input" }, new string[] { viewState, "pager", i.ToString(), "", eventValidation, "综合搜索", "", (i - 1).ToString() } ); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GV1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 1; j < table.RowCount - 1; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j]; prjName = tr.Columns[0].ToNodePlainString(); beginDate = tr.Columns[1].ToPlainTextString(); InfoUrl = "http://www.ycsggzy.cn/" + tr.Columns[0].GetATagHref().Replace("&", "&"); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "Lb_nr"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.ToHtml(); bidCtx = HtmlTxt.ToCtxString(); bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrWhiteSpace(bidUnit)) { bidUnit = bidCtx.GetBidRegex(null, false); } if (string.IsNullOrWhiteSpace(bidUnit)) { bidUnit = bidCtx.GetRegex("第一名"); } bidUnit = bidUnit.Replace("?", ""); bidMoney = bidCtx.GetMoneyRegex(null, false, "万元"); if (string.IsNullOrWhiteSpace(bidUnit)) { parser = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (tableNode != null && tableNode.Count > 0) { string ctx = string.Empty; TableTag tab = tableNode[0] as TableTag; for (int c = 0; c < tab.RowCount; c++) { if (tab.Rows[c].ColumnCount < 2) { continue; } ctx += tab.Rows[c].Columns[0].ToNodePlainString() + ":"; ctx += tab.Rows[c].Columns[1].ToNodePlainString() + "\r\n"; } bidUnit = ctx.GetBidRegex(); if (string.IsNullOrWhiteSpace(bidMoney) || bidMoney == "0") { bidMoney = ctx.GetMoneyRegex(null, false, "万元"); } } } bidUnit = bidUnit.Replace(" ", ""); if (bidUnit.Contains("公司")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司"; } code = bidCtx.GetCodeRegex(); buildUnit = bidCtx.GetBuildRegex(); prjAddress = bidCtx.GetAddressRegex().Replace(" ", ""); bidType = prjName.GetInviteBidType(); specType = "政府采购"; msgType = "银川市公共资源交易中心"; BidInfo info = ToolDb.GenBidInfo("宁夏回族自治区", "宁夏回族自治区及地市", "银川市", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagination f_right")), true), new TagNameFilter("a"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[pageNode.Count - 1].GetATagValue("onclick").GetRegexBegEnd("Info", ",").GetReplace("("); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&pageSize=15&pageNum=" + i); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "newsList")), true), new TagNameFilter("li"))); if (listNode != null && listNode.Count > 0) { for (int j = 0; j < listNode.Count; j++) { INode node = listNode[j]; ATag aTag = node.GetATag(); if (aTag == null) { continue; } string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; area = node.ToNodePlainString().GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】"); prjName = aTag.GetAttribute("title"); beginDate = node.ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.ahtba.org.cn" + aTag.Link.GetReplace("amp;"); string id = aTag.Link.Substring(aTag.Link.IndexOf("id="), aTag.Link.Length - aTag.Link.IndexOf("id=")).GetReplace("id="); string htmldtl = string.Empty; try { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "id" }, new string[] { id }); htmldtl = this.ToolWebSite.GetHtmlByUrl("http://www.ahtba.org.cn/Notice/NoticeContent", nvc).GetJsString(); } catch { } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "new_detail"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.ToLower().GetReplace("</p>,<br/>,<br>", "\r\n").ToCtxString(); buildUnit = bidCtx.GetBuildRegex(); code = bidCtx.GetCodeRegex().GetCodeDel(); bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("中标候选人名称,中签单位,第一成交候选人,成交候选人"); } bidMoney = bidCtx.GetMoneyRegex(); if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetMoneyRegex(null, true); } if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetRegex("总额").GetMoney(); } prjMgr = bidCtx.GetMgrRegex(); if (string.IsNullOrEmpty(bidUnit)) { parser = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (tableNode != null && tableNode.Count > 0) { for (int t = 0; t < tableNode.Count; t++) { TableTag tag = tableNode[t] as TableTag; string classStr = tag.GetAttribute("class"); if (!string.IsNullOrEmpty(classStr) && classStr.ToLower().Contains("table_detail")) { continue; } string ctx = string.Empty; for (int r = 0; r < tag.RowCount; r++) { for (int c = 0; c < tag.Rows[r].ColumnCount; c++) { string temp = tag.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:"); if ((c + 1) % 2 == 0) { ctx += temp + "\r\n"; } else { ctx += temp + ":"; } } } bidUnit = ctx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = ctx.GetRegex("成交候选人,中标单位名称,第一中标候选人,第一成交候选人"); } if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = ctx.GetMoneyRegex(); } if (string.IsNullOrEmpty(prjMgr)) { prjMgr = ctx.GetMgrRegex(); } if (string.IsNullOrEmpty(prjMgr)) { prjMgr = ctx.GetRegex("拟任总监,拟任项目经理"); } if (!bidUnit.Contains("公司")) { ctx = string.Empty; try { for (int r = 1; r < tag.Rows[4].ColumnCount; r++) { string temp = tag.Rows[4].Columns[r].ToNodePlainString().GetReplace(":,:"); ctx += temp + ":"; ctx += tag.Rows[5].Columns[r].ToNodePlainString().GetReplace(":,:") + "\r\n"; } bidUnit = ctx.GetBidRegex(null, true, 200); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = ctx.GetRegex("成交候选人,中标单位名称,第一中标候选人,第一成交候选人"); } if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = ctx.GetMoneyRegex(); } if (string.IsNullOrEmpty(prjMgr)) { prjMgr = ctx.GetMgrRegex(); } if (string.IsNullOrEmpty(prjMgr)) { prjMgr = ctx.GetRegex("拟任总监,拟任项目经理"); } } catch { } } } } } try { if (Convert.ToDecimal(bidMoney) > 100000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } } catch { } if (prjMgr.Contains("联系")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("联系")); } if (prjMgr.Contains("电话")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("电话")); } if (prjMgr.Contains("2")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("2")); } if (prjMgr.Contains("(")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("(")); } if (prjMgr.Contains("(")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("(")); } if (prjMgr.Contains("二")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("二")); } if (prjMgr.Contains("注册")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("注册")); } if (prjMgr.Contains("业绩")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("业绩")); } if (prjMgr.Contains("I")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("I")); } if (prjMgr.Contains("投标") || prjMgr.IsNumber()) { prjMgr = ""; } if (bidUnit.Contains("公司")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司"; } bidUnit = bidUnit.GetReplace("名称,1,、I标段"); prjMgr = prjMgr.GetReplace("1,、,一,第一中标人,第一中标,第中标人,第名,I标段,第中标候选人,标段").GetCodeDel(); specType = bidType = "建设工程"; msgType = "安徽省发展和改革委员会"; BidInfo info = ToolDb.GenBidInfo("安徽省", "安徽省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "lblPageCount"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[0].ToNodePlainString(); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION", "gcbh_Text_Box", "gcmc_TextBox", "num_TextBox", "ImageButton3.x", "ImageButton3.y" }, new string[] { "", "", "", viewState, "B0108473", eventValidation, "", "", "", "5", "12" }); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "DataGrid1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[1].GetATag(); if (aTag == null) { continue; } string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = aTag.LinkText.GetReplace(" "); beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.bcactc.com/home/gcxx/" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "hei_text"))); if (dtlNode != null && dtlNode.Count > 0) { TableTag dtlTable = dtlNode[0] as TableTag; HtmlTxt = dtlTable.ToHtml(); bidCtx = ""; for (int r = 0; r < dtlTable.RowCount; r++) { for (int c = 0; c < dtlTable.Rows[r].ColumnCount; c++) { string temp = dtlTable.Rows[r].Columns[c].ToHtml().GetReplace("<br>,<br/>", "\r\n").ToCtxString(); if (!temp.Contains("\r\n")) { temp = dtlTable.Rows[r].Columns[c].ToNodePlainString(); } if (!IsTable(dtlTable.Rows[r].ToHtml())) { if ((c + 1) % 2 == 0) { bidCtx += temp + "\r\n"; } else { bidCtx += temp.GetReplace(":,:") + ":"; } } else { bidCtx += GetTableBid(dtlTable.Rows[r].ToHtml()); } } } bidCtx = bidCtx.GetReplace(":\r\n", ":"); code = bidCtx.GetCodeRegex(); buildUnit = bidCtx.GetBuildRegex(); if (string.IsNullOrEmpty(buildUnit)) { buildUnit = bidCtx.GetRegex("建设单位名称"); } bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("中标侯选人"); } bidMoney = bidCtx.GetMoneyRegex(); if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetRegex("中标合同额").GetMoney(); } prjMgr = bidCtx.GetMgrRegex(); msgType = "北京市建设工程发包承包交易中心"; specType = "建设工程"; bidType = "勘察设计"; BidInfo info = ToolDb.GenBidInfo("北京市", "北京市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.bcactc.com/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8); } catch { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8); } catch { return(list); } } int startIndex = html.LastIndexOf("<xml"); int endIndex = html.LastIndexOf("</xml>"); string xmlstr = html.Substring(startIndex, endIndex - startIndex).ToLower().GetReplace("infourl", "span").GetReplace("info", "div").GetReplace("publishedtime", "p"); Parser parser = new Parser(new Lexer(xmlstr)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("div")); if (pageNode != null && pageNode.Count > 0) { for (int i = 0; i < pageNode.Count; i++) { parser = new Parser(new Lexer(pageNode[i].ToHtml())); NodeList dateNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("p")); string beginDate = dateNode[0].ToPlainTextString().GetDateRegex(); parser.Reset(); NodeList urlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("span")); string InfoUrl = "http://www.zhongtang.gov.cn/business/htmlfiles/" + urlNode[0].ToPlainTextString(); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString(); } catch { try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString(); } catch { continue; } } parser = new Parser(new Lexer(htmldtl)); NodeList titleNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoomtitl"))); string prjName = string.Empty; if (titleNode != null && titleNode.Count > 0) { prjName = titleNode[0].ToNodePlainString().GetReplace(" "); } parser.Reset(); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("width", "778"))); if (dtlNode != null && dtlNode.Count > 0) { if (prjName.Contains("中标")) { string buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; HtmlTxt = dtlNode[0].ToHtml(); bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); buildUnit = bidCtx.GetBuildRegex(); code = bidCtx.GetCodeRegex().GetCodeDel(); bidUnit = bidCtx.GetBidRegex(); bidMoney = bidCtx.GetRegex("中标值").GetMoney(); if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = bidCtx.GetMoneyRegex(); } try { if (decimal.Parse(bidMoney) < 1) { bidMoney = "0"; } } catch { } prjMgr = bidCtx.GetMgrRegex(); specType = "政府采购"; bidType = prjName.GetInviteBidType(); msgType = "东莞市中堂镇政府"; BidInfo info = ToolDb.GenBidInfo("广东省", "东莞市区", "中堂镇", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.zhongtang.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } else { string code = string.Empty, buildUnit = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; HtmlTxt = dtlNode[0].ToHtml(); inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); buildUnit = inviteCtx.GetBuildRegex(); prjAddress = inviteCtx.GetAddressRegex(); code = inviteCtx.GetCodeRegex().GetCodeDel(); if (buildUnit.Contains("招标代理")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理")); } if (buildUnit.Contains("地址")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")); } specType = "政府采购"; inviteType = prjName.GetInviteBidType(); msgType = "东莞市中堂镇政府"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "东莞市区", "中堂镇", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.zhongtang.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagebox"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().GetRegexBegEnd("共", "页"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.ynbidding.net/classlist.aspx?no-cache=0.7657945008653804&id=032430863326&id=://www.ynbidding.net/list&page=" + i + "&_=", Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; TableRow tr = table.Rows[j]; ATag aTag = tr.GetATag(); if (aTag == null) { continue; } prjName = aTag.LinkText; beginDate = tr.Columns[0].ToNodePlainString().GetDateRegex("yyyy/MM/dd"); InfoUrl = "http://www.ynbidding.net" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "Content"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.ToCtxString(); buildUnit = bidCtx.GetBuildRegex(); prjAddress = bidCtx.GetAddressRegex(); code = bidCtx.GetCodeRegex(); bidMoney = bidCtx.GetMoneyRegex(); bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrWhiteSpace(bidUnit)) { bidUnit = bidCtx.GetRegex("第一候选供应商"); } if (bidUnit.Contains("第一")) { bidUnit = string.Empty; } if (string.IsNullOrWhiteSpace(bidUnit)) { parser = new Parser(new Lexer(HtmlTxt)); NodeList bidNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (bidNode != null && bidNode.Count > 1) { string ctx = string.Empty; TableTag tag = bidNode[1] as TableTag; if (tag.RowCount > 1) { for (int c = 0; c < tag.Rows[0].ColumnCount; c++) { try { ctx += tag.Rows[0].Columns[c].ToNodePlainString().GetReplace("sourcefromwww.ynbidding.net") + ":"; ctx += tag.Rows[1].Columns[c].ToNodePlainString().GetReplace("sourcefromwww.ynbidding.net") + "\r\n"; } catch { } } } bidUnit = ctx.GetBidRegex(); if (string.IsNullOrWhiteSpace(bidUnit)) { bidUnit = ctx.GetRegex("投标单位,单位名称,投标人名称"); } if (bidMoney == "0" || string.IsNullOrWhiteSpace(bidMoney)) { bidMoney = ctx.GetMoneyRegex(); } if (bidMoney == "0" || string.IsNullOrWhiteSpace(bidMoney)) { bidMoney = ctx.GetMoneyString().GetMoney(); } } } if (bidUnit.Contains("公司")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司"; } if (bidUnit.Contains("第一")) { bidUnit = string.Empty; } bidType = prjName.GetInviteBidType(); specType = "建设工程"; msgType = "云南省发展和改革委员会"; BidInfo info = ToolDb.GenBidInfo("云南省", "云南省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.ynbidding.net/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default); } catch { return(null); } try { string temp = html.GetRegexBegEnd("<strong>", "</strong>").GetReplace("<fontcolor=red>1</font>/"); pageInt = int.Parse(temp); } catch { } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&SpecialID=0&page=" + i, Encoding.Default); } catch { continue; } } Parser parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("height", "200"))); if (listNode != null && listNode.Count > 0) { parser = new Parser(new Lexer(listNode.AsHtml())); NodeList fontNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (fontNode != null && fontNode.Count > 0) { for (int j = 0; j < fontNode.Count; j++) { ATag aTag = fontNode[j] as ATag; if (aTag == null) { continue; } string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; string temp = aTag.GetAttribute("title"); prjName = temp.GetRegex("文章标题"); code = temp.GetRegex("招标代码"); beginDate = temp.GetRegex("更新时间").GetDateRegex("yyyy/MM/dd"); InfoUrl = "http://www.lntb.gov.cn/" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("height", "200"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.ToLower().GetReplace("</p>,<br/>,<br>", "\r\n").ToCtxString(); buildUnit = bidCtx.GetBuildRegex(); bidUnit = bidCtx.GetBidRegex().GetReplace("名称"); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("中标人名称").GetReplace("名称"); } if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetReplace("中标人名称:,:中标人名称,中标人:,中标人:", "\r\n").GetBidRegex().GetReplace("名称"); } if (string.IsNullOrEmpty(bidUnit)) { parser = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (tableNode != null && tableNode.Count > 0) { TableTag tag = tableNode[tableNode.Count - 1] as TableTag; string ctx = string.Empty; for (int r = 0; r < tag.RowCount; r++) { for (int c = 0; c < tag.Rows[r].ColumnCount; c++) { string tempStr = tag.Rows[r].Columns[c].ToNodePlainString(); if ((c + 1) % 2 == 0) { ctx += tempStr.GetReplace(":,:") + "\r\n"; } else { ctx += tempStr.GetReplace(":,:") + ":"; } } } bidUnit = ctx.GetBidRegex().GetReplace("名称"); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = ctx.GetRegex("中标人名称").GetReplace("名称"); } } } bidMoney = bidCtx.GetMoneyRegex(); prjMgr = bidCtx.GetMgrRegex(); if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } if (buildUnit.Contains("地址")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")); } msgType = "辽宁省招标投标协调管理办公室"; specType = "建设工程"; bidType = "建设工程"; BidInfo info = ToolDb.GenBidInfo("辽宁省", "辽宁省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int pageInt = 1; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("form"), new HasAttributeFilter("name", "qPageForm"))); if (pageNode != null && pageNode.Count > 0) { try { NodeList aNode = new Parser(new Lexer(pageNode.ToHtml())).ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { string temp = aNode[aNode.Count - 2].GetATagHref().Replace("turnOverPage", "").Replace("(", "").Replace(")", "").Replace(";", ""); pageInt = int.Parse(temp); } } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "channelCode", "pageIndex", "pageSize", "pointPageIndexId" }, new string[] { "0008", i.ToString(), "15", "1" }); try { html = this.ToolWebSite.GetHtmlByUrl("http://qingyuan.gdgpo.com/queryMoreInfoList.do", nvc, Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "m_m_c_list")), true), new TagNameFilter("li"))); if (listNode != null && listNode.Count > 0) { for (int j = 0; j < listNode.Count; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; ATag aTag = listNode[j].GetATag(1); prjName = aTag.GetAttribute("title"); beginDate = listNode[j].ToPlainTextString().GetDateRegex(); InfoUrl = "http://qingyuan.gdgpo.com" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "zw_c_c_cont"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml().Replace("<br", "\r\n<br"); bidCtx = HtmlTxt.Replace("</p>", "\r\n").Replace("</pre>", "\r\n").ToCtxString(); buildUnit = bidCtx.GetBuildRegex(); code = bidCtx.Replace("(招标编号", "000000").GetCodeRegex().GetCodeDel(); if (string.IsNullOrEmpty(code)) { code = bidCtx.GetRegex("招标编号", true, 50).GetCodeDel(); } bidUnit = bidCtx.GetBidRegex().GetBidUnitDel(); bidMoney = bidCtx.GetMoneyString(); if (bidMoney.Contains("(")) { bidMoney = bidMoney.Remove(bidMoney.IndexOf("(")).GetMoney(); } else { bidMoney = bidMoney.GetMoney(); } if (bidMoney == "0") { bidMoney = bidCtx.GetMoneyString(null, true); if (bidMoney.Contains("(")) { bidMoney = bidMoney.Remove(bidMoney.IndexOf("(")).GetMoney(); } else if (bidMoney.Contains("大写")) { bidMoney = bidMoney.Remove(bidMoney.IndexOf("大写")).GetMoney(); } else { bidMoney = bidMoney.GetMoney(); } } if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetMoneyString(null, true); if (bidMoney.Contains("大写")) { bidMoney = bidMoney.Remove(bidMoney.IndexOf("大写")).GetMoney(); } else { bidMoney = bidMoney.GetMoney("万元"); } } if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetMoneyString(null, true).GetMoney(); } if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetMoneyString(new string[] { "¥", "$" }, false).GetMoney(); } if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = bidCtx.GetRegex("金额人民币", false).Replace(",", "").Replace(",", "").GetMoney(); } if (!string.IsNullOrEmpty(bidMoney) && bidMoney != "0" && decimal.Parse(bidMoney) > 10000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("1、单位名称,1、名称"); } if (bidUnit.Contains("名称")) { bidUnit = bidUnit.Replace("名称", ""); } if (bidUnit == "/") { bidUnit = string.Empty; } bidType = prjName.GetInviteBidType(); msgType = "清远市政府采购"; specType = "政府采购"; BidInfo info = ToolDb.GenBidInfo("广东省", "清远市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); parser = new Parser(new Lexer(HtmlTxt)); NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (fileNode != null && fileNode.Count > 0) { for (int k = 0; k < fileNode.Count; k++) { ATag fileAtag = fileNode[k].GetATag(); if (fileAtag.IsAtagAttach()) { string fileName = fileAtag.LinkText.ToNodeString().Replace(" ", ""); string fileLink = fileAtag.Link; if (!fileLink.ToLower().Contains("http")) { fileLink = "http://qingyuan.gdgpo.gov.cn" + fileAtag.Link; } base.AttachList.Add(ToolDb.GenBaseAttach(fileName, info.Id, fileLink)); } } } list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); Dictionary <string, string> citys = this.GetCitys(); foreach (string area in citys.Keys) { int count = 0; int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(citys[area], Encoding.UTF8, ref cookiestr); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("nowrap", "true"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().GetRegexBegEnd("总页数", "当前页").Replace(":", ""); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); string viewSTATEGENERATOR = ToolHtml.GetHtmlInputValue(html, "__VIEWSTATEGENERATOR"); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTTARGET", "__EVENTARGUMENT", "__EVENTVALIDATION", "MoreInfoList1$txtTitle" }, new string[] { viewState, viewSTATEGENERATOR, "MoreInfoList1$Pager", i.ToString(), eventValidation, "" }); try { html = this.ToolWebSite.GetHtmlByUrl(citys[area], nvc, Encoding.UTF8, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[1].GetATag(); prjName = aTag.GetAttribute("title").GetReplace("【正在报名】,【报名结束】"); beginDate = tr.Columns[2].ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.gxzbtb.cn" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "TDContent"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.GetReplace(new string[] { "<br/>", "<br />", "<br>" }, "\r\n").ToCtxString(); prjAddress = bidCtx.GetAddressRegex(); buildUnit = bidCtx.GetBuildRegex(); bidUnit = bidCtx.GetBidRegex(); bidMoney = bidCtx.GetMoneyRegex(); prjMgr = bidCtx.GetMgrRegex(); code = bidCtx.GetCodeRegex().GetCodeDel(); if (string.IsNullOrEmpty(bidUnit)) { parser = new Parser(new Lexer(HtmlTxt)); NodeList bidNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (bidNode != null && bidNode.Count > 0) { string ctx = string.Empty; TableTag bidTable = bidNode[0] as TableTag; for (int r = 0; r < bidTable.RowCount; r++) { for (int c = 0; c < bidTable.Rows[r].ColumnCount; c++) { if ((c + 1) % 2 == 0) { ctx += bidTable.Rows[r].Columns[c].ToNodePlainString() + "\r\n"; } else { ctx += bidTable.Rows[r].Columns[c].ToNodePlainString() + ":"; } } } bidUnit = ctx.GetBidRegex(); if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = ctx.GetMoneyString().GetMoney("万元"); } if (string.IsNullOrEmpty(prjAddress)) { prjAddress = ctx.GetAddressRegex(); } if (string.IsNullOrEmpty(buildUnit)) { buildUnit = ctx.GetBuildRegex(); } if (string.IsNullOrEmpty(code)) { code = ctx.GetCodeRegex().GetCodeDel(); } if (bidUnit.Contains("推荐") || bidUnit.Contains("中标") || bidUnit.Contains("地址")) { bidUnit = string.Empty; } if (string.IsNullOrEmpty(bidUnit)) { if (bidTable.RowCount > 1) { ctx = string.Empty; for (int d = 0; d < bidTable.Rows[0].ColumnCount; d++) { ctx += bidTable.Rows[0].Columns[d].ToNodePlainString() + ":"; try { ctx += bidTable.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } catch { } } bidUnit = ctx.GetBidRegex(); if (string.IsNullOrEmpty(bidMoney) || bidMoney == "0") { bidMoney = ctx.GetMoneyString().GetMoney(); } if (string.IsNullOrEmpty(prjAddress)) { prjAddress = ctx.GetAddressRegex(); } if (string.IsNullOrEmpty(buildUnit)) { buildUnit = ctx.GetBuildRegex(); } if (string.IsNullOrEmpty(code)) { code = ctx.GetCodeRegex().GetCodeDel(); } } } } } try { if (decimal.Parse(bidMoney) > 10000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } } catch { } bidUnit = bidUnit.Replace("名称", "").Replace("单位", "").Replace("№", "").Replace("1", "").Replace("2", "").Replace("联合体", "").Replace("(", ""); if (bidUnit.Contains("公司")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司"; } if (bidUnit.Contains("研究院")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("研究院")) + "研究院"; } if (bidUnit.Contains("研究所")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("研究所")) + "研究所"; } bidType = "房建市政"; specType = "建设工程"; msgType = "广西壮族自治区公共资源交易中心"; BidInfo info = ToolDb.GenBidInfo("广西壮族自治区", "广西壮族自治区及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); count++; parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.gxzbtb.cn/" + a.Link.GetReplace("../,./"); } if (Encoding.Default.GetByteCount(link) > 500) { continue; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && count >= this.MaxCount) { goto Funcs; } } } } } Funcs :; } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr); } catch { return(list); } try { string temp = html.ToCtxString().GetRegexBegEnd("第1/", "页"); pageInt = int.Parse(temp); } catch { } for (int i = 1; i <= pageInt; i++) { if (i > 1) { NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "pageno", "mode", "linkname" }, new string[] { i.ToString(), "query", "currinfo" }); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr); } catch { continue; } } Parser parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "list"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[listNode.Count - 1] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[0].GetATag(); if (aTag == null) { continue; } string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = aTag.GetAttribute("title"); InfoUrl = "http://www.nxzb.com.cn/" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl.ToLower().GetReplace("th", "td"))); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "page1"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode[dtlNode.Count - 1].ToHtml(); code = HtmlTxt.ToCtxString().GetCodeRegex().GetCodeDel(); parser = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "zbcon"))); if (tableNode != null && tableNode.Count > 0) { TableTag tag = tableNode[0] as TableTag; for (int r = 0; r < tag.RowCount; r++) { if (r == 0) { bidUnit = tag.Rows[r].Columns[0].ToNodePlainString(); } for (int c = 0; c < tag.Rows[r].ColumnCount; c++) { string temp = tag.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:"); if ((c + 1) % 2 == 0) { bidCtx += temp + "\r\n"; } else { bidCtx += temp + ":"; } } } bidMoney = bidCtx.GetMoneyRegex(); prjMgr = bidCtx.GetMgrRegex(); buildUnit = bidCtx.GetBuildRegex(); Regex regDate = new Regex(@"\d{4}年\d{1,2}月\d{1,2}"); beginDate = regDate.Match(bidCtx).Value; } try { if (decimal.Parse(bidMoney) > 100000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } } catch {} msgType = "宁夏建设工程招标投标管理中心"; specType = "建设工程"; bidType = "建设工程"; BidInfo info = ToolDb.GenBidInfo("宁夏回族自治区", "宁夏回族自治区及地市", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
private void submit_ServerClick(object sender, EventArgs e) { var bid = new { budNavn = budNavn.Value, budCVR = budCVR.Value, budNr = budNr.Value }; using (var apiclient = new API.Service1()) { int tempVal; //gem udvidet info var newBid = new BidInfo { BidderName = budNavn.Value, CVR = int.TryParse(plHverdageKoersel.Value, out tempVal) ? tempVal : 0 }; BidInfo createdBid = apiclient.CreateBidInfo(newBid); if (createdBid != null) { var ui = new { uiGarantiVognNr = uiGarantiVognNr.Value, uiRegSerieNr = uiRegSerieNr.Value, uiSekundærOs = uiSekundærOs.Value, uiTelefon = uiTelefon.Value, uiVognType = uiVognType.Value, uiVognløbNr = uiVognløbNr.Value }; var exp = new ExpandedBidInfo { GarantiVognNummer = int.TryParse(uiGarantiVognNr.Value, out tempVal) ? tempVal : (int?)null, RegSerieNummer = int.TryParse(uiRegSerieNr.Value, out tempVal) ? tempVal : (int?)null, SecondaryOS = uiSekundærOs.Value, TelefonNummer = int.TryParse(uiTelefon.Value, out tempVal) ? tempVal : (int?)null, VognType = int.TryParse(uiVognType.Value, out tempVal) ? tempVal : (int?)null, VognloebsNummer = int.TryParse(uiVognløbNr.Value, out tempVal) ? tempVal : (int?)null }; apiclient.UpdateExpandedBifInfo(createdBid, exp); //gem dok info var doc = new { docTilladelseGyldig = docTilladelseGyldig.Value, docTilladelseNr = docTilladelseNr.Value, docTilladelseType = docTilladelseType.Value, docTrafikSelskab = docTrafikSelskab.Value, docUdstende = docUdstende.Value }; DateTime tempDate; var docu = new Documentation { Tilladelse_Gyldig = DateTime.TryParse(docTilladelseGyldig.Value, out tempDate) ? tempDate : (DateTime?)null, Tilladelse_Type = docTilladelseType.Value, TrafikSelskab = docTrafikSelskab.Value, UdstedendeMyndighed = docUdstende.Value, RegistreringsNummer = dokRegnr.Value }; apiclient.UpdateDocumentation(createdBid, docu); //gem udstyr var equipment = new { usBarnestol013kg = usBarnestol013kg.Value, usBarnestol1536kg = usBarnestol1536kg.Value, usBarnestol918kg = usBarnestol918kg.Value, usBarnestol936kg = usBarnestol936kg.Value, usBarnestolIntg = usBarnestolIntg.Value, usTrappe120kg = usTrappe120kg.Value, usTrappe160kg = usTrappe160kg.Value }; //var eq = new Equipment() //{ //} //apiclient.UpdateEquipment(createdBid, equipment); //gem kontaktinfo //var contact = new //{ // kontaktBy = kontaktBy.Value, // kontaktKommune = kontaktKommune.Value, // kontaktPostnummer = kontaktPostnummer.Value, // kontaktVejnavn = kontaktVejnavn.Value, // kontaktVejnummer = kontaktVejnummer.Value //}; var con = new ContactInfo { City = kontaktBy.Value, Kommune = kontaktKommune.Value, Postnummer = int.TryParse(kontaktPostnummer.Value, out tempVal) ? tempVal : (int?)null, Vejnavn = kontaktVejnavn.Value, Vejnummer = int.TryParse(kontaktVejnummer.Value, out tempVal) ? tempVal : (int?)null }; apiclient.UpdateContactInfo(createdBid, con); //gem prisliste //var pricelist = new //{ // plHverdagAftenNatKoersel = plHverdagAftenNatKoersel.Value, // plHverdagAftenNatOpstartsGebyr = plHverdagAftenNatOpstartsGebyr.Value, // plHverdagAftenNatVentetid = plHverdagAftenNatVentetid.Value, // plHverdageKoersel = plHverdageKoersel.Value, // plHverdageOpstartsGebyr = plHverdageOpstartsGebyr.Value, // plHverdageVenteTid = plHverdageVenteTid.Value, // plPrisPerLoeft_Trappemaskine = plPrisPerLoeft_Trappemaskine.Value, // plWeekendHelligdagKoersel = plWeekendHelligdagKoersel.Value, // plWeekendHelligdagOpstartsGebyr = plWeekendHelligdagOpstartsGebyr.Value, // plWeekendHelligdagVentetid = plWeekendHelligdagVentetid.Value, // plYderligInfo = plYderligInfo.Value //}; var pList = new PriceList { HverdagAftenNatKoersel = int.TryParse(plHverdagAftenNatKoersel.Value, out tempVal) ? tempVal : (int?)null, HverdagAftenNatOpstartsGebyr = int.TryParse(plHverdagAftenNatOpstartsGebyr.Value, out tempVal) ? tempVal : (int?)null, HverdagAftenNatVentetid = int.TryParse(plHverdagAftenNatVentetid.Value, out tempVal) ? tempVal : (int?)null, HverdageKoersel = int.TryParse(plHverdageKoersel.Value, out tempVal) ? tempVal : (int?)null, HverdageOpstartsGebyr = int.TryParse(plHverdageOpstartsGebyr.Value, out tempVal) ? tempVal : (int?)null, HverdageVenteTid = int.TryParse(plHverdageVenteTid.Value, out tempVal) ? tempVal : (int?)null, PrisPerLoeft_Trappemaskine = int.TryParse(plPrisPerLoeft_Trappemaskine.Value, out tempVal) ? tempVal : (int?)null, WeekendHelligdagKoersel = int.TryParse(plWeekendHelligdagKoersel.Value, out tempVal) ? tempVal : (int?)null, WeekendHelligdagOpstartsGebyr = int.TryParse(plWeekendHelligdagOpstartsGebyr.Value, out tempVal) ? tempVal : (int?)null, WeekendHelligdagVentetid = int.TryParse(plWeekendHelligdagVentetid.Value, out tempVal) ? tempVal : (int?)null, YderligInfo = plYderligInfo.Value }; apiclient.UpdatePricelist(createdBid, pList); } } }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pagination")), true), new TagNameFilter("a"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[pageNode.Count - 2].GetATag().Link.Replace("31", "").Replace(".htm", ""); pageInt = int.Parse(temp); } catch { pageInt = 27; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.topway.com.cn/6/13/default_" + (i - 1).ToString() + ".htm", Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("id", "newsList")), true), new TagNameFilter("li"))); if (listNode != null && listNode.Count > 0) { for (int j = 0; j < listNode.Count; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = listNode[j].GetATagValue("title"); beginDate = listNode[j].ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.topway.com.cn/6/13/" + listNode[j].GetATagHref(); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "fwRight float_l marginL_10"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml().Replace("<br>", "\r\n").Replace("<br/>", "\r\n").Replace("<br />", "\r\n"); bidCtx = HtmlTxt.ToCtxString(); code = bidCtx.GetRegexBegEnd("招标编号为", "的", 50).Replace("“", "").Replace("”", ""); bidUnit = bidCtx.GetRegexBegEnd("第一中标候选人为", ","); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegexBegEnd("公司确定", "为第一部分改造"); } if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegexBegEnd("确定中标人是", "。"); } if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegexBegEnd("公司确定", "为本次"); } specType = "其他"; bidType = prjName.GetInviteBidType(); msgType = "深圳市天威视讯股份有限公司"; BidInfo info = ToolDb.GenBidInfo("广东省", "深圳社会招标", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
public override void BestBidQuote(BidInfo oInfo) { StringBuilder sb = new StringBuilder(); oInfo.Dump(sb); debug(sb); Tick k = TickImpl.NewAsk(oInfo.Symbol, (decimal)oInfo.Price, oInfo.Size); k.be = oInfo.Exchange; k.date = Util.ToTLDate(); k.time = Util.ToTLTime(); tl.newTick(k); }
protected override IList ExecuteCrawl(bool crawlAll) { string url = "http://www.hljztb.com/ajaxtools.ashx"; Dictionary <string, string> ggType = new Dictionary <string, string>(); ggType.Add("勘察设计", "18101"); ggType.Add("施工", "18102"); ggType.Add("监理", "18103"); ggType.Add("设备", "18104"); IList list = new List <BidInfo>(); foreach (string key in ggType.Keys) { int pageInt = 1; int count = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "dopost", "pagesize", "CategoryID", "sort", "keyword", "pageno" }, new string[] { "product_list", "5", "6", ggType[key], "", "1" }); try { html = this.ToolWebSite.GetHtmlByUrl(url, nvc); } catch { return(list); } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)(((Dictionary <string, object>)serializer.DeserializeObject(html))["listpage"]); pageInt = Convert.ToInt32(smsTypeJson["pagecount"]); for (int i = 1; i <= pageInt; i++) { if (i > 1) { nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "dopost", "pagesize", "CategoryID", "sort", "keyword", "pageno" }, new string[] { "product_list", "5", "6", ggType[key], "", i.ToString() }); try { html = this.ToolWebSite.GetHtmlByUrl(url, nvc); } catch { continue; } serializer = new JavaScriptSerializer(); smsTypeJson = (Dictionary <string, object>)(((Dictionary <string, object>)serializer.DeserializeObject(html))["listpage"]); } object[] listDatas = (object[])smsTypeJson["listdata"]; foreach (object obj in listDatas) { Dictionary <string, object> dic = (Dictionary <string, object>)obj; string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; string tempName = Convert.ToString(dic["Name"]); area = tempName.GetRegexBegEnd("【", "】"); prjName = tempName.GetReplace("【" + area + "】"); beginDate = Convert.ToString(dic["FTime"]); InfoUrl = "http://www.hljztb.com/" + dic["SUrl"]; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString(); } catch { continue; } Parser parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "lblFZBContent"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.GetReplace("<br/>,<br>,</p>", "\r\n").ToCtxString(); //TableTag table = dtlNode[0] as TableTag; //for (int r = 0; r < table.RowCount; r++) //{ // for (int c = 0; c < table.Rows[r].ColumnCount; c++) // { // string temp = table.Rows[r].Columns[c].ToNodePlainString(); // if ((c + 1) % 2 == 0) // bidCtx += temp.GetReplace(":,:") + "\r\n"; // else // bidCtx += temp.GetReplace(":,:") + ":"; // } //} prjAddress = bidCtx.GetAddressRegex(); buildUnit = bidCtx.GetBuildRegex(); bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("第一名,预中标单位"); } bidMoney = bidCtx.GetMoneyRegex(); if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetRegex("中标造价,造价,预 中 标 价,预中标价格").GetMoney(); } prjMgr = bidCtx.GetMgrRegex(); code = bidCtx.GetCodeRegex().GetCodeDel(); if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } if (buildUnit.Contains("联系")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("联系")); } if (buildUnit.Contains("地址")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")); } if (string.IsNullOrEmpty(code)) { code = bidCtx.GetRegex("编码"); } if (bidUnit.Contains("公司")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("公司")) + "公司"; } if (bidUnit.Contains("研究院")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("研究院")) + "研究院"; } if (bidUnit.Contains("研究所")) { bidUnit = bidUnit.Remove(bidUnit.IndexOf("研究所")) + "研究所"; } bidType = key; specType = "建设工程"; msgType = "黑龙江住房和城乡建设厅"; BidInfo info = ToolDb.GenBidInfo("黑龙江省", "黑龙江省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); count++; parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.hljztb.com/" + a.Link.GetReplace("../,./"); } if (Encoding.Default.GetByteCount(link) > 500) { continue; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && count >= this.MaxCount) { goto Found; } } } } Found :; } return(list); }
public override void BidQuote(BidInfo oInfo) { StringBuilder sb = new StringBuilder(); oInfo.Dump(sb); Console.Out.Write(sb); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); //取得页码 int pageInt = 1; int crawlMax = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&page=0"), Encoding.Default).Replace(" ", ""); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "page_PageList"))); if (sNode != null && sNode.Count > 0) { SelectTag select = sNode[0] as SelectTag; pageInt = select.OptionTags.Length; } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&page=" + (i - 1).ToString(), Encoding.Default); } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(html)); sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("tr"), new HasAttributeFilter("onmouseover", "this.style.backgroundColor=\"#EFFCD0\";"))); if (sNode != null && sNode.Count > 0) { for (int n = 0; n < sNode.Count; n++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; TableRow tr = sNode[n] as TableRow; prjName = tr.Columns[0].ToPlainTextString().Trim(); bidUnit = tr.Columns[1].ToPlainTextString().Trim(); ATag aTag = tr.Columns[0].SearchFor(typeof(ATag), true)[0] as ATag; Regex regexLink = new Regex(@"id=[^-]+"); InfoUrl = "http://www.sdcin.com.cn/viewzbgg.php?" + regexLink.Match(aTag.Link).Value; string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).Replace(" ", "").GetJsString(); } catch (Exception ex) { continue; } Parser dtlparser = new Parser(new Lexer(htmldetail)); NodeList dtnode = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "98%"))); if (dtnode != null && dtnode.Count > 0) { HtmlTxt = dtnode.AsHtml(); string ctx = string.Empty; TableTag table = dtnode[0] as TableTag; for (int k = 0; k < table.RowCount; k++) { for (int d = 0; d < table.Rows[k].ColumnCount; d++) { if (d == 0) { ctx += table.Rows[k].Columns[d].ToNodePlainString().Replace(":", "").Replace(":", "") + ":"; } else { ctx += table.Rows[k].Columns[d].ToNodePlainString() + "\r\n"; } } } bidCtx = ctx; buildUnit = bidCtx.GetBuildRegex(); if (string.IsNullOrEmpty(buildUnit)) { buildUnit = bidCtx.GetRegex("招标代理"); } bidMoney = bidCtx.GetMoneyRegex(); prjAddress = bidCtx.GetAddressRegex(); if (prjAddress.Contains("邮政编码")) { prjAddress = prjAddress.Remove(prjAddress.IndexOf("邮政编码")); } if (string.IsNullOrEmpty(beginDate)) { beginDate = DateTime.Now.ToString("yyyy-MM-dd"); } prjMgr = bidCtx.GetMgrRegex(); code = bidCtx.GetCodeRegex(); msgType = "佛山市顺德区建设工程交易中心"; specType = "建设工程"; prjName = ToolDb.GetPrjName(prjName); bidType = ToolHtml.GetInviteTypes(prjName); BidInfo info = ToolDb.GenBidInfo("广东省", "佛山市区", "顺德区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
private void SessionHolderOnBidQuote(BidInfo info) { }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagination"))); if (sNode != null && sNode.Count > 0) { try { string temp = sNode.AsString().Replace(" ", ""); Regex reg = new Regex(@"/[^页]+页"); pageInt = Convert.ToInt32(reg.Match(temp).Value.Replace("/", "").Replace("页", "")); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://zyjy.huizhou.gov.cn/pages/cms/hzggzyjyzx/html/artList.html?cataId=a000dc84e53b4dc88e1e05d15d7c90f7&pageNo=" + i.ToString(), Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "div_list"))), new TagNameFilter("ul"))); if (viewList != null && viewList.Count > 0) { for (int j = 0; j < viewList.Count; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}"); beginDate = regDate.Match(viewList[j].ToPlainTextString()).Value; //prjName = viewList[j].ToPlainTextString().Replace("\r", "").Replace("\n", "").Replace(beginDate, ""); ATag aTag = viewList.SearchFor(typeof(ATag), true)[j] as ATag; prjName = aTag.GetAttribute("title"); InfoUrl = "http://zyjy.huizhou.gov.cn" + aTag.Link; string htmDtl = string.Empty; try { htmDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8); Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>|<style[^<]*</style>|<xml[^<]*</xml>"); htmDtl = regexHtml.Replace(htmDtl, ""); } catch { continue; } parser = new Parser(new Lexer(htmDtl)); NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "divZoom"))); if (dtl != null && dtl.Count > 0) { HtmlTxt = dtl.AsHtml(); bidCtx = HtmlTxt.ToCtxString(); NodeList ifrm = new Parser(new Lexer(htmDtl)).ExtractAllNodesThatMatch(new TagNameFilter("iframe")); if (ifrm != null && ifrm.Count > 0) { IFrameTag frame = ifrm[0] as IFrameTag; string url = frame.GetAttribute("src"); try { string htm = this.ToolWebSite.GetHtmlByUrl(url, Encoding.Default); NodeList tabNode = new Parser(new Lexer(htm)).ExtractAllNodesThatMatch(new TagNameFilter("table")); string ctx = tabNode.AsHtml().ToCtxString().Replace("\r\n\t\r\n\t", "\r\n\t").Replace("\r\n\t\r\n\t", "\r\n\t").Replace("\r\n\t\r\n\t", "\r\n\t"); bidCtx = ctx + bidCtx; } catch { } } //bidCtx = System.Text.RegularExpressions.Regex.Replace(HtmlTxt, "(<script)[\\s\\S]*?(</script>)", ""); //bidCtx = System.Text.RegularExpressions.Regex.Replace(bidCtx, "<[^>]*>", "").Replace(" ", "").Replace(" ", "").Replace("\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\r\n", "\r\n").Replace("\r\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n"); Regex regPrjCode = new Regex(@"(工程编号|项目编号|招标编号|中标编号|编号)(:|:)[^\r\n]+\r\n"); code = regPrjCode.Match(bidCtx.Replace(" ", "")).Value.Replace("工程编号", "").Replace("项目编号", "").Replace("招标编号", "").Replace("中标编号", "").Replace("编号", "").Replace(":", "").Replace(":", "").Trim(); Regex regBuidUnit = new Regex(@"(建设单位|招标人|承包人|招标单位|招标方|招标代理机构)(:|:)[^\r\n]+\r\n"); buildUnit = regBuidUnit.Match(bidCtx.Replace(" ", "")).Value.Replace("招标代理机构", "").Replace("建设单位", "").Replace("招标人", "").Replace("承包人", "").Replace("招标单位", "").Replace("招标方", "").Replace(":", "").Replace(":", "").Trim(); Regex regMoney = new Regex(@"(中标价|投标价|总投资|发包价|投标报价|价格|金额|总价)(:|:|)[^\r\n]+\r\n"); bidMoney = regMoney.Match(bidCtx.Replace(" ", "")).Value.Replace("中标价", "").Replace("总投资", "").Replace("发包价", "").Replace("总价", "").Replace("投标报价", "").Replace("投标价", "").Replace("价格", "").Replace("金额", "").Replace(":", "").Replace(":", "").Replace(",", "").Replace(",", "").Trim(); Regex regBidUnit = new Regex(@"(成交供应商|中标供应商|第一候选人|中标候选人|中标单位|中标人|中标方)(:|:)[^\r\n]+\r\n"); bidUnit = regBidUnit.Match(bidCtx.Replace(" ", "")).Value.Replace("成交供应商", "").Replace("中标供应商", "").Replace("中标候选人", "").Replace("第一候选人", "").Replace("中标单位", "").Replace("中标人", "").Replace("中标方", "").Replace(":", "").Replace(":", "").Trim(); Regex regprjMgr = new Regex(@"(项目经理姓名|项目经理(或建造师)|项目经理|项目负责人|项目总监|建造师|总工程师|监理师)(:|:)[^\r\n]+\r\n"); prjMgr = regprjMgr.Match(bidCtx.Replace(" ", "")).Value.Replace("项目经理(或建造师)", "").Replace("项目经理姓名", "").Replace("总工程师", "").Replace("项目经理", "").Replace("项目总监", "").Replace("建造师", "").Replace("监理师", "").Replace("项目负责人", "").Replace(":", "").Replace(":", "").Trim(); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (bidMoney.Contains("万")) { bidMoney = bidMoney.Remove(bidMoney.IndexOf("万")).Trim(); bidMoney = regBidMoney.Match(bidMoney).Value; } else { try { bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString(); if (decimal.Parse(bidMoney) < decimal.Parse("0.1")) { bidMoney = "0"; } } catch (Exception) { bidMoney = "0"; } } if (prjMgr.Contains("资格")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("资格")); } bidUnit = ToolHtml.GetStringTemp(bidUnit).Replace(";", ""); buildUnit = ToolHtml.GetSubString(buildUnit, 150); bidUnit = ToolHtml.GetSubString(bidUnit, 150); code = ToolHtml.GetSubString(code, 50); prjMgr = ToolHtml.GetSubString(prjMgr, 50); msgType = "惠州市公共资源交易中心"; specType = "建设工程"; if (string.IsNullOrEmpty(buildUnit)) { buildUnit = "惠州市公共资源交易中心"; } bidType = ToolHtml.GetInviteTypes(prjName); BidInfo info = ToolDb.GenBidInfo("广东省", "惠州市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
public override void BidQuote(BidInfo info) { _client.BidQuote.WithDump(_receiver).WithError(MarketDataError).SafeInvoke(info); }
/* ----------------------------------------------------------- */ public override void BidQuote(BidInfo oInfo) { StringBuilder sb = new StringBuilder(); oInfo.Dump(sb); debug(sb); }